예제 #1
0
def build_module(module, required):
    import os.path
    tools.mkdir_safe('product/modules/%s' % module)
    out = 'product/modules/%s/%s.o' % (module, module)
    srcs = []

    metafile = 'modules/%s/%s.meta' % (module, module)
    if os.path.exists(metafile):
        tools.copy_file('product/modules/%s/%s.meta' % (module, module),
                        metafile)
        srcs.append(metafile)
    meta = tools.load_meta(metafile)
    extra_objs = ''
    if 'objs' in meta:
        extra_objs = meta['objs']
    src = 'modules/%s/%s.rip' % (module, module)
    if tools.depends(out, module_deps + [src]):
        tools.pprint('MOD', src, out)
        args = [
            'product/ripe', conf["RFLAGS"], '-n', module, '-c', srcs, src,
            extra_objs, '-o', out
        ]
        # Required (default) packages have already been typed, and are
        # loaded by default.  Hence, they do not need to be typed.
        if required:
            args.append('--omit-typing')
        if conf["VERBOSITY"] > 1:
            args.append('-v')
        if required:
            tools.call(args)
        else:
            if not tools.try_call(args):
                failed_modules.append(module)
예제 #2
0
파일: build.py 프로젝트: merolle/ripe
def build_module(module, required):
    import os.path
    tools.mkdir_safe('product/modules/%s' % module)
    out = 'product/modules/%s/%s.o' % (module, module)
    srcs = []

    metafile = 'modules/%s/%s.meta' % (module, module)
    if os.path.exists(metafile):
        tools.copy_file('product/modules/%s/%s.meta' % (module, module), metafile)
        srcs.append(metafile)
    meta = tools.load_meta(metafile)
    extra_objs = ''
    if 'objs' in meta:
        extra_objs = meta['objs']
    src = 'modules/%s/%s.rip' % (module, module)
    if tools.depends(out, module_deps + [src]):
        tools.pprint('MOD', src, out)
        args = ['product/ripe', conf["RFLAGS"], '-n', module,
                '-c', srcs, src, extra_objs, '-o', out]
        # Required (default) packages have already been typed, and are
        # loaded by default.  Hence, they do not need to be typed.
        if required:
          args.append('--omit-typing')
        if conf["VERBOSITY"] > 1:
            args.append('-v')
        if required:
            tools.call(args)
        else:
            if not tools.try_call(args):
                failed_modules.append(module)
예제 #3
0
파일: build.py 프로젝트: merolle/ripe
def type_module(module):
    path = 'modules/%s/%s.rip' % (module, module)
    out = 'product/modules/%s/%s.typ' % (module, module)
    if tools.depends(out, type_deps + [path]):
        sys.stdout.write(tools.color_src + module + tools.color_reset + " ")
        tools.mkdir_safe('product/modules/%s' % module)
        tools.call(['product/ripe', '-t', path, '>', out])
    return out
예제 #4
0
def type_module(module):
    path = 'modules/%s/%s.rip' % (module, module)
    out = 'product/modules/%s/%s.typ' % (module, module)
    if tools.depends(out, type_deps + [path]):
        sys.stdout.write(tools.color_src + module + tools.color_reset + " ")
        tools.mkdir_safe('product/modules/%s' % module)
        tools.call(['product/ripe', '-t', path, '>', out])
    return out
예제 #5
0
def qgen_stream(p, templates_dir, dialect, scale, qual=None,
                verbose=False, verbose_out=False):
    """Generate DS query text for query template number n

    Parameters
    ----------
    p : int, query number to generate BigQuery SQL
    templates_dir : str, absolute path to directory of query templates
        to draw from for n.
    scale : int, scale factor of db being queried
    qual : bool, generate qualification queries in ascending order
    verbose : bool, print debug statements
    verbose_out : bool, print std_out and std_err output

    Returns
    -------
    query_text : str, query text generated for query
    """

    if config.random_seed is not None:
        r = config.random_seed
    else:
        r = None

    # make temporary query directory
    temp_dir = config.fp_ds_output + config.sep + "temp_queries"
    tools.mkdir_safe(temp_dir)

    std_out, err_out = dsqgen(directory=templates_dir,
                              dialect=dialect,
                              scale=scale,
                              # filter="Y",  # write to std_out
                              streams=p+1,
                              input=templates_dir + config.sep + "templates.lst",
                              rngseed=r,
                              qualify=qual,
                              verbose=verbose,
                              output_dir=temp_dir,
                              )

    query_fp = temp_dir + config.sep + "query_{}.sql".format(p)
    with open(query_fp, "r") as f:
        query_text = f.read()

    if verbose_out:
        print("QUERY STREAM:", p)
        print("=================")
        print()
        print("Source File")
        print("===========")
        print(query_fp)
        print()

        std_err_print(std_out, err_out)

    return query_text
예제 #6
0
    def write_query_text(self, query_text, query_n):
        """Write query text executed to a specific folder

        Parameters
        ----------
        query_text : str, TPC query SQL executed
        query_n : int, TPC query number
        """
        fd = self.results_dir + config.sep
        tools.mkdir_safe(fd)
        fp = fd + "query_text_bq_{0:02d}.sql".format(query_n)
        with open(fp, "w") as f:
            f.write(query_text)
예제 #7
0
    def write_results_csv(self, df, query_n):
        """Write the results of a TPC query to a CSV file in a specific
        folder

        Parameters
        ----------
        df : Pandas DataFrame
        query_n : int, query number in TPC test
        """

        fd = self.results_dir + config.sep
        tools.mkdir_safe(fd)
        fp = fd + "query_result_bq_{0:02d}.csv".format(query_n)
        df = tools.to_consistent(df, n=config.float_precision)
        df.to_csv(fp, index=False, float_format="%.3f")
예제 #8
0
    def set_timestamp_dir(self):
        self.shared_timestamp = pd.Timestamp.now()  # "UTC"
        self.shared_timestamp = str(self.shared_timestamp).replace(" ", "_")
        self.data_source = self.test + "_" + str(self.scale) + "GB_" + self.cid
        self.results_dir, _ = tools.make_name(db="".join(self.systems),
                                              test=self.test,
                                              cid=self.cid,
                                              kind="results",
                                              datasource=self.data_source,
                                              desc=self.desc,
                                              ext="",
                                              timestamp=self.shared_timestamp)
        tools.mkdir_safe(self.results_dir)

        if self.verbose:
            print("Result Folder Name:")
            print(self.results_dir)
예제 #9
0
    def write_times_csv(self, results_list, columns):
        """Write a list of results from queries to a CSV file

        Parameters
        ----------
        results_list : list, data as recorded on the local machine
        columns : list, column names for output CSV
        """
        _, fp = tools.make_name(db="bq",
                                test=self.test,
                                cid=self.cid,
                                kind="times",
                                datasource=self.dataset,
                                desc=self.desc,
                                ext=".csv",
                                timestamp=self.timestamp)
        self.results_csv_fp = self.results_dir + config.sep + fp
        df = pd.DataFrame(results_list, columns=columns)
        tools.mkdir_safe(self.results_dir)
        df.to_csv(self.results_csv_fp, index=False)
예제 #10
0
    def query_seq(self,
                  seq,
                  seq_n=None,
                  qual=None,
                  save=False,
                  verbose_iter=False):
        """Query BigQuery with TPC-DS or TPC-H query template number n

        Parameters
        ----------
        seq : iterable sequence int, query numbers to execute between
            1 and 99 for ds and 1 and 22 for h
        seq_n : int, stream sequence number for test - i.e. 0 or 4 etc
        qual : None, or True to use qualifying values (to test 1GB qualification db)
        save : bool, save data about this query sequence to disk
        verbose_iter : bool, print per iteration status statements

        Returns
        -------
        n_time_data : list, timing data for query stream, with:
            db : str, database system under test name ("sf" or "bq")
            test : str, test name ("ds" or "h")
            scale : int, TPC scale factor in GB
            source : str, source dataset/database
            cid : str, configuration id
            desc : str, description of stream test
            query_n : int, benchmark query number
            seq_n : int, benchmark query sequence/stream number
            driver_t0 : datetime, time on the driver when query was started
            driver_t1 : datatime, time on the driver when query returned
            qid : str, database system under test query id for the query run
        """
        self.test_stage = "start"
        metadata_fp = self.results_dir + config.sep + "metadata_bq_initial.json"
        tools.mkdir_safe(self.results_dir)
        with open(metadata_fp, "w") as f:
            f.write(self.to_json(indent="  "))

        if seq_n is None:
            seq_n = "sNA"
        else:
            seq_n = str(seq_n)
        n_time_data = []
        columns = [
            "db", "test", "scale", "source", "cid", "desc", "query_n", "seq_n",
            "driver_t0", "driver_t1", "qid"
        ]

        t0_seq = pd.Timestamp.now("UTC")
        i_total = len(seq)
        for i, n in enumerate(seq):
            qn_label = self.dataset + "-q" + str(
                n) + "-" + seq_n + "-" + self.desc
            qn_label = qn_label.lower()

            if verbose_iter:
                print("=" * 40)
                print("BigQuery Start Query:", n)
                print("-" * 20)
                print("Stream Completion: {} / {}".format(i + 1, i_total))
                print("Query Label:", qn_label)
                print("-" * 20)
                print()

            self.set_query_label(qn_label)

            (t0, t1, df_result, query_text, qid) = self.query_n(n=n,
                                                                qual=qual,
                                                                std_out=False)

            _d = [
                "bq", self.test, self.scale, self.dataset, self.cid, self.desc,
                n, seq_n, t0, t1, qid
            ]
            n_time_data.append(_d)

            # write results as collected by each query
            if save:
                self.write_query_text(query_text=query_text, query_n=n)

                if len(df_result) > 0:
                    self.write_results_csv(df=df_result, query_n=n)
                else:
                    # filler for statistics when the query returns no values
                    df_result.loc[0, :] = ["filler"] * df_result.shape[1]
                    if verbose_iter:
                        print("No result rows, FILLER DataFrame created.")
                    self.write_results_csv(df=df_result, query_n=n)

            if verbose_iter:
                dt = t1 - t0
                print("Query ID: {}".format(qid))
                print("Total Time Elapsed: {}".format(dt))
                print("-" * 40)
                print()

            if self.verbose:
                if len(df_result) < 25:
                    print("Result:")
                    print("-------")
                    print(df_result)
                    print()
                else:
                    print("Head of Result:")
                    print("---------------")
                    print(df_result.head())
                    print()

        t1_seq = pd.Timestamp.now("UTC")

        #if self.verbose:
        dt_seq = t1_seq - t0_seq
        print()
        print("=" * 40)
        print("BigQuery Query Stream Done!")
        print("Total Time Elapsed: {}".format(dt_seq))
        print()

        # write local timing results to file
        self.write_times_csv(results_list=n_time_data, columns=columns)

        self.test_stage = "end"
        metadata_fp = self.results_dir + config.sep + "metadata_initial.json"
        with open(metadata_fp, "w") as f:
            f.write(self.to_json(indent="  "))

        return pd.DataFrame(n_time_data, columns=columns)
예제 #11
0
파일: build.py 프로젝트: merolle/ripe
#    conf["CFLAGS"].extend(["-pg", "-fno-omit-frame-pointer", "-O3", "-DNDEBUG"])
#    conf["LFLAGS"].append("-pg")
#    conf["RFLAGS"].append("--optim-verify")
#if "nostack" in sys.argv:
#    conf["CFLAGS"].append("-DNOSTACK")
#if "nothreads" in sys.argv:
#    conf["CFLAGS"].append("-DNOTHREADS")
#if "memlog" in sys.argv:
#    conf["CFLAGS"].append("-DMEMLOG")

# Construct required directories
required_dirs = ['bin', 'product', 'product/include', 'product/include/clib',
                 'product/include/vm', 'product/include/modules',
                 'product/include/lang', 'product/modules']
for d in required_dirs:
    tools.mkdir_safe(d)

###############################################################################
# CLIB

clib_hs =   [ 'clib/clib.h' ]
clib_srcs = [ 'clib/array.c',
              'clib/dict.c',
              'clib/hash.c',
              'clib/mem.c',
              'clib/path.c',
              'clib/stringbuf.c',
              'clib/structs.c',
              'clib/tok.c',
              'clib/utf8.c',
              'clib/util.c' ]
예제 #12
0
    def compare_sum(self):

        ds_col = {
            "call_center": "cc_call_center_sk",  # integer
            "catalog_page": "cp_catalog_page_sk",
            "catalog_returns": "cr_order_number",
            "catalog_sales": "cs_order_number",
            "customer": "c_customer_sk",
            "customer_address": "ca_address_sk",
            "customer_demographics": "cd_demo_sk",
            "date_dim": "d_date_sk",  # integer
            # skip dbgen
            "household_demographics": "hd_demo_sk",
            "income_band": "ib_income_band_sk",
            "inventory": "inv_item_sk",  # integer
            "item": "i_item_sk",
            "promotion": "p_promo_sk",
            "reason": "r_reason_sk",
            "ship_mode": "sm_ship_mode_sk",
            "store": "s_store_sk",
            "store_returns": "sr_item_sk",
            "store_sales": "ss_item_sk",
            "time_dim": "t_time_sk",
            "warehouse": "w_warehouse_sk",
            "web_page": "wp_web_page_sk",
            "web_returns": "wr_item_sk",
            "web_sales": "ws_item_sk",
            "web_site": "web_site_sk"
        }

        h_col = {
            "customer": "c_custkey",
            "lineitem": "l_linenumber",
            "nation": "n_nationkey",
            "orders": "o_orderkey",
            "part": "p_partkey",
            "partsupp": "ps_partkey",
            "region": "r_regionkey",
            "supplier": "s_suppkey"
        }

        col_names = {"ds": ds_col, "h": h_col}[self.test]

        sf = sf_tpc.SFTPC(test=self.test,
                          scale=self.scale,
                          cid=self.cid,
                          warehouse="TEST9000",
                          desc=self.desc,
                          verbose=self.verbose,
                          verbose_query=self.verbose_query)

        if self.verbose:
            print('Using database:', sf.database)

        sf.timestamp = self.shared_timestamp
        sf.results_dir = self.results_dir
        sf.connect()

        bq = bq_tpc.BQTPC(test=self.test,
                          scale=self.scale,
                          cid=self.cid,
                          desc=self.desc,
                          verbose_query=self.verbose_query,
                          verbose=self.verbose)

        bq.timestamp = self.shared_timestamp
        bq.results_dir = self.results_dir

        d = []
        for table, column in col_names.items():
            if self.verbose_iter:
                print(f"TABLE & COLUMN: {table} >> {column}")

            query_text = f"select sum({column}) from {table}"

            sf_query_result = sf.sfc.query(query_text=query_text)
            df_sf_result = sf_query_result.fetch_pandas_all()
            df_sf_result.columns = ["r"]
            sf_r = df_sf_result.loc[0, "r"]

            bq_query_result = bq.query(query_text=query_text)
            df_bq_result = bq_query_result.result().to_dataframe()
            df_bq_result.columns = ["r"]
            bq_r = df_bq_result.loc[0, "r"]

            if self.verbose_iter:
                print("RESULT: SF | BQ")
                print("SF Type:", type(sf_r))
                print("BQ Type:", type(bq_r))
                print(sf_r, "|", bq_r)
                print("-" * 40)
                print()

            # type convert to assure numerical comparison
            # is the only comparison being done
            sf_r_a = np.int64(sf_r)
            bq_r_a = np.int64(bq_r)

            try:
                equal = sf_r_a == bq_r_a
            except TypeError:
                print("Error comparing query results.")
                print("SF Reply:")
                print(sf_r)
                print("-" * 30)
                print(bq_r)
                print("-" * 30)

            d.append([table, column, sf_r, bq_r, equal])
        sf.close()

        df = pd.DataFrame(d, columns=["table", "column", "sf", "bq", "equal"])

        db_name = self.test + "_" + "{:02d}".format(
            self.scale) + "_" + self.cid
        rdir, rfp = tools.make_name(db="bqsf",
                                    test=self.test,
                                    cid=self.cid,
                                    kind="qc-comparison",
                                    datasource=db_name,
                                    desc=self.desc,
                                    ext=".csv",
                                    timestamp=None)
        tools.mkdir_safe(rdir)
        fp = rdir + config.sep + rfp
        df.to_csv(fp, index=False)
        return df
예제 #13
0
#    conf["RFLAGS"].append("--optim-verify")
#if "nostack" in sys.argv:
#    conf["CFLAGS"].append("-DNOSTACK")
#if "nothreads" in sys.argv:
#    conf["CFLAGS"].append("-DNOTHREADS")
#if "memlog" in sys.argv:
#    conf["CFLAGS"].append("-DMEMLOG")

# Construct required directories
required_dirs = [
    'bin', 'product', 'product/include', 'product/include/clib',
    'product/include/vm', 'product/include/modules', 'product/include/lang',
    'product/modules'
]
for d in required_dirs:
    tools.mkdir_safe(d)

###############################################################################
# CLIB

clib_hs = ['clib/clib.h']
clib_srcs = [
    'clib/array.c', 'clib/dict.c', 'clib/hash.c', 'clib/mem.c', 'clib/path.c',
    'clib/stringbuf.c', 'clib/structs.c', 'clib/tok.c', 'clib/utf8.c',
    'clib/util.c'
]
clib_objs = tools.cons_objs(clib_srcs, clib_hs)

###############################################################################
# LANG