Example #1
0
def load_ycsb_raw_2_ts(ycsb_file, nrows2read=None):
    """This function parses out """
    out_file_name = "%s.%s" % (ycsb_file, PARSED_FILE_EXTENSION)

    cpp_parse_ycsb_file(
        ycsb_file, out_file_name,
        valid_prefixes=["READ", "UPDATE", "RawREAD", "RawUPDATE"])

    ts = load_ycsb_parsed_2_ts(out_file_name, DEFAULT_YCSB_COLUMNS, nrows2read)

    return ts
Example #2
0
def load_ycsb_raw_2_df(ycsb_file, columns=DEFAULT_YCSB_COLUMNS, nrows2read=None):
    """This function parses out """
    out_file_name = "%s.%s" % (ycsb_file, PARSED_FILE_EXTENSION)

    cpp_parse_ycsb_file(
        ycsb_file, out_file_name,
        valid_prefixes=PARSING_VALID_PREFIXES)

    df = load_ycsb_parsed_2_df(out_file_name, columns, nrows2read)

    return df
Example #3
0
def find_target_percentile4file(ifile, target_value_us, offsets_us,
                                percentiles, skip_first_samples_min,
                                reuse_parsed_files):

    ## Parse file into time series
    out_file_name = "%s.%s" % (ifile, PARSED_FILE_EXTENSION)

    # It might be possible that the file has been parsed already, so we can skip this process
    if reuse_parsed_files and os.path.isfile(out_file_name):
        logging.info("Parsed file [%s] already exists, loading it ...",
                     load_ycsb_parsed_2_ts)
        ts = load_ycsb_parsed_2_ts(out_file_name)

    else:

        cpp_parse_ycsb_file(ifile,
                            out_file_name,
                            valid_prefixes=PARSING_VALID_PREFIXES)

        ts = load_ycsb_raw_2_ts(out_file_name)

        ts = skip_ts_head(ts, skip_first_samples_min)

    tasks = (joblib.delayed(_find_percentile)(ts, target_value_us, offset)
             for offset in offsets_us)
    res_tuples = joblib.Parallel(n_jobs=-1, verbose=50)(tasks)
    res = []
    for r in res_tuples:
        res.append(list(r))
    res_tuples = res

    # <2.> Find target percentiles
    pers = []
    for p in percentiles:
        print "Searchign for percentile, ", p, "value ", np.percentile(ts, p)
        pers.append(np.percentile(ts, p))

    offered_load = get_tracefile_offered_load_from_ycsb(ifile)
    achieved_load = get_achieved_load(ifile)

    for r in res_tuples:
        print pers
        r.append(float(offered_load))
        r.append(float(achieved_load))
        r += pers

    return res_tuples
Example #4
0
def parse_ycsb_if_necessary(src_file_name):
    """ Function checks if a parsed file already present in the sub directory, if it is, then the
    parsed file name is returned, if there are no such file, then parsing is done at this stage"""

    if src_file_name.endswith(PARSED_FILE_EXTENSION):
        return src_file_name

    parsed_file_name = "%s.%s" % (src_file_name, PARSED_FILE_EXTENSION)

    if not os.path.isfile(parsed_file_name):
        logging.info("Parsed ycsb file [%s] not found, doing cpp parsing...",
                     parsed_file_name)
        cpp_parse_ycsb_file(src_file_name,
                            parsed_file_name,
                            valid_prefixes=PARSING_VALID_PREFIXES)
        return parsed_file_name

    return src_file_name
Example #5
0
def postproc_plot_cdfs_per_experiment(out_fldr,
                                      exp_id,
                                      minutes2skip_from_ycsb_trace,
                                      target_slo_us,
                                      mask=YCSB_MASK):
    separator(sys._getframe().f_code.co_name)

    old_dir = os.getcwd()
    os.chdir(out_fldr)

    try:

        files = get_files_matching_mask(where="./", mask="i%s*.ycsb" % exp_id)
        print os.getcwd()
        print "Files, ", files
        cdf_files = []
        for f in files:
            out_file_name = "%s.%s" % (f, PARSED_FILE_EXTENSION)
            cdf_files.append(out_file_name)
            if not os.path.isfile(out_file_name):
                cpp_parse_ycsb_file(f, out_file_name, valid_prefixes=["READ"])

        cmd = "ycsb_plot_cdfs.py --src_files {src_files} --out_name {out_name} "\
            "--skip_first_samples_min {skip} --target_latency_slo_us {slo} "\
            "--consider_siblings --pre_parsed_input".format(
            src_files=" ".join(cdf_files),
            out_name="i%s_cdfs.png" % exp_id,
            skip=minutes2skip_from_ycsb_trace,
            slo=target_slo_us)

        logging.info("Executing postproc_plot_cdfs_per_experiment \n cmd[%s]",
                     cmd)
        proc = subprocess.Popen(cmd,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                shell=True)
        check_pid_output(proc)

    except:
        print_generic_exception()

    finally:
        os.chdir(old_dir)
Example #6
0
def postproc_visualise_traceworkload(conf,
                                     out_fldr,
                                     exp_id=None,
                                     mask=YCSB_MASK,
                                     warm_up_period_sec=0):

    separator(sys._getframe().f_code.co_name)

    def get_exp_and_workload_ids(ycsb_out_file_name):
        # i1_wl1_ycsb.ycsb
        tokens = ycsb_out_file_name.split("_")
        exp_id = int(tokens[0][1:])
        workload_id = int(tokens[1][2:])

        return exp_id, workload_id

    old_dir = os.getcwd()
    os.chdir(out_fldr)

    if exp_id != None:
        mask = "i%s%s" % (exp_id, mask)

    try:
        files = get_files_matching_mask(where="./", mask=mask)
        print os.getcwd()
        print "Files", files
        for f in files:
            print "Processing file: ", f

            exp_id, workload_id = get_exp_and_workload_ids(os.path.basename(f))
            tmp_R_file = "%s.r" % f

            cpp_parse_ycsb_file(f, tmp_R_file, valid_prefixes=["R"], pattern=2)

            workload_name = "workload_%s" % workload_id

            cmd = "visualize_ycsb_trace.R "\
                    "--intended_ts_file {trace_file} --actual_ts_file {tmp_R_file}"\
                    " --duration 1000 --interval 1 --id {id} --warmup {warmup} ".format(
                trace_file=conf[workload_name]["p_tracefile"],
                tmp_R_file=tmp_R_file, warmup=warm_up_period_sec,
                id=exp_id)

            logging.info(
                "Executing post processing for workloadTrace 2/2 \n cmd[%s]",
                cmd)
            proc = subprocess.Popen(cmd,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE,
                                    shell=True)
            check_pid_output(proc)

            logging.info("Removing tmp file [%s]", tmp_R_file)
            remove_files([tmp_R_file])

        # exp_index = os.path.basename(of_ycsb_out)[:2] # taking iX from i1_ycsb.ycsb

        # tmp_out_file = "%s_r.data" %exp_index

        # # cmd = 'cat %s | grep " R " |  awk "NF==3{print}{}" > %s' % (
        # #     of_ycsb_out, tmp_out_file)

        # # logging.info("Executing post processing for WorkloadTrace 1/2 \n cmd[%s]", cmd)
        # # proc = subprocess.Popen(cmd,
        # #     stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        # # check_pid_output(proc)

        # cmd = "visualize_ycsb_trace.R "\
        #         "--intended_ts_file {trace_file} --actual_ts_file {tmp_out_file}"\
        #         " --duration 1000 --interval 1 --id {id}".format(
        #     trace_file=conf[workload_name]["p_tracefile"],
        #     tmp_out_file=tmp_out_file,
        #     id=exp_index)

        # logging.info("Executing post processing for workloadTrace 2/2 \n cmd[%s]", cmd)
        # proc = subprocess.Popen(cmd,
        #     stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        # check_pid_output(proc)

    except:
        print_generic_exception()

    finally:
        os.chdir(old_dir)