def Plot(): with Cons.MT("Plotting latency ..."): env = os.environ.copy() env["FN_IN"] = "%s/client/%s" % (Conf.GetDir("log_dir"), Conf.Get("simulation_time_begin")) dn = "%s/%s" % (Conf.GetDir("output_dir"), Conf.Get("simulation_time_begin")) Util.MkDirs(dn) fn_out = "%s/latency.pdf" % dn env["FN_OUT"] = fn_out with Cons.MT("Plotting ..."): Util.RunSubp("gnuplot %s/latency.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def GetHourlyFn(): fn_out = "%s/cpu-hourly-usage" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out with Cons.MT("Generating file for cpu usage comparison ..."): dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0)) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1)) hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0) hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1) #Cons.P(hour_cpustat_0) #Cons.P(hour_cpustat_1) with open(fn_out, "w") as fo: fo.write("# 0: %s\n" % fn_ycsb_0) fo.write("# 1: %s\n" % fn_ycsb_1) fo.write("#\n") fmt = "%2d" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" fo.write(Util.BuildHeader(fmt, "hour" \ " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \ " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max" ) + "\n") for h, s0 in sorted(hour_cpustat_0.iteritems()): s1 = hour_cpustat_1[h] fo.write((fmt + "\n") % (h, s0.avg, s0.min, s0._1, s0._25, s0._50, s0._75, s0._99, s0.max, s1.avg, s1.min, s1._1, s1._25, s1._50, s1._75, s1._99, s1.max)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def _GetFnCpuOverhead(): fn_out = "%s/cpu-overhead-by-time" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get("unmodified_db")) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get("computation_overhead")) hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0) hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1) #Cons.P(hour_cpustat_0) #Cons.P(hour_cpustat_1) with open(fn_out, "w") as fo: fo.write("# u: unmodified\n") fo.write( "# c: with SSTable access monitoring and SSTable placement computation\n" ) fo.write("#\n") fmt = "%2d" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" fo.write(Util.BuildHeader(fmt, "hour" \ " u_avg u_min u_1 u_25 u_50 u_75 u_99 u_max" \ " c_avg c_min c_1 c_25 c_50 c_75 c_99 c_max" ) + "\n") for h, s0 in sorted(hour_cpustat_0.iteritems()): s1 = hour_cpustat_1[h] fo.write( (fmt + "\n") % (h, s0.avg, s0.min, s0._1, s0._25, s0._50, s0._75, s0._99, s0.max, s1.avg, s1.min, s1._1, s1._25, s1._50, s1._75, s1._99, s1.max)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def Get1minAvgFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg memory usage comparison file for plotting ..."): records = [] dn_base = Conf.GetDir("dn_base") for i in range(2): fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i)) hm_mem = _GetHmMem(fn_ycsb_log) for hm, mem in hm_mem.iteritems(): records.append(_RecordMemAvg(hm, i * 30, mem, i)) records.sort(key=operator.attrgetter("ts")) fmt = "%8s %6.3f %1d" header = Util.BuildHeader(fmt, "timestamp mem_avg_in_gb exp_type") with open(fn_out, "w") as fo: i = 0 for r in records: if i % 40 == 0: fo.write(header + "\n") i += 1 fo.write("%s\n" % r.ToStr(fmt)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def Get1minAvgFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) # computation/180126-142513/ycsb/180126-193525.769-d mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/cpu-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg cpu usage comparison file for plotting ..."): records = [] dn_base = Conf.GetDir("dn_base") for i in range(2): fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i)) hm_cpu = _GetHmCpu(fn_ycsb_log) for hm, cpu in hm_cpu.iteritems(): records.append(_RecordCpuAvg(hm, i * 30, cpu, i)) records.sort(key=operator.attrgetter("ts")) fmt = "%8s %6.2f %1d" header = Util.BuildHeader(fmt, "timestamp cpu_avg exp_type") with open(fn_out, "w") as fo: i = 0 for r in records: if i % 40 == 0: fo.write(header + "\n") i += 1 fo.write("%s\n" % r.ToStr(fmt)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GenDataFileForGnuplot(dt): SimTime.Init(dt) dn = "%s/%s" % (Conf.GetDir("output_dir"), dt) Util.MkDirs(dn) fn = "%s/dstat-data" % dn if os.path.isfile(fn): return fn with Cons.MT("Generating data file for plot ..."): global _header_idx global _body_rows _header_idx = None _body_rows = None _Parse(dt) fmt = "%9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f" \ " %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f" \ " %8.0f %8.0f %8.0f %8.0f" \ " %3.0f %3.0f" \ " %3.0f %3.0f %11s" \ " %3.1f %6.2f %3.1f %6.2f %6.2f %6.3f" header = Util.BuildHeader( fmt, " ".join(k for k, v in sorted(_header_idx.iteritems()))) with open(fn, "w") as fo: i = 0 for r in _body_rows: if i % 50 == 0: fo.write("%s\n" % header) i += 1 #Cons.P(fmt % tuple(r.Prepared())) fo.write((fmt + "\n") % tuple(r.Prepared())) Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) return fn
def GenDataFilesForGnuplot(): dn_base = Conf.GetDir("dn_base") # Analyze the number of compactions and migrations with # (a) an unmodified DB as a baseline # and (b) Mutant fns_ycsb = [] log_readers = [] for db_type in ["unmodified_db", "io_overhead"]: fn_ycsb = "%s/%s" % (dn_base, Conf.Get(db_type)) mo = re.match( r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+", fn_ycsb) dn_log = mo.group("dn_log") job_id = mo.group("job_id") exp_dt = mo.group("exp_dt") #Cons.P(dn_log) #Cons.P(job_id) #Cons.P(exp_dt) dn_log_job = "%s/%s" % (dn_log, job_id) log_readers.append(RocksdbLogReader(dn_log_job, exp_dt)) fn_metrics_by_time_0 = log_readers[0].FnMetricByTime() fn_metrics_by_time_1 = log_readers[1].FnMetricByTime() fn_rdb_compmigr = CompMigrStat.GetFnStat(fn_metrics_by_time_0, fn_metrics_by_time_1) return (fn_metrics_by_time_0, fn_metrics_by_time_1, fn_rdb_compmigr)
def main(argv): Conf.ParseArgs() Util.MkDirs(Conf.GetDir("output_dir")) dist_sq_thresholds = [0, 0.008, 0.01, 0.02, 0.05] #dist_sq_thresholds = [0.02] dist_sq_thresholds_str = [] for d in dist_sq_thresholds: dist_sq_thresholds_str.append(_NoTrailing0s(d)) reduced_files = [] reduced_file_sizes = [] with Cons.MT("Generating reduced size usa map ..."): for d in dist_sq_thresholds_str: fn_co_loc = "filter-out-almost-duplicate-points/.output/usa-map-smallsize-%s" % d reduced_files.append(fn_co_loc) if not os.path.exists(fn_co_loc): cmd = "cd filter-out-almost-duplicate-points && ./build-and-run.sh --dist_sq_threshold=%s" % d Util.RunSubp(cmd) reduced_file_sizes.append(os.path.getsize(fn_co_loc)) dn_out = "%s/.output" % os.path.dirname(__file__) fn_out = "%s/usa-map.pdf" % dn_out with Cons.MT("Plotting ..."): env = os.environ.copy() env["IN_FNS"] = " ".join(reduced_files) env["IN_FN_SIZES"] = " ".join(str(s) for s in reduced_file_sizes) env["DIST_SQ_THRESHOLDS"] = " ".join(dist_sq_thresholds_str) env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/usa-map.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def _Parse(dt): dn = "%s/dstat" % Conf.GetDir("log_dir") fn = "%s/%s.csv" % (dn, dt) if not os.path.isfile(fn): fn_7z = "%s.7z" % fn if not os.path.isfile(fn_7z): raise RuntimeError("Unexpected") Util.RunSubp("cd %s && 7z e %s" % (dn, fn_7z)) if not os.path.isfile(fn): raise RuntimeError("Unexpected") with Cons.MT("Parsing %s ..." % fn): header_rows = [] global _body_rows _body_rows = [] with open(fn, "rb") as f: header_detected = False reader = csv.reader(f) for row in reader: if (len(row) > 0) and (row[0] in ["system", "time"]): header_rows.append(row) header_detected = True elif header_detected: _body_rows.append(BodyRow(row)) #Cons.P(pprint.pformat(header_rows)) # Make sure the rows are all the same size num_cols = None for r in header_rows: if num_cols is None: num_cols = len(r) else: if num_cols != len(r): raise RuntimeError("Unexpected") for r in _body_rows: if num_cols != r.NumCols(): raise RuntimeError("Unexpected") # Get column headers global _header_idx _header_idx = {} header_rows_0_prev = None for i in range(num_cols): if len(header_rows[0][i]) > 0: #Cons.P("%s, %s" % (header_rows[0][i], header_rows[1][i])) _header_idx["%s:%s" % (header_rows[0][i].replace( " ", "_"), header_rows[1][i].replace(" ", "_"))] = i header_rows_0_prev = header_rows[0][i].replace(" ", "_") else: #Cons.P("%s, %s" % (header_rows_0_prev, header_rows[1][i])) _header_idx["%s:%s" % (header_rows_0_prev.replace( " ", "_"), header_rows[1][i].replace(" ", "_"))] = i #Cons.P(pprint.pformat(_header_idx)) # Sort the data in the header order and convert strings to numbers for b in _body_rows: b.PrepareData()
def Init(simulation_time_begin): with Cons.MT("Init Conf ...", print_time=False): global _simulation_time_begin global _simulation_time_end global _simulated_time_begin global _simulated_time_end _simulation_time_begin = None _simulation_time_end = None _simulated_time_begin = None _simulated_time_end = None dn = "%s/client" % Conf.GetDir("log_dir") fn = "%s/%s" % (dn, simulation_time_begin) if not os.path.isfile(fn): fn_7z = "%s.7z" % fn if not os.path.isfile(fn_7z): raise RuntimeError("Unexpected") Util.RunSubp("cd %s && 7z e %s" % (dn, fn_7z)) if not os.path.isfile(fn): raise RuntimeError("Unexpected") with open(fn) as fo: for line in fo: #Cons.P(line) # simulation_time_end : 161227-162418.288 mo = re.match(r"# simulation_time_begin: (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line) if mo is not None: _simulation_time_begin = mo.group("dt") if _simulation_time_begin != simulation_time_begin: raise RuntimeError("Unexpected") _simulation_time_begin = datetime.datetime.strptime(_simulation_time_begin, "%y%m%d-%H%M%S.%f") continue mo = re.match(r"# simulation_time_end : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line) if mo is not None: _simulation_time_end = mo.group("dt") _simulation_time_end = datetime.datetime.strptime(_simulation_time_end, "%y%m%d-%H%M%S.%f") continue mo = re.match(r"# simulated_time_begin : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line) if mo is not None: _simulated_time_begin = mo.group("dt") _simulated_time_begin = datetime.datetime.strptime(_simulated_time_begin, "%y%m%d-%H%M%S.%f") continue mo = re.match(r"# simulated_time_end : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line) if mo is not None: _simulated_time_end = mo.group("dt") _simulated_time_end = datetime.datetime.strptime(_simulated_time_end, "%y%m%d-%H%M%S.%f") # Got all you needed. break to save time break Cons.P("simulation_time_begin: %s" % _simulation_time_begin) Cons.P("simulation_time_end : %s" % _simulation_time_end) Cons.P("simulated_time_begin : %s" % _simulated_time_begin) Cons.P("simulated_time_end : %s" % _simulated_time_end)
def GetHourlyFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/memory-usage-by-time-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Generating file for memory usage comparison ..."): dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0)) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1)) hour_memstat_0 = _GetMemStatByHour(fn_ycsb_0) hour_memstat_1 = _GetMemStatByHour(fn_ycsb_1) #Cons.P(hour_memstat_0) #Cons.P(hour_memstat_1) with open(fn_out, "w") as fo: fo.write("# 0: %s\n" % fn_ycsb_0) fo.write("# 1: %s\n" % fn_ycsb_1) fo.write("#\n") fmt = "%2d" \ " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f" \ " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f" fo.write(Util.BuildHeader(fmt, "hour" \ " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \ " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max" ) + "\n") for h, s0 in sorted(hour_memstat_0.iteritems()): s1 = hour_memstat_1[h] fo.write((fmt + "\n") % (h , (float(s0.avg) / 1024 / 1024 / 1024) , (float(s0.min) / 1024 / 1024 / 1024) , (float(s0._1 ) / 1024 / 1024 / 1024) , (float(s0._25) / 1024 / 1024 / 1024) , (float(s0._50) / 1024 / 1024 / 1024) , (float(s0._75) / 1024 / 1024 / 1024) , (float(s0._99) / 1024 / 1024 / 1024) , (float(s0.max) / 1024 / 1024 / 1024) , (float(s1.avg) / 1024 / 1024 / 1024) , (float(s1.min) / 1024 / 1024 / 1024) , (float(s1._1 ) / 1024 / 1024 / 1024) , (float(s1._25) / 1024 / 1024 / 1024) , (float(s1._50) / 1024 / 1024 / 1024) , (float(s1._75) / 1024 / 1024 / 1024) , (float(s1._99) / 1024 / 1024 / 1024) , (float(s1.max) / 1024 / 1024 / 1024) )) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def _GetLog(dt): dn = "%s/client" % Conf.GetDir("log_dir") fn = "%s/%s" % (dn, dt) if os.path.isfile(fn): return fn fn_7z = "%s.7z" % fn if not os.path.isfile(fn_7z): raise RuntimeError("Unexpected") Util.RunSubp("cd %s && 7z e %s" % (dn, fn_7z)) return fn
def _Write(): with Cons.MT("Plotting write latency ..."): env = os.environ.copy() env["FN_IN_LOCAL_SSD"] = _GetLog(Conf.Manifest.Get("Local SSD")) env["FN_IN_EBS_GP2"] = _GetLog(Conf.Manifest.Get("EBS gp2")) env["FN_IN_EBS_ST1"] = _GetLog(Conf.Manifest.Get("EBS st1")) env["FN_IN_EBS_SC1"] = _GetLog(Conf.Manifest.Get("EBS sc1")) fn_out = "%s/baseline-time-vs-write-latency-all-stg-devs.pdf" % Conf.GetDir("output_dir") env["FN_OUT"] = fn_out Util.RunSubp("gnuplot %s/write-latency.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def _Cpu(): with Cons.MT("Plotting cpu ..."): env = os.environ.copy() env["FN_IN"] = CsvFile.GenDataFileForGnuplot() dn = "%s/%s" % (Conf.GetDir("output_dir"), Conf.Get("simulation_time_begin")) fn_out = "%s/cpu.pdf" % dn env["FN_OUT"] = fn_out with Cons.MT("Plotting ..."): Util.RunSubp("gnuplot %s/cpu.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def GenDataThrpVsLat(): fn_out = "%s/rocksdb-ycsb-thrp-vs-lat-by-stgdevs" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out with Cons.MT("Generating thrp vs lat data file ..."): dn_base = Conf.GetDir("dn_base") # {stg_dev: {target_iops: YcsbLogReader}} stgdev_tio_ylr = {} #for stgdev in ["local-ssd", "ebs-st1"]: for stgdev in ["ebs-st1"]: if stgdev not in stgdev_tio_ylr: stgdev_tio_ylr[stgdev] = {} for target_iops, v in sorted(Conf.Get(stgdev).iteritems()): fn = "%s/%s" % (dn_base, v["fn"]) t = v["time"].split("-") time_begin = t[0] time_end = t[1] overloaded = ("overloaded" in v) and v["overloaded"] stgdev_tio_ylr[stgdev][target_iops] = YcsbLogReader(fn, time_begin, time_end, overloaded) with open(fn_out, "w") as fo: fmt = "%9s %6.0f %1d %6.0f" \ " %8.2f %8.2f %9.2f %10.2f %10.2f" \ " %8.2f %8.2f %8.2f %9.2f %9.2f" fo.write("%s\n" % Util.BuildHeader(fmt, "stg_dev target_iops overloaded iops" \ " r_avg r_90 r_99 r_99.9 r_99.99" \ " w_avg w_90 w_99 w_99.9 w_99.99" )) for stgdev, v in sorted(stgdev_tio_ylr.iteritems()): for tio, ylr in sorted(v.iteritems()): fo.write((fmt + "\n") % ( stgdev , tio , (1 if ylr.overloaded else 0) , ylr.db_iops_stat.avg , ylr.r_avg , ylr.r_90 , ylr.r_99 , ylr.r_999 , ylr.r_9999 , ylr.w_avg , ylr.w_90 , ylr.w_99 , ylr.w_999 , ylr.w_9999 )) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GetFnCompareTwo(): dn_base = Conf.GetDir("dn_base") # Analyze the number of compactions and migrations with # (a) an unmodified DB as a baseline # and (b) Mutant fn_metrics_by_time = [] for i in range(2): fn_ycsb = "%s/%s" % (dn_base, Conf.Get(i)) fn_metrics_by_time.append(GetFnTimeVsMetrics(fn_ycsb)) fn_rdb_compmigr = CompMigrStat.GetFnStat(fn_metrics_by_time[0], fn_metrics_by_time[1]) return (fn_metrics_by_time, fn_rdb_compmigr)
def Plot(param): job_id = param[0] exp_dt = param[1] dn_log_job = "%s/work/mutant/log/quizup/sla-admin/%s" % ( os.path.expanduser("~"), job_id) fn_log_quizup = "%s/quizup/%s" % (dn_log_job, exp_dt) fn_log_rocksdb = "%s/rocksdb/%s" % (dn_log_job, exp_dt) fn_log_dstat = "%s/dstat/%s.csv" % (dn_log_job, exp_dt) log_q = QuizupLog(fn_log_quizup) SimTime.Init(log_q.SimTime("simulated_time_begin"), log_q.SimTime("simulated_time_end"), log_q.SimTime("simulation_time_begin"), log_q.SimTime("simulation_time_end")) qz_std_max = _QzSimTimeDur( log_q.quizup_options["simulation_time_dur_in_sec"]) qz_opt_str = _QuizupOptionsFormattedStr(log_q.quizup_options) error_adj_ranges = log_q.quizup_options["error_adj_ranges"].replace( ",", " ") (fn_rocksdb_sla_admin_log, pid_params, num_sla_adj) = RocksdbLog.ParseLog(fn_log_rocksdb, exp_dt) fn_dstat = DstatLog.GenDataFileForGnuplot(fn_log_dstat, exp_dt) fn_out = "%s/sla-admin-by-time-%s.pdf" % (Conf.GetDir("output_dir"), exp_dt) with Cons.MT("Plotting ..."): env = os.environ.copy() env["STD_MAX"] = qz_std_max env["ERROR_ADJ_RANGES"] = error_adj_ranges env["IN_FN_QZ"] = fn_log_quizup env["IN_FN_SLA_ADMIN"] = "" if num_sla_adj == 0 else fn_rocksdb_sla_admin_log env["QUIZUP_OPTIONS"] = qz_opt_str env["PID_PARAMS"] = "%s %s %s %s" % (pid_params["target_value"], pid_params["p"], pid_params["i"], pid_params["d"]) env["WORKLOAD_EVENTS"] = " ".join( str(t) for t in log_q.simulation_time_events) env["IN_FN_DS"] = fn_dstat env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/sla-admin-by-time.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def main(argv): Util.MkDirs(Conf.GetOutDir()) dn_base = Conf.GetDir("dn_base") params = [] for db_type in ["unmodified_db", "computation_overhead", "io_overhead"]: fn = "%s/%s" % (dn_base, Conf.Get(db_type)) params.append((fn, )) parallel_processing = True if parallel_processing: p = multiprocessing.Pool() p.map(PlotByTime, params) else: for p in params: PlotByTime(p)
def PlotByTime(params): exp_set_id = params[0] stg_dev = params[1] p0 = params[2] jobid_expdt = p0["jobid_expdt"] time_window = p0["time_window"] (fn_ycsb, time_max, params1) = YcsbLog.GenDataMetricsByTime(exp_set_id, stg_dev) #Cons.P(time_max) params_formatted = exp_set_id + "\n" + pprint.pformat( params1[0]) + "\n" + pprint.pformat(params1[1]) params_formatted = params_formatted.replace("_", "\\\\_").replace( " ", "\\ ").replace("\n", "\\n").replace("{", "\{").replace("}", "\}") #Cons.P(params_formatted) t = jobid_expdt.split("/") job_id = t[0] exp_dt = t[1] dn_log = Conf.GetDir("dn") dn_log_job = "%s/%s" % (dn_log, job_id) fn_dstat = DstatLog.GenDataFileForGnuplot(dn_log_job, exp_dt) fn_rocksdb = RocksdbLog.GenDataFileForGnuplot(dn_log_job, exp_dt) fn_out = "%s/rocksdb-ycsb_d-%s-by-time-%s.pdf" % (Conf.GetOutDir(), stg_dev, exp_dt) with Cons.MT("Plotting ..."): env = os.environ.copy() env["EXP_SET_ID"] = exp_set_id env["PARAMS"] = params_formatted env["STG_DEV"] = stg_dev env["TIME_MAX"] = str(time_max) env["IN_FN_DSTAT"] = fn_dstat env["IN_FN_YCSB"] = fn_ycsb env["IN_FN_ROCKSDB"] = fn_rocksdb env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/rocksdb-ycsb-by-time.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def PlotTimeVsMetrics(): with Cons.MT("Plotting time vs metrics ..."): dn_base = Conf.GetDir("dn_base") params = [] for cost_changes, v in sorted( Conf.Get("by_costchanges_targetiops").iteritems()): for target_iops, e in sorted(v.iteritems()): #Cons.P("%s %s %s" % (cost_changes, target_iops, e)) params.append("%s/%s" % (dn_base, e)) parallel_processing = False if parallel_processing: with terminating(Pool()) as pool: pool.map(_PlotTimeVsAllMetrics, params) else: for p in params: _PlotTimeVsAllMetrics(p)
def Plot(): with Cons.MT("Plotting ..."): env = os.environ.copy() env["FN_IN_LOCAL_SSD"] = CsvFile.GenDataFileForGnuplot( Conf.Manifest.Get("Local SSD")) env["FN_IN_EBS_GP2"] = CsvFile.GenDataFileForGnuplot( Conf.Manifest.Get("EBS gp2")) env["FN_IN_EBS_ST1"] = CsvFile.GenDataFileForGnuplot( Conf.Manifest.Get("EBS st1")) env["FN_IN_EBS_SC1"] = CsvFile.GenDataFileForGnuplot( Conf.Manifest.Get("EBS sc1")) fn_out = "%s/baseline-time-vs-resource-usage-all-stg-devs.pdf" % Conf.GetDir( "output_dir") env["FN_OUT"] = fn_out Util.RunSubp("gnuplot %s/res-usage.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def Init(): with Cons.MT("Init Conf ...", print_time=False): global _simulation_time_begin global _simulation_time_end global _simulated_time_begin global _simulated_time_end fn = "%s/client/%s" % (Conf.GetDir("log_dir"), Conf.Get("simulation_time_begin")) with open(fn) as fo: for line in fo: #Cons.P(line) # simulation_time_end : 161227-162418.288 mo = re.match(r"# simulation_time_begin: (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line) if mo is not None: _simulation_time_begin = mo.group("dt") if _simulation_time_begin != Conf.Get("simulation_time_begin"): raise RuntimeError("Unexpected") _simulation_time_begin = datetime.datetime.strptime(_simulation_time_begin, "%y%m%d-%H%M%S.%f") continue mo = re.match(r"# simulation_time_end : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line) if mo is not None: _simulation_time_end = mo.group("dt") _simulation_time_end = datetime.datetime.strptime(_simulation_time_end, "%y%m%d-%H%M%S.%f") continue mo = re.match(r"# simulated_time_begin : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line) if mo is not None: _simulated_time_begin = mo.group("dt") _simulated_time_begin = datetime.datetime.strptime(_simulated_time_begin, "%y%m%d-%H%M%S.%f") continue mo = re.match(r"# simulated_time_end : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line) if mo is not None: _simulated_time_end = mo.group("dt") _simulated_time_end = datetime.datetime.strptime(_simulated_time_end, "%y%m%d-%H%M%S.%f") continue Cons.P("simulation_time_begin: %s" % _simulation_time_begin) Cons.P("simulation_time_end : %s" % _simulation_time_end) Cons.P("simulated_time_begin : %s" % _simulated_time_begin) Cons.P("simulated_time_end : %s" % _simulated_time_end)
def GenDataFileForGnuplot(fn, dt): fn_out = "%s/dstat-%s" % (Conf.GetDir("output_dir"), dt) if os.path.isfile(fn_out): return fn_out with Cons.MT("Generating dstat data file for plot ..."): global _header_idx global _body_rows _header_idx = None _body_rows = None # Unzip when the file is not there if not os.path.exists(fn): fn_zipped = "%s.7z" % fn if not os.path.exists(fn_zipped): raise RuntimeError("Unexpected: %s" % fn) Util.RunSubp( "cd %s && 7z e %s > /dev/null" % (os.path.dirname(fn_zipped), os.path.basename(fn_zipped))) if not os.path.exists(fn): raise RuntimeError("Unexpected") _Parse(fn) fmt = "%9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f" \ " %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f" \ " %8.0f %8.0f %8.0f %8.0f" \ " %3.0f %3.0f" \ " %3.0f %3.0f %11s" \ " %3.1f %6.2f %3.1f %6.2f %6.2f %6.3f" header = Util.BuildHeader( fmt, " ".join(k for k, v in sorted(_header_idx.iteritems()))) with open(fn_out, "w") as fo: i = 0 for r in _body_rows: if i % 50 == 0: fo.write("%s\n" % header) i += 1 #Cons.P(fmt % tuple(r.Prepared())) fo.write((fmt + "\n") % tuple(r.Prepared())) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def PlotTimeVsMetrics(): with Cons.MT("Plotting time vs metrics ..."): dn_base = Conf.GetDir("dn_base") exps_rocksdb = Conf.Get("rocksdb") exps_computation = Conf.Get("computation") #Cons.P(pprint.pformat(exps_rocksdb)) #Cons.P(pprint.pformat(exps_computation)) params = [] for e in exps_rocksdb: params.append("%s/%s" % (dn_base, e)) for e in exps_computation: params.append("%s/%s" % (dn_base, e)) parallel_processing = True if parallel_processing: with terminating(Pool()) as pool: pool.map(_PlotTimeVsAllMetrics, params) else: for p in params: _PlotTimeVsAllMetrics(p)
def CalcCompareTwo(): with Cons.MT("Calculating the overhead of pairs ..."): dn_base = Conf.GetDir("dn_base") exps_rocksdb = Conf.Get("rocksdb") exps_computation = Conf.Get("computation") params = [] for r in exps_rocksdb: for c in exps_computation: params.append(("%s/%s" % (dn_base, r), "%s/%s" % (dn_base, c))) parallel_processing = True if parallel_processing: with terminating(Pool()) as pool: pool.map(_CalcCompareTwo, params) else: for p in params: _CalcCompareTwo(p) # Find the closest pair # You want the computation overhead one has the minimal overhead, but no smaller than the rocksdb one. exp_tuples = [] for p in params: o_cpu = CompareCpu.GetOverhead(p[0], p[1]) o_mem = CompareMem.GetOverhead(p[0], p[1]) if (o_cpu < 1.0) or (o_mem < 1.0): continue exp_tuples.append(ExpTuple(o_cpu, o_mem, p)) fmt = "%8.6f %8.6f %17s %17s" Cons.P( Util.BuildHeader( fmt, "cpu_overhead mem_overhead expdt_rocksdb expdt_computation")) for e in sorted(exp_tuples): Cons.P(fmt % (e.o_cpu, e.o_mem, e.GetExpDt("r"), e.GetExpDt("c"))) return exp_tuples
def _BuildMemtSstLives(): with Cons.MT("Building memt and sst lives ..."): #global _memt_lives global _sst_lives if _sst_lives is not None: return #_memt_lives = {} _sst_lives = {} dn = "%s/rocksdb" % Conf.GetDir("log_dir") fn = "%s/%s" % (dn, Conf.Get("simulation_time_begin")) if not os.path.isfile(fn): fn_7z = "%s.7z" % fn if not os.path.isfile(fn_7z): raise RuntimeError("Unexpected") Util.RunSubp("cd %s && 7z e %s" % (dn, fn_7z)) if not os.path.isfile(fn): raise RuntimeError("Unexpected") line_no = 0 with open(fn) as fo: for line in fo: line_no += 1 if line_no % 100 == 0: Cons.ClearLine() Cons.Pnnl("Processing line %d" % line_no) # The timestamp at the first column and the time_micros are 5 hours # apart. One is in local time (EDT) and the other is in UTC. Follow the former. # TODO: this needs to be fixed at the other place too # 2016/12/21-02:17:14.329266 7f702d7fa700 EVENT_LOG_v1 {"time_micros": # 1482304634329023, "mutant_table_acc_cnt": {"memt": # "0x7f69fc00c350:51723 0x7f6bec011200:26942", "sst": "1069:0:30.123:20.123 # 1059:980:30.123:20.123"} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \ ".+" ", \"mutant_table_acc_cnt\": {(\"memt\": \"(?P<memt_acc_cnt>(\w|\d|:| )+)\")?" \ "(, )?" \ "(\"sst\": \"(?P<sst_acc_cnt>(\w|\d|:|-|\.| )+)\")?" \ "}" \ ".*" , line) if mo is not None: _SetTabletAccess(mo) continue # 2016/12/21-02:15:58.341853 7f702dffb700 EVENT_LOG_v1 {"time_micros": # 1482304558341847, "job": 227, "event": "table_file_deletion", # "file_number": 1058} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \ ".+" ", \"job\": \d+" \ ", \"event\": \"table_file_deletion\"" \ ", \"file_number\": (?P<file_number>\d+)" \ "}" \ ".*" , line) if mo is not None: _SetTabletDeleted(mo) continue # 2016/12/21-01:27:40.840324 7f702dffb700 EVENT_LOG_v1 {"time_micros": # 1482301660840289, "cf_name": "default", "job": 4, "event": # "table_file_creation", "file_number": 15, "file_size": 67569420, # "table_properties": {"data_size": 67110556, "index_size": 458020, # "filter_size": 0, "raw_key_size": 1752468, "raw_average_key_size": 25, # "raw_value_size": 65132550, "raw_average_value_size": 966, # "num_data_blocks": 16857, "num_entries": 67425, "filter_policy_name": # "", "reason": kCompaction, "kDeletedKeys": "0", "kMergeOperands": "0"}} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \ ".+" ", \"cf_name\": \"default\"" \ ", \"job\": (?P<job>\d+)" \ ", \"event\": \"table_file_creation\"" \ ", \"file_number\": (?P<file_number>\d+)" \ ", \"file_size\": (?P<file_size>\d+)" \ ".+" \ ", \"reason\": (?P<reason>\w+)" \ ".*" , line) if mo is not None: _SetTabletCreated(mo) continue # 2016/12/21-01:28:41.835596 7f683c58d700 EVENT_LOG_v1 {"time_micros": # 1482301721835586, "job": 8, "event": "flush_started", "num_memtables": # 2, "num_entries": 257306, "num_deletes": 0, "memory_usage": 260052944} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \ ".+" ", \"job\": \d+" \ ", \"event\": \"flush_started\"" \ ".*" , line) if mo is not None: continue # 2016/12/21-01:27:25.893816 7f683c58d700 (Original Log Time # 2016/12/21-01:27:25.893597) EVENT_LOG_v1 {"time_micros": # 1482301645893590, "job": 2, "event": "flush_finished", "lsm_state": [1, # 0, 0, 0, 0, 0, 0], "immutable_memtables": 0} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \ ".+" ", \"job\": \d+" \ ", \"event\": \"flush_finished\"" \ ".*" , line) if mo is not None: continue # 2016/12/21-01:27:40.010374 7f702dffb700 EVENT_LOG_v1 {"time_micros": # 1482301660010345, "job": 4, "event": "compaction_started", "files_L0": # [12, 8], "score": 1, "input_data_size": 241744688} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \ ".+" ", \"job\": \d+" \ ", \"event\": \"compaction_started\"" \ ".*" , line) if mo is not None: continue # 2016/12/21-01:27:40.960792 7f702dffb700 (Original Log Time # 2016/12/21-01:27:40.959919) EVENT_LOG_v1 {"time_micros": # 1482301660959908, "job": 4, "event": "compaction_finished", # "compaction_time_micros": 949251, "output_level": 1, # "num_output_files": 4, "total_output_size": 229662756, # "num_input_records": 241171, "num_output_records": 229148, # "num_subcompactions": 1, "lsm_state": [0, 4, 0, 0, 0, 0, 0]} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \ ".+" ", \"job\": (?P<job>\d+)" \ ", \"event\": \"compaction_finished\"" \ ".+" \ ", \"output_level\": (?P<output_level>\d+)" \ ".*" , line) if mo is not None: _SetCompactinFinished(mo) continue # You can check out the other events here. All useful ones were covered above. #Cons.P(line) Cons.ClearLine() Cons.P("Processed %d lines" % line_no) deleted = 0 not_deleted = 0 for sst_id, sl in _sst_lives.iteritems(): if sl.TsDeleted() is None: not_deleted += 1 else: deleted += 1 Cons.P("Created %d SstLives. %d not-deleted, %d deleted" % (len(_sst_lives), not_deleted, deleted))
def GetFnCostSloEpsilonVsMetrics(): fn_out = "%s/cost-slo-epsilon-vs-metrics" % Conf.GetOutDir() if os.path.isfile(fn_out): return fn_out dn_base = Conf.GetDir("dn_base") # {cost_slo_epsilon: fn} cse_fn ={} for cost_slo_epsilon, fn0 in Conf.Get("by_cost_slo_epsilons").iteritems(): fn = "%s/%s" % (dn_base, fn0) cse_fn[cost_slo_epsilon] = fn #Cons.P(pprint.pformat(cse_fn)) params = [] for cost_slo_epsilon, fn_ycsb_log in sorted(cse_fn.iteritems()): params.append(fn_ycsb_log) parallel_processing = True if parallel_processing: with terminating(Pool()) as pool: result = pool.map(GetFnTimeVsMetrics, params) else: result = [] for p in params: result.append(GetFnTimeVsMetrics(p)) #Cons.P(result) cse_outfn = {} i = 0 for cost_slo_epsilon, fn_ycsb_log in sorted(cse_fn.iteritems()): cse_outfn[cost_slo_epsilon] = result[i] i += 1 with open(fn_out, "w") as fo: fo.write("# CSE: Storge cost SLO epsilon\n") fo.write("# JR: jobs_recovery\n") fo.write("# JF: jobs_flush\n") fo.write("# JC: jobs_compaction\n") fo.write("# JCL: jobs_comp_leveled_organization_triggered\n") fo.write("# SSCL: total_sst_size_comp_level_triggered_in_gb\n") fo.write("# SSCLCM: total_sst_size_comp_level_triggered_comp_migrs_in_gb\n") fo.write("# SSCLCMS: total_sst_size_comp_level_triggered_comp_migrs_to_slow_in_gb\n") fo.write("# SSCLCMF: total_sst_size_comp_level_triggered_comp_migrs_to_fast_in_gb\n") fo.write("# JCT: jobs_comp_temp_triggered_migr\n") fo.write("# SSCT: total_sst_size_comp_temp_triggered_migr_in_gb\n") fo.write("# SSCTS: To slow storage\n") fo.write("# SSCTF: To fast storage\n") fo.write("\n") fmt = "%4.2f %8.6f %8.6f %8.6f %8.6f %1d %2d %4d" \ " %4d %7.3f %7.3f %7.3f %7.3f" \ " %4d %7.3f %7.3f %7.3f" header = Util.BuildHeader(fmt, "CSE" \ " stg_unit_cost_$_gb_month" \ " stg_cost_$" \ " fast_stg_cost_$" \ " slow_stg_cost_$" \ " JR" \ " JF" \ " JC" \ " JCL" \ " SSCL" \ " SSCLCM" \ " SSCLCMS" \ " SSCLCMF" \ " JCT" \ " SSCT" \ " SSCTS" \ " SSCTF" \ ) fo.write(header + "\n") for cost_slo_epsilon, fn1 in sorted(cse_outfn.iteritems()): kvs = [ ["total_stg_unit_cost", None] , ["total_stg_cost", None] , ["fast_stg_cost", None] , ["slow_stg_cost", None] , ["num_jobs_recovery", None] , ["num_jobs_flush", None] , ["num_jobs_comp_all", None] , ["num_jobs_comp_level_triggered", None] , ["total_sst_size_comp_level_triggered_in_gb", None] , ["total_sst_size_comp_level_triggered_comp_migrs_in_gb", None] , ["total_sst_size_comp_level_triggered_comp_migrs_to_slow_in_gb", None] , ["total_sst_size_comp_level_triggered_comp_migrs_to_fast_in_gb", None] , ["num_jobs_comp_temp_triggered_migr", None] , ["total_sst_size_comp_temp_triggered_migr", None] , ["total_sst_size_comp_temp_triggered_migr_to_slow", None] , ["total_sst_size_comp_temp_triggered_migr_to_fast", None] ] with open(fn1) as fo1: for line in fo1: if not line.startswith("#"): continue for kv in kvs: k = kv[0] mo = re.match(r".+%s=(?P<v>(\d|\.)+)" % k, line) if mo: kv[1] = float(mo.group("v")) continue try: fo.write((fmt + "\n") % ( cost_slo_epsilon , kvs[0][1] , kvs[1][1] , kvs[2][1] , kvs[3][1] , kvs[4][1] , kvs[5][1] , kvs[6][1] , kvs[7][1] , kvs[8][1] , kvs[9][1] , kvs[10][1] , kvs[11][1] , kvs[12][1] , kvs[13][1] , kvs[14][1] , kvs[15][1] )) except TypeError as e: Cons.P(fn1) raise e Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def main(argv): Conf.ParseArgs() Util.MkDirs(Conf.GetDir("output_dir")) Plot.Plot()
def main(argv): Conf.ParseArgs() Util.MkDirs(Conf.GetDir("output_dir")) #dist_sq_thresholds = [ # 0 # , 0.008 # , 0.01 # , 0.02 # , 0.05] # Still quite big. 277 KB dist_sq_thresholds = [0.02] dist_sq_thresholds_str = [] for d in dist_sq_thresholds: dist_sq_thresholds_str.append(_NoTrailing0s(d)) fns_co_loc = [] co_loc_file_sizes = [] with Cons.MT("Generating reduced central office locations ..."): for d in dist_sq_thresholds_str: fn_co_loc = "filter-out-almost-duplicate-points/.output/centraloffices-wo-almost-dup-points-%s" % d fns_co_loc.append(fn_co_loc) if not os.path.exists(fn_co_loc): cmd = "cd filter-out-almost-duplicate-points && ./build-and-run.sh --dist_sq_threshold=%s" % d Util.RunSubp(cmd) co_loc_file_sizes.append(os.path.getsize(fn_co_loc)) dn_out = "%s/.output" % os.path.dirname(__file__) fn_out = "%s/central-office-locations.pdf" % dn_out with Cons.MT("Plotting ..."): env = os.environ.copy() env["IN_FNS"] = " ".join(fns_co_loc) env["IN_FN_SIZES"] = " ".join(str(s) for s in co_loc_file_sizes) env["DIST_SQ_THRESHOLDS"] = " ".join(dist_sq_thresholds_str) env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/central-office-on-map.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) sys.exit(0) if False: # Parallel processing params = [] for line in re.split(r"\s+", exps): t = line.split("/quizup/") if len(t) != 2: raise RuntimeError("Unexpected") job_id = t[0] exp_dt = t[1] params.append((job_id, exp_dt)) p = multiprocessing.Pool(8) p.map(Plot, params) else: for line in re.split(r"\s+", exps): t = line.split("/quizup/") if len(t) != 2: raise RuntimeError("Unexpected") job_id = t[0] exp_dt = t[1] Plot((job_id, exp_dt))
def __init__(self, exp_set_id, stg_dev): conf_sd = Conf.Get(exp_set_id)[stg_dev] t = conf_sd["jobid_expdt"].split("/") job_id = t[0] exp_dt = t[1] t = conf_sd["time_window"].split("-") exp_time_begin = t[0] exp_time_end = t[1] dn_log = Conf.GetDir("dn") dn_log_job = "%s/%s" % (dn_log, job_id) self.fn_out = "%s/ycsb-by-time-%s" % (Conf.GetOutDir(), exp_dt) if os.path.isfile(self.fn_out): return self.exp_begin_dt = datetime.datetime.strptime(exp_dt, "%y%m%d-%H%M%S.%f") #Cons.P(self.exp_begin_dt) with Cons.MT("Generating ycsb time-vs-metrics file for plot ..."): fn_log_ycsb = "%s/ycsb/%s-d" % (dn_log_job, exp_dt) # Unzip when the file is not there if not os.path.exists(fn_log_ycsb): fn_zipped = "%s.bz2" % fn_log_ycsb if not os.path.exists(fn_zipped): raise RuntimeError("Unexpected: %s" % fn_log_ycsb) Util.RunSubp( "cd %s && bzip2 -dk %s > /dev/null" % (os.path.dirname(fn_zipped), os.path.basename(fn_zipped))) if not os.path.exists(fn_log_ycsb): raise RuntimeError("Unexpected") mo_list = [] line_params = None line_run = None with open(fn_log_ycsb) as fo: for line in fo: #Cons.P(line) # 2017-10-13 20:41:01:258 2 sec: 34 operations; 34 current ops/sec; est completion in 68 days 1 hours [READ: Count=28, Max=46943, Min=33, # Avg=32239.54, 90=45343, 99=46943, 99.9=46943, 99.99=46943] [INSERT: Count=8, Max=9343, Min=221, Avg=4660.88, 90=8695, 99=9343, 99.9=9343, # 99.99=9343] mo = re.match(r"\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d:\d\d\d (?P<rel_time>\d+) sec: \d+ operations; " \ "(?P<db_iops>(\d|\.)+) current ops\/sec; .*" \ "\[READ: Count=(?P<r_cnt>\d+), Max=(?P<r_max>\d+), Min=(?P<r_min>\d+), Avg=(?P<r_avg>(\d|\.)+)," \ " 90=(?P<r_90>\d+), 99=(?P<r_99>\d+), 99.9=(?P<r_999>\d+), 99.99=(?P<r_9999>\d+)\] " \ "\[INSERT: Count=(?P<w_cnt>\d+), Max=(?P<w_max>\d+), Min=(?P<w_min>\d+), Avg=(?P<w_avg>(\d|\.)+)," \ " 90=(?P<w_90>\d+), 99=(?P<w_99>\d+), 99.9=(?P<w_999>\d+), 99.99=(?P<w_9999>\d+)\] " \ , line) if mo is not None: total_seconds = int(mo.group("rel_time")) s = total_seconds % 60 total_seconds -= s total_mins = total_seconds / 60 m = total_mins % 60 total_mins -= m h = total_mins / 60 rel_time = "%02d:%02d:%02d" % (h, m, s) mo_list.append((rel_time, mo)) continue if line.startswith("params = {"): line_params = line continue if line.startswith("run = {"): line_run = line continue cnt = 0 db_iops = [] r_cnt = 0 r_avg = 0.0 r_min = 0 r_max = 0 r_90 = 0 r_99 = 0 r_999 = 0 r_9999 = 0 w_cnt = 0 w_avg = 0.0 w_min = 0 w_max = 0 w_90 = 0 w_99 = 0 w_999 = 0 w_9999 = 0 for e in mo_list: rel_time = e[0] if (exp_time_begin < rel_time) and (rel_time < exp_time_end): mo = e[1] db_iops.append(float(mo.group("db_iops"))) r_cnt += int(mo.group("r_cnt")) r_avg += float(mo.group("r_avg")) r_min += int(mo.group("r_min")) r_max += int(mo.group("r_max")) r_90 += int(mo.group("r_90")) r_99 += int(mo.group("r_99")) r_999 += int(mo.group("r_999")) r_9999 += int(mo.group("r_9999")) w_cnt += int(mo.group("w_cnt")) w_avg += float(mo.group("w_avg")) w_min += int(mo.group("w_min")) w_max += int(mo.group("w_max")) w_90 += int(mo.group("w_90")) w_99 += int(mo.group("w_99")) w_999 += int(mo.group("w_999")) w_9999 += int(mo.group("w_9999")) cnt += 1 db_iops_stat = Stat.Gen(db_iops) with open(self.fn_out, "w") as fo_out: fo_out.write("# %s" % line_params) fo_out.write("# %s" % line_run) fo_out.write("\n") fo_out.write("# In the time range (%s, %s):\n" % (exp_time_begin, exp_time_end)) fo_out.write("# db_iops.avg= %14f\n" % db_iops_stat.avg) fo_out.write("# db_iops.min= %14f\n" % db_iops_stat.min) fo_out.write("# db_iops.max= %14f\n" % db_iops_stat.max) fo_out.write("# db_iops._25= %14f\n" % db_iops_stat._25) fo_out.write("# db_iops._50= %14f\n" % db_iops_stat._50) fo_out.write("# db_iops._75= %14f\n" % db_iops_stat._75) fo_out.write("# r_cnt = %14f\n" % (float(r_cnt) / cnt)) fo_out.write("# r_avg = %14f\n" % (float(r_avg) / cnt)) fo_out.write("# r_min = %14f\n" % (float(r_min) / cnt)) fo_out.write("# r_max = %14f\n" % (float(r_max) / cnt)) fo_out.write("# r_90 = %14f\n" % (float(r_90) / cnt)) fo_out.write("# r_99 = %14f\n" % (float(r_99) / cnt)) fo_out.write("# r_999 = %14f\n" % (float(r_999) / cnt)) fo_out.write("# r_9999 = %14f\n" % (float(r_9999) / cnt)) fo_out.write("# w_cnt = %14f\n" % (float(w_cnt) / cnt)) fo_out.write("# w_avg = %14f\n" % (float(w_avg) / cnt)) fo_out.write("# w_min = %14f\n" % (float(w_min) / cnt)) fo_out.write("# w_max = %14f\n" % (float(w_max) / cnt)) fo_out.write("# w_90 = %14f\n" % (float(w_90) / cnt)) fo_out.write("# w_99 = %14f\n" % (float(w_99) / cnt)) fo_out.write("# w_999 = %14f\n" % (float(w_999) / cnt)) fo_out.write("# w_9999 = %14f\n" % (float(w_9999) / cnt)) fo_out.write("\n") fmt = "%8s" \ " %9.2f" \ " %6d %8.2f %3d %6d" \ " %6d %6d %6d %6d" \ " %6d %8.2f %3d %6d" \ " %6d %6d %6d %6d" header = Util.BuildHeader(fmt, "rel_time" \ " db_iops" \ " read_cnt read_lat_avg read_lat_min read_lat_max" \ " read_lat_90p read_lat_99p read_lat_99.9p read_lat_99.99p" \ " write_cnt write_lat_avg write_lat_min write_lat_max" \ " write_lat_90p write_lat_99p write_lat_99.9p write_lat_99.99p" \ ) i = 0 for e in mo_list: rel_time = e[0] mo = e[1] if i % 40 == 0: fo_out.write(header + "\n") fo_out.write( (fmt + "\n") % (rel_time, float(mo.group("db_iops")), int(mo.group("r_cnt")), float(mo.group("r_avg")), int(mo.group("r_min")), int(mo.group("r_max")), int(mo.group("r_90")), int(mo.group("r_99")), int(mo.group("r_999")), int(mo.group("r_9999")), int(mo.group("w_cnt")), float(mo.group("w_avg")), int(mo.group("w_min")), int(mo.group("w_max")), int(mo.group("w_90")), int(mo.group("w_99")), int(mo.group("w_999")), int(mo.group("w_9999")))) i += 1 Cons.P("Created %s %d" % (self.fn_out, os.path.getsize(self.fn_out)))