def PlotSstAccfreqByAgeIndividual(): with Cons.MT( "Plotting individual SSTable access frequencies by their ages ..." ): dn_out = "%s/%s/sst-age-accfreq-plot" % ( Conf.Get("dn_result"), Conf.Get("simulation_time_begin")) Util.MkDirs(dn_out) env = os.environ.copy() sst_lives = RocksDbLogReader.GetSstAccFreqByAgeDataFiles() for sl in sst_lives: env["IN_FN"] = "%s/%s/sst-age-accfreq-data/%d" \ % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin"), sl.Id()) env["LEVEL"] = str(sl.Level()) if sl.TsDeleted() is None: raise RuntimeError("Unexpected") env["AGE_DELETED"] = str( SimTime.ToSimulatedTimeDur( (sl.TsDeleted() - sl.TsCreated()).total_seconds())) out_fn = "%s/L%d-%d.pdf" % (dn_out, sl.Level(), sl.Id()) env["OUT_FN"] = out_fn start_time = time.time() Util.RunSubp("gnuplot %s/sst-accfreq-by-age-individual.gnuplot" % os.path.dirname(__file__), env=env, print_cmd=False) dur = time.time() - start_time Cons.P("Created %s %d in %.0f ms" % (out_fn, os.path.getsize(out_fn), dur * 1000.0))
def Get1minAvgFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg memory usage comparison file for plotting ..."): records = [] dn_base = Conf.GetDir("dn_base") for i in range(2): fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i)) hm_mem = _GetHmMem(fn_ycsb_log) for hm, mem in hm_mem.iteritems(): records.append(_RecordMemAvg(hm, i * 30, mem, i)) records.sort(key=operator.attrgetter("ts")) fmt = "%8s %6.3f %1d" header = Util.BuildHeader(fmt, "timestamp mem_avg_in_gb exp_type") with open(fn_out, "w") as fo: i = 0 for r in records: if i % 40 == 0: fo.write(header + "\n") i += 1 fo.write("%s\n" % r.ToStr(fmt)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def _GetFnCpuOverhead(): fn_out = "%s/cpu-overhead-by-time" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get("unmodified_db")) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get("computation_overhead")) hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0) hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1) #Cons.P(hour_cpustat_0) #Cons.P(hour_cpustat_1) with open(fn_out, "w") as fo: fo.write("# u: unmodified\n") fo.write( "# c: with SSTable access monitoring and SSTable placement computation\n" ) fo.write("#\n") fmt = "%2d" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" fo.write(Util.BuildHeader(fmt, "hour" \ " u_avg u_min u_1 u_25 u_50 u_75 u_99 u_max" \ " c_avg c_min c_1 c_25 c_50 c_75 c_99 c_max" ) + "\n") for h, s0 in sorted(hour_cpustat_0.iteritems()): s1 = hour_cpustat_1[h] fo.write( (fmt + "\n") % (h, s0.avg, s0.min, s0._1, s0._25, s0._50, s0._75, s0._99, s0.max, s1.avg, s1.min, s1._1, s1._25, s1._50, s1._75, s1._99, s1.max)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def main(argv): Conf.ParseArgs() dn_out = Conf.Get("dn_result") Util.MkDirs(dn_out) SimTime.Init(Conf.Get("simulation_time_begin")) Plot.Plot()
def GetHourlyFn(): fn_out = "%s/cpu-hourly-usage" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out with Cons.MT("Generating file for cpu usage comparison ..."): dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0)) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1)) hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0) hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1) #Cons.P(hour_cpustat_0) #Cons.P(hour_cpustat_1) with open(fn_out, "w") as fo: fo.write("# 0: %s\n" % fn_ycsb_0) fo.write("# 1: %s\n" % fn_ycsb_1) fo.write("#\n") fmt = "%2d" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \ " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" fo.write(Util.BuildHeader(fmt, "hour" \ " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \ " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max" ) + "\n") for h, s0 in sorted(hour_cpustat_0.iteritems()): s1 = hour_cpustat_1[h] fo.write((fmt + "\n") % (h, s0.avg, s0.min, s0._1, s0._25, s0._50, s0._75, s0._99, s0.max, s1.avg, s1.min, s1._1, s1._25, s1._50, s1._75, s1._99, s1.max)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def Get1minAvgFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) # computation/180126-142513/ycsb/180126-193525.769-d mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/cpu-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg cpu usage comparison file for plotting ..."): records = [] dn_base = Conf.GetDir("dn_base") for i in range(2): fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i)) hm_cpu = _GetHmCpu(fn_ycsb_log) for hm, cpu in hm_cpu.iteritems(): records.append(_RecordCpuAvg(hm, i * 30, cpu, i)) records.sort(key=operator.attrgetter("ts")) fmt = "%8s %6.2f %1d" header = Util.BuildHeader(fmt, "timestamp cpu_avg exp_type") with open(fn_out, "w") as fo: i = 0 for r in records: if i % 40 == 0: fo.write(header + "\n") i += 1 fo.write("%s\n" % r.ToStr(fmt)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def PlotSstAccfreqByAgeIndividualMultiplot(): with Cons.MT("Plotting individual SSTable access frequencies by their ages ..."): dn_out = "%s/%s/sst-age-accfreq-plot" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin")) Util.MkDirs(dn_out) env = os.environ.copy() dn = "%s/%s/sst-age-accfreq-data" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin")) env["IN_DN"] = dn # Plot for all levels. Stop when there is no sstable at a level. level = 0 while True: env["LEVEL"] = str(level) sst_lives = RocksDbLogReader.GetSstAccFreqByAgeDataFiles(level) if len(sst_lives) == 0: break env["SST_IDS"] = " ".join(str(sl.Id()) for sl in sst_lives) age_deleted = [] for sl in sst_lives: age_deleted.append(SimTime.ToSimulatedTimeDur((sl.TsDeleted() - sl.TsCreated()).total_seconds())) env["AGE_DELETED"] = " ".join(str(i) for i in age_deleted) # Age deleted max. Round up with an hour granularity. age_deleted_max = max(age_deleted) age_deleted_max = math.ceil(age_deleted_max / 3600.0) * 3600 env["AGE_DELETED_MAX"] = str(age_deleted_max) accfreq_max_all_sst_in_level = 0.0 temp_max_all_sst_in_level = 0.0 accfreq_max_list = [] temp_max_list = [] for sl in sst_lives: accfreq_max = 0.0 temp_max = 0.0 for accfreq in sl.AgeAccfreq(): accfreq_max_all_sst_in_level = max(accfreq_max_all_sst_in_level, accfreq[4]) temp_max_all_sst_in_level = max(temp_max_all_sst_in_level, accfreq[5]) accfreq_max = max(accfreq_max, accfreq[4]) temp_max = max(temp_max, accfreq[5]) accfreq_max_list.append(accfreq_max) temp_max_list.append(temp_max) env["ACCFREQ_MAX_ALL_SST_IN LEVEL"] = str(accfreq_max_all_sst_in_level) env["TEMP_MAX_ALL_SST_IN_LEVEL"] = str(temp_max_all_sst_in_level) env["ACCFREQ_MAX"] = " ".join(str(e) for e in accfreq_max_list) env["TEMP_MAX"] = " ".join(str(e) for e in temp_max_list) out_fn = "%s/L%d.pdf" % (dn_out, level) env["OUT_FN"] = out_fn with Cons.MT("Plotting level %d ..." % level): Util.RunSubp("gnuplot %s/sst-accfreq-by-age-multiplot-by-level.gnuplot" % os.path.dirname(__file__), env=env, print_cmd=False) Cons.P("Created %s %d" % (out_fn, os.path.getsize(out_fn))) level += 1
def GetHourlyFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/memory-usage-by-time-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Generating file for memory usage comparison ..."): dn_base = Conf.GetDir("dn_base") fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0)) fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1)) hour_memstat_0 = _GetMemStatByHour(fn_ycsb_0) hour_memstat_1 = _GetMemStatByHour(fn_ycsb_1) #Cons.P(hour_memstat_0) #Cons.P(hour_memstat_1) with open(fn_out, "w") as fo: fo.write("# 0: %s\n" % fn_ycsb_0) fo.write("# 1: %s\n" % fn_ycsb_1) fo.write("#\n") fmt = "%2d" \ " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f" \ " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f" fo.write(Util.BuildHeader(fmt, "hour" \ " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \ " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max" ) + "\n") for h, s0 in sorted(hour_memstat_0.iteritems()): s1 = hour_memstat_1[h] fo.write((fmt + "\n") % (h , (float(s0.avg) / 1024 / 1024 / 1024) , (float(s0.min) / 1024 / 1024 / 1024) , (float(s0._1 ) / 1024 / 1024 / 1024) , (float(s0._25) / 1024 / 1024 / 1024) , (float(s0._50) / 1024 / 1024 / 1024) , (float(s0._75) / 1024 / 1024 / 1024) , (float(s0._99) / 1024 / 1024 / 1024) , (float(s0.max) / 1024 / 1024 / 1024) , (float(s1.avg) / 1024 / 1024 / 1024) , (float(s1.min) / 1024 / 1024 / 1024) , (float(s1._1 ) / 1024 / 1024 / 1024) , (float(s1._25) / 1024 / 1024 / 1024) , (float(s1._50) / 1024 / 1024 / 1024) , (float(s1._75) / 1024 / 1024 / 1024) , (float(s1._99) / 1024 / 1024 / 1024) , (float(s1.max) / 1024 / 1024 / 1024) )) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def SstHeatAtLastTime(): # Set Conf.Get("simulation_time_begin"), if not already set. if Conf.Get("simulation_time_begin") is None: MutantLogReader.Get() fn_hlt = "%s/sst-heat-last-time-%s" % (Conf.dn_result, Conf.Get("simulation_time_begin")) if os.path.isfile(fn_hlt): return fn_hlt sst_lives = MemtSstLife.GetSstLives() with Cons.MT("Generating Sst heats at the last time ..."): # Gather temperature info at n different times num_times = Conf.heatmap_by_time_num_times if Conf.ExpFinishTime() is None: MemtSstLife.SetExpEndTimeFromSstLives() min_sst_opened = None for sst_gen, sl in sorted(sst_lives.iteritems()): min_sst_opened = sl.TsCreated() if min_sst_opened is None else min(min_sst_opened, sl.TsCreated()) # Start time is when the first Sstable is opened, not the experiment start # time, when no SSTable exists yet. # Exp start time: 160927-143257.395 # First Sstable open time: 160927-143411.273 st = datetime.datetime.strptime(min_sst_opened, "%y%m%d-%H%M%S.%f") et = datetime.datetime.strptime(Conf.ExpFinishTime(), "%y%m%d-%H%M%S.%f") dur = (et - st).total_seconds() sstgen_heat = [] t = st + datetime.timedelta(seconds=(float(dur) * (num_times - 1) / num_times + time_offset_in_sec)) for sst_gen, sl in sorted(sst_lives.iteritems()): h = sl.TempAtTime(t) if h is None: continue sstgen_heat.append((sst_gen, h)) sstgen_heat.sort(key=lambda sh: sh[1], reverse=True) # Note: Don't bother with the width proportional to the tablet size for now fmt = "%4d %1d %8.3f" with open(fn_hlt, "w") as fo: # y0 is smaller than y1 (y0 is placed higher in the plot than y1). fo.write("%s\n" % Util.BuildHeader(fmt, "sst_gen level temperature")) for sh in sstgen_heat: sst_gen = sh[0] temp = sh[1] fo.write((fmt + "\n") % (sst_gen, sst_lives[sst_gen].level, temp)) Cons.P("Created %s %d" % (fn_hlt, os.path.getsize(fn_hlt))) return fn_hlt
def GenDataCostVsMetrics(exp_set_id): fn_out = "%s/rocksdb-ycsb-cost-vs-perf-%s" % (Conf.GetOutDir(), exp_set_id) fmt = "%5s %5.3f" \ " %14.6f" \ " %14.6f" \ " %14.6f" \ " %14.6f" \ " %14.6f" \ " %14.6f" \ " %13.6f %10.6f %14.6f %14.6f %14.6f %14.6f %14.6f" \ " %13.6f %10.6f %14.6f %14.6f %14.6f %14.6f %14.6f" with open(fn_out, "w") as fo: fo.write(Util.BuildHeader(fmt, "stg_dev cost_dollar_per_gb_per_month" \ " db_iops.avg" \ " db_iops.min" \ " db_iops.max" \ " db_iops._25" \ " db_iops._50" \ " db_iops._75" \ " r_avg r_min r_max r_90 r_99 r_999 r_9999" \ " w_avg w_min w_max w_90 w_99 w_999 w_9999" ) + "\n") for stg_dev, v in Conf.Get(exp_set_id).iteritems(): lr = YcsbLogReader(exp_set_id, stg_dev) fo.write((fmt + "\n") % ( stg_dev, float(Conf.Get("stg_cost")[stg_dev]) , lr.GetStat("db_iops.avg") , lr.GetStat("db_iops.min") , lr.GetStat("db_iops.max") , lr.GetStat("db_iops._25") , lr.GetStat("db_iops._50") , lr.GetStat("db_iops._75") , lr.GetStat("r_avg") , lr.GetStat("r_min") , lr.GetStat("r_max") , lr.GetStat("r_90") , lr.GetStat("r_99") , lr.GetStat("r_999") , lr.GetStat("r_9999") , lr.GetStat("w_avg") , lr.GetStat("w_min") , lr.GetStat("w_max") , lr.GetStat("w_90") , lr.GetStat("w_99") , lr.GetStat("w_999") , lr.GetStat("w_9999") )) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GetSstAccFreqAtSpecificTime(at_simulated_time): fn = "%s/sst-accfreq-%s-at-%.04f" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin"), at_simulated_time) if os.path.isfile(fn): return fn # t0: a specific time when we take the snapshot t0 = SimTime.SimulatedTimeAt(at_simulated_time) Cons.P("t0 (time of snapshot): %s" % t0) sst_lives = GetSstLives() with open(fn, "w") as fo: fmt = "%4d %13s %13s %7.3f %1d %5.2f" fo.write("# t0 (time of snapshot): %s\n" % t0) fo.write("%s\n" % Util.BuildHeader( fmt, "sst_id ts_before ts_after reads_per_64MB_per_sec level age_in_day" )) for sst_id, sl in sst_lives.iteritems(): if t0 < SimTime.ToSimulatedTime(sl.ts_created): continue if (sl.ts_deleted is not None) and (SimTime.ToSimulatedTime( sl.ts_deleted) < t0): continue ts_prev = None for ts, v in sorted(sl.ts_acccnt.iteritems()): ts_simulated = SimTime.ToSimulatedTime(ts) if ts_simulated < t0: ts_prev = ts_simulated continue #Cons.P("ts_simulated: %s" % ts_simulated) if ts_prev is not None: #cnt = v[0] cnt_per_64MB_per_sec = v[1] #temp = v[2] fo.write( (fmt + "\n") % (sst_id, ts_prev.strftime("%y%d%m-%H%M%S"), ts_simulated.strftime("%y%d%m-%H%M%S"), cnt_per_64MB_per_sec, sl.Level(), ((t0 - SimTime.ToSimulatedTime( sl.TsCreated())).total_seconds() / 3600.0 / 24))) break Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) return fn
def Plot(): with Cons.MT("Plotting latency ..."): env = os.environ.copy() env["FN_IN"] = "%s/client/%s" % (Conf.GetDir("log_dir"), Conf.Get("simulation_time_begin")) dn = "%s/%s" % (Conf.GetDir("output_dir"), Conf.Get("simulation_time_begin")) Util.MkDirs(dn) fn_out = "%s/latency.pdf" % dn env["FN_OUT"] = fn_out with Cons.MT("Plotting ..."): Util.RunSubp("gnuplot %s/latency.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def GenDataFileForGnuplot(): dn = "%s/%s" % (Conf.GetDir("output_dir"), Conf.Get("simulation_time_begin")) Util.MkDirs(dn) fn = "%s/data" % dn if os.path.isfile(fn): return fn with Cons.MT("Generating data file for plot ..."): _Parse() fmt = "%9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f" \ " %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f" \ " %8.0f %8.0f %8.0f %8.0f" \ " %3.0f %3.0f" \ " %3.0f %3.0f %11s" \ " %3.1f %6.2f %3.1f %6.2f %6.2f %6.3f" header = Util.BuildHeader( fmt, " ".join(k for k, v in sorted(_header_idx.iteritems()))) with open(fn, "w") as fo: i = 0 for r in _body_rows: if i % 50 == 0: fo.write("%s\n" % header) i += 1 #Cons.P(fmt % tuple(r.Prepared())) fo.write((fmt + "\n") % tuple(r.Prepared())) Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) return fn
def GenDataFilesForGnuplot(): dn_base = Conf.GetDir("dn_base") # Analyze the number of compactions and migrations with # (a) an unmodified DB as a baseline # and (b) Mutant fns_ycsb = [] log_readers = [] for db_type in ["unmodified_db", "io_overhead"]: fn_ycsb = "%s/%s" % (dn_base, Conf.Get(db_type)) mo = re.match( r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+", fn_ycsb) dn_log = mo.group("dn_log") job_id = mo.group("job_id") exp_dt = mo.group("exp_dt") #Cons.P(dn_log) #Cons.P(job_id) #Cons.P(exp_dt) dn_log_job = "%s/%s" % (dn_log, job_id) log_readers.append(RocksdbLogReader(dn_log_job, exp_dt)) fn_metrics_by_time_0 = log_readers[0].FnMetricByTime() fn_metrics_by_time_1 = log_readers[1].FnMetricByTime() fn_rdb_compmigr = CompMigrStat.GetFnStat(fn_metrics_by_time_0, fn_metrics_by_time_1) return (fn_metrics_by_time_0, fn_metrics_by_time_1, fn_rdb_compmigr)
def main(argv): Util.MkDirs(Conf.GetOutDir()) exp_set_id = "171013-134330" #exp_set_id = "171022-160102" conf_exp_set = Conf.Get(exp_set_id) if True: parallel_processing = True if parallel_processing: params = [] for stg_dev, v in conf_exp_set.iteritems(): params.append((exp_set_id, stg_dev, v)) p = multiprocessing.Pool() p.map(PlotByTime, params) else: for stg_dev, v in conf_exp_set.iteritems(): PlotByTime((exp_set_id, stg_dev, v)) # Plot (cost vs latency) by storage devices # Latency in avg and tail latencies # # The goal: # to show there are limited options # and show the baseline performances. # # Finish this and show that this was not a fair comparison. if True: PlotCostLatency(exp_set_id)
def PlotCompareTwo(): (fns_rocksdb, fn_sst_creation_stat) = RocksdbLog.GenDataFilesForGnuplot() #fn_cpu_stat_by_time = CompareCpu.GetHourlyFn() fn_cpu_1min_avg = CompareCpu.Get1minAvgFn() fn_mem_stat_by_time = CompareMem.GetHourlyFn() fn_mem_1min_avg = CompareMem.Get1minAvgFn() #time_max = "09:00:00" #time_max = "08:00:00" time_max = "07:50:00" exp_dts = [] for i in range(2): mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/mutant-overhead-%s.pdf" % (Conf.GetOutDir(), "-".join(exp_dts)) with Cons.MT("Plotting ..."): env = os.environ.copy() env["TIME_MAX"] = str(time_max) #env["CPU_STAT"] = fn_cpu_stat_by_time env["FN_CPU_1MIN_AVG"] = fn_cpu_1min_avg #env["MEM_STAT"] = fn_mem_stat_by_time env["FN_MEM_1MIN_AVG"] = fn_mem_1min_avg env["ROCKSDB0"] = fns_rocksdb[0] env["ROCKSDB1"] = fns_rocksdb[1] env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/compare-two-exps.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def main(argv): Conf.ParseArgs() SimTime.Init(Conf.Get("simulation_time_begin")) Util.MkDirs(Conf.dn_result) Plot.Plot()
def GenDataThrpVsAllMetrics(root_str): #fn_out = "%s/rocksdb-ycsb-thrp-vs-dstat-metrics-by-stgdevs" % Conf.GetOutDir() #if os.path.exists(fn_out): # return fn_out root = Conf.Get(root_str) with Cons.MT("Generating thrp vs dstat data: %s ..." % root_str): dn_base = root["dn_base"].replace("~", os.path.expanduser("~")) # {stg_dev: {target_iops: DstatLogReader}} stgdev_tio_dlr = {} for stgdev in ["local-ssd", "ebs-st1"]: if stgdev not in stgdev_tio_dlr: stgdev_tio_dlr[stgdev] = {} for target_iops, v in sorted(root[stgdev].iteritems()): # 171125-110758/ycsb/171125-161934.339-d mo = re.match( r"(?P<dn>.+)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).*", v["fn"]) dn = "%s/%s" % (dn_base, mo.group("dn")) exp_dt = mo.group("exp_dt") #Cons.P("%s %s" % (dn, exp_dt)) t = v["time"].split("-") time_begin = t[0] time_end = t[1] overloaded = ("overloaded" in v) and v["overloaded"] # Fast enough. Takes about 5 secs. No need for a parallelization. stgdev_tio_dlr[stgdev][target_iops] = DstatLogReader( dn, exp_dt, time_begin, time_end, overloaded) fmt = "%17s %9s %6.0f %1d" \ " %6.0f %4.0f %9.0f %8.0f %7.0f %6.0f" \ " %6.3f %6.3f %9.3f %8.3f %8.3f %8.3f" \ " %8.3f %8.3f %9.3f %8.3f" Cons.P("%s" % Util.BuildHeader(fmt, "exp_dt stg_dev target_iops overloaded" \ " dsk/xvda:read dsk/xvda:writ dsk/xvdc:read dsk/xvdc:writ dsk/xvde:read dsk/xvde:writ" \ " io/xvda:read io/xvda:writ io/xvdc:read io/xvdc:writ io/xvde:read io/xvde:writ" \ " memory_usage:buff memory_usage:cach memory_usage:free memory_usage:used" )) for stgdev, v in sorted(stgdev_tio_dlr.iteritems()): for tio, dlr in sorted(v.iteritems()): Cons.P( fmt % (dlr.exp_dt, stgdev, tio, (1 if dlr.overloaded else 0), dlr.GetAvg("dsk/xvda:read"), dlr.GetAvg("dsk/xvda:writ"), dlr.GetAvg("dsk/xvdc:read"), dlr.GetAvg("dsk/xvdc:writ"), dlr.GetAvg("dsk/xvde:read"), dlr.GetAvg("dsk/xvde:writ"), dlr.GetAvg("io/xvda:read"), dlr.GetAvg("io/xvda:writ"), dlr.GetAvg("io/xvdc:read"), dlr.GetAvg("io/xvdc:writ"), dlr.GetAvg("io/xvde:read"), dlr.GetAvg("io/xvde:writ"), dlr.GetAvg("memory_usage:buff") / (1024 * 1024), dlr.GetAvg("memory_usage:cach") / (1024 * 1024), dlr.GetAvg("memory_usage:free") / (1024 * 1024), dlr.GetAvg("memory_usage:used") / (1024 * 1024)))
def _Parse(): global _parsed if _parsed: return fn = "%s/dstat/%s.csv" % (Conf.GetDir("log_dir"), Conf.Get("simulation_time_begin")) with Cons.MT("Parsing %s ..." % fn): header_rows = [] global _body_rows _body_rows = [] with open(fn, "rb") as f: header_detected = False reader = csv.reader(f) for row in reader: if (len(row) > 0) and (row[0] in ["system", "time"]): header_rows.append(row) header_detected = True elif header_detected: _body_rows.append(BodyRow(row)) #Cons.P(pprint.pformat(header_rows)) # Make sure the rows are all the same size num_cols = None for r in header_rows: if num_cols is None: num_cols = len(r) else: if num_cols != len(r): raise RuntimeError("Unexpected") for r in _body_rows: if num_cols != r.NumCols(): raise RuntimeError("Unexpected") # Get column headers global _header_idx _header_idx = {} header_rows_0_prev = None for i in range(num_cols): if len(header_rows[0][i]) > 0: #Cons.P("%s, %s" % (header_rows[0][i], header_rows[1][i])) _header_idx["%s:%s" % (header_rows[0][i].replace( " ", "_"), header_rows[1][i].replace(" ", "_"))] = i header_rows_0_prev = header_rows[0][i].replace(" ", "_") else: #Cons.P("%s, %s" % (header_rows_0_prev, header_rows[1][i])) _header_idx["%s:%s" % (header_rows_0_prev.replace( " ", "_"), header_rows[1][i].replace(" ", "_"))] = i #Cons.P(pprint.pformat(_header_idx)) # Sort the data in the header order and convert strings to numbers for b in _body_rows: b.PrepareData() _parsed = True
def _GenSstAccFreqByAgeDataFiles(): global _gen_sst_accfreq_by_age_data_files if _gen_sst_accfreq_by_age_data_files: return _BuildMemtSstLives() #_CheckSizes() with Cons.MT("Generating Sst acc freq by age files ..."): dn0 = "%s/%s" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin")) dn1 = "%s/sst-age-accfreq-data" % dn0; Util.MkDirs(dn1) fmt = "%10.3f %10.3f %10.3f" \ " %10.3f %10.3f %10.3f" \ " %6.2f %7s" header = Util.BuildHeader(fmt, \ "age_begin_simulation_time_in_sec age_end_simulation_time_in_sec age_dur_simulation_time_in_sec" \ " age_begin_simulated_time_in_sec age_end_simulated_time_in_sec age_dur_simulated_time_in_sec" \ " reads_per_64MB_per_sec temperature") i = 0 for sst_id, sl in _sst_lives.iteritems(): # TODO: why? # Exclude sstables without any accesses. None seems to be such though. #if sl.LenTsAcccnt() == 0: # continue # TODO: Why do you want to exclude live SSTables? I don't see any reason. # Exclude ones without deleted ts. Those are current SSTables. #if sl.TsDeleted() is None: # continue fn = "%s/%d" % (dn1, sst_id) with open(fn, "w") as fo: fo.write("# sst_id: %d\n" % sst_id) fo.write("# level: %d\n" % sl.Level()) fo.write("# size: %d\n" % sl.Size()) fo.write("#\n") fo.write("%s\n" % header) sl.WriteAgeAccfreq(fo, fmt) #Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) i += 1 Cons.P("Created %d files in %s" % (i, dn1)) _gen_sst_accfreq_by_age_data_files = True
def Init(): with Cons.MT("Init Conf ...", print_time=False): global _simulation_time_begin global _simulation_time_end global _simulated_time_begin global _simulated_time_end fn = "%s/client/%s" % (Conf.GetDir("log_dir"), Conf.Get("simulation_time_begin")) with open(fn) as fo: for line in fo: #Cons.P(line) # simulation_time_end : 161227-162418.288 mo = re.match(r"# simulation_time_begin: (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line) if mo is not None: _simulation_time_begin = mo.group("dt") if _simulation_time_begin != Conf.Get("simulation_time_begin"): raise RuntimeError("Unexpected") _simulation_time_begin = datetime.datetime.strptime(_simulation_time_begin, "%y%m%d-%H%M%S.%f") continue mo = re.match(r"# simulation_time_end : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line) if mo is not None: _simulation_time_end = mo.group("dt") _simulation_time_end = datetime.datetime.strptime(_simulation_time_end, "%y%m%d-%H%M%S.%f") continue mo = re.match(r"# simulated_time_begin : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line) if mo is not None: _simulated_time_begin = mo.group("dt") _simulated_time_begin = datetime.datetime.strptime(_simulated_time_begin, "%y%m%d-%H%M%S.%f") continue mo = re.match(r"# simulated_time_end : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line) if mo is not None: _simulated_time_end = mo.group("dt") _simulated_time_end = datetime.datetime.strptime(_simulated_time_end, "%y%m%d-%H%M%S.%f") continue Cons.P("simulation_time_begin: %s" % _simulation_time_begin) Cons.P("simulation_time_end : %s" % _simulation_time_end) Cons.P("simulated_time_begin : %s" % _simulated_time_begin) Cons.P("simulated_time_end : %s" % _simulated_time_end)
def PlotSstAccfreqByAgeAll(): with Cons.MT("Plotting all SSTable access frequencies by their ages ..."): env = os.environ.copy() # TODO: Think about grouping once it's done. Like by their starting # temperatures or levels. # # TODO: Think about how to plot Sst ID labels. # sst_lives = RocksDbLogReader.GetSstAccFreqByAgeDataFiles() env["IN_SST_IDS"] = " ".join(str(sl.Id()) for sl in sst_lives) dn = "%s/%s/sst-age-accfreq-data" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin")) env["IN_DN"] = dn fn_out = "%s/%s/sst-accfreq-by-age-all-%s.pdf" \ % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin"), Conf.Get("simulation_time_begin")) env["OUT_FN"] = fn_out with Cons.MT("Plotting ..."): Util.RunSubp("gnuplot %s/sst-accfreq-by-age-all.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def _Cpu(): with Cons.MT("Plotting cpu ..."): env = os.environ.copy() env["FN_IN"] = CsvFile.GenDataFileForGnuplot() dn = "%s/%s" % (Conf.GetDir("output_dir"), Conf.Get("simulation_time_begin")) fn_out = "%s/cpu.pdf" % dn env["FN_OUT"] = fn_out with Cons.MT("Plotting ..."): Util.RunSubp("gnuplot %s/cpu.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def PlotTimeVsMetrics(): with Cons.MT("Plotting time vs metrics ..."): dn_base = Conf.GetDir("dn_base") exps_rocksdb = Conf.Get("rocksdb") exps_computation = Conf.Get("computation") #Cons.P(pprint.pformat(exps_rocksdb)) #Cons.P(pprint.pformat(exps_computation)) params = [] for e in exps_rocksdb: params.append("%s/%s" % (dn_base, e)) for e in exps_computation: params.append("%s/%s" % (dn_base, e)) parallel_processing = True if parallel_processing: with terminating(Pool()) as pool: pool.map(_PlotTimeVsAllMetrics, params) else: for p in params: _PlotTimeVsAllMetrics(p)
def GenDataThrpVsLat(): fn_out = "%s/rocksdb-ycsb-thrp-vs-lat-by-stgdevs" % Conf.GetOutDir() if os.path.exists(fn_out): return fn_out with Cons.MT("Generating thrp vs lat data file ..."): dn_base = Conf.GetDir("dn_base") # {stg_dev: {target_iops: YcsbLogReader}} stgdev_tio_ylr = {} #for stgdev in ["local-ssd", "ebs-st1"]: for stgdev in ["ebs-st1"]: if stgdev not in stgdev_tio_ylr: stgdev_tio_ylr[stgdev] = {} for target_iops, v in sorted(Conf.Get(stgdev).iteritems()): fn = "%s/%s" % (dn_base, v["fn"]) t = v["time"].split("-") time_begin = t[0] time_end = t[1] overloaded = ("overloaded" in v) and v["overloaded"] stgdev_tio_ylr[stgdev][target_iops] = YcsbLogReader(fn, time_begin, time_end, overloaded) with open(fn_out, "w") as fo: fmt = "%9s %6.0f %1d %6.0f" \ " %8.2f %8.2f %9.2f %10.2f %10.2f" \ " %8.2f %8.2f %8.2f %9.2f %9.2f" fo.write("%s\n" % Util.BuildHeader(fmt, "stg_dev target_iops overloaded iops" \ " r_avg r_90 r_99 r_99.9 r_99.99" \ " w_avg w_90 w_99 w_99.9 w_99.99" )) for stgdev, v in sorted(stgdev_tio_ylr.iteritems()): for tio, ylr in sorted(v.iteritems()): fo.write((fmt + "\n") % ( stgdev , tio , (1 if ylr.overloaded else 0) , ylr.db_iops_stat.avg , ylr.r_avg , ylr.r_90 , ylr.r_99 , ylr.r_999 , ylr.r_9999 , ylr.w_avg , ylr.w_90 , ylr.w_99 , ylr.w_999 , ylr.w_9999 )) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GetFnCompareTwo(): dn_base = Conf.GetDir("dn_base") # Analyze the number of compactions and migrations with # (a) an unmodified DB as a baseline # and (b) Mutant fn_metrics_by_time = [] for i in range(2): fn_ycsb = "%s/%s" % (dn_base, Conf.Get(i)) fn_metrics_by_time.append(GetFnTimeVsMetrics(fn_ycsb)) fn_rdb_compmigr = CompMigrStat.GetFnStat(fn_metrics_by_time[0], fn_metrics_by_time[1]) return (fn_metrics_by_time, fn_rdb_compmigr)
def GetClusteredPoints(): dist_sq_threshold = Conf.Get("dist_sq_threshold") fn_in = Conf.GetFn("youtube_workload") fn_out = "%s/%s-clustered-with-dist-sq-%s" % ( Conf.DnOut(), os.path.basename(fn_in), dist_sq_threshold) #Cons.P(fn_out) if os.path.isfile(fn_out): return fn_out cmd = "%s/_cluster.sh --youtube_workload=%s --dist_sq_threshold=%s" \ % (os.path.dirname(__file__), fn_in, dist_sq_threshold) Util.RunSubp(cmd) return fn_out
def CalcCompareTwo(): with Cons.MT("Calculating the overhead of pairs ..."): dn_base = Conf.GetDir("dn_base") exps_rocksdb = Conf.Get("rocksdb") exps_computation = Conf.Get("computation") params = [] for r in exps_rocksdb: for c in exps_computation: params.append(("%s/%s" % (dn_base, r), "%s/%s" % (dn_base, c))) parallel_processing = True if parallel_processing: with terminating(Pool()) as pool: pool.map(_CalcCompareTwo, params) else: for p in params: _CalcCompareTwo(p) # Find the closest pair # You want the computation overhead one has the minimal overhead, but no smaller than the rocksdb one. exp_tuples = [] for p in params: o_cpu = CompareCpu.GetOverhead(p[0], p[1]) o_mem = CompareMem.GetOverhead(p[0], p[1]) if (o_cpu < 1.0) or (o_mem < 1.0): continue exp_tuples.append(ExpTuple(o_cpu, o_mem, p)) fmt = "%8.6f %8.6f %17s %17s" Cons.P( Util.BuildHeader( fmt, "cpu_overhead mem_overhead expdt_rocksdb expdt_computation")) for e in sorted(exp_tuples): Cons.P(fmt % (e.o_cpu, e.o_mem, e.GetExpDt("r"), e.GetExpDt("c"))) return exp_tuples
def Plot(): fn_cf = Conf.Get("cf_locs") fn_wf = GetWfLocFile() fn_out = "%s/cloudfront-wholefoods-locations.pdf" % Conf.DnOut() with Cons.MT("Plotting Whole Foods store locations ..."): env = os.environ.copy() env["FN_CF"] = fn_cf env["FN_WF"] = fn_wf env["FN_OUT"] = fn_out Util.RunSubp("gnuplot %s/edge-server-locs.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def main(argv): Util.MkDirs(Conf.GetOutDir()) dn_base = Conf.GetDir("dn_base") params = [] for db_type in ["unmodified_db", "computation_overhead", "io_overhead"]: fn = "%s/%s" % (dn_base, Conf.Get(db_type)) params.append((fn, )) parallel_processing = True if parallel_processing: p = multiprocessing.Pool() p.map(PlotByTime, params) else: for p in params: PlotByTime(p)