def PlotSstAccfreqByAgeIndividual(): with Cons.MT( "Plotting individual SSTable access frequencies by their ages ..." ): dn_out = "%s/%s/sst-age-accfreq-plot" % ( Conf.Get("dn_result"), Conf.Get("simulation_time_begin")) Util.MkDirs(dn_out) env = os.environ.copy() sst_lives = RocksDbLogReader.GetSstAccFreqByAgeDataFiles() for sl in sst_lives: env["IN_FN"] = "%s/%s/sst-age-accfreq-data/%d" \ % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin"), sl.Id()) env["LEVEL"] = str(sl.Level()) if sl.TsDeleted() is None: raise RuntimeError("Unexpected") env["AGE_DELETED"] = str( SimTime.ToSimulatedTimeDur( (sl.TsDeleted() - sl.TsCreated()).total_seconds())) out_fn = "%s/L%d-%d.pdf" % (dn_out, sl.Level(), sl.Id()) env["OUT_FN"] = out_fn start_time = time.time() Util.RunSubp("gnuplot %s/sst-accfreq-by-age-individual.gnuplot" % os.path.dirname(__file__), env=env, print_cmd=False) dur = time.time() - start_time Cons.P("Created %s %d in %.0f ms" % (out_fn, os.path.getsize(out_fn), dur * 1000.0))
def PlotSstAccfreqByAgeIndividualMultiplot(): with Cons.MT("Plotting individual SSTable access frequencies by their ages ..."): dn_out = "%s/%s/sst-age-accfreq-plot" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin")) Util.MkDirs(dn_out) env = os.environ.copy() dn = "%s/%s/sst-age-accfreq-data" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin")) env["IN_DN"] = dn # Plot for all levels. Stop when there is no sstable at a level. level = 0 while True: env["LEVEL"] = str(level) sst_lives = RocksDbLogReader.GetSstAccFreqByAgeDataFiles(level) if len(sst_lives) == 0: break env["SST_IDS"] = " ".join(str(sl.Id()) for sl in sst_lives) age_deleted = [] for sl in sst_lives: age_deleted.append(SimTime.ToSimulatedTimeDur((sl.TsDeleted() - sl.TsCreated()).total_seconds())) env["AGE_DELETED"] = " ".join(str(i) for i in age_deleted) # Age deleted max. Round up with an hour granularity. age_deleted_max = max(age_deleted) age_deleted_max = math.ceil(age_deleted_max / 3600.0) * 3600 env["AGE_DELETED_MAX"] = str(age_deleted_max) accfreq_max_all_sst_in_level = 0.0 temp_max_all_sst_in_level = 0.0 accfreq_max_list = [] temp_max_list = [] for sl in sst_lives: accfreq_max = 0.0 temp_max = 0.0 for accfreq in sl.AgeAccfreq(): accfreq_max_all_sst_in_level = max(accfreq_max_all_sst_in_level, accfreq[4]) temp_max_all_sst_in_level = max(temp_max_all_sst_in_level, accfreq[5]) accfreq_max = max(accfreq_max, accfreq[4]) temp_max = max(temp_max, accfreq[5]) accfreq_max_list.append(accfreq_max) temp_max_list.append(temp_max) env["ACCFREQ_MAX_ALL_SST_IN LEVEL"] = str(accfreq_max_all_sst_in_level) env["TEMP_MAX_ALL_SST_IN_LEVEL"] = str(temp_max_all_sst_in_level) env["ACCFREQ_MAX"] = " ".join(str(e) for e in accfreq_max_list) env["TEMP_MAX"] = " ".join(str(e) for e in temp_max_list) out_fn = "%s/L%d.pdf" % (dn_out, level) env["OUT_FN"] = out_fn with Cons.MT("Plotting level %d ..." % level): Util.RunSubp("gnuplot %s/sst-accfreq-by-age-multiplot-by-level.gnuplot" % os.path.dirname(__file__), env=env, print_cmd=False) Cons.P("Created %s %d" % (out_fn, os.path.getsize(out_fn))) level += 1
def _CalcAgeAccfreq(self): if self.age_accfreq is not None: return # Temperature drops by 0.99 each time unit. # When the time unit is sec, temperature 1 drops to 0.001 in 687.31586483 # seconds, 11 min 27 secs. Seems a reasonable number. # 0.99 ^ n = 0.001 # n = 687.31586483 #temp_drop_alpha = 0.99 #age_end_simulated_time_prev = None #temp = None self.age_accfreq = [] age_end_prev = None cnt_per_size_first_5_min = 0.0 for ts, v in sorted(self.ts_acccnt.iteritems()): age_end = (ts - self.ts_created).total_seconds() age_begin = age_end - 1.0 if age_begin < 0.0: age_begin = 0.0 if (age_end_prev is not None) and (age_begin < age_end_prev): age_begin = age_end_prev # Simulated time age_begin_simulated_time = SimTime.ToSimulatedTimeDur(age_begin) age_end_simulated_time = SimTime.ToSimulatedTimeDur(age_end) # Dur in seconds age_dur_simulated_time = age_end_simulated_time - age_begin_simulated_time # You don't need to calculate temperature here. It is already calculated by RocksDB-Mutant. self.age_accfreq.append(( age_begin, age_end, age_begin_simulated_time, age_end_simulated_time, v[1] # cnt_per_64MB_per_sec , v[2] # temp )) age_end_prev = age_end
def _CalcAgeAccfreq(self): if self.age_accfreq is not None: return # Temperature drops by 0.99 each time unit. # When the time unit is sec, temperature 1 drops to 0.001 in 687.31586483 # seconds, 11 min 27 secs. Seems a reasonable number. # 0.99 ^ n = 0.001 # n = 687.31586483 #temp_drop_alpha = 0.99 #age_end_simulated_time_prev = None #temp = None self.age_accfreq = [] age_end_prev = None cnt_per_size_first_5_min = 0.0 for ts, v in sorted(self.ts_acccnt.iteritems()): age_end = (ts - self.ts_created).total_seconds() age_begin = age_end - 1.0 if age_begin < 0.0: age_begin = 0.0 if (age_end_prev is not None) and (age_begin < age_end_prev): age_begin = age_end_prev # Simulated time age_begin_simulated_time = SimTime.ToSimulatedTimeDur(age_begin) age_end_simulated_time = SimTime.ToSimulatedTimeDur(age_end) # Dur in seconds age_dur_simulated_time = age_end_simulated_time - age_begin_simulated_time # You don't need this calculation. This was already calculated by # RocksDB-Mutant. # # Unit is num / 64 MB / sec # Calculation is as if the accesses are all happened at the time ts #cnt_per_size = v[0] / (self.size / (64.0 * 1024 * 1024)) #acc_freq = cnt_per_size / age_dur_simulated_time # # Calculate temperature # - Defined using simulated time. Let's assume that RocksDB knows the # simulated time. In practice, it's the wall clock time. # - Initial temperature: If the first age_begin is less than 10 sec, # consider it as an initial temperature. The 10 sec threshold is in # simulation time, since the reporting granularity, 1 sec, is in # simulation time. # # Update every 5 minutes or 10. Wait until you actually need it. It's # just about plotting. Mutant calculates it in that interval. # #if age_end_simulated_time < 5*60: # cnt_per_size_first_5_min += cnt_per_size # temp = None #else: # if temp is None: # cnt_per_size_first_5_min += cnt_per_size # temp = cnt_per_size_first_5_min / age_end_simulated_time # else: # temp = temp * math.pow(temp_drop_alpha, age_end_simulated_time - age_end_simulated_time_prev) \ # + cnt_per_size * (1.0 - temp_drop_alpha) #age_end_simulated_time_prev = age_end_simulated_time self.age_accfreq.append((age_begin, age_end , age_begin_simulated_time, age_end_simulated_time , v[1] # cnt_per_64MB_per_sec , v[2] # temp )) age_end_prev = age_end
def PlotSstAccfreqAtSpecificTime(at_simulated_time): in_fn = RocksDbLogReader.GetSstAccFreqAtSpecificTime(at_simulated_time) out_fn = "%s.pdf" % in_fn out_fn2 = "%s-2.pdf" % in_fn with Cons.MT("Plotting SSTable access frequencies at specific time ..."): env = os.environ.copy() env["IN_FN"] = in_fn env["OUT_FN"] = out_fn env["OUT_FN2"] = out_fn2 Util.RunSubp("gnuplot %s/sst-accfreq-at-specific-time.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (out_fn, os.path.getsize(out_fn))) Cons.P("Created %s %d" % (out_fn2, os.path.getsize(out_fn2))) # TODO: plot by rank, sst, age, level and (sst or age) and explain why none of them works perfectly. # this motivates the needs for direct sstable access monitoring # - also, good for guaranteeing latency SLOs, since we are directly working on the access frequencies, rather than indiret metrics. return with Cons.MT("Plotting SSTable access frequencies at specific time ..."): # Plot for all levels. Stop when there is no sstable at a level. level = 0 while True: env["LEVEL"] = str(level) sst_lives = RocksDbLogReader.GetSstAccFreqByAgeDataFiles(level) if len(sst_lives) == 0: break env["SST_IDS"] = " ".join(str(sl.Id()) for sl in sst_lives) env["SST_SIZES"] = " ".join(str(sl.Size()) for sl in sst_lives) age_deleted = [] for sl in sst_lives: age_deleted.append( SimTime.ToSimulatedTimeDur( (sl.TsDeleted() - sl.TsCreated()).total_seconds())) env["AGE_DELETED"] = " ".join(str(i) for i in age_deleted) # Age deleted max. Round up with an hour granularity. age_deleted_max = max(age_deleted) age_deleted_max = math.ceil(age_deleted_max / 3600.0) * 3600 env["AGE_DELETED_MAX"] = str(age_deleted_max) accfreq_max_all_sst_in_level = 0.0 temp_max_all_sst_in_level = 0.0 accfreq_max_list = [] temp_max_list = [] for sl in sst_lives: accfreq_max = 0.0 temp_max = 0.0 for accfreq in sl.AgeAccfreq(): accfreq_max_all_sst_in_level = max( accfreq_max_all_sst_in_level, accfreq[4]) temp_max_all_sst_in_level = max(temp_max_all_sst_in_level, accfreq[5]) accfreq_max = max(accfreq_max, accfreq[4]) temp_max = max(temp_max, accfreq[5]) accfreq_max_list.append(accfreq_max) temp_max_list.append(temp_max) Cons.P("Level : %d" % level) Cons.P("Max acc freq : %f" % max(accfreq_max_list)) Cons.P("Max temperature: %f" % max(temp_max_list)) env["ACCFREQ_MAX_ALL_SST_IN LEVEL"] = str( accfreq_max_all_sst_in_level) env["TEMP_MAX_ALL_SST_IN_LEVEL"] = str(temp_max_all_sst_in_level) env["ACCFREQ_MAX"] = " ".join(str(e) for e in accfreq_max_list) env["TEMP_MAX"] = " ".join(str(e) for e in temp_max_list) out_fn = "%s/L%d.pdf" % (dn_out, level) env["OUT_FN"] = out_fn with Cons.MT("Plotting level %d ..." % level): Util.RunSubp( "gnuplot %s/sst-accfreq-by-age-multiplot-by-level.gnuplot" % os.path.dirname(__file__), env=env, print_cmd=False) Cons.P("Created %s %d" % (out_fn, os.path.getsize(out_fn))) level += 1