Esempio n. 1
0
def PlotSstAccfreqByAgeIndividual():
    with Cons.MT(
            "Plotting individual SSTable access frequencies by their ages ..."
    ):
        dn_out = "%s/%s/sst-age-accfreq-plot" % (
            Conf.Get("dn_result"), Conf.Get("simulation_time_begin"))
        Util.MkDirs(dn_out)

        env = os.environ.copy()
        sst_lives = RocksDbLogReader.GetSstAccFreqByAgeDataFiles()
        for sl in sst_lives:
            env["IN_FN"] = "%s/%s/sst-age-accfreq-data/%d" \
              % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin"), sl.Id())
            env["LEVEL"] = str(sl.Level())

            if sl.TsDeleted() is None:
                raise RuntimeError("Unexpected")
            env["AGE_DELETED"] = str(
                SimTime.ToSimulatedTimeDur(
                    (sl.TsDeleted() - sl.TsCreated()).total_seconds()))

            out_fn = "%s/L%d-%d.pdf" % (dn_out, sl.Level(), sl.Id())
            env["OUT_FN"] = out_fn
            start_time = time.time()
            Util.RunSubp("gnuplot %s/sst-accfreq-by-age-individual.gnuplot" %
                         os.path.dirname(__file__),
                         env=env,
                         print_cmd=False)
            dur = time.time() - start_time
            Cons.P("Created %s %d in %.0f ms" %
                   (out_fn, os.path.getsize(out_fn), dur * 1000.0))
Esempio n. 2
0
def Get1minAvgFn():
  exp_dts = []
  for i in range(2):
    #Cons.P(Conf.Get(i))
    mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i))
    exp_dts.append(mo.group("exp_dt"))
  fn_out = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts))
  if os.path.exists(fn_out):
    return fn_out

  with Cons.MT("Creating avg memory usage comparison file for plotting ..."):
    records = []
    dn_base = Conf.GetDir("dn_base")
    for i in range(2):
      fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i))
      hm_mem = _GetHmMem(fn_ycsb_log)
      for hm, mem in hm_mem.iteritems():
        records.append(_RecordMemAvg(hm, i * 30, mem, i))
    records.sort(key=operator.attrgetter("ts"))

  fmt = "%8s %6.3f %1d"
  header = Util.BuildHeader(fmt, "timestamp mem_avg_in_gb exp_type")
  with open(fn_out, "w") as fo:
    i = 0
    for r in records:
      if i % 40 == 0:
        fo.write(header + "\n")
        i += 1
      fo.write("%s\n" % r.ToStr(fmt))
  Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
  return fn_out
Esempio n. 3
0
def _GetFnCpuOverhead():
    fn_out = "%s/cpu-overhead-by-time" % Conf.GetOutDir()
    if os.path.exists(fn_out):
        return fn_out

    dn_base = Conf.GetDir("dn_base")
    fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get("unmodified_db"))
    fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get("computation_overhead"))

    hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0)
    hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1)
    #Cons.P(hour_cpustat_0)
    #Cons.P(hour_cpustat_1)

    with open(fn_out, "w") as fo:
        fo.write("# u: unmodified\n")
        fo.write(
            "# c: with SSTable access monitoring and SSTable placement computation\n"
        )
        fo.write("#\n")
        fmt = "%2d" \
            " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \
            " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f"
        fo.write(Util.BuildHeader(fmt, "hour" \
            " u_avg u_min u_1 u_25 u_50 u_75 u_99 u_max" \
            " c_avg c_min c_1 c_25 c_50 c_75 c_99 c_max"
            ) + "\n")
        for h, s0 in sorted(hour_cpustat_0.iteritems()):
            s1 = hour_cpustat_1[h]
            fo.write(
                (fmt + "\n") % (h, s0.avg, s0.min, s0._1, s0._25, s0._50,
                                s0._75, s0._99, s0.max, s1.avg, s1.min, s1._1,
                                s1._25, s1._50, s1._75, s1._99, s1.max))
    Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
    return fn_out
Esempio n. 4
0
def main(argv):
    Conf.ParseArgs()
    dn_out = Conf.Get("dn_result")
    Util.MkDirs(dn_out)

    SimTime.Init(Conf.Get("simulation_time_begin"))
    Plot.Plot()
Esempio n. 5
0
def GetHourlyFn():
    fn_out = "%s/cpu-hourly-usage" % Conf.GetOutDir()
    if os.path.exists(fn_out):
        return fn_out

    with Cons.MT("Generating file for cpu usage comparison ..."):
        dn_base = Conf.GetDir("dn_base")
        fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0))
        fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1))

        hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0)
        hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1)
        #Cons.P(hour_cpustat_0)
        #Cons.P(hour_cpustat_1)

        with open(fn_out, "w") as fo:
            fo.write("# 0: %s\n" % fn_ycsb_0)
            fo.write("# 1: %s\n" % fn_ycsb_1)
            fo.write("#\n")
            fmt = "%2d" \
                " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \
                " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f"
            fo.write(Util.BuildHeader(fmt, "hour" \
                " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \
                " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max"
                ) + "\n")
            for h, s0 in sorted(hour_cpustat_0.iteritems()):
                s1 = hour_cpustat_1[h]
                fo.write((fmt + "\n") %
                         (h, s0.avg, s0.min, s0._1, s0._25, s0._50, s0._75,
                          s0._99, s0.max, s1.avg, s1.min, s1._1, s1._25,
                          s1._50, s1._75, s1._99, s1.max))
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
        return fn_out
Esempio n. 6
0
def Get1minAvgFn():
    exp_dts = []
    for i in range(2):
        #Cons.P(Conf.Get(i))
        # computation/180126-142513/ycsb/180126-193525.769-d
        mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d",
                      Conf.Get(i))
        exp_dts.append(mo.group("exp_dt"))
    fn_out = "%s/cpu-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts))
    if os.path.exists(fn_out):
        return fn_out

    with Cons.MT("Creating avg cpu usage comparison file for plotting ..."):
        records = []
        dn_base = Conf.GetDir("dn_base")
        for i in range(2):
            fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i))
            hm_cpu = _GetHmCpu(fn_ycsb_log)
            for hm, cpu in hm_cpu.iteritems():
                records.append(_RecordCpuAvg(hm, i * 30, cpu, i))
        records.sort(key=operator.attrgetter("ts"))

    fmt = "%8s %6.2f %1d"
    header = Util.BuildHeader(fmt, "timestamp cpu_avg exp_type")
    with open(fn_out, "w") as fo:
        i = 0
        for r in records:
            if i % 40 == 0:
                fo.write(header + "\n")
                i += 1
            fo.write("%s\n" % r.ToStr(fmt))
    Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
    return fn_out
Esempio n. 7
0
def PlotSstAccfreqByAgeIndividualMultiplot():
	with Cons.MT("Plotting individual SSTable access frequencies by their ages ..."):
		dn_out = "%s/%s/sst-age-accfreq-plot" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin"))
		Util.MkDirs(dn_out)

		env = os.environ.copy()
		dn = "%s/%s/sst-age-accfreq-data" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin"))
		env["IN_DN"] = dn

		# Plot for all levels. Stop when there is no sstable at a level.
		level = 0
		while True:
			env["LEVEL"] = str(level)
			sst_lives = RocksDbLogReader.GetSstAccFreqByAgeDataFiles(level)
			if len(sst_lives) == 0:
				break
			env["SST_IDS"] = " ".join(str(sl.Id()) for sl in sst_lives)

			age_deleted = []
			for sl in sst_lives:
				age_deleted.append(SimTime.ToSimulatedTimeDur((sl.TsDeleted() - sl.TsCreated()).total_seconds()))
			env["AGE_DELETED"] = " ".join(str(i) for i in age_deleted)

			# Age deleted max. Round up with an hour granularity.
			age_deleted_max = max(age_deleted)
			age_deleted_max = math.ceil(age_deleted_max / 3600.0) * 3600
			env["AGE_DELETED_MAX"] = str(age_deleted_max)

			accfreq_max_all_sst_in_level = 0.0
			temp_max_all_sst_in_level = 0.0
			accfreq_max_list = []
			temp_max_list = []
			for sl in sst_lives:
				accfreq_max = 0.0
				temp_max = 0.0
				for accfreq in sl.AgeAccfreq():
					accfreq_max_all_sst_in_level = max(accfreq_max_all_sst_in_level, accfreq[4])
					temp_max_all_sst_in_level = max(temp_max_all_sst_in_level, accfreq[5])
					accfreq_max = max(accfreq_max, accfreq[4])
					temp_max = max(temp_max, accfreq[5])
				accfreq_max_list.append(accfreq_max)
				temp_max_list.append(temp_max)

			env["ACCFREQ_MAX_ALL_SST_IN LEVEL"] = str(accfreq_max_all_sst_in_level)
			env["TEMP_MAX_ALL_SST_IN_LEVEL"] = str(temp_max_all_sst_in_level)
			env["ACCFREQ_MAX"] = " ".join(str(e) for e in accfreq_max_list)
			env["TEMP_MAX"] = " ".join(str(e) for e in temp_max_list)

			out_fn = "%s/L%d.pdf" % (dn_out, level)
			env["OUT_FN"] = out_fn

			with Cons.MT("Plotting level %d ..." % level):
				Util.RunSubp("gnuplot %s/sst-accfreq-by-age-multiplot-by-level.gnuplot" % os.path.dirname(__file__), env=env, print_cmd=False)
				Cons.P("Created %s %d" % (out_fn, os.path.getsize(out_fn)))

			level += 1
Esempio n. 8
0
def GetHourlyFn():
  exp_dts = []
  for i in range(2):
    #Cons.P(Conf.Get(i))
    mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i))
    exp_dts.append(mo.group("exp_dt"))
  fn_out = "%s/memory-usage-by-time-%s" % (Conf.GetOutDir(), "-".join(exp_dts))
  if os.path.exists(fn_out):
    return fn_out

  with Cons.MT("Generating file for memory usage comparison ..."):
    dn_base = Conf.GetDir("dn_base")
    fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0))
    fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1))

    hour_memstat_0 = _GetMemStatByHour(fn_ycsb_0)
    hour_memstat_1 = _GetMemStatByHour(fn_ycsb_1)
    #Cons.P(hour_memstat_0)
    #Cons.P(hour_memstat_1)

    with open(fn_out, "w") as fo:
      fo.write("# 0: %s\n" % fn_ycsb_0)
      fo.write("# 1: %s\n" % fn_ycsb_1)
      fo.write("#\n")
      fmt = "%2d" \
          " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f" \
          " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f"
      fo.write(Util.BuildHeader(fmt, "hour" \
          " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \
          " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max"
          ) + "\n")
      for h, s0 in sorted(hour_memstat_0.iteritems()):
        s1 = hour_memstat_1[h]
        fo.write((fmt + "\n") % (h
          , (float(s0.avg) / 1024 / 1024 / 1024)
          , (float(s0.min) / 1024 / 1024 / 1024)
          , (float(s0._1 ) / 1024 / 1024 / 1024)
          , (float(s0._25) / 1024 / 1024 / 1024)
          , (float(s0._50) / 1024 / 1024 / 1024)
          , (float(s0._75) / 1024 / 1024 / 1024)
          , (float(s0._99) / 1024 / 1024 / 1024)
          , (float(s0.max) / 1024 / 1024 / 1024)

          , (float(s1.avg) / 1024 / 1024 / 1024)
          , (float(s1.min) / 1024 / 1024 / 1024)
          , (float(s1._1 ) / 1024 / 1024 / 1024)
          , (float(s1._25) / 1024 / 1024 / 1024)
          , (float(s1._50) / 1024 / 1024 / 1024)
          , (float(s1._75) / 1024 / 1024 / 1024)
          , (float(s1._99) / 1024 / 1024 / 1024)
          , (float(s1.max) / 1024 / 1024 / 1024)
          ))
    Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
  return fn_out
Esempio n. 9
0
def SstHeatAtLastTime():
	# Set Conf.Get("simulation_time_begin"), if not already set.
	if Conf.Get("simulation_time_begin") is None:
		MutantLogReader.Get()

	fn_hlt = "%s/sst-heat-last-time-%s" % (Conf.dn_result, Conf.Get("simulation_time_begin"))
	if os.path.isfile(fn_hlt):
		return fn_hlt

	sst_lives = MemtSstLife.GetSstLives()

	with Cons.MT("Generating Sst heats at the last time ..."):
		# Gather temperature info at n different times
		num_times = Conf.heatmap_by_time_num_times

		if Conf.ExpFinishTime() is None:
			MemtSstLife.SetExpEndTimeFromSstLives()

		min_sst_opened = None
		for sst_gen, sl in sorted(sst_lives.iteritems()):
			min_sst_opened = sl.TsCreated() if min_sst_opened is None else min(min_sst_opened, sl.TsCreated())

		# Start time is when the first Sstable is opened, not the experiment start
		# time, when no SSTable exists yet.
		#   Exp start time:          160927-143257.395
		#   First Sstable open time: 160927-143411.273
		st = datetime.datetime.strptime(min_sst_opened, "%y%m%d-%H%M%S.%f")
		et = datetime.datetime.strptime(Conf.ExpFinishTime(),   "%y%m%d-%H%M%S.%f")
		dur = (et - st).total_seconds()

		sstgen_heat = []
		t = st + datetime.timedelta(seconds=(float(dur) * (num_times - 1) / num_times + time_offset_in_sec))
		for sst_gen, sl in sorted(sst_lives.iteritems()):
			h = sl.TempAtTime(t)
			if h is None:
				continue
			sstgen_heat.append((sst_gen, h))

		sstgen_heat.sort(key=lambda sh: sh[1], reverse=True)

		# Note: Don't bother with the width proportional to the tablet size for now

		fmt = "%4d %1d %8.3f"
		with open(fn_hlt, "w") as fo:
			# y0 is smaller than y1 (y0 is placed higher in the plot than y1).
			fo.write("%s\n" % Util.BuildHeader(fmt, "sst_gen level temperature"))

			for sh in sstgen_heat:
				sst_gen = sh[0]
				temp = sh[1]
				fo.write((fmt + "\n") % (sst_gen, sst_lives[sst_gen].level, temp))
		Cons.P("Created %s %d" % (fn_hlt, os.path.getsize(fn_hlt)))
	return fn_hlt
Esempio n. 10
0
def GenDataCostVsMetrics(exp_set_id):
  fn_out = "%s/rocksdb-ycsb-cost-vs-perf-%s" % (Conf.GetOutDir(), exp_set_id)

  fmt = "%5s %5.3f" \
      " %14.6f" \
      " %14.6f" \
      " %14.6f" \
      " %14.6f" \
      " %14.6f" \
      " %14.6f" \
      " %13.6f %10.6f %14.6f %14.6f %14.6f %14.6f %14.6f" \
      " %13.6f %10.6f %14.6f %14.6f %14.6f %14.6f %14.6f"
  with open(fn_out, "w") as fo:
    fo.write(Util.BuildHeader(fmt, "stg_dev cost_dollar_per_gb_per_month" \
        " db_iops.avg" \
        " db_iops.min" \
        " db_iops.max" \
        " db_iops._25" \
        " db_iops._50" \
        " db_iops._75" \
        " r_avg r_min r_max r_90 r_99 r_999 r_9999" \
        " w_avg w_min w_max w_90 w_99 w_999 w_9999"
        ) + "\n")
    for stg_dev, v in Conf.Get(exp_set_id).iteritems():
      lr = YcsbLogReader(exp_set_id, stg_dev)
      fo.write((fmt + "\n") % (
        stg_dev, float(Conf.Get("stg_cost")[stg_dev])
        , lr.GetStat("db_iops.avg")
        , lr.GetStat("db_iops.min")
        , lr.GetStat("db_iops.max")
        , lr.GetStat("db_iops._25")
        , lr.GetStat("db_iops._50")
        , lr.GetStat("db_iops._75")
        , lr.GetStat("r_avg")
        , lr.GetStat("r_min")
        , lr.GetStat("r_max")
        , lr.GetStat("r_90")
        , lr.GetStat("r_99")
        , lr.GetStat("r_999")
        , lr.GetStat("r_9999")
        , lr.GetStat("w_avg")
        , lr.GetStat("w_min")
        , lr.GetStat("w_max")
        , lr.GetStat("w_90")
        , lr.GetStat("w_99")
        , lr.GetStat("w_999")
        , lr.GetStat("w_9999")
        ))
  Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
  return fn_out
Esempio n. 11
0
def GetSstAccFreqAtSpecificTime(at_simulated_time):
    fn = "%s/sst-accfreq-%s-at-%.04f" % (Conf.Get("dn_result"),
                                         Conf.Get("simulation_time_begin"),
                                         at_simulated_time)
    if os.path.isfile(fn):
        return fn

    # t0: a specific time when we take the snapshot
    t0 = SimTime.SimulatedTimeAt(at_simulated_time)
    Cons.P("t0 (time of snapshot): %s" % t0)

    sst_lives = GetSstLives()

    with open(fn, "w") as fo:
        fmt = "%4d %13s %13s %7.3f %1d %5.2f"
        fo.write("# t0 (time of snapshot): %s\n" % t0)
        fo.write("%s\n" % Util.BuildHeader(
            fmt,
            "sst_id ts_before ts_after reads_per_64MB_per_sec level age_in_day"
        ))

        for sst_id, sl in sst_lives.iteritems():
            if t0 < SimTime.ToSimulatedTime(sl.ts_created):
                continue
            if (sl.ts_deleted is not None) and (SimTime.ToSimulatedTime(
                    sl.ts_deleted) < t0):
                continue

            ts_prev = None
            for ts, v in sorted(sl.ts_acccnt.iteritems()):
                ts_simulated = SimTime.ToSimulatedTime(ts)
                if ts_simulated < t0:
                    ts_prev = ts_simulated
                    continue
                #Cons.P("ts_simulated: %s" % ts_simulated)
                if ts_prev is not None:
                    #cnt = v[0]
                    cnt_per_64MB_per_sec = v[1]
                    #temp = v[2]
                    fo.write(
                        (fmt + "\n") %
                        (sst_id, ts_prev.strftime("%y%d%m-%H%M%S"),
                         ts_simulated.strftime("%y%d%m-%H%M%S"),
                         cnt_per_64MB_per_sec, sl.Level(),
                         ((t0 - SimTime.ToSimulatedTime(
                             sl.TsCreated())).total_seconds() / 3600.0 / 24)))
                    break
    Cons.P("Created %s %d" % (fn, os.path.getsize(fn)))
    return fn
Esempio n. 12
0
def Plot():
    with Cons.MT("Plotting latency ..."):
        env = os.environ.copy()
        env["FN_IN"] = "%s/client/%s" % (Conf.GetDir("log_dir"),
                                         Conf.Get("simulation_time_begin"))
        dn = "%s/%s" % (Conf.GetDir("output_dir"),
                        Conf.Get("simulation_time_begin"))
        Util.MkDirs(dn)
        fn_out = "%s/latency.pdf" % dn
        env["FN_OUT"] = fn_out
        with Cons.MT("Plotting ..."):
            Util.RunSubp("gnuplot %s/latency.gnuplot" %
                         os.path.dirname(__file__),
                         env=env)
            Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Esempio n. 13
0
def GenDataFileForGnuplot():
    dn = "%s/%s" % (Conf.GetDir("output_dir"),
                    Conf.Get("simulation_time_begin"))
    Util.MkDirs(dn)
    fn = "%s/data" % dn
    if os.path.isfile(fn):
        return fn

    with Cons.MT("Generating data file for plot ..."):
        _Parse()

        fmt = "%9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f" \
          " %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f" \
          " %8.0f %8.0f %8.0f %8.0f" \
          " %3.0f %3.0f" \
          " %3.0f %3.0f %11s" \
          " %3.1f %6.2f %3.1f %6.2f %6.2f %6.3f"
        header = Util.BuildHeader(
            fmt, " ".join(k for k, v in sorted(_header_idx.iteritems())))
        with open(fn, "w") as fo:
            i = 0
            for r in _body_rows:
                if i % 50 == 0:
                    fo.write("%s\n" % header)
                i += 1
                #Cons.P(fmt % tuple(r.Prepared()))
                fo.write((fmt + "\n") % tuple(r.Prepared()))
        Cons.P("Created %s %d" % (fn, os.path.getsize(fn)))
        return fn
Esempio n. 14
0
def GenDataFilesForGnuplot():
    dn_base = Conf.GetDir("dn_base")

    # Analyze the number of compactions and migrations with
    #   (a) an unmodified DB as a baseline
    #   and (b) Mutant
    fns_ycsb = []
    log_readers = []
    for db_type in ["unmodified_db", "io_overhead"]:
        fn_ycsb = "%s/%s" % (dn_base, Conf.Get(db_type))
        mo = re.match(
            r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+",
            fn_ycsb)
        dn_log = mo.group("dn_log")
        job_id = mo.group("job_id")
        exp_dt = mo.group("exp_dt")
        #Cons.P(dn_log)
        #Cons.P(job_id)
        #Cons.P(exp_dt)
        dn_log_job = "%s/%s" % (dn_log, job_id)
        log_readers.append(RocksdbLogReader(dn_log_job, exp_dt))

    fn_metrics_by_time_0 = log_readers[0].FnMetricByTime()
    fn_metrics_by_time_1 = log_readers[1].FnMetricByTime()
    fn_rdb_compmigr = CompMigrStat.GetFnStat(fn_metrics_by_time_0,
                                             fn_metrics_by_time_1)
    return (fn_metrics_by_time_0, fn_metrics_by_time_1, fn_rdb_compmigr)
Esempio n. 15
0
def main(argv):
    Util.MkDirs(Conf.GetOutDir())

    exp_set_id = "171013-134330"
    #exp_set_id = "171022-160102"
    conf_exp_set = Conf.Get(exp_set_id)

    if True:
        parallel_processing = True
        if parallel_processing:
            params = []
            for stg_dev, v in conf_exp_set.iteritems():
                params.append((exp_set_id, stg_dev, v))
            p = multiprocessing.Pool()
            p.map(PlotByTime, params)
        else:
            for stg_dev, v in conf_exp_set.iteritems():
                PlotByTime((exp_set_id, stg_dev, v))

    # Plot (cost vs latency) by storage devices
    #   Latency in avg and tail latencies
    #
    # The goal:
    #   to show there are limited options
    #   and show the baseline performances.
    #
    # Finish this and show that this was not a fair comparison.
    if True:
        PlotCostLatency(exp_set_id)
Esempio n. 16
0
def PlotCompareTwo():
    (fns_rocksdb, fn_sst_creation_stat) = RocksdbLog.GenDataFilesForGnuplot()
    #fn_cpu_stat_by_time = CompareCpu.GetHourlyFn()
    fn_cpu_1min_avg = CompareCpu.Get1minAvgFn()
    fn_mem_stat_by_time = CompareMem.GetHourlyFn()
    fn_mem_1min_avg = CompareMem.Get1minAvgFn()
    #time_max = "09:00:00"
    #time_max = "08:00:00"
    time_max = "07:50:00"

    exp_dts = []
    for i in range(2):
        mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d",
                      Conf.Get(i))
        exp_dts.append(mo.group("exp_dt"))
    fn_out = "%s/mutant-overhead-%s.pdf" % (Conf.GetOutDir(),
                                            "-".join(exp_dts))

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["TIME_MAX"] = str(time_max)
        #env["CPU_STAT"] = fn_cpu_stat_by_time
        env["FN_CPU_1MIN_AVG"] = fn_cpu_1min_avg
        #env["MEM_STAT"] = fn_mem_stat_by_time
        env["FN_MEM_1MIN_AVG"] = fn_mem_1min_avg
        env["ROCKSDB0"] = fns_rocksdb[0]
        env["ROCKSDB1"] = fns_rocksdb[1]
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/compare-two-exps.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Esempio n. 17
0
def main(argv):
    Conf.ParseArgs()

    SimTime.Init(Conf.Get("simulation_time_begin"))
    Util.MkDirs(Conf.dn_result)

    Plot.Plot()
Esempio n. 18
0
def GenDataThrpVsAllMetrics(root_str):
    #fn_out = "%s/rocksdb-ycsb-thrp-vs-dstat-metrics-by-stgdevs" % Conf.GetOutDir()
    #if os.path.exists(fn_out):
    #  return fn_out

    root = Conf.Get(root_str)

    with Cons.MT("Generating thrp vs dstat data: %s ..." % root_str):
        dn_base = root["dn_base"].replace("~", os.path.expanduser("~"))

        # {stg_dev: {target_iops: DstatLogReader}}
        stgdev_tio_dlr = {}

        for stgdev in ["local-ssd", "ebs-st1"]:
            if stgdev not in stgdev_tio_dlr:
                stgdev_tio_dlr[stgdev] = {}

            for target_iops, v in sorted(root[stgdev].iteritems()):
                # 171125-110758/ycsb/171125-161934.339-d
                mo = re.match(
                    r"(?P<dn>.+)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).*",
                    v["fn"])
                dn = "%s/%s" % (dn_base, mo.group("dn"))
                exp_dt = mo.group("exp_dt")
                #Cons.P("%s %s" % (dn, exp_dt))

                t = v["time"].split("-")
                time_begin = t[0]
                time_end = t[1]
                overloaded = ("overloaded" in v) and v["overloaded"]
                # Fast enough. Takes about 5 secs. No need for a parallelization.
                stgdev_tio_dlr[stgdev][target_iops] = DstatLogReader(
                    dn, exp_dt, time_begin, time_end, overloaded)

        fmt = "%17s %9s %6.0f %1d" \
            " %6.0f %4.0f %9.0f %8.0f %7.0f %6.0f" \
            " %6.3f %6.3f %9.3f %8.3f %8.3f %8.3f" \
            " %8.3f %8.3f %9.3f %8.3f"
        Cons.P("%s" % Util.BuildHeader(fmt, "exp_dt stg_dev target_iops overloaded" \
            " dsk/xvda:read dsk/xvda:writ dsk/xvdc:read dsk/xvdc:writ dsk/xvde:read dsk/xvde:writ" \
            " io/xvda:read io/xvda:writ io/xvdc:read io/xvdc:writ io/xvde:read io/xvde:writ" \
            " memory_usage:buff memory_usage:cach memory_usage:free memory_usage:used"
            ))
        for stgdev, v in sorted(stgdev_tio_dlr.iteritems()):
            for tio, dlr in sorted(v.iteritems()):
                Cons.P(
                    fmt %
                    (dlr.exp_dt, stgdev, tio, (1 if dlr.overloaded else 0),
                     dlr.GetAvg("dsk/xvda:read"), dlr.GetAvg("dsk/xvda:writ"),
                     dlr.GetAvg("dsk/xvdc:read"), dlr.GetAvg("dsk/xvdc:writ"),
                     dlr.GetAvg("dsk/xvde:read"), dlr.GetAvg("dsk/xvde:writ"),
                     dlr.GetAvg("io/xvda:read"), dlr.GetAvg("io/xvda:writ"),
                     dlr.GetAvg("io/xvdc:read"), dlr.GetAvg("io/xvdc:writ"),
                     dlr.GetAvg("io/xvde:read"), dlr.GetAvg("io/xvde:writ"),
                     dlr.GetAvg("memory_usage:buff") /
                     (1024 * 1024), dlr.GetAvg("memory_usage:cach") /
                     (1024 * 1024), dlr.GetAvg("memory_usage:free") /
                     (1024 * 1024), dlr.GetAvg("memory_usage:used") /
                     (1024 * 1024)))
Esempio n. 19
0
def _Parse():
    global _parsed
    if _parsed:
        return

    fn = "%s/dstat/%s.csv" % (Conf.GetDir("log_dir"),
                              Conf.Get("simulation_time_begin"))
    with Cons.MT("Parsing %s ..." % fn):
        header_rows = []
        global _body_rows
        _body_rows = []
        with open(fn, "rb") as f:
            header_detected = False
            reader = csv.reader(f)
            for row in reader:
                if (len(row) > 0) and (row[0] in ["system", "time"]):
                    header_rows.append(row)
                    header_detected = True
                elif header_detected:
                    _body_rows.append(BodyRow(row))
            #Cons.P(pprint.pformat(header_rows))

        # Make sure the rows are all the same size
        num_cols = None
        for r in header_rows:
            if num_cols is None:
                num_cols = len(r)
            else:
                if num_cols != len(r):
                    raise RuntimeError("Unexpected")

        for r in _body_rows:
            if num_cols != r.NumCols():
                raise RuntimeError("Unexpected")

        # Get column headers
        global _header_idx
        _header_idx = {}
        header_rows_0_prev = None
        for i in range(num_cols):
            if len(header_rows[0][i]) > 0:
                #Cons.P("%s, %s" % (header_rows[0][i], header_rows[1][i]))
                _header_idx["%s:%s" % (header_rows[0][i].replace(
                    " ", "_"), header_rows[1][i].replace(" ", "_"))] = i
                header_rows_0_prev = header_rows[0][i].replace(" ", "_")
            else:
                #Cons.P("%s, %s" % (header_rows_0_prev, header_rows[1][i]))
                _header_idx["%s:%s" % (header_rows_0_prev.replace(
                    " ", "_"), header_rows[1][i].replace(" ", "_"))] = i
        #Cons.P(pprint.pformat(_header_idx))

        # Sort the data in the header order and convert strings to numbers
        for b in _body_rows:
            b.PrepareData()

    _parsed = True
Esempio n. 20
0
def _GenSstAccFreqByAgeDataFiles():
  global _gen_sst_accfreq_by_age_data_files
  if _gen_sst_accfreq_by_age_data_files:
    return

  _BuildMemtSstLives()
  #_CheckSizes()

  with Cons.MT("Generating Sst acc freq by age files ..."):
    dn0 = "%s/%s" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin"))
    dn1 = "%s/sst-age-accfreq-data" % dn0;
    Util.MkDirs(dn1)
    fmt = "%10.3f %10.3f %10.3f" \
        " %10.3f %10.3f %10.3f" \
        " %6.2f %7s"
    header = Util.BuildHeader(fmt, \
        "age_begin_simulation_time_in_sec age_end_simulation_time_in_sec age_dur_simulation_time_in_sec" \
        " age_begin_simulated_time_in_sec age_end_simulated_time_in_sec age_dur_simulated_time_in_sec" \
        " reads_per_64MB_per_sec temperature")
    i = 0
    for sst_id, sl in _sst_lives.iteritems():
      # TODO: why?
      # Exclude sstables without any accesses. None seems to be such though.
      #if sl.LenTsAcccnt() == 0:
      #  continue

      # TODO: Why do you want to exclude live SSTables? I don't see any reason.
      # Exclude ones without deleted ts. Those are current SSTables.
      #if sl.TsDeleted() is None:
      #  continue

      fn = "%s/%d" % (dn1, sst_id)
      with open(fn, "w") as fo:
        fo.write("# sst_id: %d\n" % sst_id)
        fo.write("# level: %d\n" % sl.Level())
        fo.write("# size: %d\n" % sl.Size())
        fo.write("#\n")
        fo.write("%s\n" % header)
        sl.WriteAgeAccfreq(fo, fmt)
      #Cons.P("Created %s %d" % (fn, os.path.getsize(fn)))
      i += 1
    Cons.P("Created %d files in %s" % (i, dn1))
  _gen_sst_accfreq_by_age_data_files = True
Esempio n. 21
0
def Init():
	with Cons.MT("Init Conf ...", print_time=False):
		global _simulation_time_begin
		global _simulation_time_end
		global _simulated_time_begin
		global _simulated_time_end

		fn = "%s/client/%s" % (Conf.GetDir("log_dir"), Conf.Get("simulation_time_begin"))
		with open(fn) as fo:
			for line in fo:
				#Cons.P(line)
				# simulation_time_end  : 161227-162418.288
				mo = re.match(r"# simulation_time_begin: (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line)
				if mo is not None:
					_simulation_time_begin = mo.group("dt")
					if _simulation_time_begin != Conf.Get("simulation_time_begin"):
						raise RuntimeError("Unexpected")
					_simulation_time_begin = datetime.datetime.strptime(_simulation_time_begin, "%y%m%d-%H%M%S.%f")
					continue

				mo = re.match(r"# simulation_time_end  : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line)
				if mo is not None:
					_simulation_time_end = mo.group("dt")
					_simulation_time_end = datetime.datetime.strptime(_simulation_time_end, "%y%m%d-%H%M%S.%f")
					continue

				mo = re.match(r"# simulated_time_begin : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line)
				if mo is not None:
					_simulated_time_begin = mo.group("dt")
					_simulated_time_begin = datetime.datetime.strptime(_simulated_time_begin, "%y%m%d-%H%M%S.%f")
					continue

				mo = re.match(r"# simulated_time_end   : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line)
				if mo is not None:
					_simulated_time_end = mo.group("dt")
					_simulated_time_end = datetime.datetime.strptime(_simulated_time_end, "%y%m%d-%H%M%S.%f")
					continue

		Cons.P("simulation_time_begin: %s" % _simulation_time_begin)
		Cons.P("simulation_time_end  : %s" % _simulation_time_end)
		Cons.P("simulated_time_begin : %s" % _simulated_time_begin)
		Cons.P("simulated_time_end   : %s" % _simulated_time_end)
Esempio n. 22
0
def PlotSstAccfreqByAgeAll():
	with Cons.MT("Plotting all SSTable access frequencies by their ages ..."):
		env = os.environ.copy()
		# TODO: Think about grouping once it's done. Like by their starting
		# temperatures or levels.
		#
		# TODO: Think about how to plot Sst ID labels.
		#
		sst_lives = RocksDbLogReader.GetSstAccFreqByAgeDataFiles()
		env["IN_SST_IDS"] = " ".join(str(sl.Id()) for sl in sst_lives)

		dn = "%s/%s/sst-age-accfreq-data" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin"))
		env["IN_DN"] = dn

		fn_out = "%s/%s/sst-accfreq-by-age-all-%s.pdf" \
				% (Conf.Get("dn_result"), Conf.Get("simulation_time_begin"), Conf.Get("simulation_time_begin"))
		env["OUT_FN"] = fn_out

		with Cons.MT("Plotting ..."):
			Util.RunSubp("gnuplot %s/sst-accfreq-by-age-all.gnuplot" % os.path.dirname(__file__), env=env)
			Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Esempio n. 23
0
def _Cpu():
    with Cons.MT("Plotting cpu ..."):
        env = os.environ.copy()
        env["FN_IN"] = CsvFile.GenDataFileForGnuplot()
        dn = "%s/%s" % (Conf.GetDir("output_dir"),
                        Conf.Get("simulation_time_begin"))
        fn_out = "%s/cpu.pdf" % dn
        env["FN_OUT"] = fn_out
        with Cons.MT("Plotting ..."):
            Util.RunSubp("gnuplot %s/cpu.gnuplot" % os.path.dirname(__file__),
                         env=env)
            Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Esempio n. 24
0
def PlotTimeVsMetrics():
    with Cons.MT("Plotting time vs metrics ..."):
        dn_base = Conf.GetDir("dn_base")
        exps_rocksdb = Conf.Get("rocksdb")
        exps_computation = Conf.Get("computation")
        #Cons.P(pprint.pformat(exps_rocksdb))
        #Cons.P(pprint.pformat(exps_computation))

        params = []
        for e in exps_rocksdb:
            params.append("%s/%s" % (dn_base, e))
        for e in exps_computation:
            params.append("%s/%s" % (dn_base, e))

        parallel_processing = True
        if parallel_processing:
            with terminating(Pool()) as pool:
                pool.map(_PlotTimeVsAllMetrics, params)
        else:
            for p in params:
                _PlotTimeVsAllMetrics(p)
Esempio n. 25
0
def GenDataThrpVsLat():
  fn_out = "%s/rocksdb-ycsb-thrp-vs-lat-by-stgdevs" % Conf.GetOutDir()
  if os.path.exists(fn_out):
    return fn_out

  with Cons.MT("Generating thrp vs lat data file ..."):
    dn_base = Conf.GetDir("dn_base")

    # {stg_dev: {target_iops: YcsbLogReader}}
    stgdev_tio_ylr = {}

    #for stgdev in ["local-ssd", "ebs-st1"]:
    for stgdev in ["ebs-st1"]:
      if stgdev not in stgdev_tio_ylr:
        stgdev_tio_ylr[stgdev] = {}

      for target_iops, v in sorted(Conf.Get(stgdev).iteritems()):
        fn = "%s/%s" % (dn_base, v["fn"])
        t = v["time"].split("-")
        time_begin = t[0]
        time_end = t[1]
        overloaded = ("overloaded" in v) and v["overloaded"]
        stgdev_tio_ylr[stgdev][target_iops] = YcsbLogReader(fn, time_begin, time_end, overloaded)

    with open(fn_out, "w") as fo:
      fmt = "%9s %6.0f %1d %6.0f" \
          " %8.2f %8.2f %9.2f %10.2f %10.2f" \
          " %8.2f %8.2f %8.2f %9.2f %9.2f"
      fo.write("%s\n" % Util.BuildHeader(fmt, "stg_dev target_iops overloaded iops" \
          " r_avg r_90 r_99 r_99.9 r_99.99" \
          " w_avg w_90 w_99 w_99.9 w_99.99"
          ))
      for stgdev, v in sorted(stgdev_tio_ylr.iteritems()):
        for tio, ylr in sorted(v.iteritems()):
          fo.write((fmt + "\n") % (
            stgdev
            , tio
            , (1 if ylr.overloaded else 0)
            , ylr.db_iops_stat.avg
            , ylr.r_avg
            , ylr.r_90
            , ylr.r_99
            , ylr.r_999
            , ylr.r_9999
            , ylr.w_avg
            , ylr.w_90
            , ylr.w_99
            , ylr.w_999
            , ylr.w_9999
            ))
    Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
    return fn_out
Esempio n. 26
0
def GetFnCompareTwo():
  dn_base = Conf.GetDir("dn_base")

  # Analyze the number of compactions and migrations with
  #   (a) an unmodified DB as a baseline
  #   and (b) Mutant
  fn_metrics_by_time = []
  for i in range(2):
    fn_ycsb = "%s/%s" % (dn_base, Conf.Get(i))
    fn_metrics_by_time.append(GetFnTimeVsMetrics(fn_ycsb))

  fn_rdb_compmigr = CompMigrStat.GetFnStat(fn_metrics_by_time[0], fn_metrics_by_time[1])
  return (fn_metrics_by_time, fn_rdb_compmigr)
Esempio n. 27
0
def GetClusteredPoints():
    dist_sq_threshold = Conf.Get("dist_sq_threshold")

    fn_in = Conf.GetFn("youtube_workload")
    fn_out = "%s/%s-clustered-with-dist-sq-%s" % (
        Conf.DnOut(), os.path.basename(fn_in), dist_sq_threshold)
    #Cons.P(fn_out)
    if os.path.isfile(fn_out):
        return fn_out
    cmd = "%s/_cluster.sh --youtube_workload=%s --dist_sq_threshold=%s" \
        % (os.path.dirname(__file__), fn_in, dist_sq_threshold)
    Util.RunSubp(cmd)
    return fn_out
Esempio n. 28
0
def CalcCompareTwo():
    with Cons.MT("Calculating the overhead of pairs ..."):
        dn_base = Conf.GetDir("dn_base")
        exps_rocksdb = Conf.Get("rocksdb")
        exps_computation = Conf.Get("computation")

        params = []
        for r in exps_rocksdb:
            for c in exps_computation:
                params.append(("%s/%s" % (dn_base, r), "%s/%s" % (dn_base, c)))

        parallel_processing = True
        if parallel_processing:
            with terminating(Pool()) as pool:
                pool.map(_CalcCompareTwo, params)
        else:
            for p in params:
                _CalcCompareTwo(p)

        # Find the closest pair
        #   You want the computation overhead one has the minimal overhead, but no smaller than the rocksdb one.
        exp_tuples = []
        for p in params:
            o_cpu = CompareCpu.GetOverhead(p[0], p[1])
            o_mem = CompareMem.GetOverhead(p[0], p[1])
            if (o_cpu < 1.0) or (o_mem < 1.0):
                continue
            exp_tuples.append(ExpTuple(o_cpu, o_mem, p))

        fmt = "%8.6f %8.6f %17s %17s"
        Cons.P(
            Util.BuildHeader(
                fmt,
                "cpu_overhead mem_overhead expdt_rocksdb expdt_computation"))

        for e in sorted(exp_tuples):
            Cons.P(fmt % (e.o_cpu, e.o_mem, e.GetExpDt("r"), e.GetExpDt("c")))
        return exp_tuples
Esempio n. 29
0
def Plot():
    fn_cf = Conf.Get("cf_locs")
    fn_wf = GetWfLocFile()
    fn_out = "%s/cloudfront-wholefoods-locations.pdf" % Conf.DnOut()

    with Cons.MT("Plotting Whole Foods store locations ..."):
        env = os.environ.copy()
        env["FN_CF"] = fn_cf
        env["FN_WF"] = fn_wf
        env["FN_OUT"] = fn_out
        Util.RunSubp("gnuplot %s/edge-server-locs.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Esempio n. 30
0
def main(argv):
    Util.MkDirs(Conf.GetOutDir())

    dn_base = Conf.GetDir("dn_base")

    params = []
    for db_type in ["unmodified_db", "computation_overhead", "io_overhead"]:
        fn = "%s/%s" % (dn_base, Conf.Get(db_type))
        params.append((fn, ))

    parallel_processing = True
    if parallel_processing:
        p = multiprocessing.Pool()
        p.map(PlotByTime, params)
    else:
        for p in params:
            PlotByTime(p)