Beispiel #1
0
def Plot():
    with Cons.MT("Plotting latency ..."):
        env = os.environ.copy()
        env["FN_IN"] = "%s/client/%s" % (Conf.GetDir("log_dir"),
                                         Conf.Get("simulation_time_begin"))
        dn = "%s/%s" % (Conf.GetDir("output_dir"),
                        Conf.Get("simulation_time_begin"))
        Util.MkDirs(dn)
        fn_out = "%s/latency.pdf" % dn
        env["FN_OUT"] = fn_out
        with Cons.MT("Plotting ..."):
            Util.RunSubp("gnuplot %s/latency.gnuplot" %
                         os.path.dirname(__file__),
                         env=env)
            Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Beispiel #2
0
def GetHourlyFn():
    fn_out = "%s/cpu-hourly-usage" % Conf.GetOutDir()
    if os.path.exists(fn_out):
        return fn_out

    with Cons.MT("Generating file for cpu usage comparison ..."):
        dn_base = Conf.GetDir("dn_base")
        fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0))
        fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1))

        hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0)
        hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1)
        #Cons.P(hour_cpustat_0)
        #Cons.P(hour_cpustat_1)

        with open(fn_out, "w") as fo:
            fo.write("# 0: %s\n" % fn_ycsb_0)
            fo.write("# 1: %s\n" % fn_ycsb_1)
            fo.write("#\n")
            fmt = "%2d" \
                " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \
                " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f"
            fo.write(Util.BuildHeader(fmt, "hour" \
                " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \
                " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max"
                ) + "\n")
            for h, s0 in sorted(hour_cpustat_0.iteritems()):
                s1 = hour_cpustat_1[h]
                fo.write((fmt + "\n") %
                         (h, s0.avg, s0.min, s0._1, s0._25, s0._50, s0._75,
                          s0._99, s0.max, s1.avg, s1.min, s1._1, s1._25,
                          s1._50, s1._75, s1._99, s1.max))
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
        return fn_out
Beispiel #3
0
def _GetFnCpuOverhead():
    fn_out = "%s/cpu-overhead-by-time" % Conf.GetOutDir()
    if os.path.exists(fn_out):
        return fn_out

    dn_base = Conf.GetDir("dn_base")
    fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get("unmodified_db"))
    fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get("computation_overhead"))

    hour_cpustat_0 = _GetCpuStatByHour(fn_ycsb_0)
    hour_cpustat_1 = _GetCpuStatByHour(fn_ycsb_1)
    #Cons.P(hour_cpustat_0)
    #Cons.P(hour_cpustat_1)

    with open(fn_out, "w") as fo:
        fo.write("# u: unmodified\n")
        fo.write(
            "# c: with SSTable access monitoring and SSTable placement computation\n"
        )
        fo.write("#\n")
        fmt = "%2d" \
            " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f" \
            " %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f"
        fo.write(Util.BuildHeader(fmt, "hour" \
            " u_avg u_min u_1 u_25 u_50 u_75 u_99 u_max" \
            " c_avg c_min c_1 c_25 c_50 c_75 c_99 c_max"
            ) + "\n")
        for h, s0 in sorted(hour_cpustat_0.iteritems()):
            s1 = hour_cpustat_1[h]
            fo.write(
                (fmt + "\n") % (h, s0.avg, s0.min, s0._1, s0._25, s0._50,
                                s0._75, s0._99, s0.max, s1.avg, s1.min, s1._1,
                                s1._25, s1._50, s1._75, s1._99, s1.max))
    Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
    return fn_out
Beispiel #4
0
def Get1minAvgFn():
  exp_dts = []
  for i in range(2):
    #Cons.P(Conf.Get(i))
    mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i))
    exp_dts.append(mo.group("exp_dt"))
  fn_out = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts))
  if os.path.exists(fn_out):
    return fn_out

  with Cons.MT("Creating avg memory usage comparison file for plotting ..."):
    records = []
    dn_base = Conf.GetDir("dn_base")
    for i in range(2):
      fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i))
      hm_mem = _GetHmMem(fn_ycsb_log)
      for hm, mem in hm_mem.iteritems():
        records.append(_RecordMemAvg(hm, i * 30, mem, i))
    records.sort(key=operator.attrgetter("ts"))

  fmt = "%8s %6.3f %1d"
  header = Util.BuildHeader(fmt, "timestamp mem_avg_in_gb exp_type")
  with open(fn_out, "w") as fo:
    i = 0
    for r in records:
      if i % 40 == 0:
        fo.write(header + "\n")
        i += 1
      fo.write("%s\n" % r.ToStr(fmt))
  Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
  return fn_out
Beispiel #5
0
def Get1minAvgFn():
    exp_dts = []
    for i in range(2):
        #Cons.P(Conf.Get(i))
        # computation/180126-142513/ycsb/180126-193525.769-d
        mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d",
                      Conf.Get(i))
        exp_dts.append(mo.group("exp_dt"))
    fn_out = "%s/cpu-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts))
    if os.path.exists(fn_out):
        return fn_out

    with Cons.MT("Creating avg cpu usage comparison file for plotting ..."):
        records = []
        dn_base = Conf.GetDir("dn_base")
        for i in range(2):
            fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i))
            hm_cpu = _GetHmCpu(fn_ycsb_log)
            for hm, cpu in hm_cpu.iteritems():
                records.append(_RecordCpuAvg(hm, i * 30, cpu, i))
        records.sort(key=operator.attrgetter("ts"))

    fmt = "%8s %6.2f %1d"
    header = Util.BuildHeader(fmt, "timestamp cpu_avg exp_type")
    with open(fn_out, "w") as fo:
        i = 0
        for r in records:
            if i % 40 == 0:
                fo.write(header + "\n")
                i += 1
            fo.write("%s\n" % r.ToStr(fmt))
    Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
    return fn_out
Beispiel #6
0
def GenDataFileForGnuplot(dt):
    SimTime.Init(dt)

    dn = "%s/%s" % (Conf.GetDir("output_dir"), dt)
    Util.MkDirs(dn)
    fn = "%s/dstat-data" % dn
    if os.path.isfile(fn):
        return fn

    with Cons.MT("Generating data file for plot ..."):
        global _header_idx
        global _body_rows
        _header_idx = None
        _body_rows = None

        _Parse(dt)

        fmt = "%9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f" \
          " %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f" \
          " %8.0f %8.0f %8.0f %8.0f" \
          " %3.0f %3.0f" \
          " %3.0f %3.0f %11s" \
          " %3.1f %6.2f %3.1f %6.2f %6.2f %6.3f"
        header = Util.BuildHeader(
            fmt, " ".join(k for k, v in sorted(_header_idx.iteritems())))
        with open(fn, "w") as fo:
            i = 0
            for r in _body_rows:
                if i % 50 == 0:
                    fo.write("%s\n" % header)
                i += 1
                #Cons.P(fmt % tuple(r.Prepared()))
                fo.write((fmt + "\n") % tuple(r.Prepared()))
        Cons.P("Created %s %d" % (fn, os.path.getsize(fn)))
        return fn
Beispiel #7
0
def GenDataFilesForGnuplot():
    dn_base = Conf.GetDir("dn_base")

    # Analyze the number of compactions and migrations with
    #   (a) an unmodified DB as a baseline
    #   and (b) Mutant
    fns_ycsb = []
    log_readers = []
    for db_type in ["unmodified_db", "io_overhead"]:
        fn_ycsb = "%s/%s" % (dn_base, Conf.Get(db_type))
        mo = re.match(
            r"(?P<dn_log>.+)/(?P<job_id>\d\d\d\d\d\d-\d\d\d\d\d\d)/ycsb/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d).+",
            fn_ycsb)
        dn_log = mo.group("dn_log")
        job_id = mo.group("job_id")
        exp_dt = mo.group("exp_dt")
        #Cons.P(dn_log)
        #Cons.P(job_id)
        #Cons.P(exp_dt)
        dn_log_job = "%s/%s" % (dn_log, job_id)
        log_readers.append(RocksdbLogReader(dn_log_job, exp_dt))

    fn_metrics_by_time_0 = log_readers[0].FnMetricByTime()
    fn_metrics_by_time_1 = log_readers[1].FnMetricByTime()
    fn_rdb_compmigr = CompMigrStat.GetFnStat(fn_metrics_by_time_0,
                                             fn_metrics_by_time_1)
    return (fn_metrics_by_time_0, fn_metrics_by_time_1, fn_rdb_compmigr)
Beispiel #8
0
def main(argv):
    Conf.ParseArgs()
    Util.MkDirs(Conf.GetDir("output_dir"))

    dist_sq_thresholds = [0, 0.008, 0.01, 0.02, 0.05]

    #dist_sq_thresholds = [0.02]

    dist_sq_thresholds_str = []
    for d in dist_sq_thresholds:
        dist_sq_thresholds_str.append(_NoTrailing0s(d))

    reduced_files = []
    reduced_file_sizes = []
    with Cons.MT("Generating reduced size usa map ..."):
        for d in dist_sq_thresholds_str:
            fn_co_loc = "filter-out-almost-duplicate-points/.output/usa-map-smallsize-%s" % d
            reduced_files.append(fn_co_loc)
            if not os.path.exists(fn_co_loc):
                cmd = "cd filter-out-almost-duplicate-points && ./build-and-run.sh --dist_sq_threshold=%s" % d
                Util.RunSubp(cmd)
            reduced_file_sizes.append(os.path.getsize(fn_co_loc))

    dn_out = "%s/.output" % os.path.dirname(__file__)
    fn_out = "%s/usa-map.pdf" % dn_out
    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["IN_FNS"] = " ".join(reduced_files)
        env["IN_FN_SIZES"] = " ".join(str(s) for s in reduced_file_sizes)
        env["DIST_SQ_THRESHOLDS"] = " ".join(dist_sq_thresholds_str)
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/usa-map.gnuplot" % os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Beispiel #9
0
def _Parse(dt):
    dn = "%s/dstat" % Conf.GetDir("log_dir")
    fn = "%s/%s.csv" % (dn, dt)
    if not os.path.isfile(fn):
        fn_7z = "%s.7z" % fn
        if not os.path.isfile(fn_7z):
            raise RuntimeError("Unexpected")
        Util.RunSubp("cd %s && 7z e %s" % (dn, fn_7z))
    if not os.path.isfile(fn):
        raise RuntimeError("Unexpected")

    with Cons.MT("Parsing %s ..." % fn):
        header_rows = []
        global _body_rows
        _body_rows = []
        with open(fn, "rb") as f:
            header_detected = False
            reader = csv.reader(f)
            for row in reader:
                if (len(row) > 0) and (row[0] in ["system", "time"]):
                    header_rows.append(row)
                    header_detected = True
                elif header_detected:
                    _body_rows.append(BodyRow(row))
            #Cons.P(pprint.pformat(header_rows))

        # Make sure the rows are all the same size
        num_cols = None
        for r in header_rows:
            if num_cols is None:
                num_cols = len(r)
            else:
                if num_cols != len(r):
                    raise RuntimeError("Unexpected")

        for r in _body_rows:
            if num_cols != r.NumCols():
                raise RuntimeError("Unexpected")

        # Get column headers
        global _header_idx
        _header_idx = {}
        header_rows_0_prev = None
        for i in range(num_cols):
            if len(header_rows[0][i]) > 0:
                #Cons.P("%s, %s" % (header_rows[0][i], header_rows[1][i]))
                _header_idx["%s:%s" % (header_rows[0][i].replace(
                    " ", "_"), header_rows[1][i].replace(" ", "_"))] = i
                header_rows_0_prev = header_rows[0][i].replace(" ", "_")
            else:
                #Cons.P("%s, %s" % (header_rows_0_prev, header_rows[1][i]))
                _header_idx["%s:%s" % (header_rows_0_prev.replace(
                    " ", "_"), header_rows[1][i].replace(" ", "_"))] = i
        #Cons.P(pprint.pformat(_header_idx))

        # Sort the data in the header order and convert strings to numbers
        for b in _body_rows:
            b.PrepareData()
Beispiel #10
0
def Init(simulation_time_begin):
	with Cons.MT("Init Conf ...", print_time=False):
		global _simulation_time_begin
		global _simulation_time_end
		global _simulated_time_begin
		global _simulated_time_end
		_simulation_time_begin = None
		_simulation_time_end = None
		_simulated_time_begin = None
		_simulated_time_end = None

		dn = "%s/client" % Conf.GetDir("log_dir")
		fn = "%s/%s" % (dn, simulation_time_begin)
		if not os.path.isfile(fn):
			fn_7z = "%s.7z" % fn
			if not os.path.isfile(fn_7z):
				raise RuntimeError("Unexpected")
			Util.RunSubp("cd %s && 7z e %s" % (dn, fn_7z))
		if not os.path.isfile(fn):
			raise RuntimeError("Unexpected")

		with open(fn) as fo:
			for line in fo:
				#Cons.P(line)
				# simulation_time_end  : 161227-162418.288
				mo = re.match(r"# simulation_time_begin: (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line)
				if mo is not None:
					_simulation_time_begin = mo.group("dt")
					if _simulation_time_begin != simulation_time_begin:
						raise RuntimeError("Unexpected")
					_simulation_time_begin = datetime.datetime.strptime(_simulation_time_begin, "%y%m%d-%H%M%S.%f")
					continue

				mo = re.match(r"# simulation_time_end  : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line)
				if mo is not None:
					_simulation_time_end = mo.group("dt")
					_simulation_time_end = datetime.datetime.strptime(_simulation_time_end, "%y%m%d-%H%M%S.%f")
					continue

				mo = re.match(r"# simulated_time_begin : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line)
				if mo is not None:
					_simulated_time_begin = mo.group("dt")
					_simulated_time_begin = datetime.datetime.strptime(_simulated_time_begin, "%y%m%d-%H%M%S.%f")
					continue

				mo = re.match(r"# simulated_time_end   : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line)
				if mo is not None:
					_simulated_time_end = mo.group("dt")
					_simulated_time_end = datetime.datetime.strptime(_simulated_time_end, "%y%m%d-%H%M%S.%f")
					# Got all you needed. break to save time
					break

		Cons.P("simulation_time_begin: %s" % _simulation_time_begin)
		Cons.P("simulation_time_end  : %s" % _simulation_time_end)
		Cons.P("simulated_time_begin : %s" % _simulated_time_begin)
		Cons.P("simulated_time_end   : %s" % _simulated_time_end)
Beispiel #11
0
def GetHourlyFn():
  exp_dts = []
  for i in range(2):
    #Cons.P(Conf.Get(i))
    mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i))
    exp_dts.append(mo.group("exp_dt"))
  fn_out = "%s/memory-usage-by-time-%s" % (Conf.GetOutDir(), "-".join(exp_dts))
  if os.path.exists(fn_out):
    return fn_out

  with Cons.MT("Generating file for memory usage comparison ..."):
    dn_base = Conf.GetDir("dn_base")
    fn_ycsb_0 = "%s/%s" % (dn_base, Conf.Get(0))
    fn_ycsb_1 = "%s/%s" % (dn_base, Conf.Get(1))

    hour_memstat_0 = _GetMemStatByHour(fn_ycsb_0)
    hour_memstat_1 = _GetMemStatByHour(fn_ycsb_1)
    #Cons.P(hour_memstat_0)
    #Cons.P(hour_memstat_1)

    with open(fn_out, "w") as fo:
      fo.write("# 0: %s\n" % fn_ycsb_0)
      fo.write("# 1: %s\n" % fn_ycsb_1)
      fo.write("#\n")
      fmt = "%2d" \
          " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f" \
          " %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f %5.3f"
      fo.write(Util.BuildHeader(fmt, "hour" \
          " 0_avg 0_min 0_1 0_25 0_50 0_75 0_99 0_max" \
          " 1_avg 1_min 1_1 1_25 1_50 1_75 1_99 1_max"
          ) + "\n")
      for h, s0 in sorted(hour_memstat_0.iteritems()):
        s1 = hour_memstat_1[h]
        fo.write((fmt + "\n") % (h
          , (float(s0.avg) / 1024 / 1024 / 1024)
          , (float(s0.min) / 1024 / 1024 / 1024)
          , (float(s0._1 ) / 1024 / 1024 / 1024)
          , (float(s0._25) / 1024 / 1024 / 1024)
          , (float(s0._50) / 1024 / 1024 / 1024)
          , (float(s0._75) / 1024 / 1024 / 1024)
          , (float(s0._99) / 1024 / 1024 / 1024)
          , (float(s0.max) / 1024 / 1024 / 1024)

          , (float(s1.avg) / 1024 / 1024 / 1024)
          , (float(s1.min) / 1024 / 1024 / 1024)
          , (float(s1._1 ) / 1024 / 1024 / 1024)
          , (float(s1._25) / 1024 / 1024 / 1024)
          , (float(s1._50) / 1024 / 1024 / 1024)
          , (float(s1._75) / 1024 / 1024 / 1024)
          , (float(s1._99) / 1024 / 1024 / 1024)
          , (float(s1.max) / 1024 / 1024 / 1024)
          ))
    Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
  return fn_out
Beispiel #12
0
def _GetLog(dt):
	dn = "%s/client" % Conf.GetDir("log_dir")
	fn = "%s/%s" % (dn, dt)
	if os.path.isfile(fn):
		return fn

	fn_7z = "%s.7z" % fn
	if not os.path.isfile(fn_7z):
		raise RuntimeError("Unexpected")
	Util.RunSubp("cd %s && 7z e %s" % (dn, fn_7z))
	return fn
Beispiel #13
0
def _Write():
	with Cons.MT("Plotting write latency ..."):
		env = os.environ.copy()
		env["FN_IN_LOCAL_SSD"] = _GetLog(Conf.Manifest.Get("Local SSD"))
		env["FN_IN_EBS_GP2"] = _GetLog(Conf.Manifest.Get("EBS gp2"))
		env["FN_IN_EBS_ST1"] = _GetLog(Conf.Manifest.Get("EBS st1"))
		env["FN_IN_EBS_SC1"] = _GetLog(Conf.Manifest.Get("EBS sc1"))
		fn_out = "%s/baseline-time-vs-write-latency-all-stg-devs.pdf" % Conf.GetDir("output_dir")
		env["FN_OUT"] = fn_out
		Util.RunSubp("gnuplot %s/write-latency.gnuplot" % os.path.dirname(__file__), env=env)
		Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Beispiel #14
0
def _Cpu():
    with Cons.MT("Plotting cpu ..."):
        env = os.environ.copy()
        env["FN_IN"] = CsvFile.GenDataFileForGnuplot()
        dn = "%s/%s" % (Conf.GetDir("output_dir"),
                        Conf.Get("simulation_time_begin"))
        fn_out = "%s/cpu.pdf" % dn
        env["FN_OUT"] = fn_out
        with Cons.MT("Plotting ..."):
            Util.RunSubp("gnuplot %s/cpu.gnuplot" % os.path.dirname(__file__),
                         env=env)
            Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Beispiel #15
0
def GenDataThrpVsLat():
  fn_out = "%s/rocksdb-ycsb-thrp-vs-lat-by-stgdevs" % Conf.GetOutDir()
  if os.path.exists(fn_out):
    return fn_out

  with Cons.MT("Generating thrp vs lat data file ..."):
    dn_base = Conf.GetDir("dn_base")

    # {stg_dev: {target_iops: YcsbLogReader}}
    stgdev_tio_ylr = {}

    #for stgdev in ["local-ssd", "ebs-st1"]:
    for stgdev in ["ebs-st1"]:
      if stgdev not in stgdev_tio_ylr:
        stgdev_tio_ylr[stgdev] = {}

      for target_iops, v in sorted(Conf.Get(stgdev).iteritems()):
        fn = "%s/%s" % (dn_base, v["fn"])
        t = v["time"].split("-")
        time_begin = t[0]
        time_end = t[1]
        overloaded = ("overloaded" in v) and v["overloaded"]
        stgdev_tio_ylr[stgdev][target_iops] = YcsbLogReader(fn, time_begin, time_end, overloaded)

    with open(fn_out, "w") as fo:
      fmt = "%9s %6.0f %1d %6.0f" \
          " %8.2f %8.2f %9.2f %10.2f %10.2f" \
          " %8.2f %8.2f %8.2f %9.2f %9.2f"
      fo.write("%s\n" % Util.BuildHeader(fmt, "stg_dev target_iops overloaded iops" \
          " r_avg r_90 r_99 r_99.9 r_99.99" \
          " w_avg w_90 w_99 w_99.9 w_99.99"
          ))
      for stgdev, v in sorted(stgdev_tio_ylr.iteritems()):
        for tio, ylr in sorted(v.iteritems()):
          fo.write((fmt + "\n") % (
            stgdev
            , tio
            , (1 if ylr.overloaded else 0)
            , ylr.db_iops_stat.avg
            , ylr.r_avg
            , ylr.r_90
            , ylr.r_99
            , ylr.r_999
            , ylr.r_9999
            , ylr.w_avg
            , ylr.w_90
            , ylr.w_99
            , ylr.w_999
            , ylr.w_9999
            ))
    Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
    return fn_out
Beispiel #16
0
def GetFnCompareTwo():
  dn_base = Conf.GetDir("dn_base")

  # Analyze the number of compactions and migrations with
  #   (a) an unmodified DB as a baseline
  #   and (b) Mutant
  fn_metrics_by_time = []
  for i in range(2):
    fn_ycsb = "%s/%s" % (dn_base, Conf.Get(i))
    fn_metrics_by_time.append(GetFnTimeVsMetrics(fn_ycsb))

  fn_rdb_compmigr = CompMigrStat.GetFnStat(fn_metrics_by_time[0], fn_metrics_by_time[1])
  return (fn_metrics_by_time, fn_rdb_compmigr)
Beispiel #17
0
def Plot(param):
    job_id = param[0]
    exp_dt = param[1]
    dn_log_job = "%s/work/mutant/log/quizup/sla-admin/%s" % (
        os.path.expanduser("~"), job_id)

    fn_log_quizup = "%s/quizup/%s" % (dn_log_job, exp_dt)
    fn_log_rocksdb = "%s/rocksdb/%s" % (dn_log_job, exp_dt)
    fn_log_dstat = "%s/dstat/%s.csv" % (dn_log_job, exp_dt)

    log_q = QuizupLog(fn_log_quizup)
    SimTime.Init(log_q.SimTime("simulated_time_begin"),
                 log_q.SimTime("simulated_time_end"),
                 log_q.SimTime("simulation_time_begin"),
                 log_q.SimTime("simulation_time_end"))

    qz_std_max = _QzSimTimeDur(
        log_q.quizup_options["simulation_time_dur_in_sec"])
    qz_opt_str = _QuizupOptionsFormattedStr(log_q.quizup_options)
    error_adj_ranges = log_q.quizup_options["error_adj_ranges"].replace(
        ",", " ")

    (fn_rocksdb_sla_admin_log, pid_params,
     num_sla_adj) = RocksdbLog.ParseLog(fn_log_rocksdb, exp_dt)

    fn_dstat = DstatLog.GenDataFileForGnuplot(fn_log_dstat, exp_dt)

    fn_out = "%s/sla-admin-by-time-%s.pdf" % (Conf.GetDir("output_dir"),
                                              exp_dt)

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["STD_MAX"] = qz_std_max
        env["ERROR_ADJ_RANGES"] = error_adj_ranges
        env["IN_FN_QZ"] = fn_log_quizup
        env["IN_FN_SLA_ADMIN"] = "" if num_sla_adj == 0 else fn_rocksdb_sla_admin_log
        env["QUIZUP_OPTIONS"] = qz_opt_str
        env["PID_PARAMS"] = "%s %s %s %s" % (pid_params["target_value"],
                                             pid_params["p"], pid_params["i"],
                                             pid_params["d"])
        env["WORKLOAD_EVENTS"] = " ".join(
            str(t) for t in log_q.simulation_time_events)
        env["IN_FN_DS"] = fn_dstat
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/sla-admin-by-time.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Beispiel #18
0
def main(argv):
    Util.MkDirs(Conf.GetOutDir())

    dn_base = Conf.GetDir("dn_base")

    params = []
    for db_type in ["unmodified_db", "computation_overhead", "io_overhead"]:
        fn = "%s/%s" % (dn_base, Conf.Get(db_type))
        params.append((fn, ))

    parallel_processing = True
    if parallel_processing:
        p = multiprocessing.Pool()
        p.map(PlotByTime, params)
    else:
        for p in params:
            PlotByTime(p)
Beispiel #19
0
def PlotByTime(params):
    exp_set_id = params[0]
    stg_dev = params[1]
    p0 = params[2]

    jobid_expdt = p0["jobid_expdt"]
    time_window = p0["time_window"]

    (fn_ycsb, time_max,
     params1) = YcsbLog.GenDataMetricsByTime(exp_set_id, stg_dev)
    #Cons.P(time_max)

    params_formatted = exp_set_id + "\n" + pprint.pformat(
        params1[0]) + "\n" + pprint.pformat(params1[1])
    params_formatted = params_formatted.replace("_", "\\\\_").replace(
        " ", "\\ ").replace("\n", "\\n").replace("{", "\{").replace("}", "\}")
    #Cons.P(params_formatted)

    t = jobid_expdt.split("/")
    job_id = t[0]
    exp_dt = t[1]

    dn_log = Conf.GetDir("dn")
    dn_log_job = "%s/%s" % (dn_log, job_id)

    fn_dstat = DstatLog.GenDataFileForGnuplot(dn_log_job, exp_dt)
    fn_rocksdb = RocksdbLog.GenDataFileForGnuplot(dn_log_job, exp_dt)

    fn_out = "%s/rocksdb-ycsb_d-%s-by-time-%s.pdf" % (Conf.GetOutDir(),
                                                      stg_dev, exp_dt)

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["EXP_SET_ID"] = exp_set_id
        env["PARAMS"] = params_formatted
        env["STG_DEV"] = stg_dev
        env["TIME_MAX"] = str(time_max)
        env["IN_FN_DSTAT"] = fn_dstat
        env["IN_FN_YCSB"] = fn_ycsb
        env["IN_FN_ROCKSDB"] = fn_rocksdb
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/rocksdb-ycsb-by-time.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Beispiel #20
0
def PlotTimeVsMetrics():
    with Cons.MT("Plotting time vs metrics ..."):
        dn_base = Conf.GetDir("dn_base")

        params = []
        for cost_changes, v in sorted(
                Conf.Get("by_costchanges_targetiops").iteritems()):
            for target_iops, e in sorted(v.iteritems()):
                #Cons.P("%s %s %s" % (cost_changes, target_iops, e))
                params.append("%s/%s" % (dn_base, e))

        parallel_processing = False
        if parallel_processing:
            with terminating(Pool()) as pool:
                pool.map(_PlotTimeVsAllMetrics, params)
        else:
            for p in params:
                _PlotTimeVsAllMetrics(p)
Beispiel #21
0
def Plot():
    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["FN_IN_LOCAL_SSD"] = CsvFile.GenDataFileForGnuplot(
            Conf.Manifest.Get("Local SSD"))
        env["FN_IN_EBS_GP2"] = CsvFile.GenDataFileForGnuplot(
            Conf.Manifest.Get("EBS gp2"))
        env["FN_IN_EBS_ST1"] = CsvFile.GenDataFileForGnuplot(
            Conf.Manifest.Get("EBS st1"))
        env["FN_IN_EBS_SC1"] = CsvFile.GenDataFileForGnuplot(
            Conf.Manifest.Get("EBS sc1"))
        fn_out = "%s/baseline-time-vs-resource-usage-all-stg-devs.pdf" % Conf.GetDir(
            "output_dir")
        env["FN_OUT"] = fn_out
        Util.RunSubp("gnuplot %s/res-usage.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Beispiel #22
0
def Init():
	with Cons.MT("Init Conf ...", print_time=False):
		global _simulation_time_begin
		global _simulation_time_end
		global _simulated_time_begin
		global _simulated_time_end

		fn = "%s/client/%s" % (Conf.GetDir("log_dir"), Conf.Get("simulation_time_begin"))
		with open(fn) as fo:
			for line in fo:
				#Cons.P(line)
				# simulation_time_end  : 161227-162418.288
				mo = re.match(r"# simulation_time_begin: (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line)
				if mo is not None:
					_simulation_time_begin = mo.group("dt")
					if _simulation_time_begin != Conf.Get("simulation_time_begin"):
						raise RuntimeError("Unexpected")
					_simulation_time_begin = datetime.datetime.strptime(_simulation_time_begin, "%y%m%d-%H%M%S.%f")
					continue

				mo = re.match(r"# simulation_time_end  : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line)
				if mo is not None:
					_simulation_time_end = mo.group("dt")
					_simulation_time_end = datetime.datetime.strptime(_simulation_time_end, "%y%m%d-%H%M%S.%f")
					continue

				mo = re.match(r"# simulated_time_begin : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line)
				if mo is not None:
					_simulated_time_begin = mo.group("dt")
					_simulated_time_begin = datetime.datetime.strptime(_simulated_time_begin, "%y%m%d-%H%M%S.%f")
					continue

				mo = re.match(r"# simulated_time_end   : (?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)", line)
				if mo is not None:
					_simulated_time_end = mo.group("dt")
					_simulated_time_end = datetime.datetime.strptime(_simulated_time_end, "%y%m%d-%H%M%S.%f")
					continue

		Cons.P("simulation_time_begin: %s" % _simulation_time_begin)
		Cons.P("simulation_time_end  : %s" % _simulation_time_end)
		Cons.P("simulated_time_begin : %s" % _simulated_time_begin)
		Cons.P("simulated_time_end   : %s" % _simulated_time_end)
Beispiel #23
0
def GenDataFileForGnuplot(fn, dt):
    fn_out = "%s/dstat-%s" % (Conf.GetDir("output_dir"), dt)
    if os.path.isfile(fn_out):
        return fn_out

    with Cons.MT("Generating dstat data file for plot ..."):
        global _header_idx
        global _body_rows
        _header_idx = None
        _body_rows = None

        # Unzip when the file is not there
        if not os.path.exists(fn):
            fn_zipped = "%s.7z" % fn
            if not os.path.exists(fn_zipped):
                raise RuntimeError("Unexpected: %s" % fn)
            Util.RunSubp(
                "cd %s && 7z e %s > /dev/null" %
                (os.path.dirname(fn_zipped), os.path.basename(fn_zipped)))
        if not os.path.exists(fn):
            raise RuntimeError("Unexpected")

        _Parse(fn)

        fmt = "%9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f" \
            " %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f" \
            " %8.0f %8.0f %8.0f %8.0f" \
            " %3.0f %3.0f" \
            " %3.0f %3.0f %11s" \
            " %3.1f %6.2f %3.1f %6.2f %6.2f %6.3f"
        header = Util.BuildHeader(
            fmt, " ".join(k for k, v in sorted(_header_idx.iteritems())))
        with open(fn_out, "w") as fo:
            i = 0
            for r in _body_rows:
                if i % 50 == 0:
                    fo.write("%s\n" % header)
                i += 1
                #Cons.P(fmt % tuple(r.Prepared()))
                fo.write((fmt + "\n") % tuple(r.Prepared()))
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
        return fn_out
Beispiel #24
0
def PlotTimeVsMetrics():
    with Cons.MT("Plotting time vs metrics ..."):
        dn_base = Conf.GetDir("dn_base")
        exps_rocksdb = Conf.Get("rocksdb")
        exps_computation = Conf.Get("computation")
        #Cons.P(pprint.pformat(exps_rocksdb))
        #Cons.P(pprint.pformat(exps_computation))

        params = []
        for e in exps_rocksdb:
            params.append("%s/%s" % (dn_base, e))
        for e in exps_computation:
            params.append("%s/%s" % (dn_base, e))

        parallel_processing = True
        if parallel_processing:
            with terminating(Pool()) as pool:
                pool.map(_PlotTimeVsAllMetrics, params)
        else:
            for p in params:
                _PlotTimeVsAllMetrics(p)
Beispiel #25
0
def CalcCompareTwo():
    with Cons.MT("Calculating the overhead of pairs ..."):
        dn_base = Conf.GetDir("dn_base")
        exps_rocksdb = Conf.Get("rocksdb")
        exps_computation = Conf.Get("computation")

        params = []
        for r in exps_rocksdb:
            for c in exps_computation:
                params.append(("%s/%s" % (dn_base, r), "%s/%s" % (dn_base, c)))

        parallel_processing = True
        if parallel_processing:
            with terminating(Pool()) as pool:
                pool.map(_CalcCompareTwo, params)
        else:
            for p in params:
                _CalcCompareTwo(p)

        # Find the closest pair
        #   You want the computation overhead one has the minimal overhead, but no smaller than the rocksdb one.
        exp_tuples = []
        for p in params:
            o_cpu = CompareCpu.GetOverhead(p[0], p[1])
            o_mem = CompareMem.GetOverhead(p[0], p[1])
            if (o_cpu < 1.0) or (o_mem < 1.0):
                continue
            exp_tuples.append(ExpTuple(o_cpu, o_mem, p))

        fmt = "%8.6f %8.6f %17s %17s"
        Cons.P(
            Util.BuildHeader(
                fmt,
                "cpu_overhead mem_overhead expdt_rocksdb expdt_computation"))

        for e in sorted(exp_tuples):
            Cons.P(fmt % (e.o_cpu, e.o_mem, e.GetExpDt("r"), e.GetExpDt("c")))
        return exp_tuples
Beispiel #26
0
def _BuildMemtSstLives():
  with Cons.MT("Building memt and sst lives ..."):
    #global _memt_lives
    global _sst_lives

    if _sst_lives is not None:
      return

    #_memt_lives = {}
    _sst_lives = {}

    dn = "%s/rocksdb" % Conf.GetDir("log_dir")
    fn = "%s/%s" % (dn, Conf.Get("simulation_time_begin"))
    if not os.path.isfile(fn):
      fn_7z = "%s.7z" % fn
      if not os.path.isfile(fn_7z):
        raise RuntimeError("Unexpected")
      Util.RunSubp("cd %s && 7z e %s" % (dn, fn_7z))
    if not os.path.isfile(fn):
      raise RuntimeError("Unexpected")

    line_no = 0
    with open(fn) as fo:
      for line in fo:
        line_no += 1
        if line_no % 100 == 0:
          Cons.ClearLine()
          Cons.Pnnl("Processing line %d" % line_no)

        # The timestamp at the first column and the time_micros are 5 hours
        # apart. One is in local time (EDT) and the other is in UTC. Follow the former.
        # TODO: this needs to be fixed at the other place too

        # 2016/12/21-02:17:14.329266 7f702d7fa700 EVENT_LOG_v1 {"time_micros":
        # 1482304634329023, "mutant_table_acc_cnt": {"memt":
        # "0x7f69fc00c350:51723 0x7f6bec011200:26942", "sst": "1069:0:30.123:20.123
        # 1059:980:30.123:20.123"}
        mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \
            ".+"
            ", \"mutant_table_acc_cnt\": {(\"memt\": \"(?P<memt_acc_cnt>(\w|\d|:| )+)\")?" \
            "(, )?" \
            "(\"sst\": \"(?P<sst_acc_cnt>(\w|\d|:|-|\.| )+)\")?" \
            "}" \
            ".*"
            , line)
        if mo is not None:
          _SetTabletAccess(mo)
          continue

        # 2016/12/21-02:15:58.341853 7f702dffb700 EVENT_LOG_v1 {"time_micros":
        # 1482304558341847, "job": 227, "event": "table_file_deletion",
        # "file_number": 1058}
        mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \
            ".+"
            ", \"job\": \d+" \
            ", \"event\": \"table_file_deletion\"" \
            ", \"file_number\": (?P<file_number>\d+)" \
            "}" \
            ".*"
            , line)
        if mo is not None:
          _SetTabletDeleted(mo)
          continue

        # 2016/12/21-01:27:40.840324 7f702dffb700 EVENT_LOG_v1 {"time_micros":
        # 1482301660840289, "cf_name": "default", "job": 4, "event":
        # "table_file_creation", "file_number": 15, "file_size": 67569420,
        # "table_properties": {"data_size": 67110556, "index_size": 458020,
        # "filter_size": 0, "raw_key_size": 1752468, "raw_average_key_size": 25,
        # "raw_value_size": 65132550, "raw_average_value_size": 966,
        # "num_data_blocks": 16857, "num_entries": 67425, "filter_policy_name":
        # "", "reason": kCompaction, "kDeletedKeys": "0", "kMergeOperands": "0"}}
        mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \
            ".+"
            ", \"cf_name\": \"default\"" \
            ", \"job\": (?P<job>\d+)" \
            ", \"event\": \"table_file_creation\"" \
            ", \"file_number\": (?P<file_number>\d+)" \
            ", \"file_size\": (?P<file_size>\d+)" \
            ".+" \
            ", \"reason\": (?P<reason>\w+)" \
            ".*"
            , line)
        if mo is not None:
          _SetTabletCreated(mo)
          continue

        # 2016/12/21-01:28:41.835596 7f683c58d700 EVENT_LOG_v1 {"time_micros":
        # 1482301721835586, "job": 8, "event": "flush_started", "num_memtables":
        # 2, "num_entries": 257306, "num_deletes": 0, "memory_usage": 260052944}
        mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \
            ".+"
            ", \"job\": \d+" \
            ", \"event\": \"flush_started\"" \
            ".*"
            , line)
        if mo is not None:
          continue

        # 2016/12/21-01:27:25.893816 7f683c58d700 (Original Log Time
        # 2016/12/21-01:27:25.893597) EVENT_LOG_v1 {"time_micros":
        # 1482301645893590, "job": 2, "event": "flush_finished", "lsm_state": [1,
        # 0, 0, 0, 0, 0, 0], "immutable_memtables": 0}
        mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \
            ".+"
            ", \"job\": \d+" \
            ", \"event\": \"flush_finished\"" \
            ".*"
            , line)
        if mo is not None:
          continue

        # 2016/12/21-01:27:40.010374 7f702dffb700 EVENT_LOG_v1 {"time_micros":
        # 1482301660010345, "job": 4, "event": "compaction_started", "files_L0":
        # [12, 8], "score": 1, "input_data_size": 241744688}
        mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \
            ".+"
            ", \"job\": \d+" \
            ", \"event\": \"compaction_started\"" \
            ".*"
            , line)
        if mo is not None:
          continue

        # 2016/12/21-01:27:40.960792 7f702dffb700 (Original Log Time
        # 2016/12/21-01:27:40.959919) EVENT_LOG_v1 {"time_micros":
        # 1482301660959908, "job": 4, "event": "compaction_finished",
        # "compaction_time_micros": 949251, "output_level": 1,
        # "num_output_files": 4, "total_output_size": 229662756,
        # "num_input_records": 241171, "num_output_records": 229148,
        # "num_subcompactions": 1, "lsm_state": [0, 4, 0, 0, 0, 0, 0]}
        mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \
            ".+"
            ", \"job\": (?P<job>\d+)" \
            ", \"event\": \"compaction_finished\"" \
            ".+" \
            ", \"output_level\": (?P<output_level>\d+)" \
            ".*"
            , line)
        if mo is not None:
          _SetCompactinFinished(mo)
          continue

        # You can check out the other events here. All useful ones were covered above.
        #Cons.P(line)
      Cons.ClearLine()
      Cons.P("Processed %d lines" % line_no)

    deleted = 0
    not_deleted = 0
    for sst_id, sl in _sst_lives.iteritems():
      if sl.TsDeleted() is None:
        not_deleted += 1
      else:
        deleted += 1
    Cons.P("Created %d SstLives. %d not-deleted, %d deleted"
        % (len(_sst_lives), not_deleted, deleted))
Beispiel #27
0
def GetFnCostSloEpsilonVsMetrics():
  fn_out = "%s/cost-slo-epsilon-vs-metrics" % Conf.GetOutDir()
  if os.path.isfile(fn_out):
    return fn_out

  dn_base = Conf.GetDir("dn_base")
  # {cost_slo_epsilon: fn}
  cse_fn ={}
  for cost_slo_epsilon, fn0 in Conf.Get("by_cost_slo_epsilons").iteritems():
    fn = "%s/%s" % (dn_base, fn0)
    cse_fn[cost_slo_epsilon] = fn
  #Cons.P(pprint.pformat(cse_fn))

  params = []
  for cost_slo_epsilon, fn_ycsb_log in sorted(cse_fn.iteritems()):
    params.append(fn_ycsb_log)

  parallel_processing = True
  if parallel_processing:
    with terminating(Pool()) as pool:
      result = pool.map(GetFnTimeVsMetrics, params)
  else:
    result = []
    for p in params:
      result.append(GetFnTimeVsMetrics(p))
  #Cons.P(result)

  cse_outfn = {}
  i = 0
  for cost_slo_epsilon, fn_ycsb_log in sorted(cse_fn.iteritems()):
    cse_outfn[cost_slo_epsilon] = result[i]
    i += 1

  with open(fn_out, "w") as fo:
    fo.write("# CSE: Storge cost SLO epsilon\n")
    fo.write("# JR: jobs_recovery\n")
    fo.write("# JF: jobs_flush\n")
    fo.write("# JC: jobs_compaction\n")
    fo.write("#   JCL: jobs_comp_leveled_organization_triggered\n")
    fo.write("#   SSCL: total_sst_size_comp_level_triggered_in_gb\n")
    fo.write("#   SSCLCM: total_sst_size_comp_level_triggered_comp_migrs_in_gb\n")
    fo.write("#     SSCLCMS: total_sst_size_comp_level_triggered_comp_migrs_to_slow_in_gb\n")
    fo.write("#     SSCLCMF: total_sst_size_comp_level_triggered_comp_migrs_to_fast_in_gb\n")
    fo.write("# JCT: jobs_comp_temp_triggered_migr\n")
    fo.write("#   SSCT: total_sst_size_comp_temp_triggered_migr_in_gb\n")
    fo.write("#     SSCTS: To slow storage\n")
    fo.write("#     SSCTF: To fast storage\n")
    fo.write("\n")

    fmt = "%4.2f %8.6f %8.6f %8.6f %8.6f %1d %2d %4d" \
        " %4d %7.3f %7.3f %7.3f %7.3f" \
        " %4d %7.3f %7.3f %7.3f"
    header = Util.BuildHeader(fmt, "CSE" \
        " stg_unit_cost_$_gb_month" \
        " stg_cost_$" \
        " fast_stg_cost_$" \
        " slow_stg_cost_$" \
        " JR" \
        " JF" \
        " JC" \
          " JCL" \
          " SSCL" \
          " SSCLCM" \
            " SSCLCMS" \
            " SSCLCMF" \
        " JCT" \
          " SSCT" \
            " SSCTS" \
            " SSCTF" \
        )
    fo.write(header + "\n")

    for cost_slo_epsilon, fn1 in sorted(cse_outfn.iteritems()):
      kvs = [
          ["total_stg_unit_cost", None]
          , ["total_stg_cost", None]
          , ["fast_stg_cost", None]
          , ["slow_stg_cost", None]
          , ["num_jobs_recovery", None]
          , ["num_jobs_flush", None]
          , ["num_jobs_comp_all", None]
            , ["num_jobs_comp_level_triggered", None]
            , ["total_sst_size_comp_level_triggered_in_gb", None]
            , ["total_sst_size_comp_level_triggered_comp_migrs_in_gb", None]
              , ["total_sst_size_comp_level_triggered_comp_migrs_to_slow_in_gb", None]
              , ["total_sst_size_comp_level_triggered_comp_migrs_to_fast_in_gb", None]
          , ["num_jobs_comp_temp_triggered_migr", None]
            , ["total_sst_size_comp_temp_triggered_migr", None]
              , ["total_sst_size_comp_temp_triggered_migr_to_slow", None]
              , ["total_sst_size_comp_temp_triggered_migr_to_fast", None]
          ]

      with open(fn1) as fo1:
        for line in fo1:
          if not line.startswith("#"):
            continue

          for kv in kvs:
            k = kv[0]
            mo = re.match(r".+%s=(?P<v>(\d|\.)+)" % k, line)
            if mo:
              kv[1] = float(mo.group("v"))
              continue

      try:
        fo.write((fmt + "\n") % (
          cost_slo_epsilon
          , kvs[0][1]
          , kvs[1][1]
          , kvs[2][1]
          , kvs[3][1]
          , kvs[4][1]
          , kvs[5][1]
          , kvs[6][1]
          , kvs[7][1]
          , kvs[8][1]
          , kvs[9][1]
          , kvs[10][1]
          , kvs[11][1]
          , kvs[12][1]
          , kvs[13][1]
          , kvs[14][1]
          , kvs[15][1]
          ))
      except TypeError as e:
        Cons.P(fn1)
        raise e

  Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
  return fn_out
Beispiel #28
0
def main(argv):
	Conf.ParseArgs()

	Util.MkDirs(Conf.GetDir("output_dir"))

	Plot.Plot()
Beispiel #29
0
def main(argv):
    Conf.ParseArgs()
    Util.MkDirs(Conf.GetDir("output_dir"))

    #dist_sq_thresholds = [
    #    0
    #    , 0.008
    #    , 0.01
    #    , 0.02
    #    , 0.05]

    # Still quite big. 277 KB
    dist_sq_thresholds = [0.02]

    dist_sq_thresholds_str = []
    for d in dist_sq_thresholds:
        dist_sq_thresholds_str.append(_NoTrailing0s(d))

    fns_co_loc = []
    co_loc_file_sizes = []
    with Cons.MT("Generating reduced central office locations ..."):
        for d in dist_sq_thresholds_str:
            fn_co_loc = "filter-out-almost-duplicate-points/.output/centraloffices-wo-almost-dup-points-%s" % d
            fns_co_loc.append(fn_co_loc)
            if not os.path.exists(fn_co_loc):
                cmd = "cd filter-out-almost-duplicate-points && ./build-and-run.sh --dist_sq_threshold=%s" % d
                Util.RunSubp(cmd)
            co_loc_file_sizes.append(os.path.getsize(fn_co_loc))

    dn_out = "%s/.output" % os.path.dirname(__file__)
    fn_out = "%s/central-office-locations.pdf" % dn_out
    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["IN_FNS"] = " ".join(fns_co_loc)
        env["IN_FN_SIZES"] = " ".join(str(s) for s in co_loc_file_sizes)
        env["DIST_SQ_THRESHOLDS"] = " ".join(dist_sq_thresholds_str)
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/central-office-on-map.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))

    sys.exit(0)

    if False:
        # Parallel processing
        params = []
        for line in re.split(r"\s+", exps):
            t = line.split("/quizup/")
            if len(t) != 2:
                raise RuntimeError("Unexpected")
            job_id = t[0]
            exp_dt = t[1]
            params.append((job_id, exp_dt))
        p = multiprocessing.Pool(8)
        p.map(Plot, params)
    else:
        for line in re.split(r"\s+", exps):
            t = line.split("/quizup/")
            if len(t) != 2:
                raise RuntimeError("Unexpected")
            job_id = t[0]
            exp_dt = t[1]
            Plot((job_id, exp_dt))
Beispiel #30
0
    def __init__(self, exp_set_id, stg_dev):
        conf_sd = Conf.Get(exp_set_id)[stg_dev]

        t = conf_sd["jobid_expdt"].split("/")
        job_id = t[0]
        exp_dt = t[1]

        t = conf_sd["time_window"].split("-")
        exp_time_begin = t[0]
        exp_time_end = t[1]

        dn_log = Conf.GetDir("dn")
        dn_log_job = "%s/%s" % (dn_log, job_id)

        self.fn_out = "%s/ycsb-by-time-%s" % (Conf.GetOutDir(), exp_dt)
        if os.path.isfile(self.fn_out):
            return

        self.exp_begin_dt = datetime.datetime.strptime(exp_dt,
                                                       "%y%m%d-%H%M%S.%f")
        #Cons.P(self.exp_begin_dt)

        with Cons.MT("Generating ycsb time-vs-metrics file for plot ..."):
            fn_log_ycsb = "%s/ycsb/%s-d" % (dn_log_job, exp_dt)
            # Unzip when the file is not there
            if not os.path.exists(fn_log_ycsb):
                fn_zipped = "%s.bz2" % fn_log_ycsb
                if not os.path.exists(fn_zipped):
                    raise RuntimeError("Unexpected: %s" % fn_log_ycsb)
                Util.RunSubp(
                    "cd %s && bzip2 -dk %s > /dev/null" %
                    (os.path.dirname(fn_zipped), os.path.basename(fn_zipped)))
            if not os.path.exists(fn_log_ycsb):
                raise RuntimeError("Unexpected")

            mo_list = []
            line_params = None
            line_run = None
            with open(fn_log_ycsb) as fo:
                for line in fo:
                    #Cons.P(line)
                    # 2017-10-13 20:41:01:258 2 sec: 34 operations; 34 current ops/sec; est completion in 68 days 1 hours [READ: Count=28, Max=46943, Min=33,
                    # Avg=32239.54, 90=45343, 99=46943, 99.9=46943, 99.99=46943] [INSERT: Count=8, Max=9343, Min=221, Avg=4660.88, 90=8695, 99=9343, 99.9=9343,
                    # 99.99=9343]
                    mo = re.match(r"\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d:\d\d\d (?P<rel_time>\d+) sec: \d+ operations; " \
                        "(?P<db_iops>(\d|\.)+) current ops\/sec; .*" \
                        "\[READ: Count=(?P<r_cnt>\d+), Max=(?P<r_max>\d+), Min=(?P<r_min>\d+), Avg=(?P<r_avg>(\d|\.)+)," \
                        " 90=(?P<r_90>\d+), 99=(?P<r_99>\d+), 99.9=(?P<r_999>\d+), 99.99=(?P<r_9999>\d+)\] " \
                        "\[INSERT: Count=(?P<w_cnt>\d+), Max=(?P<w_max>\d+), Min=(?P<w_min>\d+), Avg=(?P<w_avg>(\d|\.)+)," \
                        " 90=(?P<w_90>\d+), 99=(?P<w_99>\d+), 99.9=(?P<w_999>\d+), 99.99=(?P<w_9999>\d+)\] " \
                        , line)
                    if mo is not None:
                        total_seconds = int(mo.group("rel_time"))
                        s = total_seconds % 60
                        total_seconds -= s
                        total_mins = total_seconds / 60
                        m = total_mins % 60
                        total_mins -= m
                        h = total_mins / 60
                        rel_time = "%02d:%02d:%02d" % (h, m, s)
                        mo_list.append((rel_time, mo))
                        continue

                    if line.startswith("params = {"):
                        line_params = line
                        continue

                    if line.startswith("run = {"):
                        line_run = line
                        continue

            cnt = 0
            db_iops = []
            r_cnt = 0
            r_avg = 0.0
            r_min = 0
            r_max = 0
            r_90 = 0
            r_99 = 0
            r_999 = 0
            r_9999 = 0
            w_cnt = 0
            w_avg = 0.0
            w_min = 0
            w_max = 0
            w_90 = 0
            w_99 = 0
            w_999 = 0
            w_9999 = 0
            for e in mo_list:
                rel_time = e[0]
                if (exp_time_begin < rel_time) and (rel_time < exp_time_end):
                    mo = e[1]
                    db_iops.append(float(mo.group("db_iops")))
                    r_cnt += int(mo.group("r_cnt"))
                    r_avg += float(mo.group("r_avg"))
                    r_min += int(mo.group("r_min"))
                    r_max += int(mo.group("r_max"))
                    r_90 += int(mo.group("r_90"))
                    r_99 += int(mo.group("r_99"))
                    r_999 += int(mo.group("r_999"))
                    r_9999 += int(mo.group("r_9999"))
                    w_cnt += int(mo.group("w_cnt"))
                    w_avg += float(mo.group("w_avg"))
                    w_min += int(mo.group("w_min"))
                    w_max += int(mo.group("w_max"))
                    w_90 += int(mo.group("w_90"))
                    w_99 += int(mo.group("w_99"))
                    w_999 += int(mo.group("w_999"))
                    w_9999 += int(mo.group("w_9999"))
                    cnt += 1

            db_iops_stat = Stat.Gen(db_iops)

            with open(self.fn_out, "w") as fo_out:
                fo_out.write("# %s" % line_params)
                fo_out.write("# %s" % line_run)
                fo_out.write("\n")
                fo_out.write("# In the time range (%s, %s):\n" %
                             (exp_time_begin, exp_time_end))
                fo_out.write("#   db_iops.avg= %14f\n" % db_iops_stat.avg)
                fo_out.write("#   db_iops.min= %14f\n" % db_iops_stat.min)
                fo_out.write("#   db_iops.max= %14f\n" % db_iops_stat.max)
                fo_out.write("#   db_iops._25= %14f\n" % db_iops_stat._25)
                fo_out.write("#   db_iops._50= %14f\n" % db_iops_stat._50)
                fo_out.write("#   db_iops._75= %14f\n" % db_iops_stat._75)
                fo_out.write("#   r_cnt  = %14f\n" % (float(r_cnt) / cnt))
                fo_out.write("#   r_avg  = %14f\n" % (float(r_avg) / cnt))
                fo_out.write("#   r_min  = %14f\n" % (float(r_min) / cnt))
                fo_out.write("#   r_max  = %14f\n" % (float(r_max) / cnt))
                fo_out.write("#   r_90   = %14f\n" % (float(r_90) / cnt))
                fo_out.write("#   r_99   = %14f\n" % (float(r_99) / cnt))
                fo_out.write("#   r_999  = %14f\n" % (float(r_999) / cnt))
                fo_out.write("#   r_9999 = %14f\n" % (float(r_9999) / cnt))
                fo_out.write("#   w_cnt  = %14f\n" % (float(w_cnt) / cnt))
                fo_out.write("#   w_avg  = %14f\n" % (float(w_avg) / cnt))
                fo_out.write("#   w_min  = %14f\n" % (float(w_min) / cnt))
                fo_out.write("#   w_max  = %14f\n" % (float(w_max) / cnt))
                fo_out.write("#   w_90   = %14f\n" % (float(w_90) / cnt))
                fo_out.write("#   w_99   = %14f\n" % (float(w_99) / cnt))
                fo_out.write("#   w_999  = %14f\n" % (float(w_999) / cnt))
                fo_out.write("#   w_9999 = %14f\n" % (float(w_9999) / cnt))
                fo_out.write("\n")

                fmt = "%8s" \
                    " %9.2f" \
                    " %6d %8.2f %3d %6d" \
                    " %6d %6d %6d %6d" \
                    " %6d %8.2f %3d %6d" \
                    " %6d %6d %6d %6d"
                header = Util.BuildHeader(fmt, "rel_time" \
                      " db_iops" \
                      " read_cnt read_lat_avg read_lat_min read_lat_max" \
                      " read_lat_90p read_lat_99p read_lat_99.9p read_lat_99.99p" \
                      " write_cnt write_lat_avg write_lat_min write_lat_max" \
                      " write_lat_90p write_lat_99p write_lat_99.9p write_lat_99.99p" \
                      )

                i = 0
                for e in mo_list:
                    rel_time = e[0]
                    mo = e[1]
                    if i % 40 == 0:
                        fo_out.write(header + "\n")
                    fo_out.write(
                        (fmt + "\n") %
                        (rel_time, float(mo.group("db_iops")),
                         int(mo.group("r_cnt")), float(mo.group("r_avg")),
                         int(mo.group("r_min")), int(mo.group("r_max")),
                         int(mo.group("r_90")), int(mo.group("r_99")),
                         int(mo.group("r_999")), int(mo.group("r_9999")),
                         int(mo.group("w_cnt")), float(mo.group("w_avg")),
                         int(mo.group("w_min")), int(mo.group("w_max")),
                         int(mo.group("w_90")), int(mo.group("w_99")),
                         int(mo.group("w_999")), int(mo.group("w_9999"))))
                    i += 1
            Cons.P("Created %s %d" %
                   (self.fn_out, os.path.getsize(self.fn_out)))