Example #1
0
    def Stop():
        with Cons.MT("Stopping dstat ...", print_time=False):
            Dstat._Stop()

        # Change the current dstat log file name to the simulation_time_begin of
        # the simulator.
        if _cur_datetime is not None:
            with Cons.MT("Renaming the log file and zipping ..."):
                dn_client = "%s/quizup" % Conf.GetDir("log_archive_dn")
                global _latest_client_log_dt
                _latest_client_log_dt = None
                for f in os.listdir(dn_client):
                    mo = re.match(
                        r"(?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)$", f)
                    if mo is not None:
                        if _latest_client_log_dt is None:
                            _latest_client_log_dt = mo.group("dt")
                        else:
                            _latest_client_log_dt = max(
                                _latest_client_log_dt, mo.group("dt"))
                # There should be a client log file whose dt is bigger than
                # _cur_datetime
                if _latest_client_log_dt <= _cur_datetime:
                    raise RuntimeError("Unexpected")
                fn0 = "%s/dstat/%s.csv" % (Conf.GetDir("log_archive_dn"),
                                           _cur_datetime)
                fn1 = "%s/dstat/%s.csv" % (Conf.GetDir("log_archive_dn"),
                                           _latest_client_log_dt)
                Cons.P("renaming %s to %s" % (fn0, fn1))
                os.rename(fn0, fn1)
                Util.RunSubp("7z a -mx %s.7z %s" % (fn1, fn1))
Example #2
0
def _EvictCache():
    with Cons.MT("Drop caches ..."):
        Util.RunSubp("sudo sh -c \"echo 3 >/proc/sys/vm/drop_caches\"")

    if False:
        # Evict the DB data files from cache
        with Cons.MT("Evicting DB data ..."):
            Util.RunSubp("%s/work/vmtouch/vmtouch -e %s" %
                         (os.path.expanduser("~"), Conf.GetDir("db_path")))

        # Evict the input data files from cache so that the read IO is consistent
        # throughput the experiment.
        # Caching them would have been nice, but you can do it only when you limit
        # the memory smaller than 3.4 GB (= 15 (total ram) - 0.4 (for the OS) - 11.2
        # (the 100% quizup data size)
        evict = True
        if evict:
            with Cons.MT("Evicting workload data ..."):
                Util.RunSubp(
                    "%s/work/vmtouch/vmtouch -e %s" %
                    (os.path.expanduser("~"), Conf.GetDir("workload_dir")))
        else:
            with Cons.MT("Caching workload data ..."):
                Util.RunSubp(
                    "%s/work/vmtouch/vmtouch -t %s" %
                    (os.path.expanduser("~"), Conf.GetDir("workload_dir")))
Example #3
0
def PlotSstAccDistAtSpecificTimes():
    # At the time m sec after the n-th SSTable is created (time t).  To get the
    # max_plot_hgieht, all plot data files need to be generated before plotting
    # the first one.
    plot_data_fns_at_n = {}
    with Cons.MT(
            "Generating plot data for SSTables by levels with heat at specific times ..."
    ):
        for (n, m) in Conf.times_sst_by_levels_with_heat:
            (fn_in_boxes,
             fn_in_level_seps) = SstByLevelsWithHeatAtSpecificTimes.Boxes(
                 n, m)
            plot_data_fns_at_n[n] = (fn_in_boxes, fn_in_level_seps)

    with Cons.MT(
            "Plotting SSTables by levels with heat at specific times ..."):
        dn = "%s/sst-by-level-by-ks-range-with-heat" % Conf.dn_result

        for n, (fn_in_boxes,
                fn_in_level_seps) in sorted(plot_data_fns_at_n.iteritems()):
            env = os.environ.copy()
            env["FN_IN_BOXES"] = fn_in_boxes
            env["FN_IN_LEVEL_INFO"] = fn_in_level_seps
            env["MAX_PLOT_HEIGHT"] = str(
                SstByLevelsWithHeatAtSpecificTimes.max_plot_height)
            fn_out = "%s/sst-by-level-by-ks-range-with-heat-%s-%s.pdf" % (
                dn, Conf.ExpStartTime(), n)
            env["FN_OUT"] = fn_out

            Util.RunSubp(
                "gnuplot %s/sst-by-level-by-ks-range-with-heat-at-specific-time.gnuplot"
                % os.path.dirname(__file__),
                env=env)
            Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Example #4
0
    def __init__(self):
        self.conn = None

        # Open or create DB
        fn_db = Conf.GetFn("whole_foods_loc_db")
        conn = None
        if os.path.exists(fn_db):
            with Cons.MT("Opening the existing db ..."):
                conn = sqlite3.connect(fn_db)
                if conn is None:
                    raise RuntimeError(
                        "Error! cannot create the database connection.")
                conn.row_factory = sqlite3.Row
                cur = conn.cursor()
                q = "SELECT count(*) as cnt FROM whole_foods_loc"
                cur.execute(q)
                r = cur.fetchone()
                Cons.P("There are %d records" % r["cnt"])
        else:
            with Cons.MT("Creating a new db ..."):
                conn = sqlite3.connect(fn_db)
                if conn is None:
                    raise RuntimeError(
                        "Error! cannot create the database connection.")
                conn.row_factory = sqlite3.Row
                cur = conn.cursor()
                q = """CREATE TABLE IF NOT EXISTS whole_foods_loc (
                 addr text NOT NULL
                 , lat real NOT NULL
                 , lon real NOT NULL
                 , PRIMARY KEY (addr)
               ); """
                cur.execute(q)
        self.conn = conn
Example #5
0
def _ReadStoredLog():
	if Conf.ExpStartTime() is None:
		return None

	dn = "%s/work/mutant/misc/logs/cassandra" % os.path.expanduser("~")
	fn = "%s/system-%s" % (dn, Conf.ExpStartTime())
	if not os.path.isfile(fn):
		# If there is a 7z file, uncompress it
		fn_7z = "%s.7z" % fn
		if os.path.isfile(fn_7z):
			with Cons.MT("Found a 7z file. Uncompressing"):
				Util.RunSubp("7z e -o%s %s" % (dn, fn_7z))
		else:
			return None

	with Cons.MT("Reading the stored Cassandra Mutant log file %s" % fn, print_time=False):
		lines = []
		with open(fn) as fo:
			for line in fo.readlines():
				lines.append(line.strip())
				# Stop after reading n lines for testing
				if 0 < Conf.MaxCassLogLines():
					if Conf.MaxCassLogLines() < len(lines):
						break
		#Cons.P(len(lines))

		return lines
Example #6
0
def main(argv):
    Conf.ParseArgs()
    Util.MkDirs(Conf.GetDir("output_dir"))

    dist_sq_thresholds = [0, 0.008, 0.01, 0.02, 0.05]

    #dist_sq_thresholds = [0.02]

    dist_sq_thresholds_str = []
    for d in dist_sq_thresholds:
        dist_sq_thresholds_str.append(_NoTrailing0s(d))

    reduced_files = []
    reduced_file_sizes = []
    with Cons.MT("Generating reduced size usa map ..."):
        for d in dist_sq_thresholds_str:
            fn_co_loc = "filter-out-almost-duplicate-points/.output/usa-map-smallsize-%s" % d
            reduced_files.append(fn_co_loc)
            if not os.path.exists(fn_co_loc):
                cmd = "cd filter-out-almost-duplicate-points && ./build-and-run.sh --dist_sq_threshold=%s" % d
                Util.RunSubp(cmd)
            reduced_file_sizes.append(os.path.getsize(fn_co_loc))

    dn_out = "%s/.output" % os.path.dirname(__file__)
    fn_out = "%s/usa-map.pdf" % dn_out
    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["IN_FNS"] = " ".join(reduced_files)
        env["IN_FN_SIZES"] = " ".join(str(s) for s in reduced_file_sizes)
        env["DIST_SQ_THRESHOLDS"] = " ".join(dist_sq_thresholds_str)
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/usa-map.gnuplot" % os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Example #7
0
def PlotSstAccfreqByAgeIndividualMultiplot():
	with Cons.MT("Plotting individual SSTable access frequencies by their ages ..."):
		dn_out = "%s/%s/sst-age-accfreq-plot" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin"))
		Util.MkDirs(dn_out)

		env = os.environ.copy()
		dn = "%s/%s/sst-age-accfreq-data" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin"))
		env["IN_DN"] = dn

		# Plot for all levels. Stop when there is no sstable at a level.
		level = 0
		while True:
			env["LEVEL"] = str(level)
			sst_lives = RocksDbLogReader.GetSstAccFreqByAgeDataFiles(level)
			if len(sst_lives) == 0:
				break
			env["SST_IDS"] = " ".join(str(sl.Id()) for sl in sst_lives)

			age_deleted = []
			for sl in sst_lives:
				age_deleted.append(SimTime.ToSimulatedTimeDur((sl.TsDeleted() - sl.TsCreated()).total_seconds()))
			env["AGE_DELETED"] = " ".join(str(i) for i in age_deleted)

			# Age deleted max. Round up with an hour granularity.
			age_deleted_max = max(age_deleted)
			age_deleted_max = math.ceil(age_deleted_max / 3600.0) * 3600
			env["AGE_DELETED_MAX"] = str(age_deleted_max)

			accfreq_max_all_sst_in_level = 0.0
			temp_max_all_sst_in_level = 0.0
			accfreq_max_list = []
			temp_max_list = []
			for sl in sst_lives:
				accfreq_max = 0.0
				temp_max = 0.0
				for accfreq in sl.AgeAccfreq():
					accfreq_max_all_sst_in_level = max(accfreq_max_all_sst_in_level, accfreq[4])
					temp_max_all_sst_in_level = max(temp_max_all_sst_in_level, accfreq[5])
					accfreq_max = max(accfreq_max, accfreq[4])
					temp_max = max(temp_max, accfreq[5])
				accfreq_max_list.append(accfreq_max)
				temp_max_list.append(temp_max)

			env["ACCFREQ_MAX_ALL_SST_IN LEVEL"] = str(accfreq_max_all_sst_in_level)
			env["TEMP_MAX_ALL_SST_IN_LEVEL"] = str(temp_max_all_sst_in_level)
			env["ACCFREQ_MAX"] = " ".join(str(e) for e in accfreq_max_list)
			env["TEMP_MAX"] = " ".join(str(e) for e in temp_max_list)

			out_fn = "%s/L%d.pdf" % (dn_out, level)
			env["OUT_FN"] = out_fn

			with Cons.MT("Plotting level %d ..." % level):
				Util.RunSubp("gnuplot %s/sst-accfreq-by-age-multiplot-by-level.gnuplot" % os.path.dirname(__file__), env=env, print_cmd=False)
				Cons.P("Created %s %d" % (out_fn, os.path.getsize(out_fn)))

			level += 1
Example #8
0
def _Cpu():
    with Cons.MT("Plotting cpu ..."):
        env = os.environ.copy()
        env["FN_IN"] = CsvFile.GenDataFileForGnuplot()
        dn = "%s/%s" % (Conf.GetDir("output_dir"),
                        Conf.Get("simulation_time_begin"))
        fn_out = "%s/cpu.pdf" % dn
        env["FN_OUT"] = fn_out
        with Cons.MT("Plotting ..."):
            Util.RunSubp("gnuplot %s/cpu.gnuplot" % os.path.dirname(__file__),
                         env=env)
            Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Example #9
0
def Plot():
    with Cons.MT("Plotting ..."):
        env = os.environ.copy()

        fn_in = GetKeyCntData()
        env["FN_IN"] = fn_in
        fn_out = "%s.pdf" % fn_in
        env["FN_OUT"] = fn_out

        with Cons.MT("Plotting ..."):
            Util.RunSubp("gnuplot %s/key-access-freq.gnuplot" %
                         os.path.dirname(__file__),
                         env=env)
            Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Example #10
0
def Plot():
    with Cons.MT("Plotting latency ..."):
        env = os.environ.copy()
        env["FN_IN"] = "%s/client/%s" % (Conf.GetDir("log_dir"),
                                         Conf.Get("simulation_time_begin"))
        dn = "%s/%s" % (Conf.GetDir("output_dir"),
                        Conf.Get("simulation_time_begin"))
        Util.MkDirs(dn)
        fn_out = "%s/latency.pdf" % dn
        env["FN_OUT"] = fn_out
        with Cons.MT("Plotting ..."):
            Util.RunSubp("gnuplot %s/latency.gnuplot" %
                         os.path.dirname(__file__),
                         env=env)
            Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Example #11
0
def Plot(dts, fns):
    with Cons.MT("Plotting timestamp by files"):
        env = os.environ.copy()

        env["DTS"] = " ".join(dts)
        env["FNS"] = " ".join(fns)

        fn_out = "%s/timestamp-by-files.pdf" % _dn_out
        env["FN_OUT"] = fn_out

        with Cons.MT("Plotting ..."):
            Util.RunSubp("gnuplot %s/timestamp-by-files.gnuplot" %
                         os.path.dirname(__file__),
                         env=env)
            Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def Plot():
    with Cons.MT("Plotting ..."):
        env = os.environ.copy()

        env["FN_IN"] = GetRunningAvg()
        env["NUM_OBJS"] = str(2047471)
        env["PER_OBJ"] = str(0)
        fn_out = "%s/quizup-num-reads-by-obj-age-aggregate.pdf" % _dn_out
        env["FN_OUT"] = fn_out

        with Cons.MT("Plotting ..."):
            Util.RunSubp("gnuplot %s/num-reads-by-obj-ages.gnuplot" %
                         os.path.dirname(__file__),
                         env=env)
            Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Example #13
0
def PrepareBlockDevs():
	with Cons.MT("Preparing block storage devices ..."):
		# Make sure we are using the known machine types
		inst_type = Util.RunSubp("curl -s http://169.254.169.254/latest/meta-data/instance-type", print_cmd = False, print_output = False)

		# {dev_name: directory_name}
		# ext4 label is the same as the directory_name
		blk_devs = {"xvdb": "local-ssd0"}
		# All c3 types have 2 SSDs
		if inst_type.startswith("c3."):
			# Not needed for now
			#blk_devs["xvdc"] = "local-ssd1"
			pass
		elif inst_type in ["r3.large", "r3.xlarge", "r3.2xlarge", "r3.4xlarge"
				, "i2.xlarge"]:
			pass
		else:
			raise RuntimeError("Unexpected instance type %s" % inst_type)
		if os.path.exists("/dev/xvdd"):
			blk_devs["xvdd"] = "ebs-gp2"
		if os.path.exists("/dev/xvde"):
			blk_devs["xvde"] = "ebs-st1"
		if os.path.exists("/dev/xvdf"):
			blk_devs["xvdf"] = "ebs-sc1"

		# Init local SSDs
		# - https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/disk-performance.html
		# - Skip for Castnet, in which local SSD speed doesn't matter.

		Util.RunSubp("sudo umount /mnt || true")
		for dev_name, dir_name in blk_devs.iteritems():
			Cons.P("Setting up %s ..." % dev_name)
			Util.RunSubp("sudo umount /dev/%s || true" % dev_name)
			Util.RunSubp("sudo mkdir -p /mnt/%s" % dir_name)

			# Prevent lazy Initialization
			# - "When creating an Ext4 file system, the existing regions of the inode
			#   tables must be cleaned (overwritten with nulls, or "zeroed"). The
			#   "lazyinit" feature should significantly accelerate the creation of a
			#   file system, because it does not immediately initialize all inode
			#   tables, initializing them gradually instead during the initial mounting
			#   process in background (from Kernel version 2.6.37)."
			#   - https://www.thomas-krenn.com/en/wiki/Ext4_Filesystem
			# - Default values are 1s, which do lazy init.
			#   - man mkfs.ext4
			#
			# nodiscard is in the documentation
			# - https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ssd-instance-store.html
			# - Without nodiscard, it takes about 80 secs for a 800GB SSD.
			Util.RunSubp("sudo mkfs.ext4 -m 0 -E nodiscard,lazy_itable_init=0,lazy_journal_init=0 -L %s /dev/%s"
					% (dir_name, dev_name), measure_time=True)

			# Some are already mounted. I suspect /etc/fstab does the magic when the
			# file system is created. Give it some time and umount
			time.sleep(1)
			Util.RunSubp("sudo umount /dev/%s || true" % dev_name)

			# -o discard for TRIM
			Util.RunSubp("sudo mount -t ext4 -o discard /dev/%s /mnt/%s" % (dev_name, dir_name))
			Util.RunSubp("sudo chown -R ubuntu /mnt/%s" % dir_name)
def _CloneAndBuildYcsb():
    with Cons.MT("Cloning YCSB and build ..."):
        # Git clone
        Util.RunSubp("rm -rf /mnt/local-ssd0/mutant/YCSB")
        # Util.RunSubp("git clone https://github.com/hobinyoon/YCSB /mnt/local-ssd0/mutant/YCSB")
        Util.RunSubp(
            "git clone https://github.com/1a1a11a/YCSB /mnt/local-ssd0/mutant/YCSB"
        )

        # Symlink
        Util.RunSubp("rm -rf /home/ubuntu/work/mutant/YCSB")
        Util.RunSubp(
            "ln -s /mnt/local-ssd0/mutant/YCSB /home/ubuntu/work/mutant/YCSB")

        # Edit the git source repository for easy development.
        # Util.RunSubp("sed -i 's/" \
        #     "^\\turl = https:\\/\\/github.com\\/hobinyoon\\/YCSB" \
        #     "/\\turl = [email protected]:hobinyoon\/YCSB.git" \
        #     "/g' %s" % "~/work/mutant/YCSB/.git/config")

        Util.RunSubp("sed -i 's/" \
            "^\\turl = https:\\/\\/github.com\\/1a1a11a\\/YCSB" \
            "/\\turl = [email protected]:1a1a11a\/YCSB.git" \
            "/g' %s" % "~/work/mutant/YCSB/.git/config")

        # Switch to mutant branch
        Util.RunSubp("cd /home/ubuntu/work/mutant/YCSB" \
            " && git branch -f mutant origin/mutant" \
            " && git checkout mutant")

        # Build
        #Util.RunSubp("cd /home/ubuntu/work/mutant/YCSB && mvn -pl com.yahoo.ycsb:cassandra-binding -am clean package -DskipTests >/dev/null 2>&1")
        Util.RunSubp(
            "cd /home/ubuntu/work/mutant/YCSB && mvn -pl com.yahoo.ycsb:rocksdb-binding -am clean package -DskipTests >/dev/null 2>&1"
        )
def GetRunningAvg():
    fn = "%s/num-reads-by-obj-age" % _dn_out
    if os.path.isfile(fn):
        return fn

    with Cons.MT("Generating running average ..."):
        # Running average of the last n items, not in the last n-minute time
        # window. Should generate smoother curve for plotting.
        q = Q(60)

        with open(_fn_in) as fo, open(fn, "w") as fo_out:
            for line in fo:
                t = line.strip().split(" ")
                if len(t) != 2:
                    raise RuntimeError("Unexpected [%s]" % line)
                ts = int(t[0])
                num_reads = int(t[1])
                q.Enq(num_reads)

                if False:
                    if ts < 2 * 24 * 60:
                        fo_out.write("%d %d -\n" % (ts, num_reads))
                    else:
                        fo_out.write("%d %d %.1f\n" % (ts, num_reads, q.Avg()))
                else:
                    fo_out.write("%d %d %.1f\n" % (ts, num_reads, q.Avg()))

        Cons.P("Created %s %d" % (fn, os.path.getsize(fn)))
        return fn
Example #16
0
def _CalcCostAll():
    with Cons.MT("Calculating all cost ..."):
        if True:
            # Parallel processing
            with tempfile.NamedTemporaryFile() as fo:
                for k, exps in _exp_list_mutant_by_mig_temp_thrd.iteritems():
                    for e in exps:
                        fo.write(
                            "%s/../calc/calc.py --simulation_time_begin=%s\n" %
                            (os.path.dirname(__file__), e))
                fo.write("%s/../calc/calc.py --simulation_time_begin=%s\n" %
                         (os.path.dirname(__file__),
                          _exp_rocksdb_simulation_time_begin))
                fo.flush()
                Util.RunSubp("parallel :::: %s" % fo.name)
        else:
            # Serial processing. Useful for debugging
            for k, exps in _exp_list_mutant_by_mig_temp_thrd.iteritems():
                for e in exps:
                    Util.RunSubp(
                        "%s/../calc/calc.py --simulation_time_begin=%s" %
                        (os.path.dirname(__file__), e))
            Util.RunSubp("%s/../calc/calc.py --simulation_time_begin=%s" %
                         (os.path.dirname(__file__),
                          _exp_rocksdb_simulation_time_begin))
Example #17
0
def Get1minAvgFn():
  exp_dts = []
  for i in range(2):
    #Cons.P(Conf.Get(i))
    mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i))
    exp_dts.append(mo.group("exp_dt"))
  fn_out = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts))
  if os.path.exists(fn_out):
    return fn_out

  with Cons.MT("Creating avg memory usage comparison file for plotting ..."):
    records = []
    dn_base = Conf.GetDir("dn_base")
    for i in range(2):
      fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i))
      hm_mem = _GetHmMem(fn_ycsb_log)
      for hm, mem in hm_mem.iteritems():
        records.append(_RecordMemAvg(hm, i * 30, mem, i))
    records.sort(key=operator.attrgetter("ts"))

  fmt = "%8s %6.3f %1d"
  header = Util.BuildHeader(fmt, "timestamp mem_avg_in_gb exp_type")
  with open(fn_out, "w") as fo:
    i = 0
    for r in records:
      if i % 40 == 0:
        fo.write(header + "\n")
        i += 1
      fo.write("%s\n" % r.ToStr(fmt))
  Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
  return fn_out
Example #18
0
def GenDataFileForGnuplot(dt):
    SimTime.Init(dt)

    dn = "%s/%s" % (Conf.GetDir("output_dir"), dt)
    Util.MkDirs(dn)
    fn = "%s/dstat-data" % dn
    if os.path.isfile(fn):
        return fn

    with Cons.MT("Generating data file for plot ..."):
        global _header_idx
        global _body_rows
        _header_idx = None
        _body_rows = None

        _Parse(dt)

        fmt = "%9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f" \
          " %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f" \
          " %8.0f %8.0f %8.0f %8.0f" \
          " %3.0f %3.0f" \
          " %3.0f %3.0f %11s" \
          " %3.1f %6.2f %3.1f %6.2f %6.2f %6.3f"
        header = Util.BuildHeader(
            fmt, " ".join(k for k, v in sorted(_header_idx.iteritems())))
        with open(fn, "w") as fo:
            i = 0
            for r in _body_rows:
                if i % 50 == 0:
                    fo.write("%s\n" % header)
                i += 1
                #Cons.P(fmt % tuple(r.Prepared()))
                fo.write((fmt + "\n") % tuple(r.Prepared()))
        Cons.P("Created %s %d" % (fn, os.path.getsize(fn)))
        return fn
Example #19
0
def PlotCompareTwo():
    (fns_rocksdb, fn_sst_creation_stat) = RocksdbLog.GenDataFilesForGnuplot()
    #fn_cpu_stat_by_time = CompareCpu.GetHourlyFn()
    fn_cpu_1min_avg = CompareCpu.Get1minAvgFn()
    fn_mem_stat_by_time = CompareMem.GetHourlyFn()
    fn_mem_1min_avg = CompareMem.Get1minAvgFn()
    #time_max = "09:00:00"
    #time_max = "08:00:00"
    time_max = "07:50:00"

    exp_dts = []
    for i in range(2):
        mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d",
                      Conf.Get(i))
        exp_dts.append(mo.group("exp_dt"))
    fn_out = "%s/mutant-overhead-%s.pdf" % (Conf.GetOutDir(),
                                            "-".join(exp_dts))

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["TIME_MAX"] = str(time_max)
        #env["CPU_STAT"] = fn_cpu_stat_by_time
        env["FN_CPU_1MIN_AVG"] = fn_cpu_1min_avg
        #env["MEM_STAT"] = fn_mem_stat_by_time
        env["FN_MEM_1MIN_AVG"] = fn_mem_1min_avg
        env["ROCKSDB0"] = fns_rocksdb[0]
        env["ROCKSDB1"] = fns_rocksdb[1]
        env["OUT_FN"] = fn_out
        Util.RunSubp("gnuplot %s/compare-two-exps.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Example #20
0
def UnzipQuizupData():
	with Cons.MT("Unzipping QuizUp data ..."):
		Util.RunSubp("mkdir -p /mnt/local-ssd0/quizup-data")
		Util.RunSubp("rm -rf %s/work/quizup-data" % os.path.expanduser("~"))
		Util.RunSubp("ln -s /mnt/local-ssd0/quizup-data %s/work/quizup-data" % os.path.expanduser("~"))
		Util.RunSubp("mkdir -p %s/work/quizup-data/memcached-2w/simulator-data" % os.path.expanduser("~"))
		Util.RunSubp("cd %s/work/quizup-data-zipped && ./unzip.sh" % os.path.expanduser("~"))
def _EditCassConfDataFileDir(fn):
    with Cons.MT("Edit data_file_directories ..."):
        # data_file_directories:
        # - Can't get the bracket notation working. Go for the dash one.
        dn = "/mnt/local-ssd1/cassandra-data"
        Util.MkDirs(dn)
        lines_new = []
        with open(fn) as fo:
            lines = fo.readlines()
            i = 0
            while i < len(lines):
                line = lines[i].rstrip()
                #Cons.P("line=[%s]" % line)
                if re.match(r"(# )?data_file_directories:", line):
                    # Remove all following lines with -, which is a list item
                    while i < len(lines) - 1:
                        i += 1
                        line = lines[i].rstrip()
                        # #     - /var/lib/cassandra/data
                        if re.match(r"\#? +- .+", line) is None:
                            break
                    # Insert new one
                    lines_new.append("data_file_directories:")
                    lines_new.append("    - %s" % dn)
                else:
                    lines_new.append(line)
                    i += 1

            # Save lines_new back to the file
            with open(fn, "w") as fo:
                for l in lines_new:
                    fo.write("%s\n" % l)
Example #22
0
def PlotWorkload(workload_type):
    Util.MkDirs(_dn_output)
    #(fn_plot_data_m_ls_st1, fn_plot_data_ind) = GetPlotDataMutant(workload_type, "ls-st1", "~/work/mutant/log/ycsb/workload-%s/mutant-ls-st1" % workload_type)
    (fn_plot_data_m_ls_st1, fn_plot_data_ind) = GetPlotDataMutant(
        workload_type, "ls-st1",
        "~/work/mutant/log/ycsb/workload-%s/170822-022606-d-ls-st1-short-exps"
        % workload_type)
    sys.exit(1)
    (fn_plot_data_r_st1, fn_plot_data_ind) = GetPlotDataRocksdb(
        workload_type, "st1",
        "~/work/mutant/log/ycsb/workload-%s/rocksdb-st1" % workload_type)
    (fn_plot_data_r_ls, fn_plot_data_ind) = GetPlotDataRocksdb(
        workload_type, "ls",
        "~/work/mutant/log/ycsb/workload-%s/rocksdb-ls" % workload_type)

    fn_out = "%s/ycsb-%s-thp-vs-latency.pdf" % (_dn_output, workload_type)

    with Cons.MT("Plotting ..."):
        env = os.environ.copy()
        env["FN_ROCKSDB_ST1"] = fn_plot_data_r_st1
        env["FN_ROCKSDB_LS"] = fn_plot_data_r_ls
        env["FN_MUTANT_LS_ST1"] = fn_plot_data_m_ls_st1
        env["FN_OUT"] = fn_out
        Util.RunSubp("gnuplot %s/thrp-vs-lat.gnuplot" %
                     os.path.dirname(__file__),
                     env=env)
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
Example #23
0
def StgSizetimeCostMutant():
	exp_list_root = Conf.Manifest.Get("2-level Mutant storage by SSTable migration temperature thresholds. On EC2")

	all_exps = []
	for sst_mig_temp_threshold, v in exp_list_root["Mutant"]["By SSTable migration temperature thresholds"].iteritems():
		for simulation_time_begin in v:
			all_exps.append(simulation_time_begin)
	_CalcCost(all_exps)

	# {sst_mig_temp_threshold: StatSizetimeCostByStgdev}
	mutant_migth_stg_stat = {}
	with Cons.MT("Calculating storage cost ..."):
		for sst_mig_temp_threshold, v in exp_list_root["Mutant"]["By SSTable migration temperature thresholds"].iteritems():
			sst_mig_temp_threshold = float(sst_mig_temp_threshold)
			for simulation_time_begin in v:
				# Validate if the log is for the correct sst_mig_temp_threshold
				if sst_mig_temp_threshold != _GetSstMigTempThreshold(simulation_time_begin):
					raise RuntimeError("Unexpected [%s] != [%s]" % (sst_mig_temp_threshold, _GetSstMigTempThreshold(simulation_time_begin)))

				if sst_mig_temp_threshold not in mutant_migth_stg_stat:
					mutant_migth_stg_stat[sst_mig_temp_threshold] = StatSizetimeCostByStgdev()
				mutant_migth_stg_stat[sst_mig_temp_threshold].Add(SizetimeCostByStgdev(simulation_time_begin))
		#Cons.P(pprint.pformat(mutant_migth_stg_stat))

	return mutant_migth_stg_stat
def SstInfo():
    # Set Conf.ExpStartTime(), if not already set.
    if Conf.ExpStartTime() is None:
        MutantLogReader.Get()

    fn = "%s/sst-info-by-time-by-levels-%s" % (Conf.dn_result,
                                               Conf.ExpStartTime())
    if os.path.isfile(fn):
        return fn

    (sst_lives, memt_lives) = MemtSstLife.Get()

    with Cons.MT("Generating Sst info by time by levels data file ..."):
        #with open(fn_m, "w") as fo:
        #	fo.write("%s\n" % Memt.Header())
        #	for addr, l in sorted(_memt_lives.iteritems()):
        #		fo.write("%s\n" % l)
        #Cons.P("Created %s %d" % (fn_m, os.path.getsize(fn_m)))

        with open(fn, "w") as fo:
            fo.write("%s\n" % MemtSstLife.SstLife.Header())
            for sst_gen, l in sorted(sst_lives.iteritems()):
                fo.write("%s\n" % l)
        Cons.P("Created %s %d" % (fn, os.path.getsize(fn)))
    return fn
    def GenStat(self, fn):
        with Cons.MT(fn, print_time=False):
            lap_times = []
            with open(fn) as fo:
                for line in fo.readlines():
                    line = line.rstrip()
                    if len(line) == 0:
                        continue
                    if line.startswith("#"):
                        continue

                    # 4 KiB from /mnt/local-ssd0/ioping-test-data (ext4 /dev/xvdb): request=1 time=219.1 us
                    # 4 KiB from /mnt/local-ssd0/ioping-test-data (ext4 /dev/xvdb): request=394 time=1.51 ms
                    m = re.match(r"(?P<lap_time>(\d|\.)+ (us|ms))", line)
                    if m:
                        lt = m.group("lap_time")
                        if lt.endswith(" us"):
                            lt = float(lt[:-3])
                        elif lt.endswith(" ms"):
                            lt = (float(lt[:-3]) * 1000)
                        lap_times.append(lt)
                        continue

                    raise RuntimeError("Unexpected [%s]" % line)
            #Cons.P(len(lap_times))
            fn_cdf = "%s/%s-cdf" % (_dn_output, os.path.basename(fn))
            self.fns_cdf.append(fn_cdf)
            Stat.GenStat(lap_times, fn_cdf)
def StopDstat():
	with Cons.MT("Stopping dstat ...", print_time=False):
		cmd = "ps -e -o pid,ppid,user,args"
		lines = Util.RunSubp(cmd, print_cmd=False, print_output=False)
		#Cons.P(lines)
		pids = []
		for line in lines.split("\n"):
			line = line.strip()
			if "dstat" not in line:
				continue
			if "csv" not in line:
				continue

			# Get the second-level processes, skipping the root-level ones.
			t = re.split(" +", line)
			if t[1] == "1":
				continue
			pids.append(t[0])
			#Cons.P("[%s]" % line)

		if len(pids) > 0:
			#Cons.P("[%s]" % " ".join(pids))
			Util.RunSubp("kill %s" % " ".join(pids))

			# Make sure each of the processes has terminated
			for pid in pids:
				cmd = "kill -0 %s" % pid
				while True:
					r = 0
					with open(os.devnull, "w") as devnull:
						r = subprocess.Popen(cmd, shell=True, stdin=devnull, stdout=devnull, stderr=devnull)
					if r != 0:
						Cons.P("Process %s has terminated" % pid)
						break
					time.sleep(0.1)
def SstLevelInfo():
    # Set Conf.ExpStartTime(), if not already set.
    if Conf.ExpStartTime() is None:
        MutantLogReader.Get()

    fn = "%s/sst-info-by-time-by-levels-level-seps-%s" % (Conf.dn_result,
                                                          Conf.ExpStartTime())
    if os.path.isfile(fn):
        return fn

    sst_y_cord_level_sep_highs = SstYCord.LevelSepHigh()

    with Cons.MT(
            "Generating Sst info by time by levels: level separators data file ..."
    ):
        with open(fn, "w") as fo:
            fmt = "%1d %10d %10s"
            fo.write("%s\n" % Util.BuildHeader(
                fmt, "level level_mid_for_labels level_low_for_separators"))
            lh_prev = 0
            for l, lh in sorted(sst_y_cord_level_sep_highs.iteritems()):
                lm = (lh + lh_prev) / 2
                fo.write((fmt + "\n") % (l, lm, lh_prev))
                lh_prev = lh
        Cons.P("Created %s %d" % (fn, os.path.getsize(fn)))
    return fn
Example #28
0
    def Restart():
        with Cons.MT("Restarting dstat ...", print_time=False):
            Dstat._Stop()

            dn = "%s/dstat" % Conf.GetDir("log_archive_dn")
            Util.MkDirs(dn)

            # Get a list of all block devices
            devs = []
            for f in os.listdir("/dev"):
                mo = None
                if socket.gethostname() == "node3":
                    mo = re.match(r"sd\w$", f)
                else:
                    mo = re.match(r"xvd\w$", f)
                if mo is not None:
                    devs.append(f)

            global _cur_datetime
            _cur_datetime = datetime.datetime.now().strftime(
                "%y%m%d-%H%M%S.%f")[:-3]
            Cons.P(_cur_datetime)

            # Run dstat as a daemon
            fn_out = "%s/%s.csv" % (dn, _cur_datetime)
            cmd = "dstat -tcdnrmy -D %s --output %s" % (",".join(devs), fn_out)
            Util.RunDaemon(cmd)
Example #29
0
def RunYcsb():
    with Cons.MT("Running YCSB ..."):
        cmd = "%s/work/mutant/YCSB/mutant/cassandra/restart-dstat-run-workload.py %s %s" \
          % (os.path.expanduser("~")
            , Ec2InitUtil.GetParam(["client", "ycsb", "workload_type"])
            , Ec2InitUtil.GetParam(["client", "ycsb", "params"]))
        Util.RunSubp(cmd)
Example #30
0
def _CloneAndBuildRocksDb():
	if Ec2InitUtil.GetParam(["rocksdb"]) is None:
		return

	with Cons.MT("Cloning RocksDB src and build ..."):
		# Git clone
		Util.RunSubp("rm -rf /mnt/local-ssd0/mutant/rocksdb")
		Util.RunSubp("git clone https://github.com/hobinyoon/rocksdb /mnt/local-ssd0/mutant/rocksdb")

		# Symlink
		Util.RunSubp("rm -rf /home/ubuntu/work/mutant/rocksdb")
		Util.RunSubp("ln -s /mnt/local-ssd0/mutant/rocksdb /home/ubuntu/work/mutant/rocksdb")

		# Edit the git source repository for easy development.
		Util.RunSubp("sed -i 's/" \
				"^\\turl = https:\\/\\/github.com\\/hobinyoon\\/rocksdb" \
				"/\\turl = [email protected]:hobinyoon\/rocksdb.git" \
				"/g' %s" % "~/work/mutant/rocksdb/.git/config")

		# Switch to the mutant branch
		Util.RunSubp("cd /home/ubuntu/work/mutant/rocksdb" \
		" && git branch -f mutant origin/mutant" \
		" && git checkout mutant")

		# Build. Takes about 5 mins
		Util.RunSubp("cd /home/ubuntu/work/mutant/rocksdb && make -j16 shared_lib", measure_time=True)

		# Create data directory
		dn = "/mnt/local-ssd1/rocksdb-data"
		Util.RunSubp("sudo mkdir -p %s && sudo chown ubuntu %s" % (dn, dn))
		Util.RunSubp("rm -rf ~/work/rocksdb-data")
		Util.RunSubp("ln -s %s ~/work/rocksdb-data" % dn)