def Stop(): with Cons.MT("Stopping dstat ...", print_time=False): Dstat._Stop() # Change the current dstat log file name to the simulation_time_begin of # the simulator. if _cur_datetime is not None: with Cons.MT("Renaming the log file and zipping ..."): dn_client = "%s/quizup" % Conf.GetDir("log_archive_dn") global _latest_client_log_dt _latest_client_log_dt = None for f in os.listdir(dn_client): mo = re.match( r"(?P<dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)$", f) if mo is not None: if _latest_client_log_dt is None: _latest_client_log_dt = mo.group("dt") else: _latest_client_log_dt = max( _latest_client_log_dt, mo.group("dt")) # There should be a client log file whose dt is bigger than # _cur_datetime if _latest_client_log_dt <= _cur_datetime: raise RuntimeError("Unexpected") fn0 = "%s/dstat/%s.csv" % (Conf.GetDir("log_archive_dn"), _cur_datetime) fn1 = "%s/dstat/%s.csv" % (Conf.GetDir("log_archive_dn"), _latest_client_log_dt) Cons.P("renaming %s to %s" % (fn0, fn1)) os.rename(fn0, fn1) Util.RunSubp("7z a -mx %s.7z %s" % (fn1, fn1))
def _EvictCache(): with Cons.MT("Drop caches ..."): Util.RunSubp("sudo sh -c \"echo 3 >/proc/sys/vm/drop_caches\"") if False: # Evict the DB data files from cache with Cons.MT("Evicting DB data ..."): Util.RunSubp("%s/work/vmtouch/vmtouch -e %s" % (os.path.expanduser("~"), Conf.GetDir("db_path"))) # Evict the input data files from cache so that the read IO is consistent # throughput the experiment. # Caching them would have been nice, but you can do it only when you limit # the memory smaller than 3.4 GB (= 15 (total ram) - 0.4 (for the OS) - 11.2 # (the 100% quizup data size) evict = True if evict: with Cons.MT("Evicting workload data ..."): Util.RunSubp( "%s/work/vmtouch/vmtouch -e %s" % (os.path.expanduser("~"), Conf.GetDir("workload_dir"))) else: with Cons.MT("Caching workload data ..."): Util.RunSubp( "%s/work/vmtouch/vmtouch -t %s" % (os.path.expanduser("~"), Conf.GetDir("workload_dir")))
def PlotSstAccDistAtSpecificTimes(): # At the time m sec after the n-th SSTable is created (time t). To get the # max_plot_hgieht, all plot data files need to be generated before plotting # the first one. plot_data_fns_at_n = {} with Cons.MT( "Generating plot data for SSTables by levels with heat at specific times ..." ): for (n, m) in Conf.times_sst_by_levels_with_heat: (fn_in_boxes, fn_in_level_seps) = SstByLevelsWithHeatAtSpecificTimes.Boxes( n, m) plot_data_fns_at_n[n] = (fn_in_boxes, fn_in_level_seps) with Cons.MT( "Plotting SSTables by levels with heat at specific times ..."): dn = "%s/sst-by-level-by-ks-range-with-heat" % Conf.dn_result for n, (fn_in_boxes, fn_in_level_seps) in sorted(plot_data_fns_at_n.iteritems()): env = os.environ.copy() env["FN_IN_BOXES"] = fn_in_boxes env["FN_IN_LEVEL_INFO"] = fn_in_level_seps env["MAX_PLOT_HEIGHT"] = str( SstByLevelsWithHeatAtSpecificTimes.max_plot_height) fn_out = "%s/sst-by-level-by-ks-range-with-heat-%s-%s.pdf" % ( dn, Conf.ExpStartTime(), n) env["FN_OUT"] = fn_out Util.RunSubp( "gnuplot %s/sst-by-level-by-ks-range-with-heat-at-specific-time.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def __init__(self): self.conn = None # Open or create DB fn_db = Conf.GetFn("whole_foods_loc_db") conn = None if os.path.exists(fn_db): with Cons.MT("Opening the existing db ..."): conn = sqlite3.connect(fn_db) if conn is None: raise RuntimeError( "Error! cannot create the database connection.") conn.row_factory = sqlite3.Row cur = conn.cursor() q = "SELECT count(*) as cnt FROM whole_foods_loc" cur.execute(q) r = cur.fetchone() Cons.P("There are %d records" % r["cnt"]) else: with Cons.MT("Creating a new db ..."): conn = sqlite3.connect(fn_db) if conn is None: raise RuntimeError( "Error! cannot create the database connection.") conn.row_factory = sqlite3.Row cur = conn.cursor() q = """CREATE TABLE IF NOT EXISTS whole_foods_loc ( addr text NOT NULL , lat real NOT NULL , lon real NOT NULL , PRIMARY KEY (addr) ); """ cur.execute(q) self.conn = conn
def _ReadStoredLog(): if Conf.ExpStartTime() is None: return None dn = "%s/work/mutant/misc/logs/cassandra" % os.path.expanduser("~") fn = "%s/system-%s" % (dn, Conf.ExpStartTime()) if not os.path.isfile(fn): # If there is a 7z file, uncompress it fn_7z = "%s.7z" % fn if os.path.isfile(fn_7z): with Cons.MT("Found a 7z file. Uncompressing"): Util.RunSubp("7z e -o%s %s" % (dn, fn_7z)) else: return None with Cons.MT("Reading the stored Cassandra Mutant log file %s" % fn, print_time=False): lines = [] with open(fn) as fo: for line in fo.readlines(): lines.append(line.strip()) # Stop after reading n lines for testing if 0 < Conf.MaxCassLogLines(): if Conf.MaxCassLogLines() < len(lines): break #Cons.P(len(lines)) return lines
def main(argv): Conf.ParseArgs() Util.MkDirs(Conf.GetDir("output_dir")) dist_sq_thresholds = [0, 0.008, 0.01, 0.02, 0.05] #dist_sq_thresholds = [0.02] dist_sq_thresholds_str = [] for d in dist_sq_thresholds: dist_sq_thresholds_str.append(_NoTrailing0s(d)) reduced_files = [] reduced_file_sizes = [] with Cons.MT("Generating reduced size usa map ..."): for d in dist_sq_thresholds_str: fn_co_loc = "filter-out-almost-duplicate-points/.output/usa-map-smallsize-%s" % d reduced_files.append(fn_co_loc) if not os.path.exists(fn_co_loc): cmd = "cd filter-out-almost-duplicate-points && ./build-and-run.sh --dist_sq_threshold=%s" % d Util.RunSubp(cmd) reduced_file_sizes.append(os.path.getsize(fn_co_loc)) dn_out = "%s/.output" % os.path.dirname(__file__) fn_out = "%s/usa-map.pdf" % dn_out with Cons.MT("Plotting ..."): env = os.environ.copy() env["IN_FNS"] = " ".join(reduced_files) env["IN_FN_SIZES"] = " ".join(str(s) for s in reduced_file_sizes) env["DIST_SQ_THRESHOLDS"] = " ".join(dist_sq_thresholds_str) env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/usa-map.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def PlotSstAccfreqByAgeIndividualMultiplot(): with Cons.MT("Plotting individual SSTable access frequencies by their ages ..."): dn_out = "%s/%s/sst-age-accfreq-plot" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin")) Util.MkDirs(dn_out) env = os.environ.copy() dn = "%s/%s/sst-age-accfreq-data" % (Conf.Get("dn_result"), Conf.Get("simulation_time_begin")) env["IN_DN"] = dn # Plot for all levels. Stop when there is no sstable at a level. level = 0 while True: env["LEVEL"] = str(level) sst_lives = RocksDbLogReader.GetSstAccFreqByAgeDataFiles(level) if len(sst_lives) == 0: break env["SST_IDS"] = " ".join(str(sl.Id()) for sl in sst_lives) age_deleted = [] for sl in sst_lives: age_deleted.append(SimTime.ToSimulatedTimeDur((sl.TsDeleted() - sl.TsCreated()).total_seconds())) env["AGE_DELETED"] = " ".join(str(i) for i in age_deleted) # Age deleted max. Round up with an hour granularity. age_deleted_max = max(age_deleted) age_deleted_max = math.ceil(age_deleted_max / 3600.0) * 3600 env["AGE_DELETED_MAX"] = str(age_deleted_max) accfreq_max_all_sst_in_level = 0.0 temp_max_all_sst_in_level = 0.0 accfreq_max_list = [] temp_max_list = [] for sl in sst_lives: accfreq_max = 0.0 temp_max = 0.0 for accfreq in sl.AgeAccfreq(): accfreq_max_all_sst_in_level = max(accfreq_max_all_sst_in_level, accfreq[4]) temp_max_all_sst_in_level = max(temp_max_all_sst_in_level, accfreq[5]) accfreq_max = max(accfreq_max, accfreq[4]) temp_max = max(temp_max, accfreq[5]) accfreq_max_list.append(accfreq_max) temp_max_list.append(temp_max) env["ACCFREQ_MAX_ALL_SST_IN LEVEL"] = str(accfreq_max_all_sst_in_level) env["TEMP_MAX_ALL_SST_IN_LEVEL"] = str(temp_max_all_sst_in_level) env["ACCFREQ_MAX"] = " ".join(str(e) for e in accfreq_max_list) env["TEMP_MAX"] = " ".join(str(e) for e in temp_max_list) out_fn = "%s/L%d.pdf" % (dn_out, level) env["OUT_FN"] = out_fn with Cons.MT("Plotting level %d ..." % level): Util.RunSubp("gnuplot %s/sst-accfreq-by-age-multiplot-by-level.gnuplot" % os.path.dirname(__file__), env=env, print_cmd=False) Cons.P("Created %s %d" % (out_fn, os.path.getsize(out_fn))) level += 1
def _Cpu(): with Cons.MT("Plotting cpu ..."): env = os.environ.copy() env["FN_IN"] = CsvFile.GenDataFileForGnuplot() dn = "%s/%s" % (Conf.GetDir("output_dir"), Conf.Get("simulation_time_begin")) fn_out = "%s/cpu.pdf" % dn env["FN_OUT"] = fn_out with Cons.MT("Plotting ..."): Util.RunSubp("gnuplot %s/cpu.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def Plot(): with Cons.MT("Plotting ..."): env = os.environ.copy() fn_in = GetKeyCntData() env["FN_IN"] = fn_in fn_out = "%s.pdf" % fn_in env["FN_OUT"] = fn_out with Cons.MT("Plotting ..."): Util.RunSubp("gnuplot %s/key-access-freq.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def Plot(): with Cons.MT("Plotting latency ..."): env = os.environ.copy() env["FN_IN"] = "%s/client/%s" % (Conf.GetDir("log_dir"), Conf.Get("simulation_time_begin")) dn = "%s/%s" % (Conf.GetDir("output_dir"), Conf.Get("simulation_time_begin")) Util.MkDirs(dn) fn_out = "%s/latency.pdf" % dn env["FN_OUT"] = fn_out with Cons.MT("Plotting ..."): Util.RunSubp("gnuplot %s/latency.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def Plot(dts, fns): with Cons.MT("Plotting timestamp by files"): env = os.environ.copy() env["DTS"] = " ".join(dts) env["FNS"] = " ".join(fns) fn_out = "%s/timestamp-by-files.pdf" % _dn_out env["FN_OUT"] = fn_out with Cons.MT("Plotting ..."): Util.RunSubp("gnuplot %s/timestamp-by-files.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def Plot(): with Cons.MT("Plotting ..."): env = os.environ.copy() env["FN_IN"] = GetRunningAvg() env["NUM_OBJS"] = str(2047471) env["PER_OBJ"] = str(0) fn_out = "%s/quizup-num-reads-by-obj-age-aggregate.pdf" % _dn_out env["FN_OUT"] = fn_out with Cons.MT("Plotting ..."): Util.RunSubp("gnuplot %s/num-reads-by-obj-ages.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def PrepareBlockDevs(): with Cons.MT("Preparing block storage devices ..."): # Make sure we are using the known machine types inst_type = Util.RunSubp("curl -s http://169.254.169.254/latest/meta-data/instance-type", print_cmd = False, print_output = False) # {dev_name: directory_name} # ext4 label is the same as the directory_name blk_devs = {"xvdb": "local-ssd0"} # All c3 types have 2 SSDs if inst_type.startswith("c3."): # Not needed for now #blk_devs["xvdc"] = "local-ssd1" pass elif inst_type in ["r3.large", "r3.xlarge", "r3.2xlarge", "r3.4xlarge" , "i2.xlarge"]: pass else: raise RuntimeError("Unexpected instance type %s" % inst_type) if os.path.exists("/dev/xvdd"): blk_devs["xvdd"] = "ebs-gp2" if os.path.exists("/dev/xvde"): blk_devs["xvde"] = "ebs-st1" if os.path.exists("/dev/xvdf"): blk_devs["xvdf"] = "ebs-sc1" # Init local SSDs # - https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/disk-performance.html # - Skip for Castnet, in which local SSD speed doesn't matter. Util.RunSubp("sudo umount /mnt || true") for dev_name, dir_name in blk_devs.iteritems(): Cons.P("Setting up %s ..." % dev_name) Util.RunSubp("sudo umount /dev/%s || true" % dev_name) Util.RunSubp("sudo mkdir -p /mnt/%s" % dir_name) # Prevent lazy Initialization # - "When creating an Ext4 file system, the existing regions of the inode # tables must be cleaned (overwritten with nulls, or "zeroed"). The # "lazyinit" feature should significantly accelerate the creation of a # file system, because it does not immediately initialize all inode # tables, initializing them gradually instead during the initial mounting # process in background (from Kernel version 2.6.37)." # - https://www.thomas-krenn.com/en/wiki/Ext4_Filesystem # - Default values are 1s, which do lazy init. # - man mkfs.ext4 # # nodiscard is in the documentation # - https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ssd-instance-store.html # - Without nodiscard, it takes about 80 secs for a 800GB SSD. Util.RunSubp("sudo mkfs.ext4 -m 0 -E nodiscard,lazy_itable_init=0,lazy_journal_init=0 -L %s /dev/%s" % (dir_name, dev_name), measure_time=True) # Some are already mounted. I suspect /etc/fstab does the magic when the # file system is created. Give it some time and umount time.sleep(1) Util.RunSubp("sudo umount /dev/%s || true" % dev_name) # -o discard for TRIM Util.RunSubp("sudo mount -t ext4 -o discard /dev/%s /mnt/%s" % (dev_name, dir_name)) Util.RunSubp("sudo chown -R ubuntu /mnt/%s" % dir_name)
def _CloneAndBuildYcsb(): with Cons.MT("Cloning YCSB and build ..."): # Git clone Util.RunSubp("rm -rf /mnt/local-ssd0/mutant/YCSB") # Util.RunSubp("git clone https://github.com/hobinyoon/YCSB /mnt/local-ssd0/mutant/YCSB") Util.RunSubp( "git clone https://github.com/1a1a11a/YCSB /mnt/local-ssd0/mutant/YCSB" ) # Symlink Util.RunSubp("rm -rf /home/ubuntu/work/mutant/YCSB") Util.RunSubp( "ln -s /mnt/local-ssd0/mutant/YCSB /home/ubuntu/work/mutant/YCSB") # Edit the git source repository for easy development. # Util.RunSubp("sed -i 's/" \ # "^\\turl = https:\\/\\/github.com\\/hobinyoon\\/YCSB" \ # "/\\turl = [email protected]:hobinyoon\/YCSB.git" \ # "/g' %s" % "~/work/mutant/YCSB/.git/config") Util.RunSubp("sed -i 's/" \ "^\\turl = https:\\/\\/github.com\\/1a1a11a\\/YCSB" \ "/\\turl = [email protected]:1a1a11a\/YCSB.git" \ "/g' %s" % "~/work/mutant/YCSB/.git/config") # Switch to mutant branch Util.RunSubp("cd /home/ubuntu/work/mutant/YCSB" \ " && git branch -f mutant origin/mutant" \ " && git checkout mutant") # Build #Util.RunSubp("cd /home/ubuntu/work/mutant/YCSB && mvn -pl com.yahoo.ycsb:cassandra-binding -am clean package -DskipTests >/dev/null 2>&1") Util.RunSubp( "cd /home/ubuntu/work/mutant/YCSB && mvn -pl com.yahoo.ycsb:rocksdb-binding -am clean package -DskipTests >/dev/null 2>&1" )
def GetRunningAvg(): fn = "%s/num-reads-by-obj-age" % _dn_out if os.path.isfile(fn): return fn with Cons.MT("Generating running average ..."): # Running average of the last n items, not in the last n-minute time # window. Should generate smoother curve for plotting. q = Q(60) with open(_fn_in) as fo, open(fn, "w") as fo_out: for line in fo: t = line.strip().split(" ") if len(t) != 2: raise RuntimeError("Unexpected [%s]" % line) ts = int(t[0]) num_reads = int(t[1]) q.Enq(num_reads) if False: if ts < 2 * 24 * 60: fo_out.write("%d %d -\n" % (ts, num_reads)) else: fo_out.write("%d %d %.1f\n" % (ts, num_reads, q.Avg())) else: fo_out.write("%d %d %.1f\n" % (ts, num_reads, q.Avg())) Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) return fn
def _CalcCostAll(): with Cons.MT("Calculating all cost ..."): if True: # Parallel processing with tempfile.NamedTemporaryFile() as fo: for k, exps in _exp_list_mutant_by_mig_temp_thrd.iteritems(): for e in exps: fo.write( "%s/../calc/calc.py --simulation_time_begin=%s\n" % (os.path.dirname(__file__), e)) fo.write("%s/../calc/calc.py --simulation_time_begin=%s\n" % (os.path.dirname(__file__), _exp_rocksdb_simulation_time_begin)) fo.flush() Util.RunSubp("parallel :::: %s" % fo.name) else: # Serial processing. Useful for debugging for k, exps in _exp_list_mutant_by_mig_temp_thrd.iteritems(): for e in exps: Util.RunSubp( "%s/../calc/calc.py --simulation_time_begin=%s" % (os.path.dirname(__file__), e)) Util.RunSubp("%s/../calc/calc.py --simulation_time_begin=%s" % (os.path.dirname(__file__), _exp_rocksdb_simulation_time_begin))
def Get1minAvgFn(): exp_dts = [] for i in range(2): #Cons.P(Conf.Get(i)) mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/mem-1minavg-%s" % (Conf.GetOutDir(), "-".join(exp_dts)) if os.path.exists(fn_out): return fn_out with Cons.MT("Creating avg memory usage comparison file for plotting ..."): records = [] dn_base = Conf.GetDir("dn_base") for i in range(2): fn_ycsb_log = "%s/%s" % (dn_base, Conf.Get(i)) hm_mem = _GetHmMem(fn_ycsb_log) for hm, mem in hm_mem.iteritems(): records.append(_RecordMemAvg(hm, i * 30, mem, i)) records.sort(key=operator.attrgetter("ts")) fmt = "%8s %6.3f %1d" header = Util.BuildHeader(fmt, "timestamp mem_avg_in_gb exp_type") with open(fn_out, "w") as fo: i = 0 for r in records: if i % 40 == 0: fo.write(header + "\n") i += 1 fo.write("%s\n" % r.ToStr(fmt)) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def GenDataFileForGnuplot(dt): SimTime.Init(dt) dn = "%s/%s" % (Conf.GetDir("output_dir"), dt) Util.MkDirs(dn) fn = "%s/dstat-data" % dn if os.path.isfile(fn): return fn with Cons.MT("Generating data file for plot ..."): global _header_idx global _body_rows _header_idx = None _body_rows = None _Parse(dt) fmt = "%9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f %9.0f" \ " %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f %6.1f" \ " %8.0f %8.0f %8.0f %8.0f" \ " %3.0f %3.0f" \ " %3.0f %3.0f %11s" \ " %3.1f %6.2f %3.1f %6.2f %6.2f %6.3f" header = Util.BuildHeader( fmt, " ".join(k for k, v in sorted(_header_idx.iteritems()))) with open(fn, "w") as fo: i = 0 for r in _body_rows: if i % 50 == 0: fo.write("%s\n" % header) i += 1 #Cons.P(fmt % tuple(r.Prepared())) fo.write((fmt + "\n") % tuple(r.Prepared())) Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) return fn
def PlotCompareTwo(): (fns_rocksdb, fn_sst_creation_stat) = RocksdbLog.GenDataFilesForGnuplot() #fn_cpu_stat_by_time = CompareCpu.GetHourlyFn() fn_cpu_1min_avg = CompareCpu.Get1minAvgFn() fn_mem_stat_by_time = CompareMem.GetHourlyFn() fn_mem_1min_avg = CompareMem.Get1minAvgFn() #time_max = "09:00:00" #time_max = "08:00:00" time_max = "07:50:00" exp_dts = [] for i in range(2): mo = re.match(r".+/(?P<exp_dt>\d\d\d\d\d\d-\d\d\d\d\d\d\.\d\d\d)-d", Conf.Get(i)) exp_dts.append(mo.group("exp_dt")) fn_out = "%s/mutant-overhead-%s.pdf" % (Conf.GetOutDir(), "-".join(exp_dts)) with Cons.MT("Plotting ..."): env = os.environ.copy() env["TIME_MAX"] = str(time_max) #env["CPU_STAT"] = fn_cpu_stat_by_time env["FN_CPU_1MIN_AVG"] = fn_cpu_1min_avg #env["MEM_STAT"] = fn_mem_stat_by_time env["FN_MEM_1MIN_AVG"] = fn_mem_1min_avg env["ROCKSDB0"] = fns_rocksdb[0] env["ROCKSDB1"] = fns_rocksdb[1] env["OUT_FN"] = fn_out Util.RunSubp("gnuplot %s/compare-two-exps.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def UnzipQuizupData(): with Cons.MT("Unzipping QuizUp data ..."): Util.RunSubp("mkdir -p /mnt/local-ssd0/quizup-data") Util.RunSubp("rm -rf %s/work/quizup-data" % os.path.expanduser("~")) Util.RunSubp("ln -s /mnt/local-ssd0/quizup-data %s/work/quizup-data" % os.path.expanduser("~")) Util.RunSubp("mkdir -p %s/work/quizup-data/memcached-2w/simulator-data" % os.path.expanduser("~")) Util.RunSubp("cd %s/work/quizup-data-zipped && ./unzip.sh" % os.path.expanduser("~"))
def _EditCassConfDataFileDir(fn): with Cons.MT("Edit data_file_directories ..."): # data_file_directories: # - Can't get the bracket notation working. Go for the dash one. dn = "/mnt/local-ssd1/cassandra-data" Util.MkDirs(dn) lines_new = [] with open(fn) as fo: lines = fo.readlines() i = 0 while i < len(lines): line = lines[i].rstrip() #Cons.P("line=[%s]" % line) if re.match(r"(# )?data_file_directories:", line): # Remove all following lines with -, which is a list item while i < len(lines) - 1: i += 1 line = lines[i].rstrip() # # - /var/lib/cassandra/data if re.match(r"\#? +- .+", line) is None: break # Insert new one lines_new.append("data_file_directories:") lines_new.append(" - %s" % dn) else: lines_new.append(line) i += 1 # Save lines_new back to the file with open(fn, "w") as fo: for l in lines_new: fo.write("%s\n" % l)
def PlotWorkload(workload_type): Util.MkDirs(_dn_output) #(fn_plot_data_m_ls_st1, fn_plot_data_ind) = GetPlotDataMutant(workload_type, "ls-st1", "~/work/mutant/log/ycsb/workload-%s/mutant-ls-st1" % workload_type) (fn_plot_data_m_ls_st1, fn_plot_data_ind) = GetPlotDataMutant( workload_type, "ls-st1", "~/work/mutant/log/ycsb/workload-%s/170822-022606-d-ls-st1-short-exps" % workload_type) sys.exit(1) (fn_plot_data_r_st1, fn_plot_data_ind) = GetPlotDataRocksdb( workload_type, "st1", "~/work/mutant/log/ycsb/workload-%s/rocksdb-st1" % workload_type) (fn_plot_data_r_ls, fn_plot_data_ind) = GetPlotDataRocksdb( workload_type, "ls", "~/work/mutant/log/ycsb/workload-%s/rocksdb-ls" % workload_type) fn_out = "%s/ycsb-%s-thp-vs-latency.pdf" % (_dn_output, workload_type) with Cons.MT("Plotting ..."): env = os.environ.copy() env["FN_ROCKSDB_ST1"] = fn_plot_data_r_st1 env["FN_ROCKSDB_LS"] = fn_plot_data_r_ls env["FN_MUTANT_LS_ST1"] = fn_plot_data_m_ls_st1 env["FN_OUT"] = fn_out Util.RunSubp("gnuplot %s/thrp-vs-lat.gnuplot" % os.path.dirname(__file__), env=env) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
def StgSizetimeCostMutant(): exp_list_root = Conf.Manifest.Get("2-level Mutant storage by SSTable migration temperature thresholds. On EC2") all_exps = [] for sst_mig_temp_threshold, v in exp_list_root["Mutant"]["By SSTable migration temperature thresholds"].iteritems(): for simulation_time_begin in v: all_exps.append(simulation_time_begin) _CalcCost(all_exps) # {sst_mig_temp_threshold: StatSizetimeCostByStgdev} mutant_migth_stg_stat = {} with Cons.MT("Calculating storage cost ..."): for sst_mig_temp_threshold, v in exp_list_root["Mutant"]["By SSTable migration temperature thresholds"].iteritems(): sst_mig_temp_threshold = float(sst_mig_temp_threshold) for simulation_time_begin in v: # Validate if the log is for the correct sst_mig_temp_threshold if sst_mig_temp_threshold != _GetSstMigTempThreshold(simulation_time_begin): raise RuntimeError("Unexpected [%s] != [%s]" % (sst_mig_temp_threshold, _GetSstMigTempThreshold(simulation_time_begin))) if sst_mig_temp_threshold not in mutant_migth_stg_stat: mutant_migth_stg_stat[sst_mig_temp_threshold] = StatSizetimeCostByStgdev() mutant_migth_stg_stat[sst_mig_temp_threshold].Add(SizetimeCostByStgdev(simulation_time_begin)) #Cons.P(pprint.pformat(mutant_migth_stg_stat)) return mutant_migth_stg_stat
def SstInfo(): # Set Conf.ExpStartTime(), if not already set. if Conf.ExpStartTime() is None: MutantLogReader.Get() fn = "%s/sst-info-by-time-by-levels-%s" % (Conf.dn_result, Conf.ExpStartTime()) if os.path.isfile(fn): return fn (sst_lives, memt_lives) = MemtSstLife.Get() with Cons.MT("Generating Sst info by time by levels data file ..."): #with open(fn_m, "w") as fo: # fo.write("%s\n" % Memt.Header()) # for addr, l in sorted(_memt_lives.iteritems()): # fo.write("%s\n" % l) #Cons.P("Created %s %d" % (fn_m, os.path.getsize(fn_m))) with open(fn, "w") as fo: fo.write("%s\n" % MemtSstLife.SstLife.Header()) for sst_gen, l in sorted(sst_lives.iteritems()): fo.write("%s\n" % l) Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) return fn
def GenStat(self, fn): with Cons.MT(fn, print_time=False): lap_times = [] with open(fn) as fo: for line in fo.readlines(): line = line.rstrip() if len(line) == 0: continue if line.startswith("#"): continue # 4 KiB from /mnt/local-ssd0/ioping-test-data (ext4 /dev/xvdb): request=1 time=219.1 us # 4 KiB from /mnt/local-ssd0/ioping-test-data (ext4 /dev/xvdb): request=394 time=1.51 ms m = re.match(r"(?P<lap_time>(\d|\.)+ (us|ms))", line) if m: lt = m.group("lap_time") if lt.endswith(" us"): lt = float(lt[:-3]) elif lt.endswith(" ms"): lt = (float(lt[:-3]) * 1000) lap_times.append(lt) continue raise RuntimeError("Unexpected [%s]" % line) #Cons.P(len(lap_times)) fn_cdf = "%s/%s-cdf" % (_dn_output, os.path.basename(fn)) self.fns_cdf.append(fn_cdf) Stat.GenStat(lap_times, fn_cdf)
def StopDstat(): with Cons.MT("Stopping dstat ...", print_time=False): cmd = "ps -e -o pid,ppid,user,args" lines = Util.RunSubp(cmd, print_cmd=False, print_output=False) #Cons.P(lines) pids = [] for line in lines.split("\n"): line = line.strip() if "dstat" not in line: continue if "csv" not in line: continue # Get the second-level processes, skipping the root-level ones. t = re.split(" +", line) if t[1] == "1": continue pids.append(t[0]) #Cons.P("[%s]" % line) if len(pids) > 0: #Cons.P("[%s]" % " ".join(pids)) Util.RunSubp("kill %s" % " ".join(pids)) # Make sure each of the processes has terminated for pid in pids: cmd = "kill -0 %s" % pid while True: r = 0 with open(os.devnull, "w") as devnull: r = subprocess.Popen(cmd, shell=True, stdin=devnull, stdout=devnull, stderr=devnull) if r != 0: Cons.P("Process %s has terminated" % pid) break time.sleep(0.1)
def SstLevelInfo(): # Set Conf.ExpStartTime(), if not already set. if Conf.ExpStartTime() is None: MutantLogReader.Get() fn = "%s/sst-info-by-time-by-levels-level-seps-%s" % (Conf.dn_result, Conf.ExpStartTime()) if os.path.isfile(fn): return fn sst_y_cord_level_sep_highs = SstYCord.LevelSepHigh() with Cons.MT( "Generating Sst info by time by levels: level separators data file ..." ): with open(fn, "w") as fo: fmt = "%1d %10d %10s" fo.write("%s\n" % Util.BuildHeader( fmt, "level level_mid_for_labels level_low_for_separators")) lh_prev = 0 for l, lh in sorted(sst_y_cord_level_sep_highs.iteritems()): lm = (lh + lh_prev) / 2 fo.write((fmt + "\n") % (l, lm, lh_prev)) lh_prev = lh Cons.P("Created %s %d" % (fn, os.path.getsize(fn))) return fn
def Restart(): with Cons.MT("Restarting dstat ...", print_time=False): Dstat._Stop() dn = "%s/dstat" % Conf.GetDir("log_archive_dn") Util.MkDirs(dn) # Get a list of all block devices devs = [] for f in os.listdir("/dev"): mo = None if socket.gethostname() == "node3": mo = re.match(r"sd\w$", f) else: mo = re.match(r"xvd\w$", f) if mo is not None: devs.append(f) global _cur_datetime _cur_datetime = datetime.datetime.now().strftime( "%y%m%d-%H%M%S.%f")[:-3] Cons.P(_cur_datetime) # Run dstat as a daemon fn_out = "%s/%s.csv" % (dn, _cur_datetime) cmd = "dstat -tcdnrmy -D %s --output %s" % (",".join(devs), fn_out) Util.RunDaemon(cmd)
def RunYcsb(): with Cons.MT("Running YCSB ..."): cmd = "%s/work/mutant/YCSB/mutant/cassandra/restart-dstat-run-workload.py %s %s" \ % (os.path.expanduser("~") , Ec2InitUtil.GetParam(["client", "ycsb", "workload_type"]) , Ec2InitUtil.GetParam(["client", "ycsb", "params"])) Util.RunSubp(cmd)
def _CloneAndBuildRocksDb(): if Ec2InitUtil.GetParam(["rocksdb"]) is None: return with Cons.MT("Cloning RocksDB src and build ..."): # Git clone Util.RunSubp("rm -rf /mnt/local-ssd0/mutant/rocksdb") Util.RunSubp("git clone https://github.com/hobinyoon/rocksdb /mnt/local-ssd0/mutant/rocksdb") # Symlink Util.RunSubp("rm -rf /home/ubuntu/work/mutant/rocksdb") Util.RunSubp("ln -s /mnt/local-ssd0/mutant/rocksdb /home/ubuntu/work/mutant/rocksdb") # Edit the git source repository for easy development. Util.RunSubp("sed -i 's/" \ "^\\turl = https:\\/\\/github.com\\/hobinyoon\\/rocksdb" \ "/\\turl = [email protected]:hobinyoon\/rocksdb.git" \ "/g' %s" % "~/work/mutant/rocksdb/.git/config") # Switch to the mutant branch Util.RunSubp("cd /home/ubuntu/work/mutant/rocksdb" \ " && git branch -f mutant origin/mutant" \ " && git checkout mutant") # Build. Takes about 5 mins Util.RunSubp("cd /home/ubuntu/work/mutant/rocksdb && make -j16 shared_lib", measure_time=True) # Create data directory dn = "/mnt/local-ssd1/rocksdb-data" Util.RunSubp("sudo mkdir -p %s && sudo chown ubuntu %s" % (dn, dn)) Util.RunSubp("rm -rf ~/work/rocksdb-data") Util.RunSubp("ln -s %s ~/work/rocksdb-data" % dn)