def Read(): # Read the raw, anonymized data fn_in = "%s/work/quizup/86400-sec-redis-ephemeral-cmds.anonymized.log" \ % os.path.expanduser("~") num_lines = 119874265 # $ time wc -l 86400-sec-redis-ephemeral-cmds.anonymized.log # 119,874,265 # real 0m11.896s with Cons.MT("Reading %s" % fn_in): global _ops _ops = [] with open(fn_in) as fo: i = 0 #for line in fo.readlines(): for line in fo: i += 1 #Cons.P(line) op = Op(line) if op.cmd_type is None: continue _ops.append(op) if i % 10000 == 0: Cons.ClearLine() Cons.Pnnl("%d OPs read (%.2f%%), %d kept" % (i, 100.0 * i / num_lines, len(_ops))) # Useful for testing #if i >= 100000: # break Cons.ClearLine() Cons.P("%d OPs read (%.2f%%), %d kept" % (i, 100.0 * i / num_lines, len(_ops)))
def Pnnl(self, msg, target="log"): with self.fo_lock: if target == "log": self.fo.write("%s" % msg) elif target == "both": self.fo.write("%s" % msg) Cons.Pnnl(msg) else: raise RuntimeError("Unexpected target: %s" % target)
def SeqRead(): while True: for i in range(_num_files): fn = "%s/%02d" %(_dn_test_data, i) Cons.Pnnl("Reading %s into memory: " % fn) start_time = time.time() with open(fn, "rb") as fo: data = fo.read(_file_size) dur = time.time() - start_time print "%3.0f ms" % (dur * 1000.0)
def Write(): fn_out = "%s/work/quizup/86400-sec-redis-ephemeral-cmds.anonymized.compact.log" \ % os.path.expanduser("~") with Cons.MT("Writing %s" % fn_out): with open(fn_out, "w") as fo: i = 0 for op in _ops: i += 1 fo.write("%s %s %s\n" % (op.ts, op.cmd, " ".join(op.key))) if i % 10000 == 0: Cons.ClearLine() Cons.Pnnl("Wrote %d OPs (%.2f%%)" % (i, 100.0 * i / len(_ops))) Cons.ClearLine() Cons.P("Wrote %d OPs (%.2f%%). File size %d" % (i, 100.0 * i / len(_ops), os.path.getsize(fn_out)))
def Flush(self): with self.msg_lock: # Clear previous printed lines for i in range(self.lines_printed): # Clear current line sys.stdout.write(chr(27) + "[2K") # Move the cursor up sys.stdout.write(chr(27) + "[1A") # Move the cursor to column 1 sys.stdout.write(chr(27) + "[1G") # Clear current line sys.stdout.write(chr(27) + "[2K") Cons.Pnnl(self.msg) self.lines_printed = len(self.msg.split("\n")) - 1 self.msg = ""
def GetKeyCntData(): fn_out = "%s/work/quizup/86400-sec-redis-ephemeral-cmds.anonymized.compact.key-accesscnt" \ % os.path.expanduser("~") if os.path.isfile(fn_out): return fn_out fn_in = "%s/work/quizup/86400-sec-redis-ephemeral-cmds.anonymized.compact.log" \ % os.path.expanduser("~") num_lines = 77836639 with Cons.MT("Reading %s" % fn_in): key_cnt = {} with open(fn_in) as fo: i = 0 for line in fo: i += 1 t = line.split() for k in t[2:]: if k in key_cnt: key_cnt[k] += 1 else: key_cnt[k] = 1 if i % 10000 == 0: Cons.ClearLine() Cons.Pnnl("%d OPs read (%.2f%%)" % (i, 100.0 * i / num_lines)) # Useful for testing #if i >= 100000: # break Cons.ClearLine() Cons.P("%d OPs read (%.2f%%)" % (i, 100.0 * i / num_lines)) with Cons.MT("Writing %s" % fn_out): key_cnt_sorted_by_cnt = sorted(key_cnt.items(), key=operator.itemgetter(1), reverse=True) with open(fn_out, "w") as fo: for kc in key_cnt_sorted_by_cnt: fo.write("%s %d\n" % (kc[0], kc[1])) Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out))) return fn_out
def _GetQ(): # Get the queue. Create one if not exists. try: queue = _sqs.get_queue_by_name(QueueName=sqs_q_name, # QueueOwnerAWSAccountId='string' ) #Cons.P(pprint.pformat(vars(queue), indent=2)) #{ '_url': 'https://queue.amazonaws.com/998754746880/mutant-exps', # 'meta': ResourceMeta('sqs', identifiers=[u'url'])} return queue except botocore.exceptions.ClientError as e: #Cons.P(pprint.pformat(e, indent=2)) #Cons.P(pprint.pformat(vars(e), indent=2)) if e.response["Error"][ "Code"] == "AWS.SimpleQueueService.NonExistentQueue": pass else: raise e Cons.Pnnl("The queue doesn't exists. Creating one ") while True: response = None try: response = _bc.create_queue(QueueName=sqs_q_name) # Default message retention period is 4 days. print "" break except botocore.exceptions.ClientError as e: # When calling the CreateQueue operation: You must wait 60 seconds after # deleting a queue before you can create another with the same name. # It doesn't give me how much more you need to wait. Polling until succeed. if e.response["Error"][ "Code"] == "AWS.SimpleQueueService.QueueDeletedRecently": sys.stdout.write(".") sys.stdout.flush() time.sleep(1) else: raise e return _sqs.get_queue_by_name(QueueName=sqs_q_name)
def ByTags(tags, job_id_none_requested): Cons.Pnnl("Terminating running instances:") _TermInst.Init(job_id_none_requested) tis = [] for r in Ec2Region.All(): tis.append(_TermInst(r, tags)) threads = [] for ti in tis: t = threading.Thread(target=ti.Run) t.daemon = True threads.append(t) t.start() for t in threads: t.join() print "" Cons.P(_TermInst.Header()) for ti in tis: ti.PrintResult()
def Read(self): # {ssd_id: how_created} self.sstid_howcreated = {} self.proceed_with_pending_compactions = None # Read the log with open(self.fn_compaction_log) as fo: for line in fo.readlines(): # Looks like they are in json format #Cons.P(line) e = json.loads(line) #Cons.P(pprint.pformat(e)) if (e["keyspace"] != "ycsb") or (e["table"] != "usertable"): raise RuntimeError("Unexpected\n%s" % pprint.pformat(e)) if e["type"] == "enable": continue elif e["type"] == "flush": for t in e["tables"]: sst_gen = int(t["table"]["generation"]) if sst_gen in self.sstid_howcreated: raise RuntimeError("Unexpected") self.sstid_howcreated[ sst_gen] = CompactionLog._HowCreated("flushed") continue elif e["type"] == "pending": if int(e["pending"]) <= 2: continue # Prompt for pending compactions if self.proceed_with_pending_compactions is None: Cons.P( "There are pending compactions, which indicate the server is overloaded." ) Cons.P(pprint.pformat(e), ind=2) Cons.Pnnl("Would you like to proceed ") self.proceed_with_pending_compactions = raw_input( "(Y/N)? ") if self.proceed_with_pending_compactions.lower( ) != "y": sys.exit(0) else: continue elif e["type"] == "compaction": # {u'end': u'1474839005076', # u'input': [{u'strategyId': u'1', # u'table': {u'details': {u'level': 0, # u'max_token': u'9223342625191452161', # u'min_token': u'-9222223702120529941'}, # u'generation': 1, # u'size': 14061373, # u'version': u'mc'}}, # {u'strategyId': u'1', # u'table': {u'details': {u'level': 0, # u'max_token': u'9222611853699094103', # u'min_token': u'-9221429980877775654'}, # u'generation': 2, # u'size': 14050279, # u'version': u'mc'}}], # u'keyspace': u'ycsb', # u'output': [{u'strategyId': u'1', # u'table': {u'details': {u'level': 0, # u'max_token': u'9223342625191452161', # u'min_token': u'-9222223702120529941'}, # u'generation': 3, # u'size': 28117799, # u'version': u'mc'}}], # u'start': u'1474839003093', # u'table': u'usertable', # u'time': 1474839005076, # u'type': u'compaction'} # # time and end seem to have the same value time = int(e["time"]) start = int(e["start"]) end = int(e["end"]) # Set experiment end time: last compaction event + the duration of # the compaction. Or, the last seen Mutant event timestamp can be # used too, depending on whether you want a stabilized snapshot of # SSTables or a snapshot while data is being added. # #Conf.SetExpFinishTime(datetime.datetime.fromtimestamp(int(end + (end - start)) / 1000.0) # .strftime("%y%m%d-%H%M%S.%f") # # Trim the last 3 digits # [:-3]) input_tables = [] output_tables = [] for e1 in e["input"]: if e1["strategyId"] != "1": raise RuntimeError("Unexpected\n%s" % pprint.pformat(e)) e1["table"]["details"]["level"] e1["table"]["details"]["max_token"] e1["table"]["details"]["min_token"] input_tables.append(int(e1["table"]["generation"])) e1["table"]["size"] for e1 in e["output"]: if e1["strategyId"] != "1": raise RuntimeError("Unexpected\n%s" % pprint.pformat(e)) e1["table"]["details"]["level"] e1["table"]["details"]["max_token"] e1["table"]["details"]["min_token"] sst_gen = int(e1["table"]["generation"]) e1["table"]["size"] if sst_gen in self.sstid_howcreated: raise RuntimeError("Unexpected") self.sstid_howcreated[ sst_gen] = CompactionLog._HowCreated( "compacted", input_tables) #Cons.P("\n%s\n%s\n%s" % (time, start, end)) continue else: raise RuntimeError("Unexpected\n%s" % pprint.pformat(e))
def _BuildMemtSstLives(): with Cons.MT("Building memt and sst lives ..."): #global _memt_lives global _sst_lives if _sst_lives is not None: return #_memt_lives = {} _sst_lives = {} dn = "%s/rocksdb" % Conf.GetDir("log_dir") fn = "%s/%s" % (dn, Conf.Get("simulation_time_begin")) if not os.path.isfile(fn): fn_7z = "%s.7z" % fn if not os.path.isfile(fn_7z): raise RuntimeError("Unexpected") Util.RunSubp("cd %s && 7z e %s" % (dn, fn_7z)) if not os.path.isfile(fn): raise RuntimeError("Unexpected") line_no = 0 with open(fn) as fo: for line in fo: line_no += 1 if line_no % 100 == 0: Cons.ClearLine() Cons.Pnnl("Processing line %d" % line_no) # The timestamp at the first column and the time_micros are 5 hours # apart. One is in local time (EDT) and the other is in UTC. Follow the former. # TODO: this needs to be fixed at the other place too # 2016/12/21-02:17:14.329266 7f702d7fa700 EVENT_LOG_v1 {"time_micros": # 1482304634329023, "mutant_table_acc_cnt": {"memt": # "0x7f69fc00c350:51723 0x7f6bec011200:26942", "sst": "1069:0:30.123:20.123 # 1059:980:30.123:20.123"} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \ ".+" ", \"mutant_table_acc_cnt\": {(\"memt\": \"(?P<memt_acc_cnt>(\w|\d|:| )+)\")?" \ "(, )?" \ "(\"sst\": \"(?P<sst_acc_cnt>(\w|\d|:|-|\.| )+)\")?" \ "}" \ ".*" , line) if mo is not None: _SetTabletAccess(mo) continue # 2016/12/21-02:15:58.341853 7f702dffb700 EVENT_LOG_v1 {"time_micros": # 1482304558341847, "job": 227, "event": "table_file_deletion", # "file_number": 1058} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \ ".+" ", \"job\": \d+" \ ", \"event\": \"table_file_deletion\"" \ ", \"file_number\": (?P<file_number>\d+)" \ "}" \ ".*" , line) if mo is not None: _SetTabletDeleted(mo) continue # 2016/12/21-01:27:40.840324 7f702dffb700 EVENT_LOG_v1 {"time_micros": # 1482301660840289, "cf_name": "default", "job": 4, "event": # "table_file_creation", "file_number": 15, "file_size": 67569420, # "table_properties": {"data_size": 67110556, "index_size": 458020, # "filter_size": 0, "raw_key_size": 1752468, "raw_average_key_size": 25, # "raw_value_size": 65132550, "raw_average_value_size": 966, # "num_data_blocks": 16857, "num_entries": 67425, "filter_policy_name": # "", "reason": kCompaction, "kDeletedKeys": "0", "kMergeOperands": "0"}} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \ ".+" ", \"cf_name\": \"default\"" \ ", \"job\": (?P<job>\d+)" \ ", \"event\": \"table_file_creation\"" \ ", \"file_number\": (?P<file_number>\d+)" \ ", \"file_size\": (?P<file_size>\d+)" \ ".+" \ ", \"reason\": (?P<reason>\w+)" \ ".*" , line) if mo is not None: _SetTabletCreated(mo) continue # 2016/12/21-01:28:41.835596 7f683c58d700 EVENT_LOG_v1 {"time_micros": # 1482301721835586, "job": 8, "event": "flush_started", "num_memtables": # 2, "num_entries": 257306, "num_deletes": 0, "memory_usage": 260052944} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \ ".+" ", \"job\": \d+" \ ", \"event\": \"flush_started\"" \ ".*" , line) if mo is not None: continue # 2016/12/21-01:27:25.893816 7f683c58d700 (Original Log Time # 2016/12/21-01:27:25.893597) EVENT_LOG_v1 {"time_micros": # 1482301645893590, "job": 2, "event": "flush_finished", "lsm_state": [1, # 0, 0, 0, 0, 0, 0], "immutable_memtables": 0} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \ ".+" ", \"job\": \d+" \ ", \"event\": \"flush_finished\"" \ ".*" , line) if mo is not None: continue # 2016/12/21-01:27:40.010374 7f702dffb700 EVENT_LOG_v1 {"time_micros": # 1482301660010345, "job": 4, "event": "compaction_started", "files_L0": # [12, 8], "score": 1, "input_data_size": 241744688} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \ ".+" ", \"job\": \d+" \ ", \"event\": \"compaction_started\"" \ ".*" , line) if mo is not None: continue # 2016/12/21-01:27:40.960792 7f702dffb700 (Original Log Time # 2016/12/21-01:27:40.959919) EVENT_LOG_v1 {"time_micros": # 1482301660959908, "job": 4, "event": "compaction_finished", # "compaction_time_micros": 949251, "output_level": 1, # "num_output_files": 4, "total_output_size": 229662756, # "num_input_records": 241171, "num_output_records": 229148, # "num_subcompactions": 1, "lsm_state": [0, 4, 0, 0, 0, 0, 0]} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \ ".+" ", \"job\": (?P<job>\d+)" \ ", \"event\": \"compaction_finished\"" \ ".+" \ ", \"output_level\": (?P<output_level>\d+)" \ ".*" , line) if mo is not None: _SetCompactinFinished(mo) continue # You can check out the other events here. All useful ones were covered above. #Cons.P(line) Cons.ClearLine() Cons.P("Processed %d lines" % line_no) deleted = 0 not_deleted = 0 for sst_id, sl in _sst_lives.iteritems(): if sl.TsDeleted() is None: not_deleted += 1 else: deleted += 1 Cons.P("Created %d SstLives. %d not-deleted, %d deleted" % (len(_sst_lives), not_deleted, deleted))
def GetData(): fn_hm = "%s/sst-heatmap-by-time-%s" % (Conf.dn_result, Conf.Get("simulation_time_begin")) fn_vl = "%s/sst-heatmap-by-time-vertical-lines-%s" % (Conf.dn_result, Conf.Get("simulation_time_begin")) if os.path.isfile(fn_hm) and os.path.isfile(fn_vl): return (fn_hm, fn_vl, _MaxHeatFromHeatmapByTimeData(fn_hm)) # {sst_id, SstLife()} sst_lives = RocksdbLogReader.GetSstLives() with Cons.MT("Generating Sst heatmap by time ..."): # Gather temperature info at n different times num_times = Conf.heatmap_by_time_num_times # Set start and end times # Start time is when the first Sstable is created, not the simulation_time_begin, when no SSTable exists yet. # Exp start time: 160927-143257.395 # First Sstable open time: 160927-143411.273 min_sst_opened = None for sst_gen, sl in sorted(sst_lives.iteritems()): min_sst_opened = sl.TsCreated() if min_sst_opened is None else min(min_sst_opened, sl.TsCreated()) st = min_sst_opened et = SimTime.SimulationTimeEnd() dur = (et - st).total_seconds() # { t0: {HeatBoxes} } time_heatboxes = {} max_temperature = None for i in range(0, num_times): t0 = st + datetime.timedelta(seconds=(float(dur) * i / num_times + time_offset_in_sec)) t1 = st + datetime.timedelta(seconds=(float(dur) * (i+1) / num_times + time_offset_in_sec)) # Sort boxes by their temperature. Heights are proportional to their sizes. boxes = [] for sst_gen, sl in sorted(sst_lives.iteritems()): temp = sl.TempAtTime(t0) if max_temperature is None: max_temperature = temp else: max_temperature = max(max_temperature, temp) if temp is None: continue boxes.append(_Box(sl, t0, t1, temp)) boxes.sort(key=lambda b: b.temp, reverse=True) time_heatboxes[t0] = boxes Cons.ClearLine() Cons.Pnnl("%4d/%4d" % (i + 1, num_times)) print "" for t, boxes in sorted(time_heatboxes.iteritems()): for b in boxes: b.SetTempColor(max_temperature) # Set y-coordinate of each box for t, boxes in sorted(time_heatboxes.iteritems()): total_size = 0 for b in boxes: total_size += b.sl.Size() s = 0 for b in boxes: b.y0 = float(s) / total_size s += b.sl.Size() b.y1 = float(s) / total_size # Convert to simulated time # { t0: {HeatBoxes} } time_heatboxes1 = {} for t, boxes in sorted(time_heatboxes.iteritems()): t1 = SimTime.ToSimulatedTime(t) for b in boxes: b.t0 = SimTime.ToSimulatedTime(b.t0) b.t1 = SimTime.ToSimulatedTime(b.t1) time_heatboxes1[t1] = boxes # Make leftmost time to 0. t_first = None #t_base = datetime.datetime(2000, 1, 1) vertical_lines = [] for t, boxes in sorted(time_heatboxes1.iteritems()): if t_first is None: t_first = t vl = None for b in boxes: #b.t0 = t_base + (b.t0 - t_first) #b.t1 = t_base + (b.t1 - t_first) b.t0 = (b.t0 - t_first).total_seconds() b.t1 = (b.t1 - t_first).total_seconds() vl = b.t1 vertical_lines.append(vl) del vertical_lines[-1] fmt = "%4d %1d" \ " %8d %8d" \ " %6.4f %6.4f" \ " %8.3f %11.6f %8d %6s" with open(fn_hm, "w") as fo: fo.write("# max_temperature=%f\n" % max_temperature) # y0 is smaller than y1 (y0 is placed higher in the plot than y1). fo.write("%s\n" % Util.BuildHeader(fmt, \ "sst_gen level t0 t1 y0 y1" \ " temp temp_relative temp_color heat_color_hex")) for t, boxes in sorted(time_heatboxes1.iteritems()): for b in boxes: fo.write((fmt + "\n") % ( \ b.sl.Id(), b.sl.Level() #, b.t0.strftime("%y%m%d-%H%M%S.%f")[:-3], b.t1.strftime("%y%m%d-%H%M%S.%f")[:-3] , b.t0, b.t1 , b.y0, b.y1 , b.temp, (float(b.temp) / max_temperature) , b.temp_color, ("%0.6X" % b.temp_color) )) fo.write("\n") Cons.P("Created %s %d" % (fn_hm, os.path.getsize(fn_hm))) with open(fn_vl, "w") as fo: for vl in vertical_lines: #fo.write("%s\n" % vl.strftime("%y%m%d-%H%M%S.%f")[:-3]) fo.write("%8d\n" % vl) Cons.P("Created %s %d" % (fn_vl, os.path.getsize(fn_vl))) return (fn_hm, fn_vl, max_temperature)
def _BuildMemtSstLives(): with Cons.MT("Building memt and sst lives ..."): #global _memt_lives global _sst_lives #_memt_lives = {} _sst_lives = {} dn = "%s/work/mutant/misc/rocksdb/log/rocksdb" % os.path.expanduser( "~") fn = "%s/%s" % (dn, Conf.Get("simulation_time_begin")) if not os.path.isfile(fn): fn_7z = "%s.7z" % fn if not os.path.isfile(fn_7z): raise RuntimeError("Unexpected") Util.RunSubp("cd %s && 7z e %s" % (dn, fn_7z)) if not os.path.isfile(fn): raise RuntimeError("Unexpected") line_no = 0 with open(fn) as fo: for line in fo: line_no += 1 # It's fast enough and often times excuted in parallel. No need to show the progress. if False: if line_no % 100 == 0: Cons.ClearLine() Cons.Pnnl("Processing line %d" % line_no) # Not needed for storage cost calculation # # 2016/12/21-02:17:14.329266 7f702d7fa700 EVENT_LOG_v1 {"time_micros": # 1482304634329023, "mutant_table_acc_cnt": {"memt": # "0x7f69fc00c350:51723 0x7f6bec011200:26942", "sst": "1069:0:30.123:20.123 # 1059:980:30.123:20.123"} #mo = re.match(r".+ EVENT_LOG_v1 {\"time_micros\": (?P<time_micros>\d+)" \ # ", \"mutant_table_acc_cnt\": {(\"memt\": \"(?P<memt_acc_cnt>(\w|\d|:| )+)\")?" \ # "(, )?" \ # "(\"sst\": \"(?P<sst_acc_cnt>(\w|\d|:|-|\.| )+)\")?" \ # "}" \ # ".*" # , line) #if mo is not None: # _SetTabletAccess(mo) # continue # 2016/12/21-01:27:40.840324 7f702dffb700 EVENT_LOG_v1 {"time_micros": # 1482301660840289, "cf_name": "default", "job": 4, "event": # "table_file_creation", "file_number": 15, "file_size": 67569420, # "path_id": 0, "table_properties": {"data_size": 67110556, "index_size": 458020, # "filter_size": 0, "raw_key_size": 1752468, "raw_average_key_size": 25, # "raw_value_size": 65132550, "raw_average_value_size": 966, # "num_data_blocks": 16857, "num_entries": 67425, "filter_policy_name": # "", "reason": kCompaction, "kDeletedKeys": "0", "kMergeOperands": "0"}} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d)" \ ".+ EVENT_LOG_v1 {\"time_micros\": (?P<time_micros>\d+)" \ ", \"cf_name\": \"default\"" \ ", \"job\": (?P<job>\d+)" \ ", \"event\": \"table_file_creation\"" \ ", \"file_number\": (?P<file_number>\d+)" \ ", \"file_size\": (?P<file_size>\d+)" \ ", \"path_id\": (?P<path_id>\d+)" \ ".+" \ ", \"reason\": (?P<reason>\w+)" \ ".*" , line) if mo is not None: _SetTabletCreated(mo) continue # 2016/12/21-02:15:58.341853 7f702dffb700 EVENT_LOG_v1 {"time_micros": # 1482304558341847, "job": 227, "event": "table_file_deletion", # "file_number": 1058} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d)" \ ".+ EVENT_LOG_v1 {\"time_micros\": (?P<time_micros>\d+)" \ ", \"job\": \d+" \ ", \"event\": \"table_file_deletion\"" \ ", \"file_number\": (?P<file_number>\d+)" \ "}" \ ".*" , line) if mo is not None: _SetTabletDeleted(mo) continue # 2017/01/20-23:03:00.960592 7fc8037fe700 EVENT_LOG_v1 {"time_micros": # 1484971380960590, "mutant_trivial_move": {"in_sst": "sst_id=145 # level=1 path_id=0 temp=20.721", "out_sst_path_id": 0}} mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d)" \ ".+ EVENT_LOG_v1 {\"time_micros\": (?P<time_micros>\d+)" \ ", \"mutant_trivial_move\": {" \ "\"in_sst\": \"" \ "sst_id=(?P<in_sst_id>\d+)" \ " level=(?P<in_level>\d+)" \ " path_id=(?P<in_path_id>\d+)" \ " temp=(?P<in_temp>(\d|\.)+)" \ "\", \"out_sst_path_id\": (?P<out_path_id>\d+)}}" \ ".*" , line) if mo is not None: _SetTrivialMove(mo) continue # You can check out the other events here. All useful ones were covered above. #Cons.P(line) Cons.ClearLine() Cons.P("Processed %d lines" % line_no) deleted = 0 not_deleted = 0 for sst_id, sl in _sst_lives.iteritems(): if sl.TsDeleted() is None: not_deleted += 1 else: deleted += 1 Cons.P("Created %d SstLives. %d not-deleted, %d deleted" % (len(_sst_lives), not_deleted, deleted))
def P(self, msg): with self.msg_lock: self.msg += msg if self.buffered == False: Cons.Pnnl(msg) sys.stdout.flush()
def Heatmap(): # Set Conf.ExpStartTime(), if not already set. if Conf.ExpStartTime() is None: MutantLogReader.Get() fn_hm = "%s/sst-heatmap-by-time-%s" % (Conf.dn_result, Conf.ExpStartTime()) fn_vl = "%s/sst-heatmap-by-time-vertical-lines-%s" % (Conf.dn_result, Conf.ExpStartTime()) if os.path.isfile(fn_hm) and os.path.isfile(fn_vl): return (fn_hm, fn_vl, _MaxHeatFromHeatmapByTimeData(fn_hm)) sst_lives = MemtSstLife.GetSstLives() with Cons.MT("Generating Sst heatmap by time ..."): # Gather heat info at n different times num_times = Conf.heatmap_by_time_num_times if Conf.ExpFinishTime() is None: MemtSstLife.SetExpEndTimeFromSstLives() min_sst_opened = None for sst_gen, sl in sorted(sst_lives.iteritems()): min_sst_opened = sl.Opened() if min_sst_opened is None else min( min_sst_opened, sl.Opened()) # Start time is when the first Sstable is opened, not the experiment start # time, when no SSTable exists yet. # Exp start time: 160927-143257.395 # First Sstable open time: 160927-143411.273 #st = datetime.datetime.strptime(Conf.ExpStartTime(), "%y%m%d-%H%M%S.%f") st = datetime.datetime.strptime(min_sst_opened, "%y%m%d-%H%M%S.%f") et = datetime.datetime.strptime(Conf.ExpFinishTime(), "%y%m%d-%H%M%S.%f") dur = (et - st).total_seconds() # { t0: {HeatBoxes} } time_heatboxes = {} vertical_lines = [] for i in range(0, num_times): t0 = st + datetime.timedelta(seconds=(float(dur) * i / num_times + time_offset_in_sec)) t1 = st + datetime.timedelta(seconds=(float(dur) * (i + 1) / num_times + time_offset_in_sec)) vertical_lines.append(t1) # Heat boxes are sorted by their heat and plotted with the heights # proportional to the size. boxes = [] for sst_gen, sl in sorted(sst_lives.iteritems()): h = sl.HeatAtTime(t0) if h is None: continue boxes.append(_Box(sl, t0, t1, h)) boxes.sort(key=lambda b: b.heat, reverse=True) time_heatboxes[t0] = boxes Cons.ClearLine() Cons.Pnnl("%4d/%4d" % (i + 1, num_times)) print "" del vertical_lines[-1] # Set y-coordinate of each box for t, boxes in sorted(time_heatboxes.iteritems()): total_size = 0 for b in boxes: total_size += b.sl.Size() s = 0 for b in boxes: b.y0 = float(s) / total_size s += b.sl.Size() b.y1 = float(s) / total_size # Make leftmost time to 0. t_first = None t_base = datetime.datetime(2000, 1, 1) for t, boxes in sorted(time_heatboxes.iteritems()): if t_first is None: t_first = t for b in boxes: b.t0 = t_base + (b.t0 - t_first) b.t1 = t_base + (b.t1 - t_first) for i in range(len(vertical_lines)): vertical_lines[i] = t_base + (vertical_lines[i] - t_first) fmt = "%4d %1d %17s %17s %6.4f %6.4f" \ " %8.3f %8d %6s" with open(fn_hm, "w") as fo: fo.write("# heat_max=%f\n" % MemtSstLife.SstLife.max_heat) # y0 is smaller than y1 (y0 is placed higher in the plot than y1). fo.write("%s\n" % Util.BuildHeader(fmt, \ "sst_gen level t0 t1 y0 y1" \ " heat heat_color heat_color_hex")) for t, boxes in sorted(time_heatboxes.iteritems()): for b in boxes: fo.write((fmt + "\n") % ( \ b.sl.sst_gen, b.sl.level , b.t0.strftime("%y%m%d-%H%M%S.%f")[:-3], b.t1.strftime("%y%m%d-%H%M%S.%f")[:-3] , b.y0, b.y1 , b.heat, b.heat_color, ("%0.6X" % b.heat_color) )) fo.write("\n") Cons.P("Created %s %d" % (fn_hm, os.path.getsize(fn_hm))) with open(fn_vl, "w") as fo: for vl in vertical_lines: fo.write("%s\n" % vl.strftime("%y%m%d-%H%M%S.%f")[:-3]) Cons.P("Created %s %d" % (fn_vl, os.path.getsize(fn_vl))) return (fn_hm, fn_vl, MemtSstLife.SstLife.max_heat)