def Read():
    # Read the raw, anonymized data
    fn_in = "%s/work/quizup/86400-sec-redis-ephemeral-cmds.anonymized.log" \
      % os.path.expanduser("~")
    num_lines = 119874265
    # $ time wc -l 86400-sec-redis-ephemeral-cmds.anonymized.log
    #   119,874,265
    #   real 0m11.896s
    with Cons.MT("Reading %s" % fn_in):
        global _ops
        _ops = []
        with open(fn_in) as fo:
            i = 0
            #for line in fo.readlines():
            for line in fo:
                i += 1
                #Cons.P(line)
                op = Op(line)
                if op.cmd_type is None:
                    continue
                _ops.append(op)
                if i % 10000 == 0:
                    Cons.ClearLine()
                    Cons.Pnnl("%d OPs read (%.2f%%), %d kept" %
                              (i, 100.0 * i / num_lines, len(_ops)))
                # Useful for testing
                #if i >= 100000:
                #	break
            Cons.ClearLine()
            Cons.P("%d OPs read (%.2f%%), %d kept" %
                   (i, 100.0 * i / num_lines, len(_ops)))
Beispiel #2
0
 def Pnnl(self, msg, target="log"):
     with self.fo_lock:
         if target == "log":
             self.fo.write("%s" % msg)
         elif target == "both":
             self.fo.write("%s" % msg)
             Cons.Pnnl(msg)
         else:
             raise RuntimeError("Unexpected target: %s" % target)
Beispiel #3
0
def SeqRead():
	while True:
		for i in range(_num_files):
			fn = "%s/%02d" %(_dn_test_data, i)
			Cons.Pnnl("Reading %s into memory: " % fn)
			start_time = time.time()
			with open(fn, "rb") as fo:
				data = fo.read(_file_size)
			dur = time.time() - start_time
			print "%3.0f ms" % (dur * 1000.0)
def Write():
    fn_out = "%s/work/quizup/86400-sec-redis-ephemeral-cmds.anonymized.compact.log" \
      % os.path.expanduser("~")
    with Cons.MT("Writing %s" % fn_out):
        with open(fn_out, "w") as fo:
            i = 0
            for op in _ops:
                i += 1
                fo.write("%s %s %s\n" % (op.ts, op.cmd, " ".join(op.key)))
                if i % 10000 == 0:
                    Cons.ClearLine()
                    Cons.Pnnl("Wrote %d OPs (%.2f%%)" %
                              (i, 100.0 * i / len(_ops)))
        Cons.ClearLine()
        Cons.P("Wrote %d OPs (%.2f%%). File size %d" %
               (i, 100.0 * i / len(_ops), os.path.getsize(fn_out)))
    def Flush(self):
        with self.msg_lock:
            # Clear previous printed lines
            for i in range(self.lines_printed):
                # Clear current line
                sys.stdout.write(chr(27) + "[2K")
                # Move the cursor up
                sys.stdout.write(chr(27) + "[1A")
                # Move the cursor to column 1
                sys.stdout.write(chr(27) + "[1G")
            # Clear current line
            sys.stdout.write(chr(27) + "[2K")

            Cons.Pnnl(self.msg)
            self.lines_printed = len(self.msg.split("\n")) - 1
            self.msg = ""
Beispiel #6
0
def GetKeyCntData():
    fn_out = "%s/work/quizup/86400-sec-redis-ephemeral-cmds.anonymized.compact.key-accesscnt" \
      % os.path.expanduser("~")
    if os.path.isfile(fn_out):
        return fn_out

    fn_in = "%s/work/quizup/86400-sec-redis-ephemeral-cmds.anonymized.compact.log" \
      % os.path.expanduser("~")
    num_lines = 77836639
    with Cons.MT("Reading %s" % fn_in):
        key_cnt = {}
        with open(fn_in) as fo:
            i = 0
            for line in fo:
                i += 1
                t = line.split()
                for k in t[2:]:
                    if k in key_cnt:
                        key_cnt[k] += 1
                    else:
                        key_cnt[k] = 1

                if i % 10000 == 0:
                    Cons.ClearLine()
                    Cons.Pnnl("%d OPs read (%.2f%%)" %
                              (i, 100.0 * i / num_lines))
                # Useful for testing
                #if i >= 100000:
                #	break
        Cons.ClearLine()
        Cons.P("%d OPs read (%.2f%%)" % (i, 100.0 * i / num_lines))

    with Cons.MT("Writing %s" % fn_out):
        key_cnt_sorted_by_cnt = sorted(key_cnt.items(),
                                       key=operator.itemgetter(1),
                                       reverse=True)
        with open(fn_out, "w") as fo:
            for kc in key_cnt_sorted_by_cnt:
                fo.write("%s %d\n" % (kc[0], kc[1]))
        Cons.P("Created %s %d" % (fn_out, os.path.getsize(fn_out)))
        return fn_out
Beispiel #7
0
def _GetQ():
    # Get the queue. Create one if not exists.
    try:
        queue = _sqs.get_queue_by_name(QueueName=sqs_q_name,
                                       # QueueOwnerAWSAccountId='string'
                                       )
        #Cons.P(pprint.pformat(vars(queue), indent=2))
        #{ '_url': 'https://queue.amazonaws.com/998754746880/mutant-exps',
        #		  'meta': ResourceMeta('sqs', identifiers=[u'url'])}
        return queue
    except botocore.exceptions.ClientError as e:
        #Cons.P(pprint.pformat(e, indent=2))
        #Cons.P(pprint.pformat(vars(e), indent=2))
        if e.response["Error"][
                "Code"] == "AWS.SimpleQueueService.NonExistentQueue":
            pass
        else:
            raise e

    Cons.Pnnl("The queue doesn't exists. Creating one ")
    while True:
        response = None
        try:
            response = _bc.create_queue(QueueName=sqs_q_name)
            # Default message retention period is 4 days.
            print ""
            break
        except botocore.exceptions.ClientError as e:
            # When calling the CreateQueue operation: You must wait 60 seconds after
            # deleting a queue before you can create another with the same name.
            # It doesn't give me how much more you need to wait. Polling until succeed.
            if e.response["Error"][
                    "Code"] == "AWS.SimpleQueueService.QueueDeletedRecently":
                sys.stdout.write(".")
                sys.stdout.flush()
                time.sleep(1)
            else:
                raise e

    return _sqs.get_queue_by_name(QueueName=sqs_q_name)
Beispiel #8
0
def ByTags(tags, job_id_none_requested):
    Cons.Pnnl("Terminating running instances:")
    _TermInst.Init(job_id_none_requested)

    tis = []
    for r in Ec2Region.All():
        tis.append(_TermInst(r, tags))

    threads = []
    for ti in tis:
        t = threading.Thread(target=ti.Run)
        t.daemon = True
        threads.append(t)
        t.start()

    for t in threads:
        t.join()
    print ""

    Cons.P(_TermInst.Header())
    for ti in tis:
        ti.PrintResult()
    def Read(self):
        # {ssd_id: how_created}
        self.sstid_howcreated = {}

        self.proceed_with_pending_compactions = None

        # Read the log
        with open(self.fn_compaction_log) as fo:
            for line in fo.readlines():
                # Looks like they are in json format
                #Cons.P(line)
                e = json.loads(line)
                #Cons.P(pprint.pformat(e))

                if (e["keyspace"] != "ycsb") or (e["table"] != "usertable"):
                    raise RuntimeError("Unexpected\n%s" % pprint.pformat(e))

                if e["type"] == "enable":
                    continue
                elif e["type"] == "flush":
                    for t in e["tables"]:
                        sst_gen = int(t["table"]["generation"])
                        if sst_gen in self.sstid_howcreated:
                            raise RuntimeError("Unexpected")
                        self.sstid_howcreated[
                            sst_gen] = CompactionLog._HowCreated("flushed")
                    continue
                elif e["type"] == "pending":
                    if int(e["pending"]) <= 2:
                        continue
                    # Prompt for pending compactions
                    if self.proceed_with_pending_compactions is None:
                        Cons.P(
                            "There are pending compactions, which indicate the server is overloaded."
                        )
                        Cons.P(pprint.pformat(e), ind=2)
                        Cons.Pnnl("Would you like to proceed ")
                        self.proceed_with_pending_compactions = raw_input(
                            "(Y/N)? ")
                        if self.proceed_with_pending_compactions.lower(
                        ) != "y":
                            sys.exit(0)
                        else:
                            continue
                elif e["type"] == "compaction":
                    # {u'end': u'1474839005076',
                    #  u'input': [{u'strategyId': u'1',
                    #              u'table': {u'details': {u'level': 0,
                    #                                      u'max_token': u'9223342625191452161',
                    #                                      u'min_token': u'-9222223702120529941'},
                    #                         u'generation': 1,
                    #                         u'size': 14061373,
                    #                         u'version': u'mc'}},
                    #             {u'strategyId': u'1',
                    #              u'table': {u'details': {u'level': 0,
                    #                                      u'max_token': u'9222611853699094103',
                    #                                      u'min_token': u'-9221429980877775654'},
                    #                         u'generation': 2,
                    #                         u'size': 14050279,
                    #                         u'version': u'mc'}}],
                    #  u'keyspace': u'ycsb',
                    #  u'output': [{u'strategyId': u'1',
                    #               u'table': {u'details': {u'level': 0,
                    #                                       u'max_token': u'9223342625191452161',
                    #                                       u'min_token': u'-9222223702120529941'},
                    #                          u'generation': 3,
                    #                          u'size': 28117799,
                    #                          u'version': u'mc'}}],
                    #  u'start': u'1474839003093',
                    #  u'table': u'usertable',
                    #  u'time': 1474839005076,
                    #  u'type': u'compaction'}
                    #
                    # time and end seem to have the same value
                    time = int(e["time"])
                    start = int(e["start"])
                    end = int(e["end"])

                    # Set experiment end time: last compaction event + the duration of
                    # the compaction. Or, the last seen Mutant event timestamp can be
                    # used too, depending on whether you want a stabilized snapshot of
                    # SSTables or a snapshot while data is being added.
                    #
                    #Conf.SetExpFinishTime(datetime.datetime.fromtimestamp(int(end + (end - start)) / 1000.0)
                    #		.strftime("%y%m%d-%H%M%S.%f")
                    #		# Trim the last 3 digits
                    #		[:-3])

                    input_tables = []
                    output_tables = []
                    for e1 in e["input"]:
                        if e1["strategyId"] != "1":
                            raise RuntimeError("Unexpected\n%s" %
                                               pprint.pformat(e))
                        e1["table"]["details"]["level"]
                        e1["table"]["details"]["max_token"]
                        e1["table"]["details"]["min_token"]
                        input_tables.append(int(e1["table"]["generation"]))
                        e1["table"]["size"]

                    for e1 in e["output"]:
                        if e1["strategyId"] != "1":
                            raise RuntimeError("Unexpected\n%s" %
                                               pprint.pformat(e))
                        e1["table"]["details"]["level"]
                        e1["table"]["details"]["max_token"]
                        e1["table"]["details"]["min_token"]
                        sst_gen = int(e1["table"]["generation"])
                        e1["table"]["size"]

                        if sst_gen in self.sstid_howcreated:
                            raise RuntimeError("Unexpected")
                        self.sstid_howcreated[
                            sst_gen] = CompactionLog._HowCreated(
                                "compacted", input_tables)

                    #Cons.P("\n%s\n%s\n%s" % (time, start, end))
                    continue
                else:
                    raise RuntimeError("Unexpected\n%s" % pprint.pformat(e))
Beispiel #10
0
def _BuildMemtSstLives():
  with Cons.MT("Building memt and sst lives ..."):
    #global _memt_lives
    global _sst_lives

    if _sst_lives is not None:
      return

    #_memt_lives = {}
    _sst_lives = {}

    dn = "%s/rocksdb" % Conf.GetDir("log_dir")
    fn = "%s/%s" % (dn, Conf.Get("simulation_time_begin"))
    if not os.path.isfile(fn):
      fn_7z = "%s.7z" % fn
      if not os.path.isfile(fn_7z):
        raise RuntimeError("Unexpected")
      Util.RunSubp("cd %s && 7z e %s" % (dn, fn_7z))
    if not os.path.isfile(fn):
      raise RuntimeError("Unexpected")

    line_no = 0
    with open(fn) as fo:
      for line in fo:
        line_no += 1
        if line_no % 100 == 0:
          Cons.ClearLine()
          Cons.Pnnl("Processing line %d" % line_no)

        # The timestamp at the first column and the time_micros are 5 hours
        # apart. One is in local time (EDT) and the other is in UTC. Follow the former.
        # TODO: this needs to be fixed at the other place too

        # 2016/12/21-02:17:14.329266 7f702d7fa700 EVENT_LOG_v1 {"time_micros":
        # 1482304634329023, "mutant_table_acc_cnt": {"memt":
        # "0x7f69fc00c350:51723 0x7f6bec011200:26942", "sst": "1069:0:30.123:20.123
        # 1059:980:30.123:20.123"}
        mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \
            ".+"
            ", \"mutant_table_acc_cnt\": {(\"memt\": \"(?P<memt_acc_cnt>(\w|\d|:| )+)\")?" \
            "(, )?" \
            "(\"sst\": \"(?P<sst_acc_cnt>(\w|\d|:|-|\.| )+)\")?" \
            "}" \
            ".*"
            , line)
        if mo is not None:
          _SetTabletAccess(mo)
          continue

        # 2016/12/21-02:15:58.341853 7f702dffb700 EVENT_LOG_v1 {"time_micros":
        # 1482304558341847, "job": 227, "event": "table_file_deletion",
        # "file_number": 1058}
        mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \
            ".+"
            ", \"job\": \d+" \
            ", \"event\": \"table_file_deletion\"" \
            ", \"file_number\": (?P<file_number>\d+)" \
            "}" \
            ".*"
            , line)
        if mo is not None:
          _SetTabletDeleted(mo)
          continue

        # 2016/12/21-01:27:40.840324 7f702dffb700 EVENT_LOG_v1 {"time_micros":
        # 1482301660840289, "cf_name": "default", "job": 4, "event":
        # "table_file_creation", "file_number": 15, "file_size": 67569420,
        # "table_properties": {"data_size": 67110556, "index_size": 458020,
        # "filter_size": 0, "raw_key_size": 1752468, "raw_average_key_size": 25,
        # "raw_value_size": 65132550, "raw_average_value_size": 966,
        # "num_data_blocks": 16857, "num_entries": 67425, "filter_policy_name":
        # "", "reason": kCompaction, "kDeletedKeys": "0", "kMergeOperands": "0"}}
        mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \
            ".+"
            ", \"cf_name\": \"default\"" \
            ", \"job\": (?P<job>\d+)" \
            ", \"event\": \"table_file_creation\"" \
            ", \"file_number\": (?P<file_number>\d+)" \
            ", \"file_size\": (?P<file_size>\d+)" \
            ".+" \
            ", \"reason\": (?P<reason>\w+)" \
            ".*"
            , line)
        if mo is not None:
          _SetTabletCreated(mo)
          continue

        # 2016/12/21-01:28:41.835596 7f683c58d700 EVENT_LOG_v1 {"time_micros":
        # 1482301721835586, "job": 8, "event": "flush_started", "num_memtables":
        # 2, "num_entries": 257306, "num_deletes": 0, "memory_usage": 260052944}
        mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \
            ".+"
            ", \"job\": \d+" \
            ", \"event\": \"flush_started\"" \
            ".*"
            , line)
        if mo is not None:
          continue

        # 2016/12/21-01:27:25.893816 7f683c58d700 (Original Log Time
        # 2016/12/21-01:27:25.893597) EVENT_LOG_v1 {"time_micros":
        # 1482301645893590, "job": 2, "event": "flush_finished", "lsm_state": [1,
        # 0, 0, 0, 0, 0, 0], "immutable_memtables": 0}
        mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \
            ".+"
            ", \"job\": \d+" \
            ", \"event\": \"flush_finished\"" \
            ".*"
            , line)
        if mo is not None:
          continue

        # 2016/12/21-01:27:40.010374 7f702dffb700 EVENT_LOG_v1 {"time_micros":
        # 1482301660010345, "job": 4, "event": "compaction_started", "files_L0":
        # [12, 8], "score": 1, "input_data_size": 241744688}
        mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \
            ".+"
            ", \"job\": \d+" \
            ", \"event\": \"compaction_started\"" \
            ".*"
            , line)
        if mo is not None:
          continue

        # 2016/12/21-01:27:40.960792 7f702dffb700 (Original Log Time
        # 2016/12/21-01:27:40.959919) EVENT_LOG_v1 {"time_micros":
        # 1482301660959908, "job": 4, "event": "compaction_finished",
        # "compaction_time_micros": 949251, "output_level": 1,
        # "num_output_files": 4, "total_output_size": 229662756,
        # "num_input_records": 241171, "num_output_records": 229148,
        # "num_subcompactions": 1, "lsm_state": [0, 4, 0, 0, 0, 0, 0]}
        mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d) .+ EVENT_LOG_v1 {" \
            ".+"
            ", \"job\": (?P<job>\d+)" \
            ", \"event\": \"compaction_finished\"" \
            ".+" \
            ", \"output_level\": (?P<output_level>\d+)" \
            ".*"
            , line)
        if mo is not None:
          _SetCompactinFinished(mo)
          continue

        # You can check out the other events here. All useful ones were covered above.
        #Cons.P(line)
      Cons.ClearLine()
      Cons.P("Processed %d lines" % line_no)

    deleted = 0
    not_deleted = 0
    for sst_id, sl in _sst_lives.iteritems():
      if sl.TsDeleted() is None:
        not_deleted += 1
      else:
        deleted += 1
    Cons.P("Created %d SstLives. %d not-deleted, %d deleted"
        % (len(_sst_lives), not_deleted, deleted))
Beispiel #11
0
def GetData():
	fn_hm = "%s/sst-heatmap-by-time-%s" % (Conf.dn_result, Conf.Get("simulation_time_begin"))
	fn_vl = "%s/sst-heatmap-by-time-vertical-lines-%s" % (Conf.dn_result, Conf.Get("simulation_time_begin"))
	if os.path.isfile(fn_hm) and os.path.isfile(fn_vl):
		return (fn_hm, fn_vl, _MaxHeatFromHeatmapByTimeData(fn_hm))

	# {sst_id, SstLife()}
	sst_lives = RocksdbLogReader.GetSstLives()

	with Cons.MT("Generating Sst heatmap by time ..."):
		# Gather temperature info at n different times
		num_times = Conf.heatmap_by_time_num_times

		# Set start and end times
		# Start time is when the first Sstable is created, not the simulation_time_begin, when no SSTable exists yet.
		#   Exp start time:          160927-143257.395
		#   First Sstable open time: 160927-143411.273
		min_sst_opened = None
		for sst_gen, sl in sorted(sst_lives.iteritems()):
			min_sst_opened = sl.TsCreated() if min_sst_opened is None else min(min_sst_opened, sl.TsCreated())
		st = min_sst_opened
		et = SimTime.SimulationTimeEnd()
		dur = (et - st).total_seconds()

		# { t0: {HeatBoxes} }
		time_heatboxes = {}
		max_temperature = None
		for i in range(0, num_times):
			t0 = st + datetime.timedelta(seconds=(float(dur) * i / num_times + time_offset_in_sec))
			t1 = st + datetime.timedelta(seconds=(float(dur) * (i+1) / num_times + time_offset_in_sec))

			# Sort boxes by their temperature. Heights are proportional to their sizes.
			boxes = []
			for sst_gen, sl in sorted(sst_lives.iteritems()):
				temp = sl.TempAtTime(t0)
				if max_temperature is None:
					max_temperature = temp
				else:
					max_temperature = max(max_temperature, temp)
				if temp is None:
					continue
				boxes.append(_Box(sl, t0, t1, temp))
			boxes.sort(key=lambda b: b.temp, reverse=True)
			time_heatboxes[t0] = boxes

			Cons.ClearLine()
			Cons.Pnnl("%4d/%4d" % (i + 1, num_times))
		print ""

		for t, boxes in sorted(time_heatboxes.iteritems()):
			for b in boxes:
				b.SetTempColor(max_temperature)

		# Set y-coordinate of each box
		for t, boxes in sorted(time_heatboxes.iteritems()):
			total_size = 0
			for b in boxes:
				total_size += b.sl.Size()
			s = 0
			for b in boxes:
				b.y0 = float(s) / total_size
				s += b.sl.Size()
				b.y1 = float(s) / total_size

		# Convert to simulated time
		# { t0: {HeatBoxes} }
		time_heatboxes1 = {}
		for t, boxes in sorted(time_heatboxes.iteritems()):
			t1 = SimTime.ToSimulatedTime(t)
			for b in boxes:
				b.t0 = SimTime.ToSimulatedTime(b.t0)
				b.t1 = SimTime.ToSimulatedTime(b.t1)
			time_heatboxes1[t1] = boxes

		# Make leftmost time to 0.
		t_first = None
		#t_base = datetime.datetime(2000, 1, 1)
		vertical_lines = []
		for t, boxes in sorted(time_heatboxes1.iteritems()):
			if t_first is None:
				t_first = t
			vl = None
			for b in boxes:
				#b.t0 = t_base + (b.t0 - t_first)
				#b.t1 = t_base + (b.t1 - t_first)
				b.t0 = (b.t0 - t_first).total_seconds()
				b.t1 = (b.t1 - t_first).total_seconds()
				vl = b.t1
			vertical_lines.append(vl)
		del vertical_lines[-1]

		fmt = "%4d %1d" \
				" %8d %8d" \
				" %6.4f %6.4f" \
				" %8.3f %11.6f %8d %6s"
		with open(fn_hm, "w") as fo:
			fo.write("# max_temperature=%f\n" % max_temperature)

			# y0 is smaller than y1 (y0 is placed higher in the plot than y1).
			fo.write("%s\n" % Util.BuildHeader(fmt, \
					"sst_gen level t0 t1 y0 y1" \
					" temp temp_relative temp_color heat_color_hex"))

			for t, boxes in sorted(time_heatboxes1.iteritems()):
				for b in boxes:
					fo.write((fmt + "\n") % ( \
							b.sl.Id(), b.sl.Level()
							#, b.t0.strftime("%y%m%d-%H%M%S.%f")[:-3], b.t1.strftime("%y%m%d-%H%M%S.%f")[:-3]
							, b.t0, b.t1
							, b.y0, b.y1
							, b.temp, (float(b.temp) / max_temperature)
							, b.temp_color, ("%0.6X" % b.temp_color)
							))
				fo.write("\n")
		Cons.P("Created %s %d" % (fn_hm, os.path.getsize(fn_hm)))

		with open(fn_vl, "w") as fo:
			for vl in vertical_lines:
				#fo.write("%s\n" % vl.strftime("%y%m%d-%H%M%S.%f")[:-3])
				fo.write("%8d\n" % vl)
		Cons.P("Created %s %d" % (fn_vl, os.path.getsize(fn_vl)))
	return (fn_hm, fn_vl, max_temperature)
def _BuildMemtSstLives():
    with Cons.MT("Building memt and sst lives ..."):
        #global _memt_lives
        global _sst_lives
        #_memt_lives = {}
        _sst_lives = {}

        dn = "%s/work/mutant/misc/rocksdb/log/rocksdb" % os.path.expanduser(
            "~")
        fn = "%s/%s" % (dn, Conf.Get("simulation_time_begin"))
        if not os.path.isfile(fn):
            fn_7z = "%s.7z" % fn
            if not os.path.isfile(fn_7z):
                raise RuntimeError("Unexpected")
            Util.RunSubp("cd %s && 7z e %s" % (dn, fn_7z))
        if not os.path.isfile(fn):
            raise RuntimeError("Unexpected")

        line_no = 0
        with open(fn) as fo:
            for line in fo:
                line_no += 1
                # It's fast enough and often times excuted in parallel. No need to show the progress.
                if False:
                    if line_no % 100 == 0:
                        Cons.ClearLine()
                        Cons.Pnnl("Processing line %d" % line_no)

                # Not needed for storage cost calculation
                #
                # 2016/12/21-02:17:14.329266 7f702d7fa700 EVENT_LOG_v1 {"time_micros":
                # 1482304634329023, "mutant_table_acc_cnt": {"memt":
                # "0x7f69fc00c350:51723 0x7f6bec011200:26942", "sst": "1069:0:30.123:20.123
                # 1059:980:30.123:20.123"}
                #mo = re.match(r".+ EVENT_LOG_v1 {\"time_micros\": (?P<time_micros>\d+)" \
                #		", \"mutant_table_acc_cnt\": {(\"memt\": \"(?P<memt_acc_cnt>(\w|\d|:| )+)\")?" \
                #		"(, )?" \
                #		"(\"sst\": \"(?P<sst_acc_cnt>(\w|\d|:|-|\.| )+)\")?" \
                #		"}" \
                #		".*"
                #		, line)
                #if mo is not None:
                #	_SetTabletAccess(mo)
                #	continue

                # 2016/12/21-01:27:40.840324 7f702dffb700 EVENT_LOG_v1 {"time_micros":
                # 1482301660840289, "cf_name": "default", "job": 4, "event":
                # "table_file_creation", "file_number": 15, "file_size": 67569420,
                # "path_id": 0, "table_properties": {"data_size": 67110556, "index_size": 458020,
                # "filter_size": 0, "raw_key_size": 1752468, "raw_average_key_size": 25,
                # "raw_value_size": 65132550, "raw_average_value_size": 966,
                # "num_data_blocks": 16857, "num_entries": 67425, "filter_policy_name":
                # "", "reason": kCompaction, "kDeletedKeys": "0", "kMergeOperands": "0"}}
                mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d)" \
                  ".+ EVENT_LOG_v1 {\"time_micros\": (?P<time_micros>\d+)" \
                  ", \"cf_name\": \"default\"" \
                  ", \"job\": (?P<job>\d+)" \
                  ", \"event\": \"table_file_creation\"" \
                  ", \"file_number\": (?P<file_number>\d+)" \
                  ", \"file_size\": (?P<file_size>\d+)" \
                  ", \"path_id\": (?P<path_id>\d+)" \
                  ".+" \
                  ", \"reason\": (?P<reason>\w+)" \
                  ".*"
                  , line)
                if mo is not None:
                    _SetTabletCreated(mo)
                    continue

                # 2016/12/21-02:15:58.341853 7f702dffb700 EVENT_LOG_v1 {"time_micros":
                # 1482304558341847, "job": 227, "event": "table_file_deletion",
                # "file_number": 1058}
                mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d)" \
                  ".+ EVENT_LOG_v1 {\"time_micros\": (?P<time_micros>\d+)" \
                  ", \"job\": \d+" \
                  ", \"event\": \"table_file_deletion\"" \
                  ", \"file_number\": (?P<file_number>\d+)" \
                  "}" \
                  ".*"
                  , line)
                if mo is not None:
                    _SetTabletDeleted(mo)
                    continue

                # 2017/01/20-23:03:00.960592 7fc8037fe700 EVENT_LOG_v1 {"time_micros":
                # 1484971380960590, "mutant_trivial_move": {"in_sst": "sst_id=145
                # level=1 path_id=0 temp=20.721", "out_sst_path_id": 0}}
                mo = re.match(r"(?P<ts>\d\d\d\d/\d\d/\d\d-\d\d:\d\d:\d\d\.\d\d\d\d\d\d)" \
                  ".+ EVENT_LOG_v1 {\"time_micros\": (?P<time_micros>\d+)" \
                  ", \"mutant_trivial_move\": {" \
                  "\"in_sst\": \"" \
                  "sst_id=(?P<in_sst_id>\d+)" \
                  " level=(?P<in_level>\d+)" \
                  " path_id=(?P<in_path_id>\d+)" \
                  " temp=(?P<in_temp>(\d|\.)+)" \
                  "\", \"out_sst_path_id\": (?P<out_path_id>\d+)}}" \
                  ".*"
                  , line)
                if mo is not None:
                    _SetTrivialMove(mo)
                    continue

                # You can check out the other events here. All useful ones were covered above.
                #Cons.P(line)
            Cons.ClearLine()
            Cons.P("Processed %d lines" % line_no)

        deleted = 0
        not_deleted = 0
        for sst_id, sl in _sst_lives.iteritems():
            if sl.TsDeleted() is None:
                not_deleted += 1
            else:
                deleted += 1
        Cons.P("Created %d SstLives. %d not-deleted, %d deleted" %
               (len(_sst_lives), not_deleted, deleted))
Beispiel #13
0
 def P(self, msg):
     with self.msg_lock:
         self.msg += msg
         if self.buffered == False:
             Cons.Pnnl(msg)
             sys.stdout.flush()
Beispiel #14
0
def Heatmap():
    # Set Conf.ExpStartTime(), if not already set.
    if Conf.ExpStartTime() is None:
        MutantLogReader.Get()

    fn_hm = "%s/sst-heatmap-by-time-%s" % (Conf.dn_result, Conf.ExpStartTime())
    fn_vl = "%s/sst-heatmap-by-time-vertical-lines-%s" % (Conf.dn_result,
                                                          Conf.ExpStartTime())
    if os.path.isfile(fn_hm) and os.path.isfile(fn_vl):
        return (fn_hm, fn_vl, _MaxHeatFromHeatmapByTimeData(fn_hm))

    sst_lives = MemtSstLife.GetSstLives()

    with Cons.MT("Generating Sst heatmap by time ..."):
        # Gather heat info at n different times
        num_times = Conf.heatmap_by_time_num_times

        if Conf.ExpFinishTime() is None:
            MemtSstLife.SetExpEndTimeFromSstLives()

        min_sst_opened = None
        for sst_gen, sl in sorted(sst_lives.iteritems()):
            min_sst_opened = sl.Opened() if min_sst_opened is None else min(
                min_sst_opened, sl.Opened())

        # Start time is when the first Sstable is opened, not the experiment start
        # time, when no SSTable exists yet.
        #   Exp start time:          160927-143257.395
        #   First Sstable open time: 160927-143411.273
        #st = datetime.datetime.strptime(Conf.ExpStartTime(), "%y%m%d-%H%M%S.%f")
        st = datetime.datetime.strptime(min_sst_opened, "%y%m%d-%H%M%S.%f")
        et = datetime.datetime.strptime(Conf.ExpFinishTime(),
                                        "%y%m%d-%H%M%S.%f")
        dur = (et - st).total_seconds()

        # { t0: {HeatBoxes} }
        time_heatboxes = {}
        vertical_lines = []
        for i in range(0, num_times):
            t0 = st + datetime.timedelta(seconds=(float(dur) * i / num_times +
                                                  time_offset_in_sec))
            t1 = st + datetime.timedelta(seconds=(float(dur) *
                                                  (i + 1) / num_times +
                                                  time_offset_in_sec))
            vertical_lines.append(t1)

            # Heat boxes are sorted by their heat and plotted with the heights
            # proportional to the size.
            boxes = []
            for sst_gen, sl in sorted(sst_lives.iteritems()):
                h = sl.HeatAtTime(t0)
                if h is None:
                    continue
                boxes.append(_Box(sl, t0, t1, h))
            boxes.sort(key=lambda b: b.heat, reverse=True)
            time_heatboxes[t0] = boxes

            Cons.ClearLine()
            Cons.Pnnl("%4d/%4d" % (i + 1, num_times))
        print ""
        del vertical_lines[-1]

        # Set y-coordinate of each box
        for t, boxes in sorted(time_heatboxes.iteritems()):
            total_size = 0
            for b in boxes:
                total_size += b.sl.Size()
            s = 0
            for b in boxes:
                b.y0 = float(s) / total_size
                s += b.sl.Size()
                b.y1 = float(s) / total_size

        # Make leftmost time to 0.
        t_first = None
        t_base = datetime.datetime(2000, 1, 1)
        for t, boxes in sorted(time_heatboxes.iteritems()):
            if t_first is None:
                t_first = t
            for b in boxes:
                b.t0 = t_base + (b.t0 - t_first)
                b.t1 = t_base + (b.t1 - t_first)
        for i in range(len(vertical_lines)):
            vertical_lines[i] = t_base + (vertical_lines[i] - t_first)

        fmt = "%4d %1d %17s %17s %6.4f %6.4f" \
          " %8.3f %8d %6s"
        with open(fn_hm, "w") as fo:
            fo.write("# heat_max=%f\n" % MemtSstLife.SstLife.max_heat)

            # y0 is smaller than y1 (y0 is placed higher in the plot than y1).
            fo.write("%s\n" % Util.BuildHeader(fmt, \
              "sst_gen level t0 t1 y0 y1" \
              " heat heat_color heat_color_hex"))

            for t, boxes in sorted(time_heatboxes.iteritems()):
                for b in boxes:
                    fo.write((fmt + "\n") % ( \
                      b.sl.sst_gen, b.sl.level
                      , b.t0.strftime("%y%m%d-%H%M%S.%f")[:-3], b.t1.strftime("%y%m%d-%H%M%S.%f")[:-3]
                      , b.y0, b.y1
                      , b.heat, b.heat_color, ("%0.6X" % b.heat_color)
                      ))
                fo.write("\n")
        Cons.P("Created %s %d" % (fn_hm, os.path.getsize(fn_hm)))

        with open(fn_vl, "w") as fo:
            for vl in vertical_lines:
                fo.write("%s\n" % vl.strftime("%y%m%d-%H%M%S.%f")[:-3])
        Cons.P("Created %s %d" % (fn_vl, os.path.getsize(fn_vl)))
    return (fn_hm, fn_vl, MemtSstLife.SstLife.max_heat)