def rd_freq_popularity_2d(reader, logX=True, logY=True, cdf=False, figname="rdFreq_popularity_2d.png", **kwargs): """ plot the reuse distance distribution in a two dimensional figure, X axis is reuse distance frequency Y axis is the number of requests in percentage :param reader: :param logX: :param logY: :param cdf: :param figname: :return: the list of data points """ kwargs_plot = {} kwargs_plot.update(kwargs) kwargs_plot["logX"], kwargs_plot["logY"] = logX, logY kwargs_plot["cdf"] = cdf kwargs_plot["xlabel"] = kwargs_plot.get("xlabel", "Reuse Distance Frequency") kwargs_plot["ylabel"] = kwargs_plot.get("ylabel", "Requests Percentage") kwargs_plot["xticks"] = kwargs_plot.get( "xticks", ticker.FuncFormatter(lambda x, _: '{:.0%}'.format(x / len(l)))) rd_list = LRUProfiler(reader).get_reuse_distance() rd_dict = defaultdict(int) # rd -> count for rd in rd_list: rd_dict[rd] += 1 rd_count_dict = defaultdict(int) # rd_count -> count of rd_count max_freq = -1 for _, v in rd_dict.items(): rd_count_dict[v] += 1 if v > max_freq: max_freq = v l = [0] * max_freq if not cdf: for k, v in rd_count_dict.items(): l[k - 1] = v else: kwargs_plot["ylabel"] = kwargs.get("ylabel", "Requests Percentage (CDF)") for k, v in rd_count_dict.items(): # l[-k] = v # this is not necessary l[k - 1] = v for i in range(1, len(l)): l[i] = l[i - 1] + l[i] for i in range(0, len(l)): l[i] = l[i] / l[-1] draw2d(l, figname=figname, **kwargs_plot) reader.reset() return l
def rd_popularity_2d(reader, logX=True, logY=False, cdf=True, figname="rd_popularity_2d.png", **kwargs): """ plot the reuse distance distribution in two dimension, cold miss is ignored X axis is reuse distance Y axis is number of requests (not in percentage) :param reader: :param logX: :param logY: :param cdf: :param figname: :return: the list of data points """ if not logX or logY or not cdf: WARNING("recommend using logX without logY with cdf") kwargs_plot = {} kwargs_plot.update(kwargs) kwargs_plot["logX"], kwargs_plot["logY"] = logX, logY kwargs_plot["cdf"] = cdf kwargs_plot["xlabel"] = kwargs_plot.get("xlabel", "Reuse Distance") kwargs_plot["ylabel"] = kwargs_plot.get("ylabel", "Num of Requests") rd_list = LRUProfiler(reader).get_reuse_distance() rd_dict = defaultdict(int) # rd -> count for rd in rd_list: rd_dict[rd] += 1 max_rd = -1 for rd, _ in rd_dict.items(): if rd > max_rd: max_rd = rd l = [0] * (max_rd + 2) if not cdf: for rd, rd_count in rd_dict.items(): if rd != -1: # ignore cold miss l[rd + 1] = rd_count else: kwargs_plot["ylabel"] = kwargs.get("ylabel", "Num of Requests (CDF)") for rd, rd_count in rd_dict.items(): if rd != -1: l[rd + 1] = rd_count for i in range(1, len(l)): l[i] = l[i - 1] + l[i] for i in range(0, len(l)): l[i] = l[i] / l[-1] draw2d(l, figname=figname, **kwargs_plot) reader.reset() return l
def profiler(self, algorithm, cache_params=None, cache_size=-1, bin_size=-1, use_general_profiler=False, **kwargs): """ get a profiler instance, this should not be used by most users :param algorithm: name of algorithm :param cache_params: parameters of given cache replacement algorithm :param cache_size: size of cache :param bin_size: bin_size for generalProfiler :param use_general_profiler: this option is for LRU only, if it is True, then return a cGeneralProfiler for LRU, otherwise, return a LRUProfiler for LRU. Note: LRUProfiler does not require cache_size/bin_size params, it does not sample thus provides a smooth curve, however, it is O(logN) at each step, in constrast, cGeneralProfiler samples the curve, but use O(1) at each step :param kwargs: num_of_threads :return: a profiler instance """ num_of_threads = kwargs.get("num_of_threads", DEF_NUM_THREADS) no_load_rd = kwargs.get("no_load_rd", False) assert self.reader is not None, "you haven't opened a trace yet" if algorithm.lower() == "lru" and not use_general_profiler: profiler = LRUProfiler(self.reader, cache_size, cache_params, no_load_rd=no_load_rd) else: assert cache_size != -1, "you didn't provide size for cache" assert cache_size <= self.num_of_req(), "you cannot specify cache size({}) " \ "larger than trace length({})".format(cache_size, self.num_of_req()) if isinstance(algorithm, str): if ALLOW_C_MIMIRCACHE: if algorithm.lower() in C_AVAIL_CACHE: profiler = CGeneralProfiler(self.reader, CACHE_NAME_CONVRETER[algorithm.lower()], cache_size, bin_size, cache_params=cache_params, num_of_threads=num_of_threads) else: profiler = PyGeneralProfiler(self.reader, CACHE_NAME_CONVRETER[algorithm.lower()], cache_size, bin_size, cache_params=cache_params, num_of_threads=num_of_threads) else: profiler = PyGeneralProfiler(self.reader, CACHE_NAME_CONVRETER[algorithm.lower()], cache_size, bin_size, cache_params=cache_params, num_of_threads=num_of_threads) else: profiler = PyGeneralProfiler(self.reader, algorithm, cache_size, bin_size, cache_params=cache_params, num_of_threads=num_of_threads) return profiler
def compute_heatmap(self, reader, plot_type, time_mode, time_interval, cache_size=-1, num_of_pixel_of_time_dim=-1, num_of_threads=os.cpu_count(), **kwargs): """ calculate the data for plotting heatmap :param reader: reader for data :param plot_type: types of data, see heatmap (function) for details :param time_mode: real time (r) or virtual time (v) :param time_interval: the window size in computation :param cache_size: size of cache :param num_of_pixel_of_time_dim: as an alternative to time_interval, useful when you don't know the trace time span :param num_of_threads: number of threads/processes to use for computation, default: all :param kwargs: cache_params, :return: a two-dimension list, the first dimension is x, the second dimension is y, the value is the heat value """ bp = get_breakpoints(reader, time_mode, time_interval, num_of_pixel_of_time_dim) ppe = ProcessPoolExecutor(max_workers=num_of_threads) futures_dict = {} progress = 0 xydict = np.zeros((len(bp) - 1, len(bp) - 1)) if plot_type in [ "avg_rd_st_et", "rd_distribution", "rd_distribution_CDF", "future_rd_distribution", "dist_distribution", "rt_distribution" ]: pass elif plot_type == "hr_st_et": ema_coef = kwargs.get("ema_coef", DEF_EMA_HISTORY_WEIGHT) enable_ihr = kwargs.get("interval_hit_ratio", False) or kwargs.get( "enable_ihr", False) if kwargs.get("algorithm", "LRU").lower() == "lru": #TODO: replace CLRUProfiler with PyLRUProfiler rd = LRUProfiler(reader).get_reuse_distance() last_access_dist = get_last_access_dist(reader) for i in range(len(bp) - 1): futures_dict[ppe.submit(cal_hr_list_LRU, rd, last_access_dist, cache_size, bp, i, enable_ihr=enable_ihr, ema_coef=ema_coef)] = i else: reader_params = reader.get_params() reader_params["open_c_reader"] = False cache_class = cache_name_to_class(kwargs.get("algorithm")) cache_params = kwargs.get("cache_params", {}) for i in range(len(bp) - 1): futures_dict[ppe.submit(cal_hr_list_general, reader.__class__, reader_params, cache_class, cache_size, bp, i, cache_params=cache_params)] = i elif plot_type == "hr_st_size": raise RuntimeError("Not Implemented") elif plot_type == "KL_st_et": rd = LRUProfiler(reader).get_reuse_distance() for i in range(len(bp) - 1): futures_dict[ppe.submit(cal_KL, rd, bp, i)] = i else: ppe.shutdown() raise RuntimeError( "{} is not a valid heatmap type".format(plot_type)) last_progress_print_time = time.time() for future in as_completed(futures_dict): result = future.result() xydict[-len(result):, futures_dict[future]] = np.array(result) # print("{} {}".format(xydict[futures_dict[future]], np.array(result))) progress += 1 if time.time() - last_progress_print_time > 20: INFO("{:.2f}%".format(progress / len(futures_dict) * 100), end="\r") last_progress_print_time = time.time() ppe.shutdown() return xydict
def plotHRCs(self, algorithm_list, cache_params=(), cache_size=-1, bin_size=-1, auto_resize=True, figname="HRC.png", **kwargs): """ this function provides hit ratio curve plotting :param algorithm_list: a list of algorithm(s) :param cache_params: the corresponding cache params for the algorithms, use None for algorithms that don't require cache params, if none of the alg requires cache params, you don't need to set this :param cache_size: maximal size of cache, use -1 for max possible size :param bin_size: bin size for non-LRU profiling :param auto_resize: when using max possible size or specified cache size too large, you will get a huge plateau at the end of hit ratio curve, set auto_resize to True to cutoff most of the big plateau :param figname: name of figure :param kwargs: options: block_unit_size, num_of_threads, auto_resize_threshold, xlimit, ylimit, cache_unit_size save_gradually - save a figure everytime computation for one algorithm finishes, label - instead of using algorithm list as label, specify user-defined label """ hit_ratio_dict = {} num_of_threads = kwargs.get("num_of_threads", os.cpu_count()) no_load_rd = kwargs.get("no_load_rd", False) cache_unit_size = kwargs.get("cache_unit_size", 0) use_general_profiler = kwargs.get("use_general_profiler", False) save_gradually = kwargs.get("save_gradually", False) threshold = kwargs.get('auto_resize_threshold', 0.98) label = kwargs.get("label", algorithm_list) xlabel = kwargs.get("xlabel", "Cache Size (Items)") ylabel = kwargs.get("ylabel", "Hit Ratio") title = kwargs.get("title", "Hit Ratio Curve") profiling_with_size = False LRU_HR = None assert self.reader is not None, "you must open trace before profiling" if cache_size == -1 and auto_resize: LRU_HR = LRUProfiler(self.reader, no_load_rd=no_load_rd).plotHRC(auto_resize=True, threshold=threshold, no_save=True) cache_size = len(LRU_HR) else: assert cache_size <= self.num_of_req(), "you cannot specify cache size larger than trace length" if bin_size == -1: bin_size = cache_size // DEF_NUM_BIN_PROF + 1 # check whether profiling with size block_unit_size = 0 for i in range(len(algorithm_list)): if i < len(cache_params) and cache_params[i]: block_unit_size = cache_params[i].get("block_unit_size", 0) if block_unit_size != 0: profiling_with_size = True break if profiling_with_size and cache_unit_size != 0 and block_unit_size != cache_unit_size: raise RuntimeError("cache_unit_size and block_unit_size is not equal {} {}".\ format(cache_unit_size, block_unit_size)) for i in range(len(algorithm_list)): alg = algorithm_list[i] if cache_params and i < len(cache_params): cache_param = cache_params[i] if profiling_with_size: if cache_param is None or 'block_unit_size' not in cache_param: ERROR("it seems you want to profiling with size, " "but you didn't provide block_unit_size in " "cache params {}".format(cache_param)) elif cache_param["block_unit_size"] != block_unit_size: ERROR("only same block unit size for single plot is allowed") else: cache_param = None profiler = self.profiler(alg, cache_param, cache_size, bin_size=bin_size, use_general_profiler=use_general_profiler, num_of_threads=num_of_threads, no_load_rd=no_load_rd) t1 = time.time() if alg.lower() == "lru": if LRU_HR is None: # no auto_resize hr = profiler.get_hit_ratio() if use_general_profiler: # save the computed hit ratio hit_ratio_dict["LRU"] = {} for j in range(len(hr)): hit_ratio_dict["LRU"][j * bin_size] = hr[j] plt.plot([j * bin_size for j in range(len(hr))], hr, label=label[i]) else: # save the computed hit ratio hit_ratio_dict["LRU"] = {} for j in range(len(hr)-2): hit_ratio_dict["LRU"][j] = hr[j] plt.plot(hr[:-2], label=label[i]) else: # save the computed hit ratio hit_ratio_dict["LRU"] = {} for j in range(len(LRU_HR)): hit_ratio_dict["LRU"][j] = LRU_HR[j] plt.plot(LRU_HR, label=label[i]) else: hr = profiler.get_hit_ratio() # save the computed hit ratio hit_ratio_dict[alg] = {} for j in range(len(hr)): hit_ratio_dict[alg][j * bin_size] = hr[j] plt.plot([j * bin_size for j in range(len(hr))], hr, label=label[i]) self.reader.reset() INFO("HRC plotting {} computation finished using time {} s".format(alg, time.time() - t1)) if save_gradually: plt.savefig(figname, dpi=600) set_fig(xlabel=xlabel, ylabel=ylabel, title=title, **kwargs) if cache_unit_size != 0: plt.xlabel("Cache Size (MB)") plt.gca().xaxis.set_major_formatter( FuncFormatter(lambda x, p: int(x * cache_unit_size // 1024 // 1024))) if not 'no_save' in kwargs or not kwargs['no_save']: plt.savefig(figname, dpi=600) INFO("HRC plot is saved as {}".format(figname)) try: plt.show() except: pass plt.clf() return hit_ratio_dict
def get_reuse_distance(self): """ :return: an array of reuse distance """ return LRUProfiler(self.reader).get_reuse_distance()
def interval_hit_ratio_2d(reader, cache_size, decay_coef=0.2, time_mode="v", time_interval=10000, figname="IHRC_2d.png", **kwargs): """ The hit ratio curve over time interval, each pixel in the plot represents the exponential weight moving average (ewma) of hit ratio of the interval :param reader: :param cache_size: :param decay_coef: used in ewma :param time_mode: :param time_interval: :param figname: :return: the list of data points """ p = LRUProfiler(reader) # reuse distance list rd_list = p.get_reuse_distance() hit_ratio_list = [] ewma_hit_ratio = 0 hit_cnt_interval = 0 if time_mode == "v": for n, rd in enumerate(rd_list): if rd > cache_size or rd == -1: # this is a miss pass else: hit_cnt_interval += 1 if n % time_interval == 0: hit_ratio_interval = hit_cnt_interval / time_interval ewma_hit_ratio = ewma_hit_ratio * decay_coef + hit_ratio_interval * ( 1 - decay_coef) hit_cnt_interval = 0 hit_ratio_list.append(ewma_hit_ratio) elif time_mode == "r": ind = 0 req_cnt_interval = 0 # read time and request label line = reader.read_time_req() t, req = line last_time_interval_cutoff = line[0] while line: last_time = t t, req = line if t - last_time_interval_cutoff > time_interval: hit_ratio_interval = hit_cnt_interval / req_cnt_interval ewma_hit_ratio = ewma_hit_ratio * decay_coef + hit_ratio_interval * ( 1 - decay_coef) hit_cnt_interval = 0 req_cnt_interval = 0 last_time_interval_cutoff = last_time hit_ratio_list.append(ewma_hit_ratio) rd = rd_list[ind] req_cnt_interval += 1 if rd != -1 and rd <= cache_size: hit_cnt_interval += 1 line = reader.read_time_req() ind += 1 kwargs_plot = {} kwargs_plot.update(kwargs) kwargs_plot["logX"] = kwargs_plot.get("logX", False) kwargs_plot["logY"] = kwargs_plot.get("logY", False) kwargs_plot["xlabel"] = kwargs_plot.get( "xlabel", "{} Time".format({ "r": "Real", "v": "Virtual" }.get(time_mode, ""))) kwargs_plot["ylabel"] = kwargs_plot.get( "ylabel", "Interval Hit Ratio (decay {})".format(decay_coef)) kwargs_plot["xticks"] = kwargs_plot.get( "xticks", ticker.FuncFormatter( # both works # lambda x, _: '{:.0f}%'.format(x * 100 / len(hit_ratio_list)))) lambda x, _: '{:.0%}'.format(x / len(hit_ratio_list)))) reader.reset() draw2d(hit_ratio_list, figname=figname, **kwargs_plot) return hit_ratio_list