Beispiel #1
0
def rd_freq_popularity_2d(reader,
                          logX=True,
                          logY=True,
                          cdf=False,
                          figname="rdFreq_popularity_2d.png",
                          **kwargs):
    """
    plot the reuse distance distribution in a two dimensional figure,
    X axis is reuse distance frequency
    Y axis is the number of requests in percentage
    :param reader:
    :param logX:
    :param logY:
    :param cdf:
    :param figname:
    :return: the list of data points
    """

    kwargs_plot = {}
    kwargs_plot.update(kwargs)

    kwargs_plot["logX"], kwargs_plot["logY"] = logX, logY
    kwargs_plot["cdf"] = cdf
    kwargs_plot["xlabel"] = kwargs_plot.get("xlabel",
                                            "Reuse Distance Frequency")
    kwargs_plot["ylabel"] = kwargs_plot.get("ylabel", "Requests Percentage")
    kwargs_plot["xticks"] = kwargs_plot.get(
        "xticks",
        ticker.FuncFormatter(lambda x, _: '{:.0%}'.format(x / len(l))))

    rd_list = LRUProfiler(reader).get_reuse_distance()
    rd_dict = defaultdict(int)  # rd -> count
    for rd in rd_list:
        rd_dict[rd] += 1

    rd_count_dict = defaultdict(int)  # rd_count -> count of rd_count
    max_freq = -1
    for _, v in rd_dict.items():
        rd_count_dict[v] += 1
        if v > max_freq:
            max_freq = v

    l = [0] * max_freq
    if not cdf:
        for k, v in rd_count_dict.items():
            l[k - 1] = v
    else:
        kwargs_plot["ylabel"] = kwargs.get("ylabel",
                                           "Requests Percentage (CDF)")
        for k, v in rd_count_dict.items():
            # l[-k] = v                # this is not necessary
            l[k - 1] = v
        for i in range(1, len(l)):
            l[i] = l[i - 1] + l[i]
        for i in range(0, len(l)):
            l[i] = l[i] / l[-1]

    draw2d(l, figname=figname, **kwargs_plot)
    reader.reset()
    return l
Beispiel #2
0
def rd_popularity_2d(reader,
                     logX=True,
                     logY=False,
                     cdf=True,
                     figname="rd_popularity_2d.png",
                     **kwargs):
    """
    plot the reuse distance distribution in two dimension, cold miss is ignored
    X axis is reuse distance
    Y axis is number of requests (not in percentage)
    :param reader:
    :param logX:
    :param logY:
    :param cdf:
    :param figname:
    :return: the list of data points
    """

    if not logX or logY or not cdf:
        WARNING("recommend using logX without logY with cdf")

    kwargs_plot = {}
    kwargs_plot.update(kwargs)

    kwargs_plot["logX"], kwargs_plot["logY"] = logX, logY
    kwargs_plot["cdf"] = cdf
    kwargs_plot["xlabel"] = kwargs_plot.get("xlabel", "Reuse Distance")
    kwargs_plot["ylabel"] = kwargs_plot.get("ylabel", "Num of Requests")

    rd_list = LRUProfiler(reader).get_reuse_distance()
    rd_dict = defaultdict(int)  # rd -> count
    for rd in rd_list:
        rd_dict[rd] += 1

    max_rd = -1
    for rd, _ in rd_dict.items():
        if rd > max_rd:
            max_rd = rd

    l = [0] * (max_rd + 2)
    if not cdf:
        for rd, rd_count in rd_dict.items():
            if rd != -1:  # ignore cold miss
                l[rd + 1] = rd_count
    else:
        kwargs_plot["ylabel"] = kwargs.get("ylabel", "Num of Requests (CDF)")
        for rd, rd_count in rd_dict.items():
            if rd != -1:
                l[rd + 1] = rd_count
        for i in range(1, len(l)):
            l[i] = l[i - 1] + l[i]
        for i in range(0, len(l)):
            l[i] = l[i] / l[-1]

    draw2d(l, figname=figname, **kwargs_plot)
    reader.reset()
    return l
    def profiler(self, algorithm, cache_params=None, cache_size=-1, bin_size=-1,
                 use_general_profiler=False, **kwargs):
        """
        get a profiler instance, this should not be used by most users

        :param algorithm:  name of algorithm
        :param cache_params: parameters of given cache replacement algorithm
        :param cache_size: size of cache
        :param bin_size: bin_size for generalProfiler
        :param use_general_profiler: this option is for LRU only, if it is True,
                                        then return a cGeneralProfiler for LRU,
                                        otherwise, return a LRUProfiler for LRU.

                                        Note: LRUProfiler does not require cache_size/bin_size params,
                                        it does not sample thus provides a smooth curve, however, it is O(logN) at each step,
                                        in constrast, cGeneralProfiler samples the curve, but use O(1) at each step
        :param kwargs: num_of_threads
        :return: a profiler instance
        """

        num_of_threads = kwargs.get("num_of_threads", DEF_NUM_THREADS)
        no_load_rd = kwargs.get("no_load_rd", False)
        assert self.reader is not None, "you haven't opened a trace yet"

        if algorithm.lower() == "lru" and not use_general_profiler:
            profiler = LRUProfiler(self.reader, cache_size, cache_params, no_load_rd=no_load_rd)
        else:
            assert cache_size != -1, "you didn't provide size for cache"
            assert cache_size <= self.num_of_req(), "you cannot specify cache size({}) " \
                                                        "larger than trace length({})".format(cache_size,
                                                                                              self.num_of_req())
            if isinstance(algorithm, str):
                if ALLOW_C_MIMIRCACHE:
                    if algorithm.lower() in C_AVAIL_CACHE:
                        profiler = CGeneralProfiler(self.reader, CACHE_NAME_CONVRETER[algorithm.lower()],
                                                cache_size, bin_size,
                                                cache_params=cache_params, num_of_threads=num_of_threads)
                    else:
                        profiler = PyGeneralProfiler(self.reader, CACHE_NAME_CONVRETER[algorithm.lower()],
                                                   cache_size, bin_size,
                                                   cache_params=cache_params, num_of_threads=num_of_threads)
                else:
                    profiler = PyGeneralProfiler(self.reader, CACHE_NAME_CONVRETER[algorithm.lower()],
                                                 cache_size, bin_size,
                                                 cache_params=cache_params, num_of_threads=num_of_threads)
            else:
                profiler = PyGeneralProfiler(self.reader, algorithm, cache_size, bin_size,
                                             cache_params=cache_params, num_of_threads=num_of_threads)

        return profiler
Beispiel #4
0
    def compute_heatmap(self,
                        reader,
                        plot_type,
                        time_mode,
                        time_interval,
                        cache_size=-1,
                        num_of_pixel_of_time_dim=-1,
                        num_of_threads=os.cpu_count(),
                        **kwargs):
        """
            calculate the data for plotting heatmap

        :param reader: reader for data
        :param plot_type: types of data, see heatmap (function) for details
        :param time_mode: real time (r) or virtual time (v)
        :param time_interval: the window size in computation
        :param cache_size: size of cache
        :param num_of_pixel_of_time_dim: as an alternative to time_interval, useful when you don't know the trace time span
        :param num_of_threads: number of threads/processes to use for computation, default: all
        :param kwargs: cache_params,
        :return:  a two-dimension list, the first dimension is x, the second dimension is y, the value is the heat value
        """

        bp = get_breakpoints(reader, time_mode, time_interval,
                             num_of_pixel_of_time_dim)
        ppe = ProcessPoolExecutor(max_workers=num_of_threads)
        futures_dict = {}
        progress = 0
        xydict = np.zeros((len(bp) - 1, len(bp) - 1))

        if plot_type in [
                "avg_rd_st_et", "rd_distribution", "rd_distribution_CDF",
                "future_rd_distribution", "dist_distribution",
                "rt_distribution"
        ]:
            pass

        elif plot_type == "hr_st_et":
            ema_coef = kwargs.get("ema_coef", DEF_EMA_HISTORY_WEIGHT)
            enable_ihr = kwargs.get("interval_hit_ratio", False) or kwargs.get(
                "enable_ihr", False)

            if kwargs.get("algorithm", "LRU").lower() == "lru":
                #TODO: replace CLRUProfiler with PyLRUProfiler
                rd = LRUProfiler(reader).get_reuse_distance()
                last_access_dist = get_last_access_dist(reader)

                for i in range(len(bp) - 1):
                    futures_dict[ppe.submit(cal_hr_list_LRU,
                                            rd,
                                            last_access_dist,
                                            cache_size,
                                            bp,
                                            i,
                                            enable_ihr=enable_ihr,
                                            ema_coef=ema_coef)] = i
            else:
                reader_params = reader.get_params()
                reader_params["open_c_reader"] = False
                cache_class = cache_name_to_class(kwargs.get("algorithm"))
                cache_params = kwargs.get("cache_params", {})

                for i in range(len(bp) - 1):
                    futures_dict[ppe.submit(cal_hr_list_general,
                                            reader.__class__,
                                            reader_params,
                                            cache_class,
                                            cache_size,
                                            bp,
                                            i,
                                            cache_params=cache_params)] = i

        elif plot_type == "hr_st_size":
            raise RuntimeError("Not Implemented")

        elif plot_type == "KL_st_et":
            rd = LRUProfiler(reader).get_reuse_distance()

            for i in range(len(bp) - 1):
                futures_dict[ppe.submit(cal_KL, rd, bp, i)] = i

        else:
            ppe.shutdown()
            raise RuntimeError(
                "{} is not a valid heatmap type".format(plot_type))

        last_progress_print_time = time.time()
        for future in as_completed(futures_dict):
            result = future.result()
            xydict[-len(result):, futures_dict[future]] = np.array(result)
            # print("{} {}".format(xydict[futures_dict[future]], np.array(result)))
            progress += 1
            if time.time() - last_progress_print_time > 20:
                INFO("{:.2f}%".format(progress / len(futures_dict) * 100),
                     end="\r")
                last_progress_print_time = time.time()

        ppe.shutdown()
        return xydict
    def plotHRCs(self, algorithm_list, cache_params=(),
                 cache_size=-1, bin_size=-1,
                 auto_resize=True, figname="HRC.png", **kwargs):
        """
        this function provides hit ratio curve plotting

        :param algorithm_list: a list of algorithm(s)
        :param cache_params: the corresponding cache params for the algorithms,
                                use None for algorithms that don't require cache params,
                                if none of the alg requires cache params, you don't need to set this
        :param cache_size:  maximal size of cache, use -1 for max possible size
        :param bin_size:    bin size for non-LRU profiling
        :param auto_resize:   when using max possible size or specified cache size too large,
                                you will get a huge plateau at the end of hit ratio curve,
                                set auto_resize to True to cutoff most of the big plateau
        :param figname: name of figure
        :param kwargs: options: block_unit_size, num_of_threads,
                        auto_resize_threshold, xlimit, ylimit, cache_unit_size

                        save_gradually - save a figure everytime computation for one algorithm finishes,

                        label - instead of using algorithm list as label, specify user-defined label
        """

        hit_ratio_dict = {}

        num_of_threads          =       kwargs.get("num_of_threads",        os.cpu_count())
        no_load_rd              =       kwargs.get("no_load_rd",            False)
        cache_unit_size         =       kwargs.get("cache_unit_size",       0)
        use_general_profiler    =       kwargs.get("use_general_profiler",  False)
        save_gradually          =       kwargs.get("save_gradually",        False)
        threshold               =       kwargs.get('auto_resize_threshold', 0.98)
        label                   =       kwargs.get("label",                 algorithm_list)
        xlabel                  =       kwargs.get("xlabel",                "Cache Size (Items)")
        ylabel                  =       kwargs.get("ylabel",                "Hit Ratio")
        title                   =       kwargs.get("title",                 "Hit Ratio Curve")

        profiling_with_size = False
        LRU_HR = None

        assert self.reader is not None, "you must open trace before profiling"
        if cache_size == -1 and auto_resize:
            LRU_HR = LRUProfiler(self.reader, no_load_rd=no_load_rd).plotHRC(auto_resize=True, threshold=threshold, no_save=True)
            cache_size = len(LRU_HR)
        else:
            assert cache_size <= self.num_of_req(), "you cannot specify cache size larger than trace length"

        if bin_size == -1:
            bin_size = cache_size // DEF_NUM_BIN_PROF + 1

        # check whether profiling with size
        block_unit_size = 0
        for i in range(len(algorithm_list)):
            if i < len(cache_params) and cache_params[i]:
                block_unit_size = cache_params[i].get("block_unit_size", 0)
                if block_unit_size != 0:
                    profiling_with_size = True
                    break
        if profiling_with_size and cache_unit_size != 0 and block_unit_size != cache_unit_size:
            raise RuntimeError("cache_unit_size and block_unit_size is not equal {} {}".\
                                format(cache_unit_size, block_unit_size))


        for i in range(len(algorithm_list)):
            alg = algorithm_list[i]
            if cache_params and i < len(cache_params):
                cache_param = cache_params[i]
                if profiling_with_size:
                    if cache_param is None or 'block_unit_size' not in cache_param:
                        ERROR("it seems you want to profiling with size, "
                              "but you didn't provide block_unit_size in "
                              "cache params {}".format(cache_param))
                    elif cache_param["block_unit_size"] != block_unit_size:
                        ERROR("only same block unit size for single plot is allowed")

            else:
                cache_param = None
            profiler = self.profiler(alg, cache_param, cache_size, bin_size=bin_size,
                                     use_general_profiler=use_general_profiler,
                                     num_of_threads=num_of_threads, no_load_rd=no_load_rd)
            t1 = time.time()

            if alg.lower() == "lru":
                if LRU_HR is None:  # no auto_resize
                    hr = profiler.get_hit_ratio()
                    if use_general_profiler:
                        # save the computed hit ratio
                        hit_ratio_dict["LRU"] = {}
                        for j in range(len(hr)):
                            hit_ratio_dict["LRU"][j * bin_size] = hr[j]
                        plt.plot([j * bin_size for j in range(len(hr))], hr, label=label[i])
                    else:
                        # save the computed hit ratio
                        hit_ratio_dict["LRU"] = {}
                        for j in range(len(hr)-2):
                            hit_ratio_dict["LRU"][j] = hr[j]
                        plt.plot(hr[:-2], label=label[i])
                else:
                    # save the computed hit ratio
                    hit_ratio_dict["LRU"] = {}
                    for j in range(len(LRU_HR)):
                        hit_ratio_dict["LRU"][j] = LRU_HR[j]
                    plt.plot(LRU_HR, label=label[i])
            else:
                hr = profiler.get_hit_ratio()
                # save the computed hit ratio
                hit_ratio_dict[alg] = {}
                for j in range(len(hr)):
                    hit_ratio_dict[alg][j * bin_size] = hr[j]
                plt.plot([j * bin_size for j in range(len(hr))], hr, label=label[i])
            self.reader.reset()
            INFO("HRC plotting {} computation finished using time {} s".format(alg, time.time() - t1))
            if save_gradually:
                plt.savefig(figname, dpi=600)

        set_fig(xlabel=xlabel, ylabel=ylabel, title=title, **kwargs)

        if cache_unit_size != 0:
            plt.xlabel("Cache Size (MB)")
            plt.gca().xaxis.set_major_formatter(
                FuncFormatter(lambda x, p: int(x * cache_unit_size // 1024 // 1024)))

        if not 'no_save' in kwargs or not kwargs['no_save']:
            plt.savefig(figname, dpi=600)
            INFO("HRC plot is saved as {}".format(figname))
        try: plt.show()
        except: pass
        plt.clf()
        return hit_ratio_dict
    def get_reuse_distance(self):
        """

        :return: an array of reuse distance
        """
        return LRUProfiler(self.reader).get_reuse_distance()
Beispiel #7
0
def interval_hit_ratio_2d(reader,
                          cache_size,
                          decay_coef=0.2,
                          time_mode="v",
                          time_interval=10000,
                          figname="IHRC_2d.png",
                          **kwargs):
    """
    The hit ratio curve over time interval, each pixel in the plot represents the
    exponential weight moving average (ewma) of hit ratio of the interval

    :param reader:
    :param cache_size:
    :param decay_coef: used in ewma
    :param time_mode:
    :param time_interval:
    :param figname:
    :return: the list of data points
    """
    p = LRUProfiler(reader)
    # reuse distance list
    rd_list = p.get_reuse_distance()

    hit_ratio_list = []
    ewma_hit_ratio = 0
    hit_cnt_interval = 0

    if time_mode == "v":
        for n, rd in enumerate(rd_list):
            if rd > cache_size or rd == -1:
                # this is a miss
                pass
            else:
                hit_cnt_interval += 1
            if n % time_interval == 0:
                hit_ratio_interval = hit_cnt_interval / time_interval
                ewma_hit_ratio = ewma_hit_ratio * decay_coef + hit_ratio_interval * (
                    1 - decay_coef)
                hit_cnt_interval = 0
                hit_ratio_list.append(ewma_hit_ratio)

    elif time_mode == "r":
        ind = 0
        req_cnt_interval = 0

        # read time and request label
        line = reader.read_time_req()
        t, req = line
        last_time_interval_cutoff = line[0]

        while line:
            last_time = t
            t, req = line
            if t - last_time_interval_cutoff > time_interval:
                hit_ratio_interval = hit_cnt_interval / req_cnt_interval
                ewma_hit_ratio = ewma_hit_ratio * decay_coef + hit_ratio_interval * (
                    1 - decay_coef)
                hit_cnt_interval = 0
                req_cnt_interval = 0
                last_time_interval_cutoff = last_time
                hit_ratio_list.append(ewma_hit_ratio)

            rd = rd_list[ind]
            req_cnt_interval += 1
            if rd != -1 and rd <= cache_size:
                hit_cnt_interval += 1

            line = reader.read_time_req()
            ind += 1

    kwargs_plot = {}
    kwargs_plot.update(kwargs)

    kwargs_plot["logX"] = kwargs_plot.get("logX", False)
    kwargs_plot["logY"] = kwargs_plot.get("logY", False)
    kwargs_plot["xlabel"] = kwargs_plot.get(
        "xlabel", "{} Time".format({
            "r": "Real",
            "v": "Virtual"
        }.get(time_mode, "")))
    kwargs_plot["ylabel"] = kwargs_plot.get(
        "ylabel", "Interval Hit Ratio (decay {})".format(decay_coef))

    kwargs_plot["xticks"] = kwargs_plot.get(
        "xticks",
        ticker.FuncFormatter(
            # both works
            # lambda x, _: '{:.0f}%'.format(x * 100 / len(hit_ratio_list))))
            lambda x, _: '{:.0%}'.format(x / len(hit_ratio_list))))

    reader.reset()
    draw2d(hit_ratio_list, figname=figname, **kwargs_plot)

    return hit_ratio_list