Ejemplo n.º 1
0
def run(path_in: str, path_out: str) -> None:
    print(path_in)
    desc = """
    [plot_rdb] parsing RDB to compute RDB overlaps
    """
    print(desc)
    manifest_files = glob.glob(path_in + '/rdb.olap.e*.csv')
    print('{0} epoch files found\n'.format(len(manifest_files)))
    manifest_files.sort()

    fig_aggr, ax_aggr = plt.subplots(1, 1)

    for fin in manifest_files:
        fout = re.sub('csv$', 'pdf', Path(fin).name)
        fout = Path(path_out) / fout
        plot_epoch(fin, fout, ax_aggr=ax_aggr)
        print('Plot saved: {0} ==> {1}\n'.format(abbrv_path(fin),
                                                 abbrv_path(fout)))

    ax_aggr.set_xlabel('Attribute Range')
    ax_aggr.set_ylabel('Overlap Percent')
    ax_aggr.set_title('Aggregate Manifest Stats')
    ax_aggr.legend()

    # fig_aggr.show()
    aggr_out = Path(path_out) / 'manifest.aggr.pdf'
    fig_aggr.savefig(aggr_out, dpi=300)
Ejemplo n.º 2
0
def plot_reneg_std(bincnts: Iterable[np.ndarray], fig_path: str) -> None:
    fig, ax = plt.subplots(1, 1)

    linestyle = '-'

    for epoch, epoch_stats in enumerate(bincnts):
        epoch_loads = np.diff(epoch_stats, n=1, axis=0)

        if (epoch_loads.shape[0] == 0): continue

        epoch_cum_loads = np.delete(epoch_stats.sum(1), 0)
        epoch_stds = epoch_loads.std(1) / epoch_loads.mean(1)

        ax.plot(epoch_cum_loads, epoch_stds, linestyle, mec='purple',
                label='Epoch {0}'.format(epoch))

    ax.set_xlabel('Total Data Volume')
    ax.set_ylabel('Normalized Stddev (CoV)')
    ax.set_title('Renegotiation Events vs Interval Stddev')
    ax.legend()
    fig.show()

    plot_out = fig_path + '/reneg_vs_std.pdf'
    # fig.savefig(plot_out, dpi=300)
    print('Plot saved: ', abbrv_path(plot_out))
Ejemplo n.º 3
0
def read_all(perf_path: str) -> Tuple[Iterable, Iterable]:
    global USE_CACHE

    print('Reading perflogs from: {0}'.format(abbrv_path(perf_path)))
    aggr_bincnts = None
    epoch_counts = None

    cache_obj = cache.Cache()
    cache_miss = True
    if cache_obj.exists(perf_path):
        if not USE_CACHE:
            print('Cache Entry available, SKIPPING')
        else:
            aggr_bincnts = cache_obj.get(perf_path)
            print('Cache entry LOADED')
            cache_miss = False
    if cache_miss:
        all_fpaths = sorted(glob.glob(perf_path + PERFLOGFMT.format('*')))
        #  all_fpaths = all_fpaths[:8]

        with multiprocessing.Pool(8) as pool:
            parsed_fpaths = pool.map(read_bincnt, all_fpaths)
        #  parsed_fpaths = map(read_bincnt, all_fpaths)

        aggr_bincnts = sum(parsed_fpaths)
        cache_obj.put(perf_path, aggr_bincnts)
        epoch_counts = read_epoch_counts(all_fpaths[0])
        print(epoch_counts)

    #  row_sum = aggr_bincnts.sum(1)
    #  epoch_idx = np.argwhere(row_sum < 100).flatten()
    #  assert (epoch_idx[0] == 0)
    #  epoch_idx = np.delete(epoch_idx, 0)
    # first one needn't split
    epoch_counts = epoch_counts[:-1]


    #  from IPython import embed; embed()
    epoch_bincnts = np.split(aggr_bincnts, epoch_counts)

    aggr_pivots = read_pivots(perf_path)
    epoch_pivots = np.split(aggr_pivots, epoch_counts)

    print('RTP data read from perflogs')
    print('RTP Total Epochs: ', len(epoch_bincnts))
    total_mass = sum(aggr_bincnts[-1])
    print('RTP Total Mass: ',  f'{total_mass:,}')

    return epoch_pivots, epoch_bincnts
Ejemplo n.º 4
0
def get_manifest_overlaps(data_path: str, epoch: int,
                          probe_points: List[float]) -> Tuple[int, List[int]]:
    mf_items = read_entire_manifest(data_path, epoch)
    if len(mf_items) == 0:
        return 0, []

    _, _, item_sum = get_stats(mf_items)

    print('\nReading MockDB Manifest (path: ... {0}): {1}M items'.format(
        abbrv_path(data_path),
        int(item_sum / 1e6)))

    overlap_stats = list(
            map(lambda x: get_overlapping_count(mf_items, x)[1], probe_points))
    return item_sum, overlap_stats
Ejemplo n.º 5
0
def compute_mdb_overlap(out_path: str):
    data_path = out_path + '/../plfs/manifests'
    path_fmt_mdb = '{0}/mdb.olap.e{1}.csv'

    mdb_epoch_overlaps = gen_mdb_overlaps(data_path)

    for epoch, data in enumerate(mdb_epoch_overlaps):
        points, total, overlaps = data
        print('Epoch {:d}: Max MDB Overlap: {:.2f}%'.format(
            epoch, max(overlaps) * 100.0 / total))

        csv_path_mdb = path_fmt_mdb.format(out_path, epoch)
        dump_csv(epoch, points, overlaps, total, csv_path_mdb)

        print('Epoch {0} Written: {1}'.format(epoch, abbrv_path(csv_path_mdb)))
Ejemplo n.º 6
0
def compute_rtp_overlap(all_pivots: List[np.ndarray],
                    all_counts: List[np.ndarray],
                    out_path: str):
    path_fmt = '{0}/rtp.olap.e{1}.csv'
    path_fmt_mdb = '{0}/mdb.olap.e{1}.csv'
    npts = 100

    for epoch in range(len(all_pivots)):
        points, overlaps, total = analyze_overlap_epoch(all_pivots[epoch],
                                                        all_counts[epoch], npts)
        print('Epoch {:d}: Max RTP Overlap: {:.2f}%'.format(
            epoch, max(overlaps) * 100.0 / total))

        csv_path = path_fmt.format(out_path, epoch)
        dump_csv(epoch, points, overlaps, total, csv_path)

        print('Epoch {0} Written: {1}'.format(epoch, abbrv_path(csv_path)))
Ejemplo n.º 7
0
def gen_overlaps(data_path: str) -> Tuple[List, int, List]:
    epoch = 0

    all_data = []

    while True:
        mf_items = read_entire_manifest(data_path, epoch)
        if len(mf_items) == 0:
            break

        probe_min = min(mf_items, key = lambda x: x[0])[0]
        probe_max = max(mf_items, key = lambda x: x[1])[1]
        probe_points = gen_probe_points(probe_min, probe_max)

        _, _, item_sum = get_stats(mf_items)

        print('\nReading MockDB Manifest E{2}, (path: ... {0}): {1}M items'.format(
            abbrv_path(data_path),
            int(item_sum / 1e6), 
              epoch))

        overlap_stats = None

        with multiprocessing.Pool(16,
                                  worker_initialize, 
                                  [mf_items]) as pool:
            point_overlaps = pool.map(get_overlapping_count_parutil, 
                                      probe_points)
            point_overlaps = map(lambda x: x[1], point_overlaps)
            overlap_stats = np.fromiter(point_overlaps, dtype=np.int64)

        overlap_pct = overlap_stats / item_sum * 100.0
        overlap_max = max(overlap_pct)
        overlap_fmt = ['{:.2f}'.format(x) for x in overlap_pct]
        #  print('Epoch {}, MDB Max Overlap: {:.2f}%, Avg Overlap: {:.2f}% ({} points)'.format(
            #  epoch, max(overlap_pct), np.mean(overlap_pct), len(probe_points)))
        epoch += 1

        epoch_data = [probe_points, item_sum, overlap_stats]
        all_data.append(epoch_data)

    return all_data