def plot_relative_max_throughputs_stream_table_join(dirname):
    prefix = "stream_table_join"
    experiment = "stream-table-join"
    uids = 2
    page_view_nodes = [1] + list(range(2, 21, 2))
    thin_uids = 0
    rate_multipliers = list(range(100))

    maximums = []
    max_rates = []

    ## Sequential time
    dirnames = [
        '%s_1_1_1_%d' % (prefix, rate_multiplier)
        for rate_multiplier in rate_multipliers
    ]
    path_dirnames = [os.path.join(dirname, name) for name in dirnames]
    throughputs = [
        results.get_erlang_throughput(path_dirname)
        for path_dirname in path_dirnames
    ]
    max_throughput = max(throughputs)
    index = throughputs.index(max_throughput)
    max_rates.append(rate_multipliers[index])
    maximums.append(max_throughput)

    ## Parallel times
    for pvn in page_view_nodes:
        dirnames = [
            '%s_%d_%d_%d_%d' % (prefix, uids, pvn, thin_uids, rate_multiplier)
            for rate_multiplier in rate_multipliers
        ]
        path_dirnames = [os.path.join(dirname, name) for name in dirnames]
        throughputs = [
            results.get_erlang_throughput(path_dirname)
            for path_dirname in path_dirnames
        ]
        max_throughput = max(throughputs)
        index = throughputs.index(max_throughput)
        max_rates.append(rate_multipliers[index])
        maximums.append(max_throughput)

    # print(maximums)
    print("Max rates:", max_rates)
    ticks = [1] + [pvn * 2 for pvn in page_view_nodes]
    flink_maximums = [447.0, 778, 996, 884, 1096, 1032, 1051, 979, 995]
    plot_relative_max_throughputs_common(experiment,
                                         ticks[:len(flink_maximums)],
                                         maximums[:len(flink_maximums)],
                                         flink_maximums)
def plot_relative_max_throughputs_full_value_barrier_example(dirname):
    prefix = "ab_exp_full_1"
    experiment = "full-value-barrier"
    ratio_ab = 10000
    heartbeat_rate = 100
    optimizer = "optimizer_greedy"
    ## After 16 it starts getting worse
    a_nodes_numbers = [1] + list(range(2, 17, 2))
    rate_multipliers = list(range(100))

    maximums = []
    max_rates = []
    for a_nodes_number in a_nodes_numbers:
        dirnames = [
            '%s_%d_%d_%d_%d_%s' % (prefix, rate_mult, ratio_ab, heartbeat_rate,
                                   a_nodes_number, optimizer)
            for rate_mult in rate_multipliers
        ]
        path_dirnames = [os.path.join(dirname, name) for name in dirnames]
        throughputs = [
            results.get_erlang_throughput(path_dirname)
            for path_dirname in path_dirnames
        ]
        max_throughput = max(throughputs)
        index = throughputs.index(max_throughput)
        max_rates.append(rate_multipliers[index])
        maximums.append(max_throughput)

    # print(maximums)
    print(max_rates)
    flink_maximums = [337.0, 551, 466, 508, 497, 513, 485, 463, 441]
    plot_relative_max_throughputs_common(experiment, a_nodes_numbers, maximums,
                                         flink_maximums)
Example #3
0
def get_flumina_latencies_throughputs(flumina_dirs):
    flumina_dirs = list(flumina_dirs)
    flumina_latencies = [
        results.get_erlang_latencies(dir) for dir in flumina_dirs
    ]
    flumina_throughputs = [
        results.get_erlang_throughput(dir) for dir in flumina_dirs
    ]
    return flumina_latencies, flumina_throughputs
Example #4
0
def get_flumina_latencies_throughputs(flumina_dirs,
                                      experiment='value-barrier'):
    flumina_dirs = list(flumina_dirs)
    flumina_latencies = [
        results.get_erlang_latencies(dir, experiment) for dir in flumina_dirs
    ]
    flumina_throughputs = [
        results.get_erlang_throughput(dir) for dir in flumina_dirs
    ]
    return flumina_latencies, flumina_throughputs
Example #5
0
def plot_scaleup(erlang_subdirs, flink_subdirs, output_file):
    erlang_latencies = [
        results.get_erlang_latencies(subdir) for subdir in erlang_subdirs
    ]
    erlang_latencies_mean = [p50 for p10, p50, p90 in erlang_latencies]
    erlang_latencies_diff_10 = [
        p50 - p10 for p10, p50, p90 in erlang_latencies
    ]
    erlang_latencies_diff_90 = [
        p90 - p50 for p10, p50, p90 in erlang_latencies
    ]
    erlang_throughputs = [
        results.get_erlang_throughput(subdir) for subdir in erlang_subdirs
    ]

    flink_latencies = [
        results.get_flink_latencies(subdir) for subdir in flink_subdirs
    ]
    flink_latencies_mean = [p50 for p10, p50, p90 in flink_latencies]
    flink_latencies_diff_10 = [p50 - p10 for p10, p50, p90 in flink_latencies]
    flink_latencies_diff_90 = [p90 - p50 for p10, p50, p90 in flink_latencies]
    flink_throughputs = [
        results.get_flink_throughput(subdir) for subdir in flink_subdirs
    ]

    plt.rcParams.update({'font.size': 18})
    fig, ax = plt.subplots()
    ax.set_xlabel('Throughput (events/ms)')
    ax.set_ylabel('Latency (ms)')
    plt.yscale('log')
    ax.errorbar(erlang_throughputs,
                erlang_latencies_mean,
                [erlang_latencies_diff_10, erlang_latencies_diff_90],
                linestyle='-',
                marker='o',
                label='Flumina',
                linewidth=1,
                capthick=1,
                capsize=3,
                color='tab:blue')
    ax.errorbar(flink_throughputs,
                flink_latencies_mean,
                [flink_latencies_diff_10, flink_latencies_diff_90],
                linestyle='--',
                marker='^',
                label='Flink',
                linewidth=1,
                capthick=1,
                capsize=3,
                color='tab:red')
    ax.legend()

    plt.tight_layout()
    #plt.savefig(output_file)
    plt.show()
Example #6
0
def main():
    parser = argparse.ArgumentParser(description='Run Flink experiments')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-s', '--suite', help='Run the given experiment suite')
    group.add_argument('-l',
                       '--list',
                       help='List experiment suites',
                       action='store_true')
    group.add_argument(
        '-f',
        '--flink-results',
        help='Process Flink results from the given output directory')
    group.add_argument(
        '-e',
        '--erlang-results',
        help='Process Erlang results from the given output directory')
    args = parser.parse_args()

    if args.list:
        for name, suite in suites.items():
            print('\n\t'.join([f'{name}:\n' + '=' * (1 + len(name))] +
                              [str(exp) for exp in suite.experiments]) + '\n')
        exit(0)

    if args.flink_results is not None:
        p10, p50, p90 = results.get_flink_latencies(args.flink_results)
        throughput = results.get_flink_throughput(args.flink_results)
        network_data = results.get_network_data(
            args.flink_results) / 1024.0 / 1024.0
        print(f'Latency percentiles (ms):  {p10:.0f}  {p50:.0f}  {p90:.0f}')
        print(f'Mean throughput (events/ms): {throughput}')
        print(f'Network data (MB): {network_data:0.1f}')
        exit(0)

    if args.erlang_results is not None:
        p10, p50, p90 = results.get_erlang_latencies(args.erlang_results)
        throughput = results.get_erlang_throughput(args.erlang_results)
        network_data = results.get_network_data(
            args.erlang_results) / 1024.0 / 1024.0
        print(f'Latency percentiles (ms):  {p10:.0f}  {p50:.0f}  {p90:.0f}')
        print(f'Mean throughput (events/ms): {throughput}')
        print(f'Network data (MB): {network_data:0.1f}')
        exit(0)

    if args.suite not in suites:
        parser.print_usage()
        exit(1)

    suites[args.suite].run()
Example #7
0
def get_max_throughput_flumina(archive_dir, prefix, suffix):

    subdirectories = [
        os.path.join(archive_dir, o) for o in os.listdir(archive_dir)
        if os.path.isdir(os.path.join(archive_dir, o)) and o.startswith(prefix)
        and o.endswith(suffix)
    ]

    throughputs = [
        results.get_erlang_throughput(subdir) for subdir in subdirectories
    ]
    try:
        max_throughput = max(throughputs)
    except:
        max_throughput = 0
    return max_throughput
def common_plot_scaleup(dirname,
                        dirnames,
                        xticks,
                        xlabel,
                        output_name,
                        yscale='log',
                        experiment="value-barrier"):
    print("Plotting:", output_name, experiment)
    ## We assume that all directories are there
    path_dirnames = [os.path.join(dirname, name) for name in dirnames]
    latencies = [
        results.read_preprocess_latency_data(path_dirname, experiment)
        for path_dirname in path_dirnames
    ]
    throughputs = [
        results.get_erlang_throughput(path_dirname)
        for path_dirname in path_dirnames
    ]

    ## Get the average, 10th, and 90th percentile for both latencies and throughputs
    median_latencies = [np.percentile(lats, 50) for ts, lats in latencies]
    ten_latencies = [np.percentile(lats, 10) for ts, lats in latencies]
    ninety_latencies = [np.percentile(lats, 90) for ts, lats in latencies]
    # mean_throughputs = [np.mean(ths) for ts, ths in throughputs]

    fig, ax = plt.subplots()
    ax.set_xlabel('Throughput (msgs/ms)')
    ax.set_ylabel('Latency (ms)')
    plt.yscale(yscale)
    ax.plot(throughputs,
            ninety_latencies,
            '-^',
            label='90th percentile',
            color='tab:red',
            linewidth=0.5)
    ax.plot(throughputs, median_latencies, '-o', label='median', linewidth=0.5)
    ax.plot(throughputs,
            ten_latencies,
            '-s',
            label='10th percentile',
            color='tab:green',
            linewidth=0.5)
    ax.legend()

    plt.tight_layout()
    plt.savefig(os.path.join('plots', output_name + ".pdf"))
Example #9
0
def main():
    parser = argparse.ArgumentParser(description='Run Flink experiments')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('-s', '--suite', help='Run the given experiment suite')
    group.add_argument('-e', '--experiment', help='Run the given experiment')
    group.add_argument('-l', '--list', help='List experiment suites', action='store_true')
    group.add_argument('-k', '--flink-results', help='Process Flink results from the given output directory')
    group.add_argument('-f', '--flumina-results', help='Process Flumina results from the given output directory')
    parser.add_argument('--flink-workers', help='File containing a list of Flink worker hostnames')
    parser.add_argument('--total-values', type=int)
    parser.add_argument('--value-nodes', type=int)
    parser.add_argument('--value-rate', type=float)
    parser.add_argument('--vb-ratio', type=int)
    parser.add_argument('--hb-ratio', type=int)
    parser.add_argument('--total-pageviews', type=int)
    parser.add_argument('--total-users', type=int, default=2)
    parser.add_argument('--pageview-parallelism', type=int)
    parser.add_argument('--pageview-rate', type=float)
    parser.add_argument('--sequential', action='store_true')
    parser.add_argument('--manual', help='Run Flink with a manually implemented Flumina-like synchronization',
                        action='store_true')
    parser.add_argument('--attempts', type=int, default=1)
    parser.add_argument('--rmi-host', help='Host that is running the Java RMI registry')
    args = parser.parse_args()

    if args.list:
        for name, suite in suites.items():
            print('\n\t'.join(
                [f'{name}:\n' + '=' * (1 + len(name))]
                + [str(exp) for exp in suite.experiments]) + '\n')
        exit(0)

    if args.flink_results is not None:
        p10, p50, p90 = results.get_flink_latencies(args.flink_results)
        throughput = results.get_flink_throughput(args.flink_results)
        #network_data = results.get_network_data(args.flink_results) / 1024.0 / 1024.0
        print(f'Latency percentiles (ms):  {p10:.0f}  {p50:.0f}  {p90:.0f}')
        print(f'Mean throughput (events/ms): {throughput}')
        #print(f'Network data (MB): {network_data:0.1f}')
        exit(0)

    if args.flumina_results is not None:
        p10, p50, p90 = results.get_erlang_latencies(args.erlang_results)
        throughput = results.get_erlang_throughput(args.erlang_results)
        network_data = results.get_network_data(args.erlang_results) / 1024.0 / 1024.0
        print(f'Latency percentiles (ms):  {p10:.0f}  {p50:.0f}  {p90:.0f}')
        print(f'Mean throughput (events/ms): {throughput}')
        print(f'Network data (MB): {network_data:0.1f}')
        exit(0)

    if args.experiment is not None:
        if args.experiment.startswith("value-barrier"):
            exps = [ValueBarrierEC2(args.value_nodes, args.total_values, args.value_rate, args.vb_ratio, args.hb_ratio,
                                    manual=args.manual, sequential=args.sequential, attempt=a)
                    for a in range(1, args.attempts + 1)]
        elif args.experiment.startswith("pageview"):
            exps = [PageViewEC2(args.total_pageviews, args.total_users, args.pageview_parallelism, args.pageview_rate,
                                manual=args.manual, sequential=args.sequential, attempt=a)
                    for a in range(1, args.attempts + 1)]
        elif args.experiment.startswith("fraud-detection"):
            exps = [
                FraudDetectionEC2(args.value_nodes, args.total_values, args.value_rate, args.vb_ratio, args.hb_ratio,
                                  manual=args.manual, attempt=a)
                for a in range(1, args.attempts + 1)]
        ExperimentSuite(args.experiment, exps).run(args)
        exit(0)

    if args.suite not in suites:
        parser.print_usage()
        exit(1)

    suites[args.suite].run(args)