def plot_relative_max_throughputs_stream_table_join(dirname): prefix = "stream_table_join" experiment = "stream-table-join" uids = 2 page_view_nodes = [1] + list(range(2, 21, 2)) thin_uids = 0 rate_multipliers = list(range(100)) maximums = [] max_rates = [] ## Sequential time dirnames = [ '%s_1_1_1_%d' % (prefix, rate_multiplier) for rate_multiplier in rate_multipliers ] path_dirnames = [os.path.join(dirname, name) for name in dirnames] throughputs = [ results.get_erlang_throughput(path_dirname) for path_dirname in path_dirnames ] max_throughput = max(throughputs) index = throughputs.index(max_throughput) max_rates.append(rate_multipliers[index]) maximums.append(max_throughput) ## Parallel times for pvn in page_view_nodes: dirnames = [ '%s_%d_%d_%d_%d' % (prefix, uids, pvn, thin_uids, rate_multiplier) for rate_multiplier in rate_multipliers ] path_dirnames = [os.path.join(dirname, name) for name in dirnames] throughputs = [ results.get_erlang_throughput(path_dirname) for path_dirname in path_dirnames ] max_throughput = max(throughputs) index = throughputs.index(max_throughput) max_rates.append(rate_multipliers[index]) maximums.append(max_throughput) # print(maximums) print("Max rates:", max_rates) ticks = [1] + [pvn * 2 for pvn in page_view_nodes] flink_maximums = [447.0, 778, 996, 884, 1096, 1032, 1051, 979, 995] plot_relative_max_throughputs_common(experiment, ticks[:len(flink_maximums)], maximums[:len(flink_maximums)], flink_maximums)
def plot_relative_max_throughputs_full_value_barrier_example(dirname): prefix = "ab_exp_full_1" experiment = "full-value-barrier" ratio_ab = 10000 heartbeat_rate = 100 optimizer = "optimizer_greedy" ## After 16 it starts getting worse a_nodes_numbers = [1] + list(range(2, 17, 2)) rate_multipliers = list(range(100)) maximums = [] max_rates = [] for a_nodes_number in a_nodes_numbers: dirnames = [ '%s_%d_%d_%d_%d_%s' % (prefix, rate_mult, ratio_ab, heartbeat_rate, a_nodes_number, optimizer) for rate_mult in rate_multipliers ] path_dirnames = [os.path.join(dirname, name) for name in dirnames] throughputs = [ results.get_erlang_throughput(path_dirname) for path_dirname in path_dirnames ] max_throughput = max(throughputs) index = throughputs.index(max_throughput) max_rates.append(rate_multipliers[index]) maximums.append(max_throughput) # print(maximums) print(max_rates) flink_maximums = [337.0, 551, 466, 508, 497, 513, 485, 463, 441] plot_relative_max_throughputs_common(experiment, a_nodes_numbers, maximums, flink_maximums)
def get_flumina_latencies_throughputs(flumina_dirs): flumina_dirs = list(flumina_dirs) flumina_latencies = [ results.get_erlang_latencies(dir) for dir in flumina_dirs ] flumina_throughputs = [ results.get_erlang_throughput(dir) for dir in flumina_dirs ] return flumina_latencies, flumina_throughputs
def get_flumina_latencies_throughputs(flumina_dirs, experiment='value-barrier'): flumina_dirs = list(flumina_dirs) flumina_latencies = [ results.get_erlang_latencies(dir, experiment) for dir in flumina_dirs ] flumina_throughputs = [ results.get_erlang_throughput(dir) for dir in flumina_dirs ] return flumina_latencies, flumina_throughputs
def plot_scaleup(erlang_subdirs, flink_subdirs, output_file): erlang_latencies = [ results.get_erlang_latencies(subdir) for subdir in erlang_subdirs ] erlang_latencies_mean = [p50 for p10, p50, p90 in erlang_latencies] erlang_latencies_diff_10 = [ p50 - p10 for p10, p50, p90 in erlang_latencies ] erlang_latencies_diff_90 = [ p90 - p50 for p10, p50, p90 in erlang_latencies ] erlang_throughputs = [ results.get_erlang_throughput(subdir) for subdir in erlang_subdirs ] flink_latencies = [ results.get_flink_latencies(subdir) for subdir in flink_subdirs ] flink_latencies_mean = [p50 for p10, p50, p90 in flink_latencies] flink_latencies_diff_10 = [p50 - p10 for p10, p50, p90 in flink_latencies] flink_latencies_diff_90 = [p90 - p50 for p10, p50, p90 in flink_latencies] flink_throughputs = [ results.get_flink_throughput(subdir) for subdir in flink_subdirs ] plt.rcParams.update({'font.size': 18}) fig, ax = plt.subplots() ax.set_xlabel('Throughput (events/ms)') ax.set_ylabel('Latency (ms)') plt.yscale('log') ax.errorbar(erlang_throughputs, erlang_latencies_mean, [erlang_latencies_diff_10, erlang_latencies_diff_90], linestyle='-', marker='o', label='Flumina', linewidth=1, capthick=1, capsize=3, color='tab:blue') ax.errorbar(flink_throughputs, flink_latencies_mean, [flink_latencies_diff_10, flink_latencies_diff_90], linestyle='--', marker='^', label='Flink', linewidth=1, capthick=1, capsize=3, color='tab:red') ax.legend() plt.tight_layout() #plt.savefig(output_file) plt.show()
def main(): parser = argparse.ArgumentParser(description='Run Flink experiments') group = parser.add_mutually_exclusive_group() group.add_argument('-s', '--suite', help='Run the given experiment suite') group.add_argument('-l', '--list', help='List experiment suites', action='store_true') group.add_argument( '-f', '--flink-results', help='Process Flink results from the given output directory') group.add_argument( '-e', '--erlang-results', help='Process Erlang results from the given output directory') args = parser.parse_args() if args.list: for name, suite in suites.items(): print('\n\t'.join([f'{name}:\n' + '=' * (1 + len(name))] + [str(exp) for exp in suite.experiments]) + '\n') exit(0) if args.flink_results is not None: p10, p50, p90 = results.get_flink_latencies(args.flink_results) throughput = results.get_flink_throughput(args.flink_results) network_data = results.get_network_data( args.flink_results) / 1024.0 / 1024.0 print(f'Latency percentiles (ms): {p10:.0f} {p50:.0f} {p90:.0f}') print(f'Mean throughput (events/ms): {throughput}') print(f'Network data (MB): {network_data:0.1f}') exit(0) if args.erlang_results is not None: p10, p50, p90 = results.get_erlang_latencies(args.erlang_results) throughput = results.get_erlang_throughput(args.erlang_results) network_data = results.get_network_data( args.erlang_results) / 1024.0 / 1024.0 print(f'Latency percentiles (ms): {p10:.0f} {p50:.0f} {p90:.0f}') print(f'Mean throughput (events/ms): {throughput}') print(f'Network data (MB): {network_data:0.1f}') exit(0) if args.suite not in suites: parser.print_usage() exit(1) suites[args.suite].run()
def get_max_throughput_flumina(archive_dir, prefix, suffix): subdirectories = [ os.path.join(archive_dir, o) for o in os.listdir(archive_dir) if os.path.isdir(os.path.join(archive_dir, o)) and o.startswith(prefix) and o.endswith(suffix) ] throughputs = [ results.get_erlang_throughput(subdir) for subdir in subdirectories ] try: max_throughput = max(throughputs) except: max_throughput = 0 return max_throughput
def common_plot_scaleup(dirname, dirnames, xticks, xlabel, output_name, yscale='log', experiment="value-barrier"): print("Plotting:", output_name, experiment) ## We assume that all directories are there path_dirnames = [os.path.join(dirname, name) for name in dirnames] latencies = [ results.read_preprocess_latency_data(path_dirname, experiment) for path_dirname in path_dirnames ] throughputs = [ results.get_erlang_throughput(path_dirname) for path_dirname in path_dirnames ] ## Get the average, 10th, and 90th percentile for both latencies and throughputs median_latencies = [np.percentile(lats, 50) for ts, lats in latencies] ten_latencies = [np.percentile(lats, 10) for ts, lats in latencies] ninety_latencies = [np.percentile(lats, 90) for ts, lats in latencies] # mean_throughputs = [np.mean(ths) for ts, ths in throughputs] fig, ax = plt.subplots() ax.set_xlabel('Throughput (msgs/ms)') ax.set_ylabel('Latency (ms)') plt.yscale(yscale) ax.plot(throughputs, ninety_latencies, '-^', label='90th percentile', color='tab:red', linewidth=0.5) ax.plot(throughputs, median_latencies, '-o', label='median', linewidth=0.5) ax.plot(throughputs, ten_latencies, '-s', label='10th percentile', color='tab:green', linewidth=0.5) ax.legend() plt.tight_layout() plt.savefig(os.path.join('plots', output_name + ".pdf"))
def main(): parser = argparse.ArgumentParser(description='Run Flink experiments') group = parser.add_mutually_exclusive_group() group.add_argument('-s', '--suite', help='Run the given experiment suite') group.add_argument('-e', '--experiment', help='Run the given experiment') group.add_argument('-l', '--list', help='List experiment suites', action='store_true') group.add_argument('-k', '--flink-results', help='Process Flink results from the given output directory') group.add_argument('-f', '--flumina-results', help='Process Flumina results from the given output directory') parser.add_argument('--flink-workers', help='File containing a list of Flink worker hostnames') parser.add_argument('--total-values', type=int) parser.add_argument('--value-nodes', type=int) parser.add_argument('--value-rate', type=float) parser.add_argument('--vb-ratio', type=int) parser.add_argument('--hb-ratio', type=int) parser.add_argument('--total-pageviews', type=int) parser.add_argument('--total-users', type=int, default=2) parser.add_argument('--pageview-parallelism', type=int) parser.add_argument('--pageview-rate', type=float) parser.add_argument('--sequential', action='store_true') parser.add_argument('--manual', help='Run Flink with a manually implemented Flumina-like synchronization', action='store_true') parser.add_argument('--attempts', type=int, default=1) parser.add_argument('--rmi-host', help='Host that is running the Java RMI registry') args = parser.parse_args() if args.list: for name, suite in suites.items(): print('\n\t'.join( [f'{name}:\n' + '=' * (1 + len(name))] + [str(exp) for exp in suite.experiments]) + '\n') exit(0) if args.flink_results is not None: p10, p50, p90 = results.get_flink_latencies(args.flink_results) throughput = results.get_flink_throughput(args.flink_results) #network_data = results.get_network_data(args.flink_results) / 1024.0 / 1024.0 print(f'Latency percentiles (ms): {p10:.0f} {p50:.0f} {p90:.0f}') print(f'Mean throughput (events/ms): {throughput}') #print(f'Network data (MB): {network_data:0.1f}') exit(0) if args.flumina_results is not None: p10, p50, p90 = results.get_erlang_latencies(args.erlang_results) throughput = results.get_erlang_throughput(args.erlang_results) network_data = results.get_network_data(args.erlang_results) / 1024.0 / 1024.0 print(f'Latency percentiles (ms): {p10:.0f} {p50:.0f} {p90:.0f}') print(f'Mean throughput (events/ms): {throughput}') print(f'Network data (MB): {network_data:0.1f}') exit(0) if args.experiment is not None: if args.experiment.startswith("value-barrier"): exps = [ValueBarrierEC2(args.value_nodes, args.total_values, args.value_rate, args.vb_ratio, args.hb_ratio, manual=args.manual, sequential=args.sequential, attempt=a) for a in range(1, args.attempts + 1)] elif args.experiment.startswith("pageview"): exps = [PageViewEC2(args.total_pageviews, args.total_users, args.pageview_parallelism, args.pageview_rate, manual=args.manual, sequential=args.sequential, attempt=a) for a in range(1, args.attempts + 1)] elif args.experiment.startswith("fraud-detection"): exps = [ FraudDetectionEC2(args.value_nodes, args.total_values, args.value_rate, args.vb_ratio, args.hb_ratio, manual=args.manual, attempt=a) for a in range(1, args.attempts + 1)] ExperimentSuite(args.experiment, exps).run(args) exit(0) if args.suite not in suites: parser.print_usage() exit(1) suites[args.suite].run(args)