def extract_summary_from_config(inputdir, workload, worker, vc, reps, xput_client, thinktime_ms, num_threads, middlewares, warmup_period_endtime, cooldown_period_starttime): unformatted_foldername = "{}_{}vc{}workers" foldername = unformatted_foldername.format(workload, str(int(vc)), str(int(worker))) all_reps = [] for rep in range(1, reps + 1): log_folder_path = os.path.join(inputdir, foldername, str(int(rep))) # Now we extract throughput, responsetime, average queuetime and missrate from the middleware middleware_dirs = [ dirfuncs.get_only_subdir(os.path.join(log_folder_path, mw_dir)) for mw_dir in middlewares ] concatenated_requests = [ gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs ] metrics = [ gmws.extract_metrics(reqs) for reqs in concatenated_requests ] cut_metrics = [ cut.cut_away_warmup_cooldown(mets, warmup_period_endtime, cooldown_period_starttime) for mets in metrics ] windows = [ gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics ] rep_metrics = gmws.aggregate_over_middlewares(windows) all_reps.append(rep_metrics) mw_agg_over_reps = gmws.aggregate_over_reps(all_reps) mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps) avg = mw_averages.loc['mean', :] num_clients = vc * num_threads print("{} workers:\n".format(worker)) print( "Throughput MW:\t\t\t\t\t\t\t\t{}\nThroughput (derived from MW ResponseTime):\t{}\nThroughput (Client):\t\t\t\t\t\t{}\nAvg Time in Queue:\t\t\t\t\t\t\t{}\nAvg Length of Queue:\t\t\t\t\t\t{}\nAvg Time waiting for MC:\t\t\t\t\t{}\n" .format( avg['throughput_mw'], num_clients / (float(avg['responseTime_ms']) + thinktime_ms) * 1000, xput_client, avg['queueTime_ms'], avg['queueLength'], avg['memcachedRTT_ms']))
def get_data(matching_dirs, num_repetitions, middlewares, client_logfiles, warmup_period_endtime, cooldown_period_starttime, num_threads): all_metrics_per_multiget = [] for experiment_dir in matching_dirs: num_multiget = find_num_multigets(experiment_dir) all_mw_metrics_per_rep = [] all_mt_metrics_per_rep = [] for rep in range(1, num_repetitions + 1): middleware_dirs = [get_only_subdir(os.path.join(experiment_dir, str(rep), mw_dir)) for mw_dir in middlewares] concatenated_requests = [gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs] filtered_requests = [reqs[reqs['requestType'].str.contains("GET")] for reqs in concatenated_requests] metrics = [gmws.extract_metrics(reqs) for reqs in filtered_requests] cut_metrics = [cut_away_warmup_cooldown(mets, warmup_period_endtime, cooldown_period_starttime) for mets in metrics] windows = [gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics] rep_metrics = gmws.aggregate_over_middlewares(windows) all_mw_metrics_per_rep.append(rep_metrics) client_logfile_paths = [os.path.join(experiment_dir, str(rep), client_logfile) for client_logfile in client_logfiles] client_metrics = gmts.aggregate_over_clients(client_logfile_paths) cut_client_metrics = cut_away_warmup_cooldown(client_metrics, warmup_period_endtime, cooldown_period_starttime) all_mt_metrics_per_rep.append(cut_client_metrics) # We have three throughput/resptimes values now from the three repetitions mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep) mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps) mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep) mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps) metrics_per_vc = pd.concat([mw_averages, mt_averages], axis=1) metrics_per_vc['multigetSize'] = num_multiget all_metrics_per_multiget.append(metrics_per_vc) all_metrics = pd.concat(all_metrics_per_multiget) return all_metrics
def plot_mw_metric_over_time(inputdir, worker_configuration, metric_to_plot, folder_prefix, reps, middlewares, ax): color_cycler = cycler('color', ['#66c2a4', '#41ae76', '#238b45', '#005824']) #ax.set_ylim([0, 27000]) ax.set_xlim([-5, 87]) ax.set_prop_cycle(color_cycler) for worker_config in worker_configuration: all_windows = [] for rep in reps: log_folder_path = create_log_folder_path(inputdir, folder_prefix, rep, worker_config) middleware_dirs = [ dirfuncs.get_only_subdir(os.path.join(log_folder_path, mw_dir)) for mw_dir in middlewares ] requests = pd.concat([ gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs ]) metrics = gmws.extract_metrics(requests) window = gmws.aggregate_over_windows(metrics) all_windows.append(window) concatenated = pd.concat(all_windows)[metric_to_plot] avg = concatenated.groupby(concatenated.index).agg('mean') ax.plot(avg.index, avg, label=worker_config) ax.legend(loc="upper left") ax.set_title( "Experiment 3.1:\nThroughput (MW) vs. Time for different number of workers" ) ax.set_xlabel("Time (sec)") ax.set_ylabel("Throughput (ops/sec)")
def graph_responsetime(worker, vc, num_threads, workload, middlewares, client_logfiles, reps, inputdir, xlim, ax): all_mw_metrics_per_rep = [] all_mt_metrics_per_rep = [] for rep in reps: run_dir = os.path.join(inputdir, "{}_{}vc{}workers".format(workload, vc, worker), str(rep)) # Get MW response time middleware_dirs = [ dirfuncs.get_only_subdir(os.path.join(run_dir, mw_dir)) for mw_dir in middlewares ] concatenated_requests = [ gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs ] metrics = [ gmws.extract_metrics(reqs) for reqs in concatenated_requests ] cut_metrics = [ cut.cut_away_warmup_cooldown(mets, 10, 72) for mets in metrics ] windows = [ gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics ] rep_metrics = gmws.aggregate_over_middlewares(windows) all_mw_metrics_per_rep.append(rep_metrics) # Get MT response time client_logfile_paths = [ os.path.join(run_dir, client_logfile) for client_logfile in client_logfiles ] client_metrics = gmts.aggregate_over_clients(client_logfile_paths) cut_client_metrics = cut.cut_away_warmup_cooldown( client_metrics, 10, 72) all_mt_metrics_per_rep.append(cut_client_metrics) mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep) mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps) mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep) mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps) metrics = pd.concat([mw_averages, mt_averages], axis=1) names = [ 'NetThread Service Time', 'Queue Time', 'Memcached RTT', 'Worker Service Time', 'Total Response Time (MW)', 'Total Response Time (MT)' ] metrics.rename( { 'netthreadServiceTime_ms': names[0], 'queueTime_ms': names[1], 'memcachedRTT_ms': names[2], 'workerServiceTime_ms': names[3], 'responseTime_ms': names[4], 'responsetime': names[5] }, axis='columns', inplace=True) means = metrics.loc['mean', names] stds = metrics.loc['std', names] color_cycler = [ '#bf812d', '#c7eae5', '#80cdc1', '#01665e', '#003c30', '#bf812d' ] means.plot(ax=ax, kind='barh', xerr=stds, color=color_cycler) ax.set_title("{}, {} clients, {} workers".format(workload, vc * num_threads, worker)) ax.set_xlabel("Time (msec)") ax.set_xlim([0, xlim])
def get_responsetime_data_old(sharded, multigets, middlewares, client_logfiles, reps, inputdir): all_metrics_per_multiget = [] for multiget in multigets: all_mw_metrics_per_rep = [] all_mt_metrics_per_rep = [] for rep in reps: run_dir = os.path.join(inputdir, "{}_{}multiget".format(sharded, multiget), str(rep)) # Get MW response time middleware_dirs = [ dirfuncs.get_only_subdir(os.path.join(run_dir, mw_dir)) for mw_dir in middlewares ] concatenated_requests = [ gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs ] metrics = [ gmws.extract_metrics_old(reqs) for reqs in concatenated_requests ] cut_metrics = [ cut.cut_away_warmup_cooldown(mets, 10, 72) for mets in metrics ] windows = [ gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics ] rep_metrics = gmws.aggregate_over_middlewares(windows) all_mw_metrics_per_rep.append(rep_metrics) # Get MT response time client_logfile_paths = [ os.path.join(run_dir, client_logfile) for client_logfile in client_logfiles ] client_metrics = gmts.aggregate_over_clients(client_logfile_paths) cut_client_metrics = cut.cut_away_warmup_cooldown( client_metrics, 10, 72) all_mt_metrics_per_rep.append(cut_client_metrics) mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep) mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps) mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep) mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps) metrics = pd.concat([mw_averages, mt_averages], axis=1) metrics['multigets'] = multiget all_metrics_per_multiget.append(metrics) all_metrics = pd.concat(all_metrics_per_multiget) return all_metrics.loc['mean', :]
def get_worker_data(worker_config, matching_dirs, num_repetitions, middlewares, client_logfiles, warmup_period_endtime, cooldown_period_starttime, num_threads): all_metrics_per_vc = [] for experiment_dir in matching_dirs: num_vc = find_num_vc(experiment_dir) all_mw_metrics_per_rep = [] all_mt_metrics_per_rep = [] for rep in range(1, num_repetitions + 1): middleware_dirs = [ get_only_subdir(os.path.join(experiment_dir, str(rep), mw_dir)) for mw_dir in middlewares ] concatenated_requests = [ gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs ] metrics = [ gmws.extract_metrics(reqs) for reqs in concatenated_requests ] cut_metrics = [ cut_away_warmup_cooldown(mets, warmup_period_endtime, cooldown_period_starttime) for mets in metrics ] windows = [ gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics ] windows = [win[:-1] for win in windows] rep_metrics = gmws.aggregate_over_middlewares(windows) all_mw_metrics_per_rep.append(rep_metrics) client_logfile_paths = [ os.path.join(experiment_dir, str(rep), client_logfile) for client_logfile in client_logfiles ] client_metrics = gmts.aggregate_over_clients(client_logfile_paths) cut_client_metrics = cut_away_warmup_cooldown( client_metrics, warmup_period_endtime, cooldown_period_starttime) all_mt_metrics_per_rep.append(cut_client_metrics) # We have three throughput/resptimes values now from the three repetitions mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep) mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps) mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep) mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps) metrics_per_vc = pd.concat([mw_averages, mt_averages], axis=1) metrics_per_vc['vc_per_thread'] = num_vc all_metrics_per_vc.append(metrics_per_vc) all_metrics_for_worker = pd.concat(all_metrics_per_vc) all_metrics_for_worker[ 'num_clients'] = all_metrics_for_worker['vc_per_thread'] * num_threads return all_metrics_for_worker
def get_responsetime_data_old(worker, vc_settings, num_threads, workload, middlewares, client_logfiles, reps, inputdir): all_metrics_per_vc = [] for vc in vc_settings: all_mw_metrics_per_rep = [] all_mt_metrics_per_rep = [] for rep in reps: run_dir = os.path.join( inputdir, "{}_{}vc{}workers".format(workload, vc, worker), str(rep)) # Get MW response time middleware_dirs = [ dirfuncs.get_only_subdir(os.path.join(run_dir, mw_dir)) for mw_dir in middlewares ] concatenated_requests = [ gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs ] metrics = [ gmws.extract_metrics_old(reqs) for reqs in concatenated_requests ] cut_metrics = [ cut.cut_away_warmup_cooldown(mets, 10, 72) for mets in metrics ] windows = [ gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics ] rep_metrics = gmws.aggregate_over_middlewares(windows) all_mw_metrics_per_rep.append(rep_metrics) # Get MT response time client_logfile_paths = [ os.path.join(run_dir, client_logfile) for client_logfile in client_logfiles ] client_metrics = gmts.aggregate_over_clients(client_logfile_paths) cut_client_metrics = cut.cut_away_warmup_cooldown( client_metrics, 10, 72) all_mt_metrics_per_rep.append(cut_client_metrics) mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep) mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps) mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep) mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps) metrics = pd.concat([mw_averages, mt_averages], axis=1) metrics['num_clients'] = vc * num_threads all_metrics_per_vc.append(metrics) all_metrics = pd.concat(all_metrics_per_vc) #names = ['NetThread Service Time', 'Queue Time', 'Memcached RTT', 'Worker Service Time', # 'Total Response Time (MW)', 'Total Response Time (MT)'] #metrics.rename({'netthreadServiceTime_ms': names[0], 'queueTime_ms': names[1], 'memcachedRTT_ms': names[2], 'workerServiceTime_ms': names[3], 'responseTime_ms': names[4], 'responsetime': names[5]}, axis='columns', inplace=True) return all_metrics.loc['mean', :]
def extract_summary_from_config(inputdir, workload, worker, vc, reps, xput_client, resptime_client, thinktime_ms, num_threads, middlewares, client_logfiles, warmup_period_endtime, cooldown_period_starttime): unformatted_foldername = "{}_{}vc{}workers" foldername = unformatted_foldername.format(workload, str(int(vc)), str(int(worker))) all_reps = [] for rep in range(1, reps + 1): log_folder_path = os.path.join(inputdir, foldername, str(int(rep))) # Now we extract throughput, responsetime, average queuetime and missrate from the middleware middleware_dirs = [ dirfuncs.get_only_subdir(os.path.join(log_folder_path, mw_dir)) for mw_dir in middlewares ] concatenated_requests = [ gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs ] metrics = [ gmws.extract_metrics(reqs) for reqs in concatenated_requests ] cut_metrics = [ cut.cut_away_warmup_cooldown(mets, warmup_period_endtime, cooldown_period_starttime) for mets in metrics ] windows = [ gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics ] rep_metrics = gmws.aggregate_over_middlewares(windows) all_reps.append(rep_metrics) mw_agg_over_reps = gmws.aggregate_over_reps(all_reps) mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps) avg = mw_averages.loc['mean', :] client_logfile_paths = [ os.path.join(log_folder_path, client_logfile) for client_logfile in client_logfiles ] total_hits_misses = [ gmts.extract_total_numbers(filepath) for filepath in client_logfile_paths ] totals_df = pd.DataFrame( data=total_hits_misses, columns=['total_opsec', 'hits_persec', 'misses_persec']) missrate_client = gmts.calculate_miss_rate(totals_df) num_clients = vc * num_threads print("{} workers:\n".format(worker)) print( "Throughput MW:\t\t\t\t\t\t\t\t{}\nResponse Time MW:\t\t\t\t\t\t{}\nAvg Time in Queue:\t\t\t\t\t\t{}\nMiss rate MW:\t\t\t\t\t\t\t{}\nThroughput MT:\t\t\t\t\t\t{}\nResponse Time MT:\t\t\t\t\t{}\nMiss rate MT:\t\t\t\t\t{}\nNum Clients:\t\t\t\t\t{}\n" .format(avg['throughput_mw'], avg['responseTime_ms'], avg['queueTime_ms'], avg['numKeysRequested'] - avg['numHits'], xput_client, resptime_client, missrate_client, num_clients))
def graph_queuelength(worker_configs, vc, num_threads, workload, middlewares, reps, inputdir, ylim, ax): metrics_per_worker = [] for worker in worker_configs: all_mw_metrics_per_rep = [] for rep in reps: run_dir = os.path.join( inputdir, "{}_{}vc{}workers".format(workload, vc, worker), str(rep)) # Get MW response time middleware_dirs = [ dirfuncs.get_only_subdir(os.path.join(run_dir, mw_dir)) for mw_dir in middlewares ] concatenated_requests = [ gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs ] metrics = [ gmws.extract_metrics(reqs) for reqs in concatenated_requests ] cut_metrics = [ cut.cut_away_warmup_cooldown(mets, 10, 72) for mets in metrics ] windows = [ gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics ] rep_metrics = gmws.aggregate_over_middlewares(windows) all_mw_metrics_per_rep.append(rep_metrics) mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep) mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps) mw_averages['worker'] = worker metrics = mw_averages.loc[:, ['queueLength', 'worker']] metrics_per_worker.append(metrics) metrics = pd.concat(metrics_per_worker) names = ['8', '16', '32', '64'] metrics.rename(index={0: '8', 1: '16', 2: '32', 3: '64'}, inplace=True) y = metrics.loc['mean', ['queueLength']].reset_index(drop=True).rename(index={ 0: '8', 1: '16', 2: '32', 3: '64' }) stds = metrics.loc['std', ['queueLength']].reset_index(drop=True).rename(index={ 0: '8', 1: '16', 2: '32', 3: '64' }) color_cycler = ['#66c2a4', '#41ae76', '#238b45', '#005824'] y.plot(ax=ax, kind='bar', yerr=stds, color=color_cycler) ax.set_title("{}, {} clients".format(workload, vc * num_threads, worker)) ax.set_xlabel("Workers per Middleware") ax.set_ylabel("Average Queue Length") ax.set_ylim([0, ylim]) ax.legend().set_visible(False)