def find_max_throughput(inputdir, workload, worker, vc_configs, reps, client_logfiles, warmup_period_endtime, cooldown_period_starttime): unformatted_foldername = "{}_{}vc{}workers" config_throughput_maxima = [] for vc in vc_configs: foldername = unformatted_foldername.format(workload, vc, worker) all_rep_windows = [] for rep in range(1, reps + 1): log_folder_path = os.path.join(inputdir, foldername, str(rep)) client_logfile_paths = [ os.path.join(log_folder_path, client_logfile) for client_logfile in client_logfiles ] window = gmts.aggregate_over_clients(client_logfile_paths) window = cut.cut_away_warmup_cooldown(window, warmup_period_endtime, cooldown_period_starttime) all_rep_windows.append(window) mt_agg_over_reps = gmts.aggregate_over_reps(all_rep_windows) mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps) mean_vals = mt_averages.loc['mean', ['responsetime', 'throughput']] config_throughput_maxima.append((vc, mean_vals[0], mean_vals[1])) all_config_maxima = pd.DataFrame( data=config_throughput_maxima, columns=['vc', 'responsetime', 'throughput']) max_configuration = all_config_maxima.iloc[ all_config_maxima['throughput'].idxmax()] return max_configuration
def get_data(matching_dirs, num_repetitions, middlewares, client_logfiles, warmup_period_endtime, cooldown_period_starttime, num_threads): all_metrics_per_multiget = [] for experiment_dir in matching_dirs: num_multiget = find_num_multigets(experiment_dir) all_mw_metrics_per_rep = [] all_mt_metrics_per_rep = [] for rep in range(1, num_repetitions + 1): middleware_dirs = [get_only_subdir(os.path.join(experiment_dir, str(rep), mw_dir)) for mw_dir in middlewares] concatenated_requests = [gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs] filtered_requests = [reqs[reqs['requestType'].str.contains("GET")] for reqs in concatenated_requests] metrics = [gmws.extract_metrics(reqs) for reqs in filtered_requests] cut_metrics = [cut_away_warmup_cooldown(mets, warmup_period_endtime, cooldown_period_starttime) for mets in metrics] windows = [gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics] rep_metrics = gmws.aggregate_over_middlewares(windows) all_mw_metrics_per_rep.append(rep_metrics) client_logfile_paths = [os.path.join(experiment_dir, str(rep), client_logfile) for client_logfile in client_logfiles] client_metrics = gmts.aggregate_over_clients(client_logfile_paths) cut_client_metrics = cut_away_warmup_cooldown(client_metrics, warmup_period_endtime, cooldown_period_starttime) all_mt_metrics_per_rep.append(cut_client_metrics) # We have three throughput/resptimes values now from the three repetitions mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep) mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps) mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep) mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps) metrics_per_vc = pd.concat([mw_averages, mt_averages], axis=1) metrics_per_vc['multigetSize'] = num_multiget all_metrics_per_multiget.append(metrics_per_vc) all_metrics = pd.concat(all_metrics_per_multiget) return all_metrics
def graph_responsetime(worker, vc, num_threads, workload, middlewares, client_logfiles, reps, inputdir, xlim, ax): all_mw_metrics_per_rep = [] all_mt_metrics_per_rep = [] for rep in reps: run_dir = os.path.join(inputdir, "{}_{}vc{}workers".format(workload, vc, worker), str(rep)) # Get MW response time middleware_dirs = [ dirfuncs.get_only_subdir(os.path.join(run_dir, mw_dir)) for mw_dir in middlewares ] concatenated_requests = [ gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs ] metrics = [ gmws.extract_metrics(reqs) for reqs in concatenated_requests ] cut_metrics = [ cut.cut_away_warmup_cooldown(mets, 10, 72) for mets in metrics ] windows = [ gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics ] rep_metrics = gmws.aggregate_over_middlewares(windows) all_mw_metrics_per_rep.append(rep_metrics) # Get MT response time client_logfile_paths = [ os.path.join(run_dir, client_logfile) for client_logfile in client_logfiles ] client_metrics = gmts.aggregate_over_clients(client_logfile_paths) cut_client_metrics = cut.cut_away_warmup_cooldown( client_metrics, 10, 72) all_mt_metrics_per_rep.append(cut_client_metrics) mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep) mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps) mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep) mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps) metrics = pd.concat([mw_averages, mt_averages], axis=1) names = [ 'NetThread Service Time', 'Queue Time', 'Memcached RTT', 'Worker Service Time', 'Total Response Time (MW)', 'Total Response Time (MT)' ] metrics.rename( { 'netthreadServiceTime_ms': names[0], 'queueTime_ms': names[1], 'memcachedRTT_ms': names[2], 'workerServiceTime_ms': names[3], 'responseTime_ms': names[4], 'responsetime': names[5] }, axis='columns', inplace=True) means = metrics.loc['mean', names] stds = metrics.loc['std', names] color_cycler = [ '#bf812d', '#c7eae5', '#80cdc1', '#01665e', '#003c30', '#bf812d' ] means.plot(ax=ax, kind='barh', xerr=stds, color=color_cycler) ax.set_title("{}, {} clients, {} workers".format(workload, vc * num_threads, worker)) ax.set_xlabel("Time (msec)") ax.set_xlim([0, xlim])
def get_responsetime_data_old(sharded, multigets, middlewares, client_logfiles, reps, inputdir): all_metrics_per_multiget = [] for multiget in multigets: all_mw_metrics_per_rep = [] all_mt_metrics_per_rep = [] for rep in reps: run_dir = os.path.join(inputdir, "{}_{}multiget".format(sharded, multiget), str(rep)) # Get MW response time middleware_dirs = [ dirfuncs.get_only_subdir(os.path.join(run_dir, mw_dir)) for mw_dir in middlewares ] concatenated_requests = [ gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs ] metrics = [ gmws.extract_metrics_old(reqs) for reqs in concatenated_requests ] cut_metrics = [ cut.cut_away_warmup_cooldown(mets, 10, 72) for mets in metrics ] windows = [ gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics ] rep_metrics = gmws.aggregate_over_middlewares(windows) all_mw_metrics_per_rep.append(rep_metrics) # Get MT response time client_logfile_paths = [ os.path.join(run_dir, client_logfile) for client_logfile in client_logfiles ] client_metrics = gmts.aggregate_over_clients(client_logfile_paths) cut_client_metrics = cut.cut_away_warmup_cooldown( client_metrics, 10, 72) all_mt_metrics_per_rep.append(cut_client_metrics) mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep) mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps) mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep) mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps) metrics = pd.concat([mw_averages, mt_averages], axis=1) metrics['multigets'] = multiget all_metrics_per_multiget.append(metrics) all_metrics = pd.concat(all_metrics_per_multiget) return all_metrics.loc['mean', :]
def get_worker_data(worker_config, matching_dirs, num_repetitions, middlewares, client_logfiles, warmup_period_endtime, cooldown_period_starttime, num_threads): all_metrics_per_vc = [] for experiment_dir in matching_dirs: num_vc = find_num_vc(experiment_dir) all_mw_metrics_per_rep = [] all_mt_metrics_per_rep = [] for rep in range(1, num_repetitions + 1): middleware_dirs = [ get_only_subdir(os.path.join(experiment_dir, str(rep), mw_dir)) for mw_dir in middlewares ] concatenated_requests = [ gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs ] metrics = [ gmws.extract_metrics(reqs) for reqs in concatenated_requests ] cut_metrics = [ cut_away_warmup_cooldown(mets, warmup_period_endtime, cooldown_period_starttime) for mets in metrics ] windows = [ gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics ] windows = [win[:-1] for win in windows] rep_metrics = gmws.aggregate_over_middlewares(windows) all_mw_metrics_per_rep.append(rep_metrics) client_logfile_paths = [ os.path.join(experiment_dir, str(rep), client_logfile) for client_logfile in client_logfiles ] client_metrics = gmts.aggregate_over_clients(client_logfile_paths) cut_client_metrics = cut_away_warmup_cooldown( client_metrics, warmup_period_endtime, cooldown_period_starttime) all_mt_metrics_per_rep.append(cut_client_metrics) # We have three throughput/resptimes values now from the three repetitions mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep) mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps) mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep) mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps) metrics_per_vc = pd.concat([mw_averages, mt_averages], axis=1) metrics_per_vc['vc_per_thread'] = num_vc all_metrics_per_vc.append(metrics_per_vc) all_metrics_for_worker = pd.concat(all_metrics_per_vc) all_metrics_for_worker[ 'num_clients'] = all_metrics_for_worker['vc_per_thread'] * num_threads return all_metrics_for_worker
def get_responsetime_data_old(worker, vc_settings, num_threads, workload, middlewares, client_logfiles, reps, inputdir): all_metrics_per_vc = [] for vc in vc_settings: all_mw_metrics_per_rep = [] all_mt_metrics_per_rep = [] for rep in reps: run_dir = os.path.join( inputdir, "{}_{}vc{}workers".format(workload, vc, worker), str(rep)) # Get MW response time middleware_dirs = [ dirfuncs.get_only_subdir(os.path.join(run_dir, mw_dir)) for mw_dir in middlewares ] concatenated_requests = [ gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs ] metrics = [ gmws.extract_metrics_old(reqs) for reqs in concatenated_requests ] cut_metrics = [ cut.cut_away_warmup_cooldown(mets, 10, 72) for mets in metrics ] windows = [ gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics ] rep_metrics = gmws.aggregate_over_middlewares(windows) all_mw_metrics_per_rep.append(rep_metrics) # Get MT response time client_logfile_paths = [ os.path.join(run_dir, client_logfile) for client_logfile in client_logfiles ] client_metrics = gmts.aggregate_over_clients(client_logfile_paths) cut_client_metrics = cut.cut_away_warmup_cooldown( client_metrics, 10, 72) all_mt_metrics_per_rep.append(cut_client_metrics) mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep) mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps) mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep) mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps) metrics = pd.concat([mw_averages, mt_averages], axis=1) metrics['num_clients'] = vc * num_threads all_metrics_per_vc.append(metrics) all_metrics = pd.concat(all_metrics_per_vc) #names = ['NetThread Service Time', 'Queue Time', 'Memcached RTT', 'Worker Service Time', # 'Total Response Time (MW)', 'Total Response Time (MT)'] #metrics.rename({'netthreadServiceTime_ms': names[0], 'queueTime_ms': names[1], 'memcachedRTT_ms': names[2], 'workerServiceTime_ms': names[3], 'responseTime_ms': names[4], 'responsetime': names[5]}, axis='columns', inplace=True) return all_metrics.loc['mean', :]