Ejemplo n.º 1
0
def find_max_throughput(inputdir, workload, worker, vc_configs, reps,
                        client_logfiles, warmup_period_endtime,
                        cooldown_period_starttime):
    unformatted_foldername = "{}_{}vc{}workers"

    config_throughput_maxima = []
    for vc in vc_configs:

        foldername = unformatted_foldername.format(workload, vc, worker)
        all_rep_windows = []
        for rep in range(1, reps + 1):

            log_folder_path = os.path.join(inputdir, foldername, str(rep))

            client_logfile_paths = [
                os.path.join(log_folder_path, client_logfile)
                for client_logfile in client_logfiles
            ]
            window = gmts.aggregate_over_clients(client_logfile_paths)
            window = cut.cut_away_warmup_cooldown(window,
                                                  warmup_period_endtime,
                                                  cooldown_period_starttime)
            all_rep_windows.append(window)

        mt_agg_over_reps = gmts.aggregate_over_reps(all_rep_windows)
        mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps)
        mean_vals = mt_averages.loc['mean', ['responsetime', 'throughput']]
        config_throughput_maxima.append((vc, mean_vals[0], mean_vals[1]))

    all_config_maxima = pd.DataFrame(
        data=config_throughput_maxima,
        columns=['vc', 'responsetime', 'throughput'])
    max_configuration = all_config_maxima.iloc[
        all_config_maxima['throughput'].idxmax()]
    return max_configuration
Ejemplo n.º 2
0
def plot_mt_metric_over_time(inputdir, worker_configuration, metric_to_plot,
                             folder_prefix, reps, client_logfiles, ax):

    color_cycler = cycler('color',
                          ['#66c2a4', '#41ae76', '#238b45', '#005824'])

    ax.set_prop_cycle(color_cycler)

    for worker_config in worker_configuration:
        all_windows = []
        for rep in reps:
            log_folder_path = create_log_folder_path(inputdir, folder_prefix,
                                                     rep, worker_config)

            client_logfile_paths = [
                os.path.join(log_folder_path, client_logfile)
                for client_logfile in client_logfiles
            ]
            window = gmts.aggregate_over_clients(client_logfile_paths)

            all_windows.append(window)

        concatenated = pd.concat(all_windows)[metric_to_plot]
        avg = concatenated.groupby(concatenated.index).agg('mean')
        ax.plot(avg.index, avg, label=worker_config)

    ax.set_xlim([-5, 87])
    ax.legend(loc="upper left")
    ax.set_title(
        "Experiment 3.1:\nThroughput (MW) vs. Time for different number of workers"
    )
    ax.set_xlabel("Time (sec)")
    ax.set_ylabel("Throughput (ops/sec)")
Ejemplo n.º 3
0
def get_data(matching_dirs, num_repetitions, middlewares, client_logfiles, warmup_period_endtime, cooldown_period_starttime, num_threads):
    all_metrics_per_multiget = []
    for experiment_dir in matching_dirs:
        num_multiget = find_num_multigets(experiment_dir)
        all_mw_metrics_per_rep = []
        all_mt_metrics_per_rep = []
        for rep in range(1, num_repetitions + 1):

            middleware_dirs = [get_only_subdir(os.path.join(experiment_dir, str(rep), mw_dir)) for mw_dir in middlewares]
            concatenated_requests = [gmws.concatenate_requestlogs(middleware_dir) for middleware_dir in middleware_dirs]


            filtered_requests = [reqs[reqs['requestType'].str.contains("GET")] for reqs in concatenated_requests]
            metrics = [gmws.extract_metrics(reqs) for reqs in filtered_requests]


            cut_metrics = [cut_away_warmup_cooldown(mets, warmup_period_endtime, cooldown_period_starttime) for mets in metrics]

            windows = [gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics]

            rep_metrics = gmws.aggregate_over_middlewares(windows)

            all_mw_metrics_per_rep.append(rep_metrics)


            client_logfile_paths = [os.path.join(experiment_dir, str(rep), client_logfile) for client_logfile in client_logfiles]
            client_metrics = gmts.aggregate_over_clients(client_logfile_paths)

            cut_client_metrics = cut_away_warmup_cooldown(client_metrics, warmup_period_endtime, cooldown_period_starttime)

            all_mt_metrics_per_rep.append(cut_client_metrics)


        # We have three throughput/resptimes values now from the three repetitions
        mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep)
        mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps)
        mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep)
        mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps)

        metrics_per_vc = pd.concat([mw_averages, mt_averages], axis=1)
        metrics_per_vc['multigetSize'] = num_multiget
        all_metrics_per_multiget.append(metrics_per_vc)

    all_metrics = pd.concat(all_metrics_per_multiget)

    return all_metrics
Ejemplo n.º 4
0
def graph_responsetime(worker, vc, num_threads, workload, middlewares,
                       client_logfiles, reps, inputdir, xlim, ax):

    all_mw_metrics_per_rep = []
    all_mt_metrics_per_rep = []
    for rep in reps:
        run_dir = os.path.join(inputdir,
                               "{}_{}vc{}workers".format(workload, vc, worker),
                               str(rep))

        # Get MW response time
        middleware_dirs = [
            dirfuncs.get_only_subdir(os.path.join(run_dir, mw_dir))
            for mw_dir in middlewares
        ]
        concatenated_requests = [
            gmws.concatenate_requestlogs(middleware_dir)
            for middleware_dir in middleware_dirs
        ]

        metrics = [
            gmws.extract_metrics(reqs) for reqs in concatenated_requests
        ]

        cut_metrics = [
            cut.cut_away_warmup_cooldown(mets, 10, 72) for mets in metrics
        ]

        windows = [
            gmws.aggregate_over_windows(cut_mets) for cut_mets in cut_metrics
        ]

        rep_metrics = gmws.aggregate_over_middlewares(windows)

        all_mw_metrics_per_rep.append(rep_metrics)

        # Get MT response time
        client_logfile_paths = [
            os.path.join(run_dir, client_logfile)
            for client_logfile in client_logfiles
        ]
        client_metrics = gmts.aggregate_over_clients(client_logfile_paths)

        cut_client_metrics = cut.cut_away_warmup_cooldown(
            client_metrics, 10, 72)

        all_mt_metrics_per_rep.append(cut_client_metrics)

    mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep)
    mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps)
    mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep)
    mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps)

    metrics = pd.concat([mw_averages, mt_averages], axis=1)

    names = [
        'NetThread Service Time', 'Queue Time', 'Memcached RTT',
        'Worker Service Time', 'Total Response Time (MW)',
        'Total Response Time (MT)'
    ]
    metrics.rename(
        {
            'netthreadServiceTime_ms': names[0],
            'queueTime_ms': names[1],
            'memcachedRTT_ms': names[2],
            'workerServiceTime_ms': names[3],
            'responseTime_ms': names[4],
            'responsetime': names[5]
        },
        axis='columns',
        inplace=True)
    means = metrics.loc['mean', names]
    stds = metrics.loc['std', names]
    color_cycler = [
        '#bf812d', '#c7eae5', '#80cdc1', '#01665e', '#003c30', '#bf812d'
    ]
    means.plot(ax=ax, kind='barh', xerr=stds, color=color_cycler)
    ax.set_title("{}, {} clients, {} workers".format(workload,
                                                     vc * num_threads, worker))
    ax.set_xlabel("Time (msec)")
    ax.set_xlim([0, xlim])
Ejemplo n.º 5
0
def get_responsetime_data_old(sharded, multigets, middlewares, client_logfiles,
                              reps, inputdir):

    all_metrics_per_multiget = []
    for multiget in multigets:
        all_mw_metrics_per_rep = []
        all_mt_metrics_per_rep = []
        for rep in reps:
            run_dir = os.path.join(inputdir,
                                   "{}_{}multiget".format(sharded, multiget),
                                   str(rep))

            # Get MW response time
            middleware_dirs = [
                dirfuncs.get_only_subdir(os.path.join(run_dir, mw_dir))
                for mw_dir in middlewares
            ]
            concatenated_requests = [
                gmws.concatenate_requestlogs(middleware_dir)
                for middleware_dir in middleware_dirs
            ]

            metrics = [
                gmws.extract_metrics_old(reqs)
                for reqs in concatenated_requests
            ]

            cut_metrics = [
                cut.cut_away_warmup_cooldown(mets, 10, 72) for mets in metrics
            ]

            windows = [
                gmws.aggregate_over_windows(cut_mets)
                for cut_mets in cut_metrics
            ]

            rep_metrics = gmws.aggregate_over_middlewares(windows)

            all_mw_metrics_per_rep.append(rep_metrics)

            # Get MT response time
            client_logfile_paths = [
                os.path.join(run_dir, client_logfile)
                for client_logfile in client_logfiles
            ]
            client_metrics = gmts.aggregate_over_clients(client_logfile_paths)

            cut_client_metrics = cut.cut_away_warmup_cooldown(
                client_metrics, 10, 72)

            all_mt_metrics_per_rep.append(cut_client_metrics)

        mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep)
        mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps)
        mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep)
        mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps)

        metrics = pd.concat([mw_averages, mt_averages], axis=1)
        metrics['multigets'] = multiget
        all_metrics_per_multiget.append(metrics)

    all_metrics = pd.concat(all_metrics_per_multiget)
    return all_metrics.loc['mean', :]
Ejemplo n.º 6
0
def get_worker_data(worker_config, matching_dirs, num_repetitions, middlewares,
                    client_logfiles, warmup_period_endtime,
                    cooldown_period_starttime, num_threads):
    all_metrics_per_vc = []
    for experiment_dir in matching_dirs:
        num_vc = find_num_vc(experiment_dir)
        all_mw_metrics_per_rep = []
        all_mt_metrics_per_rep = []
        for rep in range(1, num_repetitions + 1):

            middleware_dirs = [
                get_only_subdir(os.path.join(experiment_dir, str(rep), mw_dir))
                for mw_dir in middlewares
            ]
            concatenated_requests = [
                gmws.concatenate_requestlogs(middleware_dir)
                for middleware_dir in middleware_dirs
            ]

            metrics = [
                gmws.extract_metrics(reqs) for reqs in concatenated_requests
            ]

            cut_metrics = [
                cut_away_warmup_cooldown(mets, warmup_period_endtime,
                                         cooldown_period_starttime)
                for mets in metrics
            ]

            windows = [
                gmws.aggregate_over_windows(cut_mets)
                for cut_mets in cut_metrics
            ]
            windows = [win[:-1] for win in windows]

            rep_metrics = gmws.aggregate_over_middlewares(windows)

            all_mw_metrics_per_rep.append(rep_metrics)

            client_logfile_paths = [
                os.path.join(experiment_dir, str(rep), client_logfile)
                for client_logfile in client_logfiles
            ]
            client_metrics = gmts.aggregate_over_clients(client_logfile_paths)

            cut_client_metrics = cut_away_warmup_cooldown(
                client_metrics, warmup_period_endtime,
                cooldown_period_starttime)

            all_mt_metrics_per_rep.append(cut_client_metrics)

        # We have three throughput/resptimes values now from the three repetitions
        mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep)
        mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps)
        mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep)
        mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps)

        metrics_per_vc = pd.concat([mw_averages, mt_averages], axis=1)
        metrics_per_vc['vc_per_thread'] = num_vc
        all_metrics_per_vc.append(metrics_per_vc)

    all_metrics_for_worker = pd.concat(all_metrics_per_vc)
    all_metrics_for_worker[
        'num_clients'] = all_metrics_for_worker['vc_per_thread'] * num_threads
    return all_metrics_for_worker
Ejemplo n.º 7
0
def get_responsetime_data_old(worker, vc_settings, num_threads, workload,
                              middlewares, client_logfiles, reps, inputdir):

    all_metrics_per_vc = []
    for vc in vc_settings:
        all_mw_metrics_per_rep = []
        all_mt_metrics_per_rep = []
        for rep in reps:
            run_dir = os.path.join(
                inputdir, "{}_{}vc{}workers".format(workload, vc, worker),
                str(rep))

            # Get MW response time
            middleware_dirs = [
                dirfuncs.get_only_subdir(os.path.join(run_dir, mw_dir))
                for mw_dir in middlewares
            ]
            concatenated_requests = [
                gmws.concatenate_requestlogs(middleware_dir)
                for middleware_dir in middleware_dirs
            ]

            metrics = [
                gmws.extract_metrics_old(reqs)
                for reqs in concatenated_requests
            ]

            cut_metrics = [
                cut.cut_away_warmup_cooldown(mets, 10, 72) for mets in metrics
            ]

            windows = [
                gmws.aggregate_over_windows(cut_mets)
                for cut_mets in cut_metrics
            ]

            rep_metrics = gmws.aggregate_over_middlewares(windows)

            all_mw_metrics_per_rep.append(rep_metrics)

            # Get MT response time
            client_logfile_paths = [
                os.path.join(run_dir, client_logfile)
                for client_logfile in client_logfiles
            ]
            client_metrics = gmts.aggregate_over_clients(client_logfile_paths)

            cut_client_metrics = cut.cut_away_warmup_cooldown(
                client_metrics, 10, 72)

            all_mt_metrics_per_rep.append(cut_client_metrics)

        mw_agg_over_reps = gmws.aggregate_over_reps(all_mw_metrics_per_rep)
        mw_averages = gmws.aggregate_over_timesteps(mw_agg_over_reps)
        mt_agg_over_reps = gmts.aggregate_over_reps(all_mt_metrics_per_rep)
        mt_averages = gmts.aggregate_over_timesteps(mt_agg_over_reps)

        metrics = pd.concat([mw_averages, mt_averages], axis=1)
        metrics['num_clients'] = vc * num_threads
        all_metrics_per_vc.append(metrics)

    all_metrics = pd.concat(all_metrics_per_vc)
    #names = ['NetThread Service Time', 'Queue Time', 'Memcached RTT', 'Worker Service Time',
    #         'Total Response Time (MW)', 'Total Response Time (MT)']
    #metrics.rename({'netthreadServiceTime_ms': names[0], 'queueTime_ms': names[1], 'memcachedRTT_ms': names[2], 'workerServiceTime_ms': names[3], 'responseTime_ms': names[4], 'responsetime': names[5]}, axis='columns', inplace=True)
    return all_metrics.loc['mean', :]