예제 #1
0
def plot_cumm_mr(redis_db, num_iterations, workload_config, filter_config):
    mr_count = 0
    tbot_metric = workload_config['tbot_metric']
    all_mrs = get_all_mrs(redis_db)

    mr_to_performance = {}

    for exp_index in range(num_iterations):
        current_performance = tbot_datastore.read_summary_redis(redis_db, exp_index)
        # Count the number of MRs considered the filtering process
        # Just doing this manually because Michael be lazy.
	if filter_config['filter_policy'] == 'pipeline':
        mr_to_performance[mr_count] = current_performance
        mr_count += 1
    elif filter_config['filter_policy'] is None:
        mr_count += 0

        # Count number of MRs considered in the standard approach
        for mr in all_mrs:
            metric = tbot_datastore.read_redis_result(redis_db, exp_index, mr, tbot_metric)
            if len(metric) != 0:
                mr_to_performance[mr_count] = current_performance
                mr_count += 1

    # Plot results from mr_to_performance
    get_by_mr_performance_charts(workload_config, num_iterations, mr_to_performance)

# INCOMPLETE
# Plot the results of the by MR performance
def get_by_mr_performance_charts(workload_config, num_iterations, mr_to_performance):
    experiment_type = workload_config['type']

    # Creating general performance chart
    chart_directory = 'results/graphs/mr/{}/'.format(workload_config['type'] + str(time_id))

    plt.plot(*zip(*sorted(mr_to_performance.items())))
    plt.title('{} Performance Over Time'.format(workload_config['type']))
    plt.xlabel('Elapsed Time (seconds)')
    plt.ylabel('Latency_99 (ms)')
    chart_name = '{}{}{}performance.png'.format(chart_directory, num_iterations, experiment_type)
    plt.savefig(chart_name, bbox_inches='tight')
    plt.clf()

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--config_file", help='Configuration file for Throttlebot execution')
    parser.add_argument("--reset_resources", action="store_true", help="Reset all resource allocation")
    parser.add_argument("--plot_cumm_mr", type=int, default=0, help="Plots the Performance vs cumulative MRs explored")
    args = parser.parse_args()

    sys_config,workload_config,filter_config = parse_config_file(args.config_file)
    
    redis_host = 'localhost'
    redis_db = redis.StrictRedis(host=redis_host, port=6379, db=0)
                                   
    if args.reset_resources:
        reset_resources()
    elif args.plot_cumm_mr != 0:
        print 'Plotting up to {} iterations'.format(args.plot_cumm_mr)
        plot_cumm_mr(redis_db, args.plot_cumm_mr, workload_config, filter_config)
예제 #2
0
def print_all_steps(redis_db, total_experiments):
    print 'Steps towards improving performance'
    for experiment_count in range(total_experiments):
        mimr, action_taken, perf_improvement = tbot_datastore.read_summary_redis(
            redis_db, experiment_count)
        print 'Iteration {}, Mimr = {}, New allocation = {}, Performance Improvement = {}'.format(
            experiment_count, mimr, action_taken, perf_improvement)
예제 #3
0
def get_performance_over_time_chart(redis_db, experiment_type,
                                    experiment_iteration_count,
                                    chart_directory):
    # Creating performance over time chart
    x = []
    y = []
    for iteration in range(experiment_iteration_count + 1):
        _, _, _, _, curr_perf, elaps_time, _ = tbot_datastore.read_summary_redis(
            redis_db, iteration)
        x.append(elaps_time)
        y.append(curr_perf)
    plt.plot(x, y, drawstyle='steps-post')
    plt.title('{} Performance Over Time'.format(experiment_type))
    plt.xlabel('Elapsed Time (seconds)')
    plt.ylabel('Latency_99 (ms)')
    chart_name = '{}{}{}performance_time.png'.format(
        chart_directory, experiment_iteration_count, experiment_type)
    plt.savefig(chart_name)
    plt.clf()
예제 #4
0
def get_performance_over_mr_chart(redis_db, experiment_type,
                                  experiment_iteration_count, chart_directory):
    x = []
    y = []
    for iteration in range(experiment_iteration_count + 1):
        _, _, _, _, curr_perf, _, cumulative_mr = tbot_datastore.read_summary_redis(
            redis_db, iteration)
        x.append(cumulative_mr)
        y.append(curr_perf)
    plt.plot(x, y, drawstyle='steps-post')
    plt.title(
        '{} Performance Over Number of MRs Stressed'.format(experiment_type))
    plt.xlabel('Number of MRs Stressed')
    plt.ylabel('Latency_99 (ms)')
    chart_name = '{}{}{}performance_mr.png'.format(chart_directory,
                                                   experiment_iteration_count,
                                                   experiment_type)
    plt.savefig(chart_name)
    plt.clf()
예제 #5
0
def run(sys_config,
        workload_config,
        filter_config,
        default_mr_config,
        last_completed_iter=0):
    redis_host = sys_config['redis_host']
    baseline_trials = sys_config['baseline_trials']
    experiment_trials = sys_config['trials']
    stress_weights = sys_config['stress_weights']
    stress_policy = sys_config['stress_policy']
    resource_to_stress = sys_config['stress_these_resources']
    service_to_stress = sys_config['stress_these_services']
    vm_to_stress = sys_config['stress_these_machines']
    machine_type = sys_config['machine_type']
    quilt_overhead = sys_config['quilt_overhead']
    gradient_mode = sys_config['gradient_mode']

    preferred_performance_metric = workload_config['tbot_metric']
    optimize_for_lowest = workload_config['optimize_for_lowest']

    redis_db = redis.StrictRedis(host=redis_host, port=6379, db=0)
    if last_completed_iter == 0:
        redis_db.flushall()
    '''
    # Prompt the user to make sure they want to flush the db
    ok_to_flush = raw_input("Are you sure you want to flush the results of your last experiment? Please respond with Y or N: ")
    if ok_to_flush == 'Y':
        redis_db.flushall()
    elif ok_to_flush == 'N':
        print 'OK you said it boss. Exiting...'
        exit()
    else:
        print 'Only Y and N are acceptable responses. Exiting...'
        exit()
    '''

    print '\n' * 2
    print '*' * 20
    print 'INFO: INITIALIZING RESOURCE CONFIG'
    # Initialize Redis and Cluster based on the default resource configuration
    init_cluster_capacities_r(redis_db, machine_type, quilt_overhead)
    init_service_placement_r(redis_db, default_mr_config)
    init_resource_config(redis_db, default_mr_config, machine_type)

    print '*' * 20
    print 'INFO: INSTALLING DEPENDENCIES'
    #install_dependencies(workload_config)

    # Initialize time for data charts
    time_start = datetime.datetime.now()

    print '*' * 20
    print 'INFO: RUNNING BASELINE'

    # Get the Current Performance -- not used for any analysis, just to benchmark progress!!
    current_performance = measure_baseline(workload_config, baseline_trials)

    current_performance[preferred_performance_metric] = remove_outlier(
        current_performance[preferred_performance_metric])
    current_time_stop = datetime.datetime.now()
    time_delta = current_time_stop - time_start

    print 'Current (non-analytic) performance measured: {}'.format(
        current_performance)

    if last_completed_iter != 0:
        tbot_datastore.write_summary_redis(
            redis_db, 0, MR('initial', 'initial', []), 0, {},
            mean_list(current_performance[preferred_performance_metric]),
            mean_list(current_performance[preferred_performance_metric]),
            time_delta.seconds, 0)

    print '============================================'
    print '\n' * 2

    # Initialize the current configurations
    # Initialize the working set of MRs to all the MRs
    mr_working_set = resource_datastore.get_all_mrs(redis_db)
    resource_datastore.write_mr_working_set(redis_db, mr_working_set, 0)
    cumulative_mr_count = 0
    experiment_count = last_completed_iter + 1

    while experiment_count < 10:
        # Calculate the analytic baseline that is used to determine MRs
        analytic_provisions = prepare_analytic_baseline(
            redis_db, sys_config, min(stress_weights))
        print 'The Analytic provisions are as follows {}'.format(
            analytic_provisions)
        for mr in analytic_provisions:
            resource_modifier.set_mr_provision(mr, analytic_provisions[mr])
        analytic_baseline = measure_runtime(workload_config, experiment_trials)
        analytic_mean = mean_list(
            analytic_baseline[preferred_performance_metric])
        print 'The analytic baseline is {}'.format(analytic_baseline)
        print 'This current performance is {}'.format(current_performance)
        analytic_baseline[preferred_performance_metric] = remove_outlier(
            analytic_baseline[preferred_performance_metric])

        # Get a list of MRs to stress in the form of a list of MRs
        mr_to_consider = apply_filtering_policy(redis_db, mr_working_set,
                                                experiment_count, sys_config,
                                                workload_config, filter_config)

        for mr in mr_to_consider:
            print '\n' * 2
            print '*' * 20
            print 'Current MR is {}'.format(mr.to_string())
            increment_to_performance = {}
            current_mr_allocation = resource_datastore.read_mr_alloc(
                redis_db, mr)
            print 'Current MR allocation is {}'.format(current_mr_allocation)

            for stress_weight in stress_weights:
                # Calculate Gradient Schedule and provision resources accordingly
                mr_gradient_schedule = calculate_mr_gradient_schedule(
                    redis_db, [mr], sys_config, stress_weight)
                for change_mr in mr_gradient_schedule:
                    resource_modifier.set_mr_provision(
                        change_mr, mr_gradient_schedule[change_mr])

                experiment_results = measure_runtime(workload_config,
                                                     experiment_trials)

                # Write results of experiment to Redis
                # preferred_results = remove_outlier(experiment_results[preferred_performance_metric])
                preferred_results = experiment_results[
                    preferred_performance_metric]
                mean_result = mean_list(preferred_results)
                tbot_datastore.write_redis_ranking(
                    redis_db, experiment_count, preferred_performance_metric,
                    mean_result, mr, stress_weight)

                # Revert the Gradient schedule and provision resources accordingly
                mr_revert_gradient_schedule = revert_mr_gradient_schedule(
                    redis_db, [mr], sys_config, stress_weight)
                for change_mr in mr_revert_gradient_schedule:
                    resource_modifier.set_mr_provision(
                        change_mr, mr_revert_gradient_schedule[change_mr])

                increment_to_performance[stress_weight] = experiment_results

            # Write the results of the iteration to Redis
            tbot_datastore.write_redis_results(redis_db, mr,
                                               increment_to_performance,
                                               experiment_count,
                                               preferred_performance_metric)
            print '*' * 20
            print '\n' * 2

        # Timing Information for the purpose of experiments
        current_time_stop = datetime.datetime.now()
        time_delta = current_time_stop - time_start
        cumulative_mr_count += len(mr_to_consider)
        chart_generator.get_summary_mimr_charts(
            redis_db, workload_config, current_performance, mr_working_set,
            experiment_count, stress_weights, preferred_performance_metric,
            time_start)

        # Move back into the normal operating basis by removing the baseline prep stresses
        reverted_analytic_provisions = revert_analytic_baseline(
            redis_db, sys_config)
        for mr in reverted_analytic_provisions:
            resource_modifier.set_mr_provision(
                mr, reverted_analytic_provisions[mr])

        # Recover the results of the experiment from Redis
        max_stress_weight = min(stress_weights)
        mimr_list = tbot_datastore.get_top_n_mimr(
            redis_db,
            experiment_count,
            preferred_performance_metric,
            max_stress_weight,
            gradient_mode,
            optimize_for_lowest=optimize_for_lowest,
            num_results_returned=-1)

        imr_list, nimr_list = seperate_mr(
            mimr_list,
            mean_list(analytic_baseline[preferred_performance_metric]),
            optimize_for_lowest)
        if len(imr_list) == 0:
            print 'INFO: IMR list length is 0. Please choose a metric with more signal. Exiting...'
            break
        print 'INFO: IMR list is {}'.format(
            [mr.to_string() for mr in imr_list])
        print 'INFO: NIMR list is {}'.format(
            [mr.to_string() for mr in nimr_list])

        # Try all the MIMRs in the list until a viable improvement is determined
        # Improvement Amount
        mimr = None
        action_taken = {}

        for imr in imr_list:
            imr_improvement_percent = improve_mr_by(redis_db, imr,
                                                    max_stress_weight)
            current_imr_alloc = resource_datastore.read_mr_alloc(redis_db, imr)
            new_imr_alloc = convert_percent_to_raw(imr, current_imr_alloc,
                                                   imr_improvement_percent)
            imr_improvement_proposal = new_imr_alloc - current_imr_alloc

            # If the the Proposed MR cannot be improved by the proposed amount, there are two options
            # - Max out the resources to fill up the remaining resources on the machine
            # - Resource Stealing from NIMRs
            # Both functions will return VIABLE improvements to the IMR deployment
            nimr_diff_proposal = {}
            if check_improve_mr_viability(redis_db, imr,
                                          imr_improvement_proposal) is False:
                print 'INFO: MR {} to increase {} by {} is not viable'.format(
                    imr.to_string(), current_imr_alloc,
                    imr_improvement_proposal)
                print 'INFO: Attempting to max out the machines resources...'
                imr_improvement_proposal = fill_out_resource(redis_db, imr)

                if imr_improvement_proposal <= 0:
                    print 'INFO: No more space to fill out resources. Stealing from NIMRs'
                    # Calculate a plan to reduce the resource provisioning of NIMRs
                    nimr_diff_proposal, imr_improvement_proposal = create_decrease_nimr_schedule(
                        redis_db, imr, nimr_list, max_stress_weight)
                    print 'INFO: Proposed NIMR {}'.format(nimr_diff_proposal)
                    print 'INFO: New IMR improvement {}'.format(
                        imr_improvement_proposal)

                    if len(nimr_diff_proposal
                           ) == 0 or imr_improvement_proposal == 0:
                        action_taken[imr] = 0
                        continue

            # Decrease the amount of resources provisioned to the NIMR
            for nimr in nimr_diff_proposal:
                action_taken[nimr] = nimr_diff_proposal[nimr]
                new_nimr_alloc = resource_datastore.read_mr_alloc(
                    redis_db, nimr) + nimr_diff_proposal[nimr]
                print 'NIMR stealing: imposing a change of {} on {}'.format(
                    action_taken[nimr], nimr.to_string())
                finalize_mr_provision(redis_db, nimr, new_nimr_alloc)

            # Improving the resource should always be viable at this step
            if check_improve_mr_viability(redis_db, imr,
                                          imr_improvement_proposal):
                new_imr_alloc = imr_improvement_proposal + current_imr_alloc
                action_taken[imr] = imr_improvement_proposal
                finalize_mr_provision(redis_db, imr, new_imr_alloc)
                print 'Improvement Calculated: MR {} increase from {} to {}'.format(
                    mr.to_string(), current_imr_alloc, new_imr_alloc)
                mimr = imr
                break
            else:
                action_taken[imr] = 0
                print 'Improvement Calculated: MR {} failed to improve from {}'.format(
                    mr.to_string(), current_mr_allocation)
                print 'This IMR cannot be improved. Printing some debugging before exiting...'

                print 'Current MR allocation is {}'.format(current_imr_alloc)
                print 'Proposed (failed) allocation is {}, improved by {}'.format(
                    new_imr_alloc, imr_improvement_proposal)

                for deployment in imr.instances:
                    vm_ip, container = deployment
                    capacity = resource_datastore.read_machine_capacity(
                        redis_db, vm_ip)
                    consumption = resource_datastore.read_machine_consumption(
                        redis_db, vm_ip)
                    print 'Machine {} Capacity is {}, and consumption is currently {}'.format(
                        vm_ip, capacity, consumption)

        if mimr is None:
            print 'No viable improvement found'
            break

        #Compare against the baseline at the beginning of the program
        improved_performance = measure_runtime(workload_config,
                                               baseline_trials)
        # improved_performance[preferred_performance_metric] = remove_outlier(improved_performance[preferred_performance_metric])
        improved_mean = mean_list(
            improved_performance[preferred_performance_metric])
        previous_mean = mean_list(
            current_performance[preferred_performance_metric])
        performance_improvement = improved_mean - previous_mean

        # Write a summary of the experiment's iterations to Redis
        tbot_datastore.write_summary_redis(redis_db, experiment_count, mimr,
                                           performance_improvement,
                                           action_taken, analytic_mean,
                                           improved_mean, time_delta.seconds,
                                           cumulative_mr_count)
        current_performance = improved_performance

        # Generating overall performance improvement
        chart_generator.get_summary_performance_charts(redis_db,
                                                       workload_config,
                                                       experiment_count,
                                                       time_start)

        results = tbot_datastore.read_summary_redis(redis_db, experiment_count)
        print 'Results from iteration {} are {}'.format(
            experiment_count, results)

        # Checkpoint MR configurations and print
        current_mr_config = resource_datastore.read_all_mr_alloc(redis_db)
        print_csv_configuration(current_mr_config)

        experiment_count += 1

    print '{} experiments completed'.format(experiment_count)
    print_all_steps(redis_db, experiment_count)

    current_mr_config = resource_datastore.read_all_mr_alloc(redis_db)
    for mr in current_mr_config:
        print '{} = {}'.format(mr.to_string(), current_mr_config[mr])

    print_csv_configuration(current_mr_config)
예제 #6
0
def print_all_steps(redis_db, total_experiments):
    print 'Steps towards improving performance'
    net_improvement = 0
    for experiment_count in range(1, total_experiments):
        mimr, action_taken, perf_improvement, analytic_perf, current_perf, elapsed_time, cumm_mr = tbot_datastore.read_summary_redis(
            redis_db, experiment_count)
        print 'Iteration {}, Mimr = {}, New allocation = {}, Performance Improvement = {}, Analytic Performance = {}, Performance after improvement = {}, Elapsed Time = {}, Cummulative MR = {}'.format(
            experiment_count, mimr, action_taken, perf_improvement,
            analytic_perf, current_perf, elapsed_time, cumm_mr)

        # Append results to log file
        with open("experiment_logs.txt", "a") as myfile:
            log_msg = '{},{},{}\n'.format(experiment_count, mimr, action_taken)
            myfile.write(log_msg)

        net_improvement += float(perf_improvement)
    print 'Net Improvement: {}'.format(net_improvement)

    with open("experiment_logs.txt", "a") as myfile:
        myfile.write('net_improvement,{}\n'.format(net_improvement))
예제 #7
0
def run(system_config, workload_config, default_mr_config):
    redis_host = system_config['redis_host']
    baseline_trials = system_config['baseline_trials']
    experiment_trials = system_config['trials']
    stress_weights = system_config['stress_weights']
    stress_policy = system_config['stress_policy']
    resource_to_stress = system_config['stress_these_resources']
    service_to_stress = system_config['stress_these_services']
    vm_to_stress = system_config['stress_these_machines']
    machine_type = system_config['machine_type']
    quilt_overhead = system_config['quilt_overhead']

    preferred_performance_metric = workload_config['tbot_metric']
    optimize_for_lowest = workload_config['optimize_for_lowest']

    redis_db = redis.StrictRedis(host=redis_host, port=6379, db=0)
    redis_db.flushall()

    # Initialize Redis and Cluster based on the default resource configuration
    init_cluster_capacities_r(redis_db, machine_type, quilt_overhead)
    init_service_placement_r(redis_db, default_mr_config)
    init_resource_config(redis_db, default_mr_config, machine_type)

    # Run the baseline experiment
    experiment_count = 0
    baseline_performance = measure_baseline(workload_config, baseline_trials)

    # Initialize the current configurations
    # Invariant: MR are the same between iterations
    current_mr_config = resource_datastore.read_all_mr_alloc(redis_db)

    while experiment_count < 10:
        # Get a list of MRs to stress in the form of a list of MRs
        mr_to_stress = generate_mr_from_policy(redis_db, stress_policy)
        print mr_to_stress

        for mr in mr_to_stress:
            print 'Current MR is {}'.format(mr.to_string())
            increment_to_performance = {}
            current_mr_allocation = resource_datastore.read_mr_alloc(
                redis_db, mr)
            print 'Current MR allocation is {}'.format(current_mr_allocation)
            for stress_weight in stress_weights:
                new_alloc = convert_percent_to_raw(mr, current_mr_allocation,
                                                   stress_weight)
                set_mr_provision(mr, new_alloc)
                experiment_results = measure_runtime(workload_config,
                                                     experiment_trials)

                #Write results of experiment to Redis
                mean_result = float(
                    sum(experiment_results[preferred_performance_metric])
                ) / len(experiment_results[preferred_performance_metric])
                tbot_datastore.write_redis_ranking(
                    redis_db, experiment_count, preferred_performance_metric,
                    mean_result, mr, stress_weight)

                # Remove the effect of the resource stressing
                new_alloc = convert_percent_to_raw(mr, current_mr_allocation,
                                                   0)
                increment_to_performance[stress_weight] = experiment_results

            # Write the results of the iteration to Redis
            tbot_datastore.write_redis_results(redis_db, mr,
                                               increment_to_performance,
                                               experiment_count,
                                               preferred_performance_metric)

        # Recover the results of the experiment from Redis
        max_stress_weight = min(stress_weights)
        mimr_list = tbot_datastore.get_top_n_mimr(
            redis_db,
            experiment_count,
            preferred_performance_metric,
            max_stress_weight,
            optimize_for_lowest=optimize_for_lowest,
            num_results_returned=10)

        # Try all the MIMRs in the list until a viable improvement is determined
        # Improvement Amount
        mimr = None
        action_taken = 0
        print 'The MR improvement is {}'.format(max_stress_weight)
        for mr_score in mimr_list:
            mr, score = mr_score
            improvement_percent = improve_mr_by(redis_db, mr,
                                                max_stress_weight)
            current_mr_allocation = resource_datastore.read_mr_alloc(
                redis_db, mr)
            new_alloc = convert_percent_to_raw(mr, current_mr_allocation,
                                               improvement_percent)
            improvement_amount = new_alloc - current_mr_allocation
            action_taken = improvement_amount
            if check_improve_mr_viability(redis_db, mr, improvement_amount):
                set_mr_provision(mr, new_alloc)
                print 'Improvement Calculated: MR {} increase from {} to {}'.format(
                    mr.to_string(), current_mr_allocation, new_alloc)
                old_alloc = resource_datastore.read_mr_alloc(redis_db, mr)
                resource_datastore.write_mr_alloc(redis_db, mr, new_alloc)
                update_machine_consumption(redis_db, mr, new_alloc, old_alloc)
                current_mr_config = update_mr_config(redis_db,
                                                     current_mr_config)
                mimr = mr
                break
            else:
                print 'Improvement Calculated: MR {} failed to improve from {} to {}'.format(
                    mr.to_string(), current_mr_allocation, new_alloc)

        if mimr is None:
            print 'No viable improvement found'
            break

        #Compare against the baseline at the beginning of the program
        improved_performance = measure_runtime(workload_config,
                                               baseline_trials)
        print improved_performance
        improved_mean = sum(
            improved_performance[preferred_performance_metric]) / float(
                len(improved_performance[preferred_performance_metric]))
        baseline_mean = sum(
            baseline_performance[preferred_performance_metric]) / float(
                len(baseline_performance[preferred_performance_metric]))
        performance_improvement = improved_mean - baseline_mean

        # Write a summary of the experiment's iterations to Redis
        tbot_datastore.write_summary_redis(redis_db, experiment_count, mimr,
                                           performance_improvement,
                                           action_taken)
        baseline_performance = improved_performance

        results = tbot_datastore.read_summary_redis(redis_db, experiment_count)
        print 'Results from iteration {} are {}'.format(
            experiment_count, results)
        experiment_count += 1

        # TODO: Handle False Positive
        # TODO: Compare against performance condition -- for now only do some number of experiments

    print '{} experiments completed'.format(experiment_count)
    print_all_steps(redis_db, experiment_count)
    for mr in current_mr_config:
        print '{} = {}'.format(mr.to_string(), current_mr_config[mr])