Esempio n. 1
0
def init_resource_config(redis_db, default_mr_config, machine_type):
    print 'Initializing the Resource Configurations in the containers'
    instance_specs = get_instance_specs(machine_type)
    for mr in default_mr_config:
        new_resource_provision = default_mr_config[mr]
        if check_improve_mr_viability(redis_db, mr,
                                      new_resource_provision) is False:
            print 'Initial Resource provisioning for {} is too much. Exiting...'.format(
                mr.to_string())
            exit()

        # Enact the change in resource provisioning
        resource_modifier.set_mr_provision(mr, new_resource_provision)

        # Reflect the change in Redis
        resource_datastore.write_mr_alloc(redis_db, mr, new_resource_provision)
        update_machine_consumption(redis_db, mr, new_resource_provision, 0)
Esempio n. 2
0
def apply_pipeline_filter(redis_db,
                          mr_working_set,
                          experiment_iteration,
                          system_config,
                          workload_config,
                          filter_config):

    print '*' * 20
    print 'INFO: Applying Filtering Pipeline'

    print 'Filter config is {}'.format(filter_config)
    print 'MR working set is {}'.format(mr_working_set)
    
    machine_type = system_config['machine_type']

    pipeline_partitions = filter_config['pipeline_partitions']
    stress_weight = filter_config['stress_amount']
    experiment_trials = filter_config['filter_exp_trials']
    pipelined_services = filter_config['pipeline_services']

    pipeline_groups = []

    print 'Pipelined services are {}'.format(pipelined_services)
    # No specified pipelined services indicates that each pipeline is a service
    if pipelined_services[0][0] == 'BY_SERVICE':
        service_names = list(set([mr.service_name for mr in mr_working_set]))
        for service_name in service_names:
            mr_list = search_mr_working_set(mr_working_set, services)
            pipeline_groups.append(mr_list)
    elif pipelined_services[0][0] == 'RANDOM':
        pipeline_groups = gen_mr_random_split(mr_working_set, pipeline_partitions)

    print "The pipeline groups are being printed below: "
    for pipeline_group in pipeline_groups:
        pipeline_group = [mr.to_string() for mr in pipeline_group]
        print 'A pipeline is {}'.format(pipeline_group)
        
    tbot_metric = workload_config['tbot_metric']
    optimize_for_lowest = workload_config['optimize_for_lowest']

    pipeline_index = 0
    for pipeline in pipeline_groups:
        change_mr_schedule = calculate_mr_gradient_schedule(redis_db,
                                                            pipeline,
                                                            system_config,
                                                            stress_weight)
        # Simultaneously stress the MRs in a pipeline
        for mr in change_mr_schedule:
            resource_modifier.set_mr_provision(mr, change_mr_schedule[mr], workload_config)

        experiment_results = measure_runtime(workload_config, experiment_trials)
        exp_mean = mean_list(experiment_results[tbot_metric])
        repr_str = str(pipeline_index)
        tbot_datastore.write_filtered_results(redis_db,
                                              'pipeline',
                                              experiment_iteration,
                                              repr_str,
                                              exp_mean)

        # Revert the stressing
        change_mr_schedule = revert_mr_gradient_schedule(redis_db,
                                                         pipeline,
                                                         system_config,
                                                         stress_weight)
        for mr in change_mr_schedule:
            resource_modifier.set_mr_provision(mr, change_mr_schedule[mr], workload_config)

        pipeline_index += 1

    all_pipeline_score_list = tbot_datastore.get_top_n_filtered_results(redis_db,
                                                                        'pipeline',
                                                                        experiment_iteration,
                                                                        system_config,
                                                                        optimize_for_lowest=optimize_for_lowest)
    
    print 'INFO: The current pipeline score list is here {}'.format(all_pipeline_score_list)
    
    # Temporarily just choose the most impacted pipeline
    selected_pipeline_score_list = [all_pipeline_score_list[0]]

    # MIP = Most Impacted Pipeline
    mip = []
    for pipeline_score in selected_pipeline_score_list:
        pipeline_repr,score = pipeline_score
        mip += pipeline_groups[int(pipeline_repr)]
    print mip

    # Log results of the filtering
    print 'About to log to {}'.format(FILTER_LOGS)
    with open(FILTER_LOGS, "a") as myfile:
        # First output the result
        filter_str = '{},'.format(experiment_iteration)
        for mr in mip:
            filter_str += '{},'.format(mr.to_string())
        filter_str += '\n\n'
        myfile.write(filter_str)
        
    return mip
Esempio n. 3
0
def run(sys_config,
        workload_config,
        filter_config,
        default_mr_config,
        last_completed_iter=0):
    redis_host = sys_config['redis_host']
    baseline_trials = sys_config['baseline_trials']
    experiment_trials = sys_config['trials']
    stress_weights = sys_config['stress_weights']
    stress_policy = sys_config['stress_policy']
    resource_to_stress = sys_config['stress_these_resources']
    service_to_stress = sys_config['stress_these_services']
    vm_to_stress = sys_config['stress_these_machines']
    machine_type = sys_config['machine_type']
    quilt_overhead = sys_config['quilt_overhead']
    gradient_mode = sys_config['gradient_mode']

    preferred_performance_metric = workload_config['tbot_metric']
    optimize_for_lowest = workload_config['optimize_for_lowest']

    redis_db = redis.StrictRedis(host=redis_host, port=6379, db=0)
    if last_completed_iter == 0:
        redis_db.flushall()
    '''
    # Prompt the user to make sure they want to flush the db
    ok_to_flush = raw_input("Are you sure you want to flush the results of your last experiment? Please respond with Y or N: ")
    if ok_to_flush == 'Y':
        redis_db.flushall()
    elif ok_to_flush == 'N':
        print 'OK you said it boss. Exiting...'
        exit()
    else:
        print 'Only Y and N are acceptable responses. Exiting...'
        exit()
    '''

    print '\n' * 2
    print '*' * 20
    print 'INFO: INITIALIZING RESOURCE CONFIG'
    # Initialize Redis and Cluster based on the default resource configuration
    init_cluster_capacities_r(redis_db, machine_type, quilt_overhead)
    init_service_placement_r(redis_db, default_mr_config)
    init_resource_config(redis_db, default_mr_config, machine_type)

    print '*' * 20
    print 'INFO: INSTALLING DEPENDENCIES'
    #install_dependencies(workload_config)

    # Initialize time for data charts
    time_start = datetime.datetime.now()

    print '*' * 20
    print 'INFO: RUNNING BASELINE'

    # Get the Current Performance -- not used for any analysis, just to benchmark progress!!
    current_performance = measure_baseline(workload_config, baseline_trials)

    current_performance[preferred_performance_metric] = remove_outlier(
        current_performance[preferred_performance_metric])
    current_time_stop = datetime.datetime.now()
    time_delta = current_time_stop - time_start

    print 'Current (non-analytic) performance measured: {}'.format(
        current_performance)

    if last_completed_iter != 0:
        tbot_datastore.write_summary_redis(
            redis_db, 0, MR('initial', 'initial', []), 0, {},
            mean_list(current_performance[preferred_performance_metric]),
            mean_list(current_performance[preferred_performance_metric]),
            time_delta.seconds, 0)

    print '============================================'
    print '\n' * 2

    # Initialize the current configurations
    # Initialize the working set of MRs to all the MRs
    mr_working_set = resource_datastore.get_all_mrs(redis_db)
    resource_datastore.write_mr_working_set(redis_db, mr_working_set, 0)
    cumulative_mr_count = 0
    experiment_count = last_completed_iter + 1

    while experiment_count < 10:
        # Calculate the analytic baseline that is used to determine MRs
        analytic_provisions = prepare_analytic_baseline(
            redis_db, sys_config, min(stress_weights))
        print 'The Analytic provisions are as follows {}'.format(
            analytic_provisions)
        for mr in analytic_provisions:
            resource_modifier.set_mr_provision(mr, analytic_provisions[mr])
        analytic_baseline = measure_runtime(workload_config, experiment_trials)
        analytic_mean = mean_list(
            analytic_baseline[preferred_performance_metric])
        print 'The analytic baseline is {}'.format(analytic_baseline)
        print 'This current performance is {}'.format(current_performance)
        analytic_baseline[preferred_performance_metric] = remove_outlier(
            analytic_baseline[preferred_performance_metric])

        # Get a list of MRs to stress in the form of a list of MRs
        mr_to_consider = apply_filtering_policy(redis_db, mr_working_set,
                                                experiment_count, sys_config,
                                                workload_config, filter_config)

        for mr in mr_to_consider:
            print '\n' * 2
            print '*' * 20
            print 'Current MR is {}'.format(mr.to_string())
            increment_to_performance = {}
            current_mr_allocation = resource_datastore.read_mr_alloc(
                redis_db, mr)
            print 'Current MR allocation is {}'.format(current_mr_allocation)

            for stress_weight in stress_weights:
                # Calculate Gradient Schedule and provision resources accordingly
                mr_gradient_schedule = calculate_mr_gradient_schedule(
                    redis_db, [mr], sys_config, stress_weight)
                for change_mr in mr_gradient_schedule:
                    resource_modifier.set_mr_provision(
                        change_mr, mr_gradient_schedule[change_mr])

                experiment_results = measure_runtime(workload_config,
                                                     experiment_trials)

                # Write results of experiment to Redis
                # preferred_results = remove_outlier(experiment_results[preferred_performance_metric])
                preferred_results = experiment_results[
                    preferred_performance_metric]
                mean_result = mean_list(preferred_results)
                tbot_datastore.write_redis_ranking(
                    redis_db, experiment_count, preferred_performance_metric,
                    mean_result, mr, stress_weight)

                # Revert the Gradient schedule and provision resources accordingly
                mr_revert_gradient_schedule = revert_mr_gradient_schedule(
                    redis_db, [mr], sys_config, stress_weight)
                for change_mr in mr_revert_gradient_schedule:
                    resource_modifier.set_mr_provision(
                        change_mr, mr_revert_gradient_schedule[change_mr])

                increment_to_performance[stress_weight] = experiment_results

            # Write the results of the iteration to Redis
            tbot_datastore.write_redis_results(redis_db, mr,
                                               increment_to_performance,
                                               experiment_count,
                                               preferred_performance_metric)
            print '*' * 20
            print '\n' * 2

        # Timing Information for the purpose of experiments
        current_time_stop = datetime.datetime.now()
        time_delta = current_time_stop - time_start
        cumulative_mr_count += len(mr_to_consider)
        chart_generator.get_summary_mimr_charts(
            redis_db, workload_config, current_performance, mr_working_set,
            experiment_count, stress_weights, preferred_performance_metric,
            time_start)

        # Move back into the normal operating basis by removing the baseline prep stresses
        reverted_analytic_provisions = revert_analytic_baseline(
            redis_db, sys_config)
        for mr in reverted_analytic_provisions:
            resource_modifier.set_mr_provision(
                mr, reverted_analytic_provisions[mr])

        # Recover the results of the experiment from Redis
        max_stress_weight = min(stress_weights)
        mimr_list = tbot_datastore.get_top_n_mimr(
            redis_db,
            experiment_count,
            preferred_performance_metric,
            max_stress_weight,
            gradient_mode,
            optimize_for_lowest=optimize_for_lowest,
            num_results_returned=-1)

        imr_list, nimr_list = seperate_mr(
            mimr_list,
            mean_list(analytic_baseline[preferred_performance_metric]),
            optimize_for_lowest)
        if len(imr_list) == 0:
            print 'INFO: IMR list length is 0. Please choose a metric with more signal. Exiting...'
            break
        print 'INFO: IMR list is {}'.format(
            [mr.to_string() for mr in imr_list])
        print 'INFO: NIMR list is {}'.format(
            [mr.to_string() for mr in nimr_list])

        # Try all the MIMRs in the list until a viable improvement is determined
        # Improvement Amount
        mimr = None
        action_taken = {}

        for imr in imr_list:
            imr_improvement_percent = improve_mr_by(redis_db, imr,
                                                    max_stress_weight)
            current_imr_alloc = resource_datastore.read_mr_alloc(redis_db, imr)
            new_imr_alloc = convert_percent_to_raw(imr, current_imr_alloc,
                                                   imr_improvement_percent)
            imr_improvement_proposal = new_imr_alloc - current_imr_alloc

            # If the the Proposed MR cannot be improved by the proposed amount, there are two options
            # - Max out the resources to fill up the remaining resources on the machine
            # - Resource Stealing from NIMRs
            # Both functions will return VIABLE improvements to the IMR deployment
            nimr_diff_proposal = {}
            if check_improve_mr_viability(redis_db, imr,
                                          imr_improvement_proposal) is False:
                print 'INFO: MR {} to increase {} by {} is not viable'.format(
                    imr.to_string(), current_imr_alloc,
                    imr_improvement_proposal)
                print 'INFO: Attempting to max out the machines resources...'
                imr_improvement_proposal = fill_out_resource(redis_db, imr)

                if imr_improvement_proposal <= 0:
                    print 'INFO: No more space to fill out resources. Stealing from NIMRs'
                    # Calculate a plan to reduce the resource provisioning of NIMRs
                    nimr_diff_proposal, imr_improvement_proposal = create_decrease_nimr_schedule(
                        redis_db, imr, nimr_list, max_stress_weight)
                    print 'INFO: Proposed NIMR {}'.format(nimr_diff_proposal)
                    print 'INFO: New IMR improvement {}'.format(
                        imr_improvement_proposal)

                    if len(nimr_diff_proposal
                           ) == 0 or imr_improvement_proposal == 0:
                        action_taken[imr] = 0
                        continue

            # Decrease the amount of resources provisioned to the NIMR
            for nimr in nimr_diff_proposal:
                action_taken[nimr] = nimr_diff_proposal[nimr]
                new_nimr_alloc = resource_datastore.read_mr_alloc(
                    redis_db, nimr) + nimr_diff_proposal[nimr]
                print 'NIMR stealing: imposing a change of {} on {}'.format(
                    action_taken[nimr], nimr.to_string())
                finalize_mr_provision(redis_db, nimr, new_nimr_alloc)

            # Improving the resource should always be viable at this step
            if check_improve_mr_viability(redis_db, imr,
                                          imr_improvement_proposal):
                new_imr_alloc = imr_improvement_proposal + current_imr_alloc
                action_taken[imr] = imr_improvement_proposal
                finalize_mr_provision(redis_db, imr, new_imr_alloc)
                print 'Improvement Calculated: MR {} increase from {} to {}'.format(
                    mr.to_string(), current_imr_alloc, new_imr_alloc)
                mimr = imr
                break
            else:
                action_taken[imr] = 0
                print 'Improvement Calculated: MR {} failed to improve from {}'.format(
                    mr.to_string(), current_mr_allocation)
                print 'This IMR cannot be improved. Printing some debugging before exiting...'

                print 'Current MR allocation is {}'.format(current_imr_alloc)
                print 'Proposed (failed) allocation is {}, improved by {}'.format(
                    new_imr_alloc, imr_improvement_proposal)

                for deployment in imr.instances:
                    vm_ip, container = deployment
                    capacity = resource_datastore.read_machine_capacity(
                        redis_db, vm_ip)
                    consumption = resource_datastore.read_machine_consumption(
                        redis_db, vm_ip)
                    print 'Machine {} Capacity is {}, and consumption is currently {}'.format(
                        vm_ip, capacity, consumption)

        if mimr is None:
            print 'No viable improvement found'
            break

        #Compare against the baseline at the beginning of the program
        improved_performance = measure_runtime(workload_config,
                                               baseline_trials)
        # improved_performance[preferred_performance_metric] = remove_outlier(improved_performance[preferred_performance_metric])
        improved_mean = mean_list(
            improved_performance[preferred_performance_metric])
        previous_mean = mean_list(
            current_performance[preferred_performance_metric])
        performance_improvement = improved_mean - previous_mean

        # Write a summary of the experiment's iterations to Redis
        tbot_datastore.write_summary_redis(redis_db, experiment_count, mimr,
                                           performance_improvement,
                                           action_taken, analytic_mean,
                                           improved_mean, time_delta.seconds,
                                           cumulative_mr_count)
        current_performance = improved_performance

        # Generating overall performance improvement
        chart_generator.get_summary_performance_charts(redis_db,
                                                       workload_config,
                                                       experiment_count,
                                                       time_start)

        results = tbot_datastore.read_summary_redis(redis_db, experiment_count)
        print 'Results from iteration {} are {}'.format(
            experiment_count, results)

        # Checkpoint MR configurations and print
        current_mr_config = resource_datastore.read_all_mr_alloc(redis_db)
        print_csv_configuration(current_mr_config)

        experiment_count += 1

    print '{} experiments completed'.format(experiment_count)
    print_all_steps(redis_db, experiment_count)

    current_mr_config = resource_datastore.read_all_mr_alloc(redis_db)
    for mr in current_mr_config:
        print '{} = {}'.format(mr.to_string(), current_mr_config[mr])

    print_csv_configuration(current_mr_config)
Esempio n. 4
0
def finalize_mr_provision(redis_db, mr, new_alloc):
    resource_modifier.set_mr_provision(mr, new_alloc)
    old_alloc = resource_datastore.read_mr_alloc(redis_db, mr)
    resource_datastore.write_mr_alloc(redis_db, mr, new_alloc)
    update_machine_consumption(redis_db, mr, new_alloc, old_alloc)
Esempio n. 5
0
def find_colocated_nimrs(redis_db, imr, mr_working_set, baseline_mean,
                         sys_config, workload_config):
    print 'Finding colocated NIMRs'
    experiment_trials = sys_config['trials']
    stress_weights = sys_config['stress_weights']
    stress_weight = min(stress_weights)

    preferred_performance_metric = workload_config['tbot_metric']
    optimize_for_lowest = workload_config['optimize_for_lowest']

    vm_to_service = get_vm_to_service(get_actual_vms())

    colocated_services = []
    # Identify an unique list of relevant MRs colocated with IMR instances
    for deployment in imr.instances:
        vm_ip, container = deployment
        colocated_services = colocated_services + vm_to_service[vm_ip]
    print 'Colocated services are {}'.format(colocated_services)

    candidate_mrs = []
    for mr in mr_working_set:
        if mr.service_name in colocated_services and mr.resource == imr.resource:
            candidate_mrs.append(mr)
    print 'Candidate MRs are {}'.format(
        [mr.to_string() for mr in candidate_mrs])

    nimr_list = []
    for mr in candidate_mrs:
        print 'MR being considered is {}'.format(mr.to_string())
        mr_gradient_schedule = calculate_mr_gradient_schedule(
            redis_db, [mr], sys_config, stress_weight)

        for change_mr in mr_gradient_schedule:
            resource_modifier.set_mr_provision(change_mr,
                                               mr_gradient_schedule[change_mr],
                                               workload_config)

        experiment_results = measure_runtime(workload_config,
                                             experiment_trials)
        preferred_results = experiment_results[preferred_performance_metric]
        mean_result = mean_list(preferred_results)

        perf_diff = mean_result - baseline_mean
        if (perf_diff > 0.03 * baseline_mean) and optimize_for_lowest:
            print 'Do nothing for optimize lowest'
        elif (perf_diff <
              -0.03 * baseline_mean) and optimize_for_lowest is False:
            print 'Do nothing for optimize lowest'
        else:
            nimr_list.append(mr)

        # Revert the Gradient schedule and provision resources accordingly
        mr_revert_gradient_schedule = revert_mr_gradient_schedule(
            redis_db, [mr], sys_config, stress_weight)

        for change_mr in mr_revert_gradient_schedule:
            resource_modifier.set_mr_provision(
                change_mr, mr_revert_gradient_schedule[change_mr],
                workload_config)

    return nimr_list
Esempio n. 6
0
def apply_pipeline_filter(redis_db, mr_working_set, experiment_iteration,
                          system_config, workload_config, filter_config):

    logging.info('*' * 20)
    logging.info('Applying Filtering Pipeline')

    logging.info('Filter config is {}'.format(filter_config))
    logging.info('MR working set is {}'.format(mr_working_set))

    machine_type = system_config['machine_type']

    pipeline_partitions = filter_config['pipeline_partitions']
    stress_weight = filter_config['stress_amount']
    experiment_trials = filter_config['filter_exp_trials']
    pipelined_services = filter_config['pipeline_services']

    pipeline_groups = []

    logging.info('Pipelined services are {}'.format(pipelined_services))
    # No specified pipelined services indicates that each pipeline is a service
    if pipelined_services[0][0] == 'BY_SERVICE':
        service_names = list(set([mr.service_name for mr in mr_working_set]))
        for service_name in service_names:
            mr_list = search_mr_working_set(mr_working_set, services)
            pipeline_groups.append(mr_list)
    elif pipelined_services[0][0] == 'RANDOM':
        pipeline_groups = gen_mr_random_split(mr_working_set,
                                              pipeline_partitions)

    logging.info("The pipeline groups are being printed below: ")
    for pipeline_group in pipeline_groups:
        pipeline_group = [mr.to_string() for mr in pipeline_group]
        logging.info('A pipeline is {}'.format(pipeline_group))

    tbot_metric = workload_config['tbot_metric']
    optimize_for_lowest = workload_config['optimize_for_lowest']

    pipeline_index = 0
    for pipeline in pipeline_groups:
        change_mr_schedule = calculate_mr_gradient_schedule(
            redis_db, pipeline, system_config, stress_weight)
        # Simultaneously stress the MRs in a pipeline
        for mr in change_mr_schedule:
            resource_modifier.set_mr_provision(mr, change_mr_schedule[mr],
                                               workload_config)

        experiment_results = measure_runtime(workload_config,
                                             experiment_trials)
        exp_mean = mean_list(experiment_results[tbot_metric])
        repr_str = str(pipeline_index)
        tbot_datastore.write_filtered_results(redis_db, 'pipeline',
                                              experiment_iteration, repr_str,
                                              exp_mean)

        # Revert the stressing
        change_mr_schedule = revert_mr_gradient_schedule(
            redis_db, pipeline, system_config, stress_weight)
        for mr in change_mr_schedule:
            resource_modifier.set_mr_provision(mr, change_mr_schedule[mr],
                                               workload_config)

        pipeline_index += 1

    all_pipeline_score_list = tbot_datastore.get_top_n_filtered_results(
        redis_db,
        'pipeline',
        experiment_iteration,
        system_config,
        optimize_for_lowest=optimize_for_lowest)

    return all_pipeline_score_list, pipeline_groups