Esempio n. 1
0
def revert_single_gradient(redis_db, mr_candidates, stress_weight):
    mr_to_alloc = {}
    for mr_candidate in mr_candidates:
        original_mr_alloc = resource_datastore.read_mr_alloc(
            redis_db, mr_candidate)
        mr_to_alloc[mr_candidate] = original_mr_alloc
    return mr_to_alloc
Esempio n. 2
0
def revert_inverted_gradient(redis_db, mr_candidates, stress_weight):
    mr_to_alloc = {}
    for mr in mr_candidates:
        original_alloc = resource_datastore.read_mr_alloc(redis_db, mr)
        new_alloc = convert_percent_to_raw(mr, original_alloc, stress_weight)
        mr_to_alloc[mr] = new_alloc
    return mr_to_alloc
Esempio n. 3
0
def revert_inverted_baseline(redis_db):
    mr_to_alloc = {}
    all_mr_list = resource_datastore.get_all_mrs(redis_db)
    for mr in all_mr_list:
        current_mr_alloc = resource_datastore.read_mr_alloc(redis_db, mr)
        new_alloc = convert_percent_to_raw(mr, current_mr_alloc, 0)
        mr_to_alloc[mr] = new_alloc
    return mr_to_alloc
Esempio n. 4
0
def schedule_inverted_gradient(redis_db, mr_candidates, stress_weight):
    mr_to_alloc = {}
    for mr in mr_candidates:
        # Simulate ADDING a resource by reverting it to its original, non-stressed amount
        # This is the amount recorded in Redis
        current_mr_alloc = resource_datastore.read_mr_alloc(redis_db, mr)
        mr_to_alloc[mr] = current_mr_alloc
    return mr_to_alloc
Esempio n. 5
0
def schedule_single_gradient(redis_db, mr_candidates, stress_weight):
    mr_to_alloc = {}
    for mr_candidate in mr_candidates:
        current_mr_alloc = resource_datastore.read_mr_alloc(
            redis_db, mr_candidate)
        new_alloc = convert_percent_to_raw(mr_candidate, current_mr_alloc,
                                           stress_weight)
        mr_to_alloc[mr_candidate] = new_alloc
    return mr_to_alloc
Esempio n. 6
0
def run(sys_config,
        workload_config,
        filter_config,
        default_mr_config,
        last_completed_iter=0):
    redis_host = sys_config['redis_host']
    baseline_trials = sys_config['baseline_trials']
    experiment_trials = sys_config['trials']
    stress_weights = sys_config['stress_weights']
    stress_policy = sys_config['stress_policy']
    resource_to_stress = sys_config['stress_these_resources']
    service_to_stress = sys_config['stress_these_services']
    vm_to_stress = sys_config['stress_these_machines']
    machine_type = sys_config['machine_type']
    quilt_overhead = sys_config['quilt_overhead']
    gradient_mode = sys_config['gradient_mode']

    preferred_performance_metric = workload_config['tbot_metric']
    optimize_for_lowest = workload_config['optimize_for_lowest']

    redis_db = redis.StrictRedis(host=redis_host, port=6379, db=0)
    if last_completed_iter == 0:
        redis_db.flushall()
    '''
    # Prompt the user to make sure they want to flush the db
    ok_to_flush = raw_input("Are you sure you want to flush the results of your last experiment? Please respond with Y or N: ")
    if ok_to_flush == 'Y':
        redis_db.flushall()
    elif ok_to_flush == 'N':
        print 'OK you said it boss. Exiting...'
        exit()
    else:
        print 'Only Y and N are acceptable responses. Exiting...'
        exit()
    '''

    print '\n' * 2
    print '*' * 20
    print 'INFO: INITIALIZING RESOURCE CONFIG'
    # Initialize Redis and Cluster based on the default resource configuration
    init_cluster_capacities_r(redis_db, machine_type, quilt_overhead)
    init_service_placement_r(redis_db, default_mr_config)
    init_resource_config(redis_db, default_mr_config, machine_type)

    print '*' * 20
    print 'INFO: INSTALLING DEPENDENCIES'
    #install_dependencies(workload_config)

    # Initialize time for data charts
    time_start = datetime.datetime.now()

    print '*' * 20
    print 'INFO: RUNNING BASELINE'

    # Get the Current Performance -- not used for any analysis, just to benchmark progress!!
    current_performance = measure_baseline(workload_config, baseline_trials)

    current_performance[preferred_performance_metric] = remove_outlier(
        current_performance[preferred_performance_metric])
    current_time_stop = datetime.datetime.now()
    time_delta = current_time_stop - time_start

    print 'Current (non-analytic) performance measured: {}'.format(
        current_performance)

    if last_completed_iter != 0:
        tbot_datastore.write_summary_redis(
            redis_db, 0, MR('initial', 'initial', []), 0, {},
            mean_list(current_performance[preferred_performance_metric]),
            mean_list(current_performance[preferred_performance_metric]),
            time_delta.seconds, 0)

    print '============================================'
    print '\n' * 2

    # Initialize the current configurations
    # Initialize the working set of MRs to all the MRs
    mr_working_set = resource_datastore.get_all_mrs(redis_db)
    resource_datastore.write_mr_working_set(redis_db, mr_working_set, 0)
    cumulative_mr_count = 0
    experiment_count = last_completed_iter + 1

    while experiment_count < 10:
        # Calculate the analytic baseline that is used to determine MRs
        analytic_provisions = prepare_analytic_baseline(
            redis_db, sys_config, min(stress_weights))
        print 'The Analytic provisions are as follows {}'.format(
            analytic_provisions)
        for mr in analytic_provisions:
            resource_modifier.set_mr_provision(mr, analytic_provisions[mr])
        analytic_baseline = measure_runtime(workload_config, experiment_trials)
        analytic_mean = mean_list(
            analytic_baseline[preferred_performance_metric])
        print 'The analytic baseline is {}'.format(analytic_baseline)
        print 'This current performance is {}'.format(current_performance)
        analytic_baseline[preferred_performance_metric] = remove_outlier(
            analytic_baseline[preferred_performance_metric])

        # Get a list of MRs to stress in the form of a list of MRs
        mr_to_consider = apply_filtering_policy(redis_db, mr_working_set,
                                                experiment_count, sys_config,
                                                workload_config, filter_config)

        for mr in mr_to_consider:
            print '\n' * 2
            print '*' * 20
            print 'Current MR is {}'.format(mr.to_string())
            increment_to_performance = {}
            current_mr_allocation = resource_datastore.read_mr_alloc(
                redis_db, mr)
            print 'Current MR allocation is {}'.format(current_mr_allocation)

            for stress_weight in stress_weights:
                # Calculate Gradient Schedule and provision resources accordingly
                mr_gradient_schedule = calculate_mr_gradient_schedule(
                    redis_db, [mr], sys_config, stress_weight)
                for change_mr in mr_gradient_schedule:
                    resource_modifier.set_mr_provision(
                        change_mr, mr_gradient_schedule[change_mr])

                experiment_results = measure_runtime(workload_config,
                                                     experiment_trials)

                # Write results of experiment to Redis
                # preferred_results = remove_outlier(experiment_results[preferred_performance_metric])
                preferred_results = experiment_results[
                    preferred_performance_metric]
                mean_result = mean_list(preferred_results)
                tbot_datastore.write_redis_ranking(
                    redis_db, experiment_count, preferred_performance_metric,
                    mean_result, mr, stress_weight)

                # Revert the Gradient schedule and provision resources accordingly
                mr_revert_gradient_schedule = revert_mr_gradient_schedule(
                    redis_db, [mr], sys_config, stress_weight)
                for change_mr in mr_revert_gradient_schedule:
                    resource_modifier.set_mr_provision(
                        change_mr, mr_revert_gradient_schedule[change_mr])

                increment_to_performance[stress_weight] = experiment_results

            # Write the results of the iteration to Redis
            tbot_datastore.write_redis_results(redis_db, mr,
                                               increment_to_performance,
                                               experiment_count,
                                               preferred_performance_metric)
            print '*' * 20
            print '\n' * 2

        # Timing Information for the purpose of experiments
        current_time_stop = datetime.datetime.now()
        time_delta = current_time_stop - time_start
        cumulative_mr_count += len(mr_to_consider)
        chart_generator.get_summary_mimr_charts(
            redis_db, workload_config, current_performance, mr_working_set,
            experiment_count, stress_weights, preferred_performance_metric,
            time_start)

        # Move back into the normal operating basis by removing the baseline prep stresses
        reverted_analytic_provisions = revert_analytic_baseline(
            redis_db, sys_config)
        for mr in reverted_analytic_provisions:
            resource_modifier.set_mr_provision(
                mr, reverted_analytic_provisions[mr])

        # Recover the results of the experiment from Redis
        max_stress_weight = min(stress_weights)
        mimr_list = tbot_datastore.get_top_n_mimr(
            redis_db,
            experiment_count,
            preferred_performance_metric,
            max_stress_weight,
            gradient_mode,
            optimize_for_lowest=optimize_for_lowest,
            num_results_returned=-1)

        imr_list, nimr_list = seperate_mr(
            mimr_list,
            mean_list(analytic_baseline[preferred_performance_metric]),
            optimize_for_lowest)
        if len(imr_list) == 0:
            print 'INFO: IMR list length is 0. Please choose a metric with more signal. Exiting...'
            break
        print 'INFO: IMR list is {}'.format(
            [mr.to_string() for mr in imr_list])
        print 'INFO: NIMR list is {}'.format(
            [mr.to_string() for mr in nimr_list])

        # Try all the MIMRs in the list until a viable improvement is determined
        # Improvement Amount
        mimr = None
        action_taken = {}

        for imr in imr_list:
            imr_improvement_percent = improve_mr_by(redis_db, imr,
                                                    max_stress_weight)
            current_imr_alloc = resource_datastore.read_mr_alloc(redis_db, imr)
            new_imr_alloc = convert_percent_to_raw(imr, current_imr_alloc,
                                                   imr_improvement_percent)
            imr_improvement_proposal = new_imr_alloc - current_imr_alloc

            # If the the Proposed MR cannot be improved by the proposed amount, there are two options
            # - Max out the resources to fill up the remaining resources on the machine
            # - Resource Stealing from NIMRs
            # Both functions will return VIABLE improvements to the IMR deployment
            nimr_diff_proposal = {}
            if check_improve_mr_viability(redis_db, imr,
                                          imr_improvement_proposal) is False:
                print 'INFO: MR {} to increase {} by {} is not viable'.format(
                    imr.to_string(), current_imr_alloc,
                    imr_improvement_proposal)
                print 'INFO: Attempting to max out the machines resources...'
                imr_improvement_proposal = fill_out_resource(redis_db, imr)

                if imr_improvement_proposal <= 0:
                    print 'INFO: No more space to fill out resources. Stealing from NIMRs'
                    # Calculate a plan to reduce the resource provisioning of NIMRs
                    nimr_diff_proposal, imr_improvement_proposal = create_decrease_nimr_schedule(
                        redis_db, imr, nimr_list, max_stress_weight)
                    print 'INFO: Proposed NIMR {}'.format(nimr_diff_proposal)
                    print 'INFO: New IMR improvement {}'.format(
                        imr_improvement_proposal)

                    if len(nimr_diff_proposal
                           ) == 0 or imr_improvement_proposal == 0:
                        action_taken[imr] = 0
                        continue

            # Decrease the amount of resources provisioned to the NIMR
            for nimr in nimr_diff_proposal:
                action_taken[nimr] = nimr_diff_proposal[nimr]
                new_nimr_alloc = resource_datastore.read_mr_alloc(
                    redis_db, nimr) + nimr_diff_proposal[nimr]
                print 'NIMR stealing: imposing a change of {} on {}'.format(
                    action_taken[nimr], nimr.to_string())
                finalize_mr_provision(redis_db, nimr, new_nimr_alloc)

            # Improving the resource should always be viable at this step
            if check_improve_mr_viability(redis_db, imr,
                                          imr_improvement_proposal):
                new_imr_alloc = imr_improvement_proposal + current_imr_alloc
                action_taken[imr] = imr_improvement_proposal
                finalize_mr_provision(redis_db, imr, new_imr_alloc)
                print 'Improvement Calculated: MR {} increase from {} to {}'.format(
                    mr.to_string(), current_imr_alloc, new_imr_alloc)
                mimr = imr
                break
            else:
                action_taken[imr] = 0
                print 'Improvement Calculated: MR {} failed to improve from {}'.format(
                    mr.to_string(), current_mr_allocation)
                print 'This IMR cannot be improved. Printing some debugging before exiting...'

                print 'Current MR allocation is {}'.format(current_imr_alloc)
                print 'Proposed (failed) allocation is {}, improved by {}'.format(
                    new_imr_alloc, imr_improvement_proposal)

                for deployment in imr.instances:
                    vm_ip, container = deployment
                    capacity = resource_datastore.read_machine_capacity(
                        redis_db, vm_ip)
                    consumption = resource_datastore.read_machine_consumption(
                        redis_db, vm_ip)
                    print 'Machine {} Capacity is {}, and consumption is currently {}'.format(
                        vm_ip, capacity, consumption)

        if mimr is None:
            print 'No viable improvement found'
            break

        #Compare against the baseline at the beginning of the program
        improved_performance = measure_runtime(workload_config,
                                               baseline_trials)
        # improved_performance[preferred_performance_metric] = remove_outlier(improved_performance[preferred_performance_metric])
        improved_mean = mean_list(
            improved_performance[preferred_performance_metric])
        previous_mean = mean_list(
            current_performance[preferred_performance_metric])
        performance_improvement = improved_mean - previous_mean

        # Write a summary of the experiment's iterations to Redis
        tbot_datastore.write_summary_redis(redis_db, experiment_count, mimr,
                                           performance_improvement,
                                           action_taken, analytic_mean,
                                           improved_mean, time_delta.seconds,
                                           cumulative_mr_count)
        current_performance = improved_performance

        # Generating overall performance improvement
        chart_generator.get_summary_performance_charts(redis_db,
                                                       workload_config,
                                                       experiment_count,
                                                       time_start)

        results = tbot_datastore.read_summary_redis(redis_db, experiment_count)
        print 'Results from iteration {} are {}'.format(
            experiment_count, results)

        # Checkpoint MR configurations and print
        current_mr_config = resource_datastore.read_all_mr_alloc(redis_db)
        print_csv_configuration(current_mr_config)

        experiment_count += 1

    print '{} experiments completed'.format(experiment_count)
    print_all_steps(redis_db, experiment_count)

    current_mr_config = resource_datastore.read_all_mr_alloc(redis_db)
    for mr in current_mr_config:
        print '{} = {}'.format(mr.to_string(), current_mr_config[mr])

    print_csv_configuration(current_mr_config)
Esempio n. 7
0
def update_mr_config(redis_db, mr_in_play):
    updated_configuration = {}
    for mr in mr_in_play:
        updated_configuration[mr] = resource_datastore.read_mr_alloc(
            redis_db, mr)
    return updated_configuration
Esempio n. 8
0
def create_decrease_nimr_schedule(redis_db, imr, nimr_list, stress_weight):
    print 'IMR is {}'.format(imr.to_string())

    # Filter out NIMRs that are not the same resource type as mr
    for nimr in list(nimr_list):
        print 'NIMR resource: {} '.format(nimr.resource)
        print 'IMR resource: {}'.format(imr.resource)
        if nimr.resource != imr.resource: nimr_list.remove(nimr)

    if len(nimr_list) == 0:
        return {}, 0

    print 'NIMR Debugging: Filtered nimr list is {}'.format(
        [nimr.to_string() for nimr in nimr_list])

    reduction_proposal = []

    # Ensure that every deployment has at least one service losing a machine
    vm_to_nimr = {}
    vm_to_service = get_vm_to_service(get_actual_vms())

    # Identify an unique list of relevant NIMRs colocated with IMR instances
    for deployment in imr.instances:
        vm_ip, container = deployment
        if vm_ip in vm_to_nimr:
            continue

        colocated_services = vm_to_service[vm_ip]
        if imr.service_name in colocated_services:
            colocated_services.remove(imr.service_name)
        # Remove Duplicates
        colocated_services = list(set(colocated_services))

        vm_to_nimr[vm_ip] = []
        for nimr in nimr_list:
            if nimr.service_name in colocated_services:
                vm_to_nimr[vm_ip].append(nimr)

    min_mr_removal = float('inf')
    target_vm = None

    for vm_ip in vm_to_nimr:
        if len(vm_to_nimr[vm_ip]) == 0:
            print 'no suitable NIMRs for substitution found'
            return {}, 0
        total_removal_amount = 0
        for nimr in vm_to_nimr[vm_ip]:
            reduction_multiplier = containers_per_vm(nimr)
            nimr_alloc = resource_datastore.read_mr_alloc(redis_db, nimr)
            new_alloc = convert_percent_to_raw(nimr, nimr_alloc, stress_weight)
            # Multiply by reduction multiplier since you have multiple NIMR instances
            alloc_diff = (nimr_alloc - new_alloc) * reduction_multiplier[vm_ip]
            total_removal_amount += alloc_diff
        if total_removal_amount < min_mr_removal:
            min_mr_removal = total_removal_amount
            target_vm = vm_ip

    new_nimr_change = {}
    total_change = 0
    for nimr in vm_to_nimr[target_vm]:
        reduction_multiplier = containers_per_vm(nimr)
        nimr_alloc = resource_datastore.read_mr_alloc(redis_db, nimr)
        new_alloc = convert_percent_to_raw(nimr, nimr_alloc, stress_weight)
        # Multiply by reduction multiplier since you have multiple NIMR instances
        alloc_diff = (new_alloc - nimr_alloc) * reduction_multiplier[target_vm]
        new_nimr_change[nimr] = alloc_diff
        total_change += alloc_diff

    # Divide the min mr removal amount among the instances on the machine
    improvement_multiplier = containers_per_vm(imr)
    max_multiplier = max(improvement_multiplier.values())
    proposed_imr_improvement = -1 * float(total_change) / max_multiplier
    print 'New MR alloc {}'.format(new_nimr_change)
    print 'Minimum MR Removal {}'.format(proposed_imr_improvement)

    return new_nimr_change, proposed_imr_improvement
Esempio n. 9
0
def finalize_mr_provision(redis_db, mr, new_alloc):
    resource_modifier.set_mr_provision(mr, new_alloc)
    old_alloc = resource_datastore.read_mr_alloc(redis_db, mr)
    resource_datastore.write_mr_alloc(redis_db, mr, new_alloc)
    update_machine_consumption(redis_db, mr, new_alloc, old_alloc)
Esempio n. 10
0
def run(system_config, workload_config, default_mr_config):
    redis_host = system_config['redis_host']
    baseline_trials = system_config['baseline_trials']
    experiment_trials = system_config['trials']
    stress_weights = system_config['stress_weights']
    stress_policy = system_config['stress_policy']
    resource_to_stress = system_config['stress_these_resources']
    service_to_stress = system_config['stress_these_services']
    vm_to_stress = system_config['stress_these_machines']
    machine_type = system_config['machine_type']
    quilt_overhead = system_config['quilt_overhead']

    preferred_performance_metric = workload_config['tbot_metric']
    optimize_for_lowest = workload_config['optimize_for_lowest']

    redis_db = redis.StrictRedis(host=redis_host, port=6379, db=0)
    redis_db.flushall()

    # Initialize Redis and Cluster based on the default resource configuration
    init_cluster_capacities_r(redis_db, machine_type, quilt_overhead)
    init_service_placement_r(redis_db, default_mr_config)
    init_resource_config(redis_db, default_mr_config, machine_type)

    # Run the baseline experiment
    experiment_count = 0
    baseline_performance = measure_baseline(workload_config, baseline_trials)

    # Initialize the current configurations
    # Invariant: MR are the same between iterations
    current_mr_config = resource_datastore.read_all_mr_alloc(redis_db)

    while experiment_count < 10:
        # Get a list of MRs to stress in the form of a list of MRs
        mr_to_stress = generate_mr_from_policy(redis_db, stress_policy)
        print mr_to_stress

        for mr in mr_to_stress:
            print 'Current MR is {}'.format(mr.to_string())
            increment_to_performance = {}
            current_mr_allocation = resource_datastore.read_mr_alloc(
                redis_db, mr)
            print 'Current MR allocation is {}'.format(current_mr_allocation)
            for stress_weight in stress_weights:
                new_alloc = convert_percent_to_raw(mr, current_mr_allocation,
                                                   stress_weight)
                set_mr_provision(mr, new_alloc)
                experiment_results = measure_runtime(workload_config,
                                                     experiment_trials)

                #Write results of experiment to Redis
                mean_result = float(
                    sum(experiment_results[preferred_performance_metric])
                ) / len(experiment_results[preferred_performance_metric])
                tbot_datastore.write_redis_ranking(
                    redis_db, experiment_count, preferred_performance_metric,
                    mean_result, mr, stress_weight)

                # Remove the effect of the resource stressing
                new_alloc = convert_percent_to_raw(mr, current_mr_allocation,
                                                   0)
                increment_to_performance[stress_weight] = experiment_results

            # Write the results of the iteration to Redis
            tbot_datastore.write_redis_results(redis_db, mr,
                                               increment_to_performance,
                                               experiment_count,
                                               preferred_performance_metric)

        # Recover the results of the experiment from Redis
        max_stress_weight = min(stress_weights)
        mimr_list = tbot_datastore.get_top_n_mimr(
            redis_db,
            experiment_count,
            preferred_performance_metric,
            max_stress_weight,
            optimize_for_lowest=optimize_for_lowest,
            num_results_returned=10)

        # Try all the MIMRs in the list until a viable improvement is determined
        # Improvement Amount
        mimr = None
        action_taken = 0
        print 'The MR improvement is {}'.format(max_stress_weight)
        for mr_score in mimr_list:
            mr, score = mr_score
            improvement_percent = improve_mr_by(redis_db, mr,
                                                max_stress_weight)
            current_mr_allocation = resource_datastore.read_mr_alloc(
                redis_db, mr)
            new_alloc = convert_percent_to_raw(mr, current_mr_allocation,
                                               improvement_percent)
            improvement_amount = new_alloc - current_mr_allocation
            action_taken = improvement_amount
            if check_improve_mr_viability(redis_db, mr, improvement_amount):
                set_mr_provision(mr, new_alloc)
                print 'Improvement Calculated: MR {} increase from {} to {}'.format(
                    mr.to_string(), current_mr_allocation, new_alloc)
                old_alloc = resource_datastore.read_mr_alloc(redis_db, mr)
                resource_datastore.write_mr_alloc(redis_db, mr, new_alloc)
                update_machine_consumption(redis_db, mr, new_alloc, old_alloc)
                current_mr_config = update_mr_config(redis_db,
                                                     current_mr_config)
                mimr = mr
                break
            else:
                print 'Improvement Calculated: MR {} failed to improve from {} to {}'.format(
                    mr.to_string(), current_mr_allocation, new_alloc)

        if mimr is None:
            print 'No viable improvement found'
            break

        #Compare against the baseline at the beginning of the program
        improved_performance = measure_runtime(workload_config,
                                               baseline_trials)
        print improved_performance
        improved_mean = sum(
            improved_performance[preferred_performance_metric]) / float(
                len(improved_performance[preferred_performance_metric]))
        baseline_mean = sum(
            baseline_performance[preferred_performance_metric]) / float(
                len(baseline_performance[preferred_performance_metric]))
        performance_improvement = improved_mean - baseline_mean

        # Write a summary of the experiment's iterations to Redis
        tbot_datastore.write_summary_redis(redis_db, experiment_count, mimr,
                                           performance_improvement,
                                           action_taken)
        baseline_performance = improved_performance

        results = tbot_datastore.read_summary_redis(redis_db, experiment_count)
        print 'Results from iteration {} are {}'.format(
            experiment_count, results)
        experiment_count += 1

        # TODO: Handle False Positive
        # TODO: Compare against performance condition -- for now only do some number of experiments

    print '{} experiments completed'.format(experiment_count)
    print_all_steps(redis_db, experiment_count)
    for mr in current_mr_config:
        print '{} = {}'.format(mr.to_string(), current_mr_config[mr])