def simulate_job_flows(job_flows, pool, EC2): """Simulates the job flows using the pool, and will also simulate pure on-demand hours with no pool and return both. Returns: optimal_logged_hours: The amount of hours that each reserved instance used from the given job flow. demand_logged_hours: The amount of hours used per instance on just purely on demand instances, no reserved instances. Use this as a control group. """ job_flows_begin_time = min(job.get("startdatetime") for job in job_flows) job_flows_end_time = max(job.get("enddatetime") for job in job_flows) interval_job_flows = job_flows_end_time - job_flows_begin_time EMPTY_INSTANCE_POOL = EC2.init_empty_reserve_pool() optimal_simulator = Simulator(job_flows, pool, EC2) demand_simulator = Simulator(job_flows, EMPTY_INSTANCE_POOL, EC2) optimal_logged_hours = optimal_simulator.run() demand_logged_hours = demand_simulator.run() convert_to_yearly_estimated_hours(demand_logged_hours, interval_job_flows) convert_to_yearly_estimated_hours(optimal_logged_hours, interval_job_flows) return optimal_logged_hours, demand_logged_hours
def record_log_data(self): """This will set up the record information to graph total hours logged in a simulation over time. """ logged_hours_per_hour = self.EC2.init_empty_all_instance_types() event_times = {} log_simulator = Simulator(self.job_flows, self.pool, self.EC2) observer = SimulationObserver(event_times, logged_hours_per_hour) log_simulator.attach_log_hours_observer(observer) log_simulator.run() return logged_hours_per_hour, event_times
def record_used_instances(self): """Stores information regarding what instances were in the 'used_pool' during the job simulation at all points of the simulation. """ used_instances_over_time = self.EC2.init_empty_all_instance_types() event_times = {} instance_simulator = Simulator(self.job_flows, self.pool, self.EC2) observer = SimulationObserver(event_times, used_instances_over_time) instance_simulator.attach_pool_use_observer(observer) instance_simulator.run() return used_instances_over_time, event_times
def delta_reserved_instance_hours_generator(self, instance_type, pool): starter_pool = copy.deepcopy(pool) assert (len(self.EC2.RESERVE_PRIORITIES) > 0) highest_util = self.EC2.RESERVE_PRIORITIES[0] iterative_simulator = Simulator(self.job_flows, starter_pool, self.EC2) previous_logged_hours = iterative_simulator.run() previous_hours = previous_logged_hours[highest_util][instance_type] while True: starter_pool[highest_util][instance_type] += 1 current_logged_hours = iterative_simulator.run() current_hours = current_logged_hours[highest_util][instance_type] yield (current_hours - previous_hours) previous_hours = current_hours
def delta_reserved_instance_hours_generator(self, instance_type, pool): starter_pool = copy.deepcopy(pool) assert(len(self.EC2.RESERVE_PRIORITIES) > 0) highest_util = self.EC2.RESERVE_PRIORITIES[0] iterative_simulator = Simulator(self.job_flows, starter_pool, self.EC2) previous_logged_hours = iterative_simulator.run() previous_hours = previous_logged_hours[highest_util][instance_type] while True: starter_pool[highest_util][instance_type] += 1 current_logged_hours = iterative_simulator.run() current_hours = current_logged_hours[highest_util][instance_type] yield (current_hours - previous_hours) previous_hours = current_hours
def simulate_job_flows(job_flows, pool, EC2): """Simulates the job flows using the pool, and will also simulate pure on-demand hours with no pool and return both. Returns: optimal_logged_hours: The amount of hours that each reserved instance used from the given job flow. demand_logged_hours: The amount of hours used per instance on just purely on demand instances, no reserved instances. Use this as a control group. """ job_flows_begin_time = min(job.get('startdatetime') for job in job_flows) job_flows_end_time = max(job.get('enddatetime') for job in job_flows) interval_job_flows = job_flows_end_time - job_flows_begin_time EMPTY_INSTANCE_POOL = EC2.init_empty_reserve_pool() optimal_simulator = Simulator(job_flows, pool, EC2) demand_simulator = Simulator(job_flows, EMPTY_INSTANCE_POOL, EC2) optimal_logged_hours = optimal_simulator.run() demand_logged_hours = demand_simulator.run() convert_to_yearly_estimated_hours(demand_logged_hours, interval_job_flows) convert_to_yearly_estimated_hours(optimal_logged_hours, interval_job_flows) return optimal_logged_hours, demand_logged_hours
def optimize_reserve_pool(self, instance_type, pool): """The brute force approach will take a single instance type and optimize the instance pool for it. By using the job_flows in simulations. Mutates: pool """ simulator = Simulator(self.job_flows, pool, self.EC2) previous_cost = float('inf') current_min_cost = float("inf") current_cost = float('inf') current_min_instances = self.EC2.init_reserve_counts( pool, instance_type) # Calculate the default cost first. logged_hours = simulator.run() convert_to_yearly_estimated_hours(logged_hours, self.job_flows_interval) current_min_cost, _ = self.EC2.calculate_cost(logged_hours, pool) current_cost = current_min_cost delta_reserved_hours = (self.delta_reserved_instance_hours_generator( instance_type, pool)) while previous_cost >= current_cost: current_simulation_costs = (self.EC2.init_reserve_costs( float('inf'))) # Add a single instance to each utilization type, and # record the costs. Choose the minimum cost utilization type. logging.debug("Simulation hours added %d", delta_reserved_hours.next()) for utilization_class in pool: # Reset the min instances to the best values. for current_util in pool: pool[current_util][instance_type] = ( current_min_instances[current_util]) pool[utilization_class][instance_type] = ( current_min_instances[utilization_class] + 1) logged_hours = simulator.run() convert_to_yearly_estimated_hours(logged_hours, self.job_flows_interval) cost, _ = self.EC2.calculate_cost(logged_hours, pool) current_simulation_costs[utilization_class] = cost previous_cost = current_cost current_cost = min(current_simulation_costs.values()) min_util_level = None for utilization_class in current_simulation_costs: if current_simulation_costs[utilization_class] == current_cost: min_util_level = utilization_class # Record the new cost, and see if adding one instance is better # If not, then break from the loop, since adding more will be worst if min(current_cost, current_min_cost) != current_min_cost or ( current_cost == current_min_cost): current_min_cost = current_cost current_min_instances[min_util_level] += 1 # Reset to best instance pool. for current_util in pool: pool[current_util][instance_type] = ( current_min_instances[utilization_class]) logging.debug("Current best minimum cost for %s: %d", instance_type, current_min_cost) for utilization_class in current_min_instances: pool[utilization_class][instance_type] = ( current_min_instances[utilization_class])
def optimize_reserve_pool(self, instance_type, pool): """The brute force approach will take a single instance type and optimize the instance pool for it. By using the job_flows in simulations. Mutates: pool """ simulator = Simulator(self.job_flows, pool, self.EC2) previous_cost = float('inf') current_min_cost = float("inf") current_cost = float('inf') current_min_instances = self.EC2.init_reserve_counts(pool, instance_type) # Calculate the default cost first. logged_hours = simulator.run() convert_to_yearly_estimated_hours(logged_hours, self.job_flows_interval) current_min_cost, _ = self.EC2.calculate_cost(logged_hours, pool) logging.debug('Current min cost: %s' % str(current_min_cost)) current_cost = current_min_cost delta_reserved_hours = ( self.delta_reserved_instance_hours_generator(instance_type, pool)) while previous_cost >= current_cost: current_simulation_costs = ( self.EC2.init_reserve_costs(float('inf'))) # Add a single instance to each utilization type, and # record the costs. Choose the minimum cost utilization type. logging.debug("Simulation hours added %d", delta_reserved_hours.next()) for utilization_class in pool: # Reset the min instances to the best values. for current_util in pool: pool[current_util][instance_type] = ( current_min_instances[current_util]) pool[utilization_class][instance_type] = ( current_min_instances[utilization_class] + 1) logged_hours = simulator.run() convert_to_yearly_estimated_hours(logged_hours, self.job_flows_interval) cost, _ = self.EC2.calculate_cost(logged_hours, pool) current_simulation_costs[utilization_class] = cost previous_cost = current_cost current_cost = min(current_simulation_costs.values()) min_util_level = None for utilization_class in current_simulation_costs: if current_simulation_costs[utilization_class] == current_cost: min_util_level = utilization_class # Record the new cost, and see if adding one instance is better # If not, then break from the loop, since adding more will be worst if min(current_cost, current_min_cost) != current_min_cost or ( current_cost == current_min_cost): current_min_cost = current_cost current_min_instances[min_util_level] += 1 # Reset to best instance pool. for current_util in pool: pool[current_util][instance_type] = ( current_min_instances[utilization_class]) logging.debug("Current best minimum cost for %s: %d", instance_type, current_min_cost) for utilization_class in current_min_instances: pool[utilization_class][instance_type] = ( current_min_instances[utilization_class])