Esempio n. 1
0
def simulate_job_flows(job_flows, pool, EC2):
    """Simulates the job flows using the pool, and will also simulate pure
    on-demand hours with no pool and return both.

    Returns:
        optimal_logged_hours: The amount of hours that each reserved instance
            used from the given job flow.

        demand_logged_hours: The amount of hours used per instance on just
            purely on demand instances, no reserved instances. Use this as a
            control group.
    """
    job_flows_begin_time = min(job.get("startdatetime") for job in job_flows)
    job_flows_end_time = max(job.get("enddatetime") for job in job_flows)
    interval_job_flows = job_flows_end_time - job_flows_begin_time

    EMPTY_INSTANCE_POOL = EC2.init_empty_reserve_pool()
    optimal_simulator = Simulator(job_flows, pool, EC2)
    demand_simulator = Simulator(job_flows, EMPTY_INSTANCE_POOL, EC2)
    optimal_logged_hours = optimal_simulator.run()
    demand_logged_hours = demand_simulator.run()

    convert_to_yearly_estimated_hours(demand_logged_hours, interval_job_flows)
    convert_to_yearly_estimated_hours(optimal_logged_hours, interval_job_flows)
    return optimal_logged_hours, demand_logged_hours
Esempio n. 2
0
    def record_log_data(self):
        """This will set up the record information to graph total hours
        logged in a simulation over time.
        """
        logged_hours_per_hour = self.EC2.init_empty_all_instance_types()
        event_times = {}
        log_simulator = Simulator(self.job_flows, self.pool, self.EC2)
        observer = SimulationObserver(event_times, logged_hours_per_hour)
        log_simulator.attach_log_hours_observer(observer)
        log_simulator.run()

        return  logged_hours_per_hour, event_times
Esempio n. 3
0
 def record_used_instances(self):
     """Stores information regarding what instances were in the
     'used_pool' during the job simulation at all points of the
     simulation.
     """
     used_instances_over_time = self.EC2.init_empty_all_instance_types()
     event_times = {}
     instance_simulator = Simulator(self.job_flows, self.pool, self.EC2)
     observer = SimulationObserver(event_times, used_instances_over_time)
     instance_simulator.attach_pool_use_observer(observer)
     instance_simulator.run()
     return used_instances_over_time, event_times
Esempio n. 4
0
    def delta_reserved_instance_hours_generator(self, instance_type, pool):

        starter_pool = copy.deepcopy(pool)
        assert (len(self.EC2.RESERVE_PRIORITIES) > 0)
        highest_util = self.EC2.RESERVE_PRIORITIES[0]
        iterative_simulator = Simulator(self.job_flows, starter_pool, self.EC2)
        previous_logged_hours = iterative_simulator.run()
        previous_hours = previous_logged_hours[highest_util][instance_type]

        while True:
            starter_pool[highest_util][instance_type] += 1
            current_logged_hours = iterative_simulator.run()
            current_hours = current_logged_hours[highest_util][instance_type]
            yield (current_hours - previous_hours)
            previous_hours = current_hours
Esempio n. 5
0
    def delta_reserved_instance_hours_generator(self, instance_type, pool):

        starter_pool = copy.deepcopy(pool)
        assert(len(self.EC2.RESERVE_PRIORITIES) > 0)
        highest_util = self.EC2.RESERVE_PRIORITIES[0]
        iterative_simulator = Simulator(self.job_flows, starter_pool, self.EC2)
        previous_logged_hours = iterative_simulator.run()
        previous_hours = previous_logged_hours[highest_util][instance_type]

        while True:
            starter_pool[highest_util][instance_type] += 1
            current_logged_hours = iterative_simulator.run()
            current_hours = current_logged_hours[highest_util][instance_type]
            yield (current_hours - previous_hours)
            previous_hours = current_hours
Esempio n. 6
0
def simulate_job_flows(job_flows, pool, EC2):
    """Simulates the job flows using the pool, and will also simulate pure
    on-demand hours with no pool and return both.

    Returns:
        optimal_logged_hours: The amount of hours that each reserved instance
            used from the given job flow.

        demand_logged_hours: The amount of hours used per instance on just
            purely on demand instances, no reserved instances. Use this as a
            control group.
    """
    job_flows_begin_time = min(job.get('startdatetime') for job in job_flows)
    job_flows_end_time = max(job.get('enddatetime') for job in job_flows)
    interval_job_flows = job_flows_end_time - job_flows_begin_time

    EMPTY_INSTANCE_POOL = EC2.init_empty_reserve_pool()
    optimal_simulator = Simulator(job_flows, pool, EC2)
    demand_simulator = Simulator(job_flows, EMPTY_INSTANCE_POOL, EC2)
    optimal_logged_hours = optimal_simulator.run()
    demand_logged_hours = demand_simulator.run()

    convert_to_yearly_estimated_hours(demand_logged_hours, interval_job_flows)
    convert_to_yearly_estimated_hours(optimal_logged_hours, interval_job_flows)
    return optimal_logged_hours, demand_logged_hours
Esempio n. 7
0
    def record_log_data(self):
        """This will set up the record information to graph total hours
        logged in a simulation over time.
        """
        logged_hours_per_hour = self.EC2.init_empty_all_instance_types()
        event_times = {}
        log_simulator = Simulator(self.job_flows, self.pool, self.EC2)
        observer = SimulationObserver(event_times, logged_hours_per_hour)
        log_simulator.attach_log_hours_observer(observer)
        log_simulator.run()

        return logged_hours_per_hour, event_times
Esempio n. 8
0
 def record_used_instances(self):
     """Stores information regarding what instances were in the
     'used_pool' during the job simulation at all points of the
     simulation.
     """
     used_instances_over_time = self.EC2.init_empty_all_instance_types()
     event_times = {}
     instance_simulator = Simulator(self.job_flows, self.pool, self.EC2)
     observer = SimulationObserver(event_times, used_instances_over_time)
     instance_simulator.attach_pool_use_observer(observer)
     instance_simulator.run()
     return used_instances_over_time, event_times
Esempio n. 9
0
    def optimize_reserve_pool(self, instance_type, pool):
        """The brute force approach will take a single instance type and
        optimize the instance pool for it. By using the job_flows in
        simulations.

        Mutates: pool
        """
        simulator = Simulator(self.job_flows, pool, self.EC2)
        previous_cost = float('inf')
        current_min_cost = float("inf")
        current_cost = float('inf')
        current_min_instances = self.EC2.init_reserve_counts(
            pool, instance_type)

        # Calculate the default cost first.
        logged_hours = simulator.run()
        convert_to_yearly_estimated_hours(logged_hours,
                                          self.job_flows_interval)
        current_min_cost, _ = self.EC2.calculate_cost(logged_hours, pool)
        current_cost = current_min_cost
        delta_reserved_hours = (self.delta_reserved_instance_hours_generator(
            instance_type, pool))

        while previous_cost >= current_cost:
            current_simulation_costs = (self.EC2.init_reserve_costs(
                float('inf')))
            # Add a single instance to each utilization type, and
            # record the costs. Choose the minimum cost utilization type.
            logging.debug("Simulation hours added %d",
                          delta_reserved_hours.next())
            for utilization_class in pool:
                # Reset the min instances to the best values.
                for current_util in pool:
                    pool[current_util][instance_type] = (
                        current_min_instances[current_util])

                pool[utilization_class][instance_type] = (
                    current_min_instances[utilization_class] + 1)
                logged_hours = simulator.run()

                convert_to_yearly_estimated_hours(logged_hours,
                                                  self.job_flows_interval)
                cost, _ = self.EC2.calculate_cost(logged_hours, pool)
                current_simulation_costs[utilization_class] = cost
            previous_cost = current_cost
            current_cost = min(current_simulation_costs.values())
            min_util_level = None
            for utilization_class in current_simulation_costs:
                if current_simulation_costs[utilization_class] == current_cost:
                    min_util_level = utilization_class

            # Record the new cost, and see if adding one instance is better
            # If not, then break from the loop, since adding more will be worst
            if min(current_cost, current_min_cost) != current_min_cost or (
                    current_cost == current_min_cost):

                current_min_cost = current_cost
                current_min_instances[min_util_level] += 1
            # Reset to best instance pool.
            for current_util in pool:
                pool[current_util][instance_type] = (
                    current_min_instances[utilization_class])
            logging.debug("Current best minimum cost for %s: %d",
                          instance_type, current_min_cost)
        for utilization_class in current_min_instances:
            pool[utilization_class][instance_type] = (
                current_min_instances[utilization_class])
Esempio n. 10
0
    def optimize_reserve_pool(self, instance_type, pool):
        """The brute force approach will take a single instance type and
        optimize the instance pool for it. By using the job_flows in
        simulations.

        Mutates: pool
        """
        simulator = Simulator(self.job_flows, pool, self.EC2)
        previous_cost = float('inf')
        current_min_cost = float("inf")
        current_cost = float('inf')
        current_min_instances = self.EC2.init_reserve_counts(pool,
            instance_type)

        # Calculate the default cost first.
        logged_hours = simulator.run()
        convert_to_yearly_estimated_hours(logged_hours,
            self.job_flows_interval)
        current_min_cost, _ = self.EC2.calculate_cost(logged_hours, pool)
        logging.debug('Current min cost: %s' % str(current_min_cost))
        current_cost = current_min_cost
        delta_reserved_hours = (
            self.delta_reserved_instance_hours_generator(instance_type, pool))

        while previous_cost >= current_cost:
            current_simulation_costs = (
                self.EC2.init_reserve_costs(float('inf')))
            # Add a single instance to each utilization type, and
            # record the costs. Choose the minimum cost utilization type.
            logging.debug("Simulation hours added %d",
                delta_reserved_hours.next())
            for utilization_class in pool:
                # Reset the min instances to the best values.
                for current_util in pool:
                    pool[current_util][instance_type] = (
                        current_min_instances[current_util])

                pool[utilization_class][instance_type] = (
                        current_min_instances[utilization_class] + 1)
                logged_hours = simulator.run()

                convert_to_yearly_estimated_hours(logged_hours,
                    self.job_flows_interval)
                cost, _ = self.EC2.calculate_cost(logged_hours, pool)
                current_simulation_costs[utilization_class] = cost
            previous_cost = current_cost
            current_cost = min(current_simulation_costs.values())
            min_util_level = None
            for utilization_class in current_simulation_costs:
                if current_simulation_costs[utilization_class] == current_cost:
                    min_util_level = utilization_class

            # Record the new cost, and see if adding one instance is better
            # If not, then break from the loop, since adding more will be worst
            if min(current_cost, current_min_cost) != current_min_cost or (
                current_cost == current_min_cost):

                current_min_cost = current_cost
                current_min_instances[min_util_level] += 1
            # Reset to best instance pool.
            for current_util in pool:
                pool[current_util][instance_type] = (
                    current_min_instances[utilization_class])
            logging.debug("Current best minimum cost for %s: %d",
                instance_type,
                current_min_cost)
        for utilization_class in current_min_instances:
            pool[utilization_class][instance_type] = (
                    current_min_instances[utilization_class])