def graph_over_time(self, info_over_time, hours_line, xlabel='Time job ran (in hours)', ylabel='Instances run'): """Given some sort of data that changes over time, graph the data usage using this""" begin_time = min(job.get('startdatetime') for job in self.job_flows) end_time = max(job.get('enddatetime') for job in self.job_flows) # If end time is during the day, round to the next day so graph looks # pretty. if end_time.hour != 0: end_time = end_time.replace(hour=0, day=(end_time.day + 1)) for instance_type in instance_types_in_pool(info_over_time): # Locators / Formatters to pretty up the graph. hours = self.mdates.HourLocator(byhour=None, interval=1) days = self.mdates.DayLocator(bymonthday=None, interval=1) formatter = self.mdates.DateFormatter("%m/%d ") fig = self.plt.figure() fig.suptitle(instance_type) ax = fig.add_subplot(111) date_list = self.mdates.date2num(hours_line[instance_type]) all_utilization_classes = copy.deepcopy( self.EC2.ALL_UTILIZATION_PRIORITIES) # Reverse so demand is graphed first, it should be the largest. all_utilization_classes.reverse() for utilization_class in all_utilization_classes: ax.plot(date_list, info_over_time[utilization_class][instance_type], color='#000000') ax.plot(date_list[0], info_over_time[utilization_class][instance_type][0], color=self.colors[utilization_class], label=utilization_class) ax.fill_between( date_list, info_over_time[utilization_class][instance_type], color=self.colors[utilization_class], alpha=1.0) ax.xaxis.set_major_locator(days) ax.xaxis.set_major_formatter(formatter) ax.xaxis.set_minor_locator(hours) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_xlim(begin_time, end_time) ax.grid(True) ax.legend() self.plt.xticks(rotation='vertical')
def graph_over_time(self, info_over_time, hours_line, xlabel='Time job ran (in hours)', ylabel='Instances run'): """Given some sort of data that changes over time, graph the data usage using this""" begin_time = min(job.get('startdatetime') for job in self.job_flows) end_time = max(job.get('enddatetime') for job in self.job_flows) # If end time is during the day, round to the next day so graph looks # pretty. if end_time.hour != 0: end_time = end_time.replace(hour=0, day=(end_time.day + 1)) for instance_type in instance_types_in_pool(info_over_time): # Locators / Formatters to pretty up the graph. hours = self.mdates.HourLocator(byhour=None, interval=1) days = self.mdates.DayLocator(bymonthday=None, interval=1) formatter = self.mdates.DateFormatter("%m/%d ") fig = self.plt.figure() fig.suptitle(instance_type) ax = fig.add_subplot(111) date_list = self.mdates.date2num(hours_line[instance_type]) all_utilization_classes = copy.deepcopy( self.EC2.ALL_UTILIZATION_PRIORITIES) # Reverse so demand is graphed first, it should be the largest. all_utilization_classes.reverse() for utilization_class in all_utilization_classes: ax.plot(date_list, info_over_time[utilization_class][instance_type], color='#000000') ax.plot(date_list[0], info_over_time[utilization_class][instance_type][0], color=self.colors[utilization_class], label=utilization_class) ax.fill_between(date_list, info_over_time[utilization_class][instance_type], color=self.colors[utilization_class], alpha=1.0) ax.xaxis.set_major_locator(days) ax.xaxis.set_major_formatter(formatter) ax.xaxis.set_minor_locator(hours) ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_xlim(begin_time, end_time) ax.grid(True) ax.legend() self.plt.xticks(rotation='vertical')
def output_statistics(log, pool, demand_log, EC2): """Once everything is calculated, output here""" EMPTY_INSTANCE_POOL = EC2.init_empty_reserve_pool() optimized_cost, optimized_upfront_cost = EC2.calculate_cost(log, pool) demand_cost, _ = EC2.calculate_cost(demand_log, EMPTY_INSTANCE_POOL) owned_reserved_instances = get_owned_reserved_instances(EC2) buy_instances = calculate_instances_to_buy(owned_reserved_instances, pool, EC2) all_instances = instance_types_in_pool(pool) all_instances.union(instance_types_in_pool(owned_reserved_instances)) print "%20s %15s %15s %15s" % ('', 'Optimal', 'Owned', 'To Purchase') for utilization_class in EC2.RESERVE_PRIORITIES: print "%-20s" % (utilization_class) for machine in all_instances: print "%20s %15d %15d %15d" % ( machine, pool[utilization_class][machine], owned_reserved_instances[utilization_class][machine], buy_instances[utilization_class][machine]) print print " Hours Used By Instance type **************" for utilization_class in demand_log: for machine in demand_log[utilization_class]: print "\t%s: %s" % ( machine, intWithCommas(int(demand_log[utilization_class][machine]))) optimized_cost_fmt = intWithCommas(int(optimized_cost)) optimized_upfront_cost_fmt = intWithCommas(int(optimized_upfront_cost)) demand_cost_fmt = intWithCommas(int(demand_cost)) difference_cost = intWithCommas(int(demand_cost - optimized_cost)) print print "Cost difference:" print "Hourly cost for all instance: $%s" % optimized_cost_fmt print "Upfront Cost for all instances: $%s" % optimized_upfront_cost_fmt print "Cost for all On-Demand: $%s" % demand_cost_fmt print "Money Saved: $%s" % difference_cost
def output_statistics(log, pool, demand_log, EC2): """Once everything is calculated, output here""" EMPTY_INSTANCE_POOL = EC2.init_empty_reserve_pool() optimized_cost, optimized_upfront_cost = EC2.calculate_cost(log, pool) demand_cost, _ = EC2.calculate_cost(demand_log, EMPTY_INSTANCE_POOL) owned_reserved_instances = get_owned_reserved_instances(EC2) buy_instances = calculate_instances_to_buy(owned_reserved_instances, pool, EC2) all_instances = instance_types_in_pool(pool) all_instances.union(instance_types_in_pool(owned_reserved_instances)) print "%20s %15s %15s %15s" % ("", "Optimal", "Owned", "To Purchase") for utilization_class in EC2.RESERVE_PRIORITIES: print "%-20s" % (utilization_class) for machine in all_instances: print "%20s %15d %15d %15d" % ( machine, pool[utilization_class][machine], owned_reserved_instances[utilization_class][machine], buy_instances[utilization_class][machine], ) print print " Hours Used By Instance type **************" for utilization_class in demand_log: for machine in demand_log[utilization_class]: print "\t%s: %s" % (machine, intWithCommas(int(demand_log[utilization_class][machine]))) optimized_cost_fmt = intWithCommas(int(optimized_cost)) optimized_upfront_cost_fmt = intWithCommas(int(optimized_upfront_cost)) demand_cost_fmt = intWithCommas(int(demand_cost)) difference_cost = intWithCommas(int(demand_cost - optimized_cost)) print print "Cost difference:" print "Cost for Reserved Instance: $%s" % optimized_cost_fmt print "Upfront Cost for all instances: $%s" % optimized_upfront_cost_fmt print "Cost for all On-Demand: $%s" % demand_cost_fmt print "Money Saved: $%s" % difference_cost
def run(self, pre_existing_pool=None): """Take all the max_instance counts, then use that to hill climb to find the most cost efficient instance cost Returns: optimal_pool: dict of the best pool of instances to be used. """ if pre_existing_pool is None: optimized_pool = self.EC2.init_empty_reserve_pool() else: optimized_pool = pre_existing_pool # Zero-ing the instances just makes it so the optimized pool # knows all the instance_types the job flows use beforehand. fill_instance_types(self.job_flows, optimized_pool) for instance in instance_types_in_pool(optimized_pool): logging.debug("Finding optimal instances for %s", instance) self.optimize_reserve_pool(instance, optimized_pool) return optimized_pool