def main(): parser = create_parser() args = parser.parse_args() print("Args: ", args) numeric_level = getattr(logging, args.log_level, None) logging.basicConfig(format=LOGFORMAT) logger.setLevel(numeric_level) client = boto3.client('emr', region_name=args.aws_region) s3 = boto3.resource('s3') cluster_id = args.cluster_id if cluster_id is None: logger.info("Launching cluster...") args_dict = vars(args) if args.dynamic_pricing: ec2 = boto3.client('ec2', region_name=args.aws_region) bid_px, is_spot = pricing.get_bid_price(ec2, args.slave) args_dict['bid_price'] = str(bid_px) if is_spot: logger.info("Using spot pricing with bid price $%d", bid_px) else: logger.info("Spot price too high. Using on-demand %d", bid_px) cluster_config = cluster.emr_config(**args_dict) response = client.run_job_flow(**cluster_config) cluster_id = response['JobFlowId'] logger.info("Cluster ID: %s", cluster_id) emr_steps = steps.setup_steps(s3, args.s3_bucket, args.app, args.submit_args, args.app_args, args.uploads, args.s3_dist_cp) client.add_job_flow_steps(JobFlowId=cluster_id, Steps=emr_steps)
def determine_prices(args, ec2, pricing_client): """ Checks `args` in order to determine whether spot pricing should be used for instance groups within the EMR cluster, and if this is the case attempts to determine the optimal bid price. """ # Check if we need to do anything pricing_properties = ('dynamic_pricing_master', 'dynamic_pricing_core', 'dynamic_pricing_task') if not any([x in args for x in pricing_properties]): return args availability_zone = None subnet_id = args.get('ec2_subnet_id') if subnet_id: # We need to determine the AZ associated with the provided EC2 subnet ID # in order to look up spot prices in the correct region. availability_zone = pricing.get_availability_zone(ec2, subnet_id) if not availability_zone: logger.info( "Could not determine availability zone for subnet '%s'", subnet_id) # Mutate a copy of args. args = args.copy() # Determine bid prices for the instance types for which we want to # use bid pricing. for price_property in pricing_properties: if price_property not in args: continue if args[price_property]: instance_type_key = price_property.replace('dynamic_pricing', 'instance_type') instance_type = args[instance_type_key] instance_group = price_property.replace('dynamic_pricing_', '') # TODO (rikheijdens): optimize by caching instance prices # between instance groups? bid_price, is_spot = pricing.get_bid_price(ec2, pricing_client, instance_type, availability_zone) if is_spot: logger.info( "Using spot pricing with a bid price of $%.2f" " for %s instances in the %s instance group.", bid_price, instance_type, instance_group) bid_key = price_property.replace('dynamic_pricing', 'bid_price') args[bid_key] = str(bid_price) else: logger.info( "Spot price for %s in the %s instance group too high." " Using on-demand price of $%.2f", instance_type, instance_group, bid_price) return args
def determine_prices(args, ec2, pricing_client): """ Checks `args` in order to determine whether spot pricing should be used for instance groups within the EMR cluster, and if this is the case attempts to determine the optimal bid price. """ # Check if we need to do anything pricing_properties = ( 'dynamic_pricing_master', 'dynamic_pricing_core', 'dynamic_pricing_task') if not any([x in args for x in pricing_properties]): return args # Mutate a copy of args. args = args.copy() # Determine bid prices for the instance types for which we want to # use bid pricing. for price_property in pricing_properties: if price_property not in args: continue if args[price_property]: instance_type_key = price_property.replace( 'dynamic_pricing', 'instance_type') instance_type = args[instance_type_key] instance_group = price_property.replace('dynamic_pricing_', '') # TODO (rikheijdens): optimize by caching instance prices # between instance groups? bid_price, is_spot = pricing.get_bid_price(ec2, pricing_client, instance_type) if is_spot: logger.info("Using spot pricing with a bid price of $%.2f" " for %s instances in the %s instance group.", bid_price, instance_type, instance_group) bid_key = price_property.replace('dynamic_pricing', 'bid_price') args[bid_key] = str(bid_price) else: logger.info("Spot price for %s in the %s instance group too high." " Using on-demand price of $%.2f", instance_type, instance_group, bid_price) return args
def test_get_bid_price(self, ec2, pricing_client): bid_price, is_spot = get_bid_price(ec2, pricing_client, 'm4.large') if is_spot: assert bid_price > 0. else: assert bid_price == get_demand_price('us-east-1', 'm4.large')