Exemple #1
0
def main():
    parser = create_parser()
    args = parser.parse_args()
    print("Args: ", args)

    numeric_level = getattr(logging, args.log_level, None)
    logging.basicConfig(format=LOGFORMAT)
    logger.setLevel(numeric_level)

    client = boto3.client('emr', region_name=args.aws_region)
    s3 = boto3.resource('s3')

    cluster_id = args.cluster_id
    if cluster_id is None:
        logger.info("Launching cluster...")
        args_dict = vars(args)
        if args.dynamic_pricing:
            ec2 = boto3.client('ec2', region_name=args.aws_region)
            bid_px, is_spot = pricing.get_bid_price(ec2, args.slave)
            args_dict['bid_price'] = str(bid_px)
            if is_spot:
                logger.info("Using spot pricing with bid price $%d", bid_px)
            else:
                logger.info("Spot price too high. Using on-demand %d", bid_px)
        cluster_config = cluster.emr_config(**args_dict)
        response = client.run_job_flow(**cluster_config)
        cluster_id = response['JobFlowId']
        logger.info("Cluster ID: %s", cluster_id)

    emr_steps = steps.setup_steps(s3, args.s3_bucket, args.app,
                                  args.submit_args, args.app_args,
                                  args.uploads, args.s3_dist_cp)

    client.add_job_flow_steps(JobFlowId=cluster_id, Steps=emr_steps)
Exemple #2
0
def determine_prices(args, ec2, pricing_client):
    """
    Checks `args` in order to determine whether spot pricing should be
     used for instance groups within the EMR cluster, and if this is the
     case attempts to determine the optimal bid price.
    """
    # Check if we need to do anything
    pricing_properties = ('dynamic_pricing_master', 'dynamic_pricing_core',
                          'dynamic_pricing_task')
    if not any([x in args for x in pricing_properties]):
        return args

    availability_zone = None
    subnet_id = args.get('ec2_subnet_id')
    if subnet_id:
        # We need to determine the AZ associated with the provided EC2 subnet ID
        # in order to look up spot prices in the correct region.
        availability_zone = pricing.get_availability_zone(ec2, subnet_id)
        if not availability_zone:
            logger.info(
                "Could not determine availability zone for subnet '%s'",
                subnet_id)

    # Mutate a copy of args.
    args = args.copy()

    # Determine bid prices for the instance types for which we want to
    # use bid pricing.
    for price_property in pricing_properties:
        if price_property not in args:
            continue

        if args[price_property]:
            instance_type_key = price_property.replace('dynamic_pricing',
                                                       'instance_type')
            instance_type = args[instance_type_key]
            instance_group = price_property.replace('dynamic_pricing_', '')
            # TODO (rikheijdens): optimize by caching instance prices
            # between instance groups?
            bid_price, is_spot = pricing.get_bid_price(ec2, pricing_client,
                                                       instance_type,
                                                       availability_zone)
            if is_spot:
                logger.info(
                    "Using spot pricing with a bid price of $%.2f"
                    " for %s instances in the %s instance group.", bid_price,
                    instance_type, instance_group)
                bid_key = price_property.replace('dynamic_pricing',
                                                 'bid_price')
                args[bid_key] = str(bid_price)
            else:
                logger.info(
                    "Spot price for %s in the %s instance group too high."
                    " Using on-demand price of $%.2f", instance_type,
                    instance_group, bid_price)
    return args
Exemple #3
0
def determine_prices(args, ec2, pricing_client):
    """
    Checks `args` in order to determine whether spot pricing should be
     used for instance groups within the EMR cluster, and if this is the
     case attempts to determine the optimal bid price.
    """
    # Check if we need to do anything
    pricing_properties = (
        'dynamic_pricing_master', 'dynamic_pricing_core', 'dynamic_pricing_task')
    if not any([x in args for x in pricing_properties]):
        return args

    # Mutate a copy of args.
    args = args.copy()

    # Determine bid prices for the instance types for which we want to
    # use bid pricing.
    for price_property in pricing_properties:
        if price_property not in args:
            continue

        if args[price_property]:
            instance_type_key = price_property.replace(
                'dynamic_pricing', 'instance_type')
            instance_type = args[instance_type_key]
            instance_group = price_property.replace('dynamic_pricing_', '')
            # TODO (rikheijdens): optimize by caching instance prices
            # between instance groups?
            bid_price, is_spot = pricing.get_bid_price(ec2, pricing_client, instance_type)
            if is_spot:
                logger.info("Using spot pricing with a bid price of $%.2f"
                            " for %s instances in the %s instance group.",
                            bid_price, instance_type,
                            instance_group)
                bid_key = price_property.replace('dynamic_pricing', 'bid_price')
                args[bid_key] = str(bid_price)
            else:
                logger.info("Spot price for %s in the %s instance group too high."
                            " Using on-demand price of $%.2f",
                            instance_type, instance_group, bid_price)
    return args
 def test_get_bid_price(self, ec2, pricing_client):
     bid_price, is_spot = get_bid_price(ec2, pricing_client, 'm4.large')
     if is_spot:
         assert bid_price > 0.
     else:
         assert bid_price == get_demand_price('us-east-1', 'm4.large')