Esempio n. 1
0
def main():
    args_dict = parse_cli_args(create_parser())
    print("Args: ", args_dict)

    numeric_level = getattr(logging, args_dict['log_level'], None)
    logging.basicConfig(format=LOGFORMAT)
    logger.setLevel(numeric_level)

    client = boto3.client('emr', region_name=args_dict['aws_region'])
    s3 = boto3.resource('s3')

    cluster_id = args_dict.get('cluster_id')
    if cluster_id is None:
        logger.info("Launching cluster...")
        ec2_client = boto3.client('ec2', region_name=args_dict['aws_region'])
        pricing_client = boto3.client('pricing',
                                      region_name=args_dict['aws_region'])
        args_dict = determine_prices(args_dict, ec2_client, pricing_client)
        cluster_config = cluster.emr_config(**args_dict)
        response = client.run_job_flow(**cluster_config)
        cluster_id = response['JobFlowId']
        logger.info("Cluster ID: %s", cluster_id)

    emr_steps = steps.setup_steps(s3, args_dict['s3_bucket'], args_dict['app'],
                                  args_dict['submit_args'],
                                  args_dict['app_args'], args_dict['uploads'],
                                  args_dict['s3_dist_cp'])

    response = client.add_job_flow_steps(JobFlowId=cluster_id, Steps=emr_steps)

    try:
        step_ids = json.dumps(response['StepIds'])
    except KeyError:
        step_ids = 'Invalid response'
    logger.info("Step IDs: %s", step_ids)
Esempio n. 2
0
def main():
    parser = create_parser()
    args = parser.parse_args()
    print("Args: ", args)

    numeric_level = getattr(logging, args.log_level, None)
    logging.basicConfig(format=LOGFORMAT)
    logger.setLevel(numeric_level)

    client = boto3.client('emr', region_name=args.aws_region)
    s3 = boto3.resource('s3')

    cluster_id = args.cluster_id
    if cluster_id is None:
        logger.info("Launching cluster...")
        args_dict = vars(args)
        if args.dynamic_pricing:
            ec2 = boto3.client('ec2', region_name=args.aws_region)
            bid_px, is_spot = pricing.get_bid_price(ec2, args.slave)
            args_dict['bid_price'] = str(bid_px)
            if is_spot:
                logger.info("Using spot pricing with bid price $%d", bid_px)
            else:
                logger.info("Spot price too high. Using on-demand %d", bid_px)
        cluster_config = cluster.emr_config(**args_dict)
        response = client.run_job_flow(**cluster_config)
        cluster_id = response['JobFlowId']
        logger.info("Cluster ID: %s", cluster_id)

    emr_steps = steps.setup_steps(s3, args.s3_bucket, args.app,
                                  args.submit_args, args.app_args,
                                  args.uploads, args.s3_dist_cp)

    client.add_job_flow_steps(JobFlowId=cluster_id, Steps=emr_steps)
def test_emr_cluster_config_with_defaults():
    config = emr_config('emr-5.2.0',
                        instance_type_master='m4.large',
                        keep_alive=False,
                        instance_type_core='m4.2xlarge',
                        instance_type_task='m4.2xlarge',
                        num_core=1,
                        num_task=1,
                        bid_price_task='0.1',
                        name="Test SparkSteps",
                        defaults=['spark-defaults', 'spark.speculation=false',
                                  'yarn-site', 'yarn.nodemanager.vmem-check-enabled=true'])
    print(config['Configurations'])
    assert config == {'Instances':
                          {'InstanceGroups': [{'InstanceCount': 1,  # NOQA: E127
                                               'InstanceRole': 'MASTER',
                                               'InstanceType': 'm4.large',
                                               'Market': 'ON_DEMAND',
                                               'Name': 'Master Node'},
                                              {'InstanceCount': 1,
                                               'InstanceRole': 'CORE',
                                               'InstanceType': 'm4.2xlarge',
                                               'Market': 'ON_DEMAND',
                                               'Name': 'Core Nodes'},
                                              {'BidPrice': '0.1',
                                               'InstanceCount': 1,
                                               'InstanceRole': 'TASK',
                                               'InstanceType': 'm4.2xlarge',
                                               'Market': 'SPOT',
                                               'Name': 'Task Nodes'}],
                           'KeepJobFlowAliveWhenNoSteps': False,
                           'TerminationProtected': False
                           },
                      'Applications': [{'Name': 'Hadoop'}, {'Name': 'Spark'}],
                      'Configurations': [
                          {
                              'Classification': 'spark-defaults',
                              'Properties': {
                                  'spark.speculation': 'false'
                              }
                          },
                          {
                              'Classification': 'yarn-site',
                              'Properties': {
                                  'yarn.nodemanager.vmem-check-enabled': 'true'
                              }
                          }
                      ],
                      'Name': 'Test SparkSteps',
                      'JobFlowRole': 'EMR_EC2_DefaultRole',
                      'ReleaseLabel': 'emr-5.2.0',
                      'VisibleToAllUsers': True,
                      'ServiceRole': 'EMR_DefaultRole'}

    client = boto3.client('emr', region_name=AWS_REGION_NAME)
    client.run_job_flow(**config)
Esempio n. 4
0
def test_emr_cluster_config():
    config = emr_config('emr-5.2.0',
                        master='m4.large',
                        keep_alive=False,
                        slave='m4.2xlarge',
                        num_core=1,
                        num_task=1,
                        bid_price='0.1',
                        name="Test SparkSteps")
    assert config == {
        'Instances': {
            'InstanceGroups': [
                {
                    'InstanceCount': 1,  # NOQA: E127
                    'InstanceRole': 'MASTER',
                    'InstanceType': 'm4.large',
                    'Market': 'ON_DEMAND',
                    'Name': 'Master Node'
                },
                {
                    'InstanceCount': 1,
                    'InstanceRole': 'CORE',
                    'InstanceType': 'm4.2xlarge',
                    'Market': 'ON_DEMAND',
                    'Name': 'Core Nodes'
                },
                {
                    'BidPrice': '0.1',
                    'InstanceCount': 1,
                    'InstanceRole': 'TASK',
                    'InstanceType': 'm4.2xlarge',
                    'Market': 'SPOT',
                    'Name': 'Task Nodes'
                }
            ],
            'KeepJobFlowAliveWhenNoSteps':
            False,
            'TerminationProtected':
            False
        },
        'Applications': [{
            'Name': 'Hadoop'
        }, {
            'Name': 'Spark'
        }],
        'Name': 'Test SparkSteps',
        'JobFlowRole': 'EMR_EC2_DefaultRole',
        'ReleaseLabel': 'emr-5.2.0',
        'VisibleToAllUsers': True,
        'ServiceRole': 'EMR_DefaultRole'
    }

    client = boto3.client('emr', region_name=AWS_REGION_NAME)
    client.run_job_flow(**config)
Esempio n. 5
0
def test_is_step_complete(emr_client, s3_client):
    """
    Ensure is_step_complete returns expected boolean value
    """
    cluster_config = emr_config('emr-5.2.0',
                                instance_type_master='m4.large',
                                jobflow_role='MyCustomRole',
                                keep_alive=False,
                                instance_type_core='m4.2xlarge',
                                instance_type_task='m4.2xlarge',
                                num_core=1,
                                num_task=1,
                                bid_price_task='0.1',
                                maximize_resource_allocation=True,
                                name='Test SparkSteps',
                                app_list=['hadoop', 'hive', 'spark'])
    response = emr_client.run_job_flow(**cluster_config)
    cluster_id = response['JobFlowId']

    test_step = {
        'Name': 'test-step',
        'ActionOnFailure': 'CANCEL_AND_WAIT',
        'HadoopJarStep': {
            'Jar': 'command-runner.jar',
            'Args': ['state-pusher-script']
        }
    }
    response = emr_client.add_job_flow_steps(JobFlowId=cluster_id,
                                             Steps=[test_step])
    last_step_id = response['StepIds'][-1]

    # while the step state is non-terminal is_step_complete should return False
    for state in ['PENDING', 'RUNNING', 'CONTINUE', 'CANCEL_PENDING']:
        set_step_state(last_step_id, cluster_id, state)
        assert not is_step_complete(emr_client, cluster_id, last_step_id), \
            'Expected last step to not be complete when step state is {}'.format(state)

    # when last step is in a terminal state (completed), is_step_complete should return True
    set_step_state(last_step_id, cluster_id, 'COMPLETED')
    assert is_step_complete(emr_client, cluster_id, last_step_id), \
        'Expected last step to be complete when last step state is {}'.format('COMPLETED')

    # when last step is in a failed state, is_step_complete should raise a helpful exception
    for state in ['CANCELLED', 'FAILED', 'INTERRUPTED']:
        set_step_state(last_step_id, cluster_id, state)
        try:
            is_step_complete(emr_client, cluster_id, last_step_id)
            assert False, \
                'Expected an exception to be raised when the last step is in {} state'.format(state)
        except Exception as e:
            assert 'EMR job failed' == str(
                e), 'Exception message not as expected'
Esempio n. 6
0
def main():
    args_dict = parse_cli_args(create_parser())
    print("Args: ", args_dict)

    numeric_level = getattr(logging, args_dict['log_level'], None)
    logging.basicConfig(format=LOGFORMAT)
    logging.getLogger('sparksteps').setLevel(numeric_level)

    client = boto3.client('emr', region_name=args_dict['aws_region'])
    s3 = boto3.resource('s3')

    cluster_id = args_dict.get('cluster_id')
    if cluster_id is None:
        logger.info("Launching cluster...")
        ec2_client = boto3.client('ec2', region_name=args_dict['aws_region'])
        pricing_client = boto3.client('pricing',
                                      region_name=args_dict['aws_region'])
        args_dict = determine_prices(args_dict, ec2_client, pricing_client)
        cluster_config = cluster.emr_config(**args_dict)
        response = client.run_job_flow(**cluster_config)
        cluster_id = response['JobFlowId']
        logger.info("Cluster ID: %s", cluster_id)

    emr_steps = steps.setup_steps(s3, args_dict['s3_bucket'],
                                  args_dict['s3_path'], args_dict['app'],
                                  args_dict['submit_args'],
                                  args_dict['app_args'], args_dict['uploads'],
                                  args_dict['s3_dist_cp'])

    response = client.add_job_flow_steps(JobFlowId=cluster_id, Steps=emr_steps)

    try:
        step_ids = json.dumps(response['StepIds'])
    except KeyError:
        step_ids = 'Invalid response'
        args_dict['wait'] = False
    logger.info("Step IDs: %s", step_ids)

    sleep_interval = args_dict.get('wait')
    if sleep_interval:
        last_step_id = response['StepIds'][-1]
        logger.info(
            'Polling until step {last_step} is complete using a sleep interval of {interval} seconds...'
            .format(last_step=last_step_id, interval=sleep_interval))
        wait_for_step_complete(client,
                               cluster_id,
                               last_step_id,
                               sleep_interval_s=int(sleep_interval))
def test_emr_cluster_config():
    config = emr_config('emr-5.2.0',
                        instance_type_master='m4.large',
                        jobflow_role='MyCustomRole',
                        service_role='MyServiceRole',
                        keep_alive=False,
                        instance_type_core='m4.2xlarge',
                        instance_type_task='m4.2xlarge',
                        num_core=1,
                        num_task=1,
                        bid_price_task='0.1',
                        maximize_resource_allocation=True,
                        name="Test SparkSteps",
                        app_list=['hadoop', 'hive', 'spark'])
    assert config == {'Instances':
                          {'InstanceGroups': [{'InstanceCount': 1,  # NOQA: E127
                                               'InstanceRole': 'MASTER',
                                               'InstanceType': 'm4.large',
                                               'Market': 'ON_DEMAND',
                                               'Name': 'Master Node'},
                                              {'InstanceCount': 1,
                                               'InstanceRole': 'CORE',
                                               'InstanceType': 'm4.2xlarge',
                                               'Market': 'ON_DEMAND',
                                               'Name': 'Core Nodes'},
                                              {'BidPrice': '0.1',
                                               'InstanceCount': 1,
                                               'InstanceRole': 'TASK',
                                               'InstanceType': 'm4.2xlarge',
                                               'Market': 'SPOT',
                                               'Name': 'Task Nodes'}],
                           'KeepJobFlowAliveWhenNoSteps': False,
                           'TerminationProtected': False
                           },
                      'Applications': [{'Name': 'Hadoop'}, {'Name': 'Hive'}, {'Name': 'Spark'}],
                      'Name': 'Test SparkSteps',
                      'JobFlowRole': 'MyCustomRole',
                      'ServiceRole': 'MyServiceRole',
                      'ReleaseLabel': 'emr-5.2.0',
                      'VisibleToAllUsers': True,
                      'Configurations': [{'Classification': 'spark',
                                          'Properties': {'maximizeResourceAllocation': 'true'}}]
                      }

    client = boto3.client('emr', region_name=AWS_REGION_NAME)
    client.run_job_flow(**config)
Esempio n. 8
0
def test_emr_cluster_config():
    config = emr_config('emr-5.2.0',
                        instance_type_master='m4.large',
                        keep_alive=False,
                        instance_type_core='m4.2xlarge',
                        instance_type_task='m4.2xlarge',
                        num_core=1,
                        num_task=1,
                        bid_price_task='0.1',
                        maximize_resource_allocation=True,
                        name="Test SparkSteps")
    assert config == {'Instances':
                          {'InstanceGroups': [{'InstanceCount': 1,  # NOQA: E127
                                               'InstanceRole': 'MASTER',
                                               'InstanceType': 'm4.large',
                                               'Market': 'ON_DEMAND',
                                               'Name': 'Master Node'},
                                              {'InstanceCount': 1,
                                               'InstanceRole': 'CORE',
                                               'InstanceType': 'm4.2xlarge',
                                               'Market': 'ON_DEMAND',
                                               'Name': 'Core Nodes'},
                                              {'BidPrice': '0.1',
                                               'InstanceCount': 1,
                                               'InstanceRole': 'TASK',
                                               'InstanceType': 'm4.2xlarge',
                                               'Market': 'SPOT',
                                               'Name': 'Task Nodes'}],
                           'KeepJobFlowAliveWhenNoSteps': False,
                           'TerminationProtected': False
                           },
                      'Applications': [{'Name': 'Hadoop'}, {'Name': 'Spark'}],
                      'Name': 'Test SparkSteps',
                      'JobFlowRole': 'EMR_EC2_DefaultRole',
                      'ReleaseLabel': 'emr-5.2.0',
                      'VisibleToAllUsers': True,
                      'ServiceRole': 'EMR_DefaultRole',
                      'Configurations': [{'Classification': 'spark',
                                          'Properties': {'maximizeResourceAllocation': 'true'}}]
                      }

    client = boto3.client('emr', region_name=AWS_REGION_NAME)
    client.run_job_flow(**config)
Esempio n. 9
0
def test_emr_spot_cluster():
    config = emr_config('emr-5.2.0',
                        instance_type_master='m4.large',
                        keep_alive=False,
                        instance_type_core='c3.8xlarge',
                        instance_type_task='c3.8xlarge',
                        num_core=2,
                        num_task=4,
                        bid_price_master='0.05',
                        bid_price_core='0.25',
                        bid_price_task='0.1',
                        name="Test SparkSteps",
                        bootstrap_script='s3://bucket/bootstrap-actions.sh')
    assert config == {'Instances':
                          {'InstanceGroups': [{'InstanceCount': 1,  # NOQA: E127
                                               'InstanceRole': 'MASTER',
                                               'InstanceType': 'm4.large',
                                               'Market': 'SPOT',
                                               'BidPrice': '0.05',
                                               'Name': 'Master Node'},
                                              {'BidPrice': '0.25',
                                               'InstanceCount': 2,
                                               'InstanceRole': 'CORE',
                                               'InstanceType': 'c3.8xlarge',
                                               'Market': 'SPOT',
                                               'Name': 'Core Nodes'},
                                              {'BidPrice': '0.1',
                                               'InstanceCount': 4,
                                               'InstanceRole': 'TASK',
                                               'InstanceType': 'c3.8xlarge',
                                               'Market': 'SPOT',
                                               'Name': 'Task Nodes'}],
                           'KeepJobFlowAliveWhenNoSteps': False,
                           'TerminationProtected': False
                           },
                      'Applications': [{'Name': 'Hadoop'}, {'Name': 'Spark'}],
                      'BootstrapActions': [{'Name': 'bootstrap',
                                            'ScriptBootstrapAction': {'Path': 's3://bucket/bootstrap-actions.sh'}}],
                      'Name': 'Test SparkSteps',
                      'JobFlowRole': 'EMR_EC2_DefaultRole',
                      'ReleaseLabel': 'emr-5.2.0',
                      'VisibleToAllUsers': True,
                      'ServiceRole': 'EMR_DefaultRole'}
Esempio n. 10
0
def test_emr_spot_cluster():
    config = emr_config('emr-5.2.0',
                        instance_type_master='m4.large',
                        keep_alive=False,
                        instance_type_core='c3.8xlarge',
                        instance_type_task='c3.8xlarge',
                        num_core=2,
                        num_task=4,
                        bid_price_master='0.05',
                        bid_price_core='0.25',
                        bid_price_task='0.1',
                        name="Test SparkSteps",
                        bootstrap_script='s3://bucket/bootstrap-actions.sh')
    assert config == {'Instances':
                          {'InstanceGroups': [{'InstanceCount': 1,  # NOQA: E127
                                               'InstanceRole': 'MASTER',
                                               'InstanceType': 'm4.large',
                                               'Market': 'SPOT',
                                               'BidPrice': '0.05',
                                               'Name': 'Master Node'},
                                              {'BidPrice': '0.25',
                                               'InstanceCount': 2,
                                               'InstanceRole': 'CORE',
                                               'InstanceType': 'c3.8xlarge',
                                               'Market': 'SPOT',
                                               'Name': 'Core Nodes'},
                                              {'BidPrice': '0.1',
                                               'InstanceCount': 4,
                                               'InstanceRole': 'TASK',
                                               'InstanceType': 'c3.8xlarge',
                                               'Market': 'SPOT',
                                               'Name': 'Task Nodes'}],
                           'KeepJobFlowAliveWhenNoSteps': False,
                           'TerminationProtected': False
                           },
                      'Applications': [{'Name': 'Hadoop'}, {'Name': 'Spark'}],
                      'BootstrapActions': [{'Name': 'bootstrap',
                                            'ScriptBootstrapAction': {'Path': 's3://bucket/bootstrap-actions.sh'}}],
                      'Name': 'Test SparkSteps',
                      'JobFlowRole': 'EMR_EC2_DefaultRole',
                      'ReleaseLabel': 'emr-5.2.0',
                      'VisibleToAllUsers': True,
                      'ServiceRole': 'EMR_DefaultRole'}
Esempio n. 11
0
def test_emr_cluster_config_with_bootstrap():
    config = emr_config('emr-5.2.0',
                        instance_type_master='m4.large',
                        keep_alive=False,
                        instance_type_core='m4.2xlarge',
                        instance_type_task='m4.2xlarge',
                        num_core=1,
                        num_task=1,
                        bid_price_task='0.1',
                        name="Test SparkSteps",
                        bootstrap_script='s3://bucket/bootstrap-actions.sh')
    assert config == {'Instances':
                          {'InstanceGroups': [{'InstanceCount': 1,  # NOQA: E127
                                               'InstanceRole': 'MASTER',
                                               'InstanceType': 'm4.large',
                                               'Market': 'ON_DEMAND',
                                               'Name': 'Master Node'},
                                              {'InstanceCount': 1,
                                               'InstanceRole': 'CORE',
                                               'InstanceType': 'm4.2xlarge',
                                               'Market': 'ON_DEMAND',
                                               'Name': 'Core Nodes'},
                                              {'BidPrice': '0.1',
                                               'InstanceCount': 1,
                                               'InstanceRole': 'TASK',
                                               'InstanceType': 'm4.2xlarge',
                                               'Market': 'SPOT',
                                               'Name': 'Task Nodes'}],
                           'KeepJobFlowAliveWhenNoSteps': False,
                           'TerminationProtected': False
                           },
                      'Applications': [{'Name': 'Hadoop'}, {'Name': 'Spark'}],
                      'BootstrapActions': [{'Name': 'bootstrap',
                                            'ScriptBootstrapAction': {'Path': 's3://bucket/bootstrap-actions.sh'}}],
                      'Name': 'Test SparkSteps',
                      'JobFlowRole': 'EMR_EC2_DefaultRole',
                      'ReleaseLabel': 'emr-5.2.0',
                      'VisibleToAllUsers': True,
                      'ServiceRole': 'EMR_DefaultRole'}

    client = boto3.client('emr', region_name=AWS_REGION_NAME)
    client.run_job_flow(**config)
Esempio n. 12
0
def main():
    args_dict = parse_cli_args(create_parser())
    print("Args: ", args_dict)

    numeric_level = getattr(logging, args_dict['log_level'], None)
    logging.basicConfig(format=LOGFORMAT)
    logger.setLevel(numeric_level)

    client = boto3.client('emr', region_name=args_dict['aws_region'])
    s3 = boto3.resource('s3')

    cluster_id = args_dict.get('cluster_id')
    if cluster_id is None:
        logger.info("Launching cluster...")
        ec2_client = boto3.client('ec2', region_name=args_dict['aws_region'])
        pricing_client = boto3.client('pricing', region_name=args_dict['aws_region'])
        args_dict = determine_prices(args_dict, ec2_client, pricing_client)
        cluster_config = cluster.emr_config(**args_dict)
        response = client.run_job_flow(**cluster_config)
        cluster_id = response['JobFlowId']
        logger.info("Cluster ID: %s", cluster_id)

    emr_steps = steps.setup_steps(s3,
                                  args_dict['s3_bucket'],
                                  args_dict['app'],
                                  args_dict['submit_args'],
                                  args_dict['app_args'],
                                  args_dict['uploads'],
                                  args_dict['s3_dist_cp'])

    response = client.add_job_flow_steps(JobFlowId=cluster_id, Steps=emr_steps)

    try:
        step_ids = json.dumps(response['StepIds'])
    except KeyError:
        step_ids = 'Invalid response'
    logger.info("Step IDs: %s", step_ids)
Esempio n. 13
0
def test_emr_ebs_storage():
    config = emr_config('emr-5.2.0',
                        instance_type_master='m4.large',
                        keep_alive=False,
                        instance_type_core='c3.8xlarge',
                        instance_type_task='c3.8xlarge',
                        ebs_volume_size_core=100,
                        ebs_volume_type_core='gp2',
                        ebs_volumes_per_core=2,
                        ebs_volume_size_task=10,
                        ebs_volume_type_task='io1',
                        ebs_optimized_task=True,
                        num_core=2,
                        num_task=4,
                        bid_price_master='0.05',
                        bid_price_core='0.25',
                        bid_price_task='0.1',
                        name="Test SparkSteps",
                        bootstrap_script='s3://bucket/bootstrap-actions.sh')
    assert config == {'Instances':
                          {'InstanceGroups': [{'InstanceCount': 1,  # NOQA: E127
                                               'InstanceRole': 'MASTER',
                                               'InstanceType': 'm4.large',
                                               'Market': 'SPOT',
                                               'BidPrice': '0.05',
                                               'Name': 'Master Node'},
                                              {'BidPrice': '0.25',
                                               'InstanceCount': 2,
                                               'InstanceRole': 'CORE',
                                               'InstanceType': 'c3.8xlarge',
                                               'Market': 'SPOT',
                                               'Name': 'Core Nodes',
                                               'EbsConfiguration': {
                                               'EbsBlockDeviceConfigs': [{
                                                    'VolumeSpecification': {
                                                        'VolumeType': 'gp2',
                                                        'SizeInGB': 100
                                                    },
                                                    'VolumesPerInstance': 2
                                                }],
                                                'EbsOptimized': False
                                               }},
                                              {'BidPrice': '0.1',
                                               'InstanceCount': 4,
                                               'InstanceRole': 'TASK',
                                               'InstanceType': 'c3.8xlarge',
                                               'Market': 'SPOT',
                                               'Name': 'Task Nodes',
                                               'EbsConfiguration': {
                                               'EbsBlockDeviceConfigs': [{
                                                    'VolumeSpecification': {
                                                        'VolumeType': 'io1',
                                                        'SizeInGB': 10
                                                    },
                                                    'VolumesPerInstance': 1
                                                }],
                                                'EbsOptimized': True
                                               }}],
                           'KeepJobFlowAliveWhenNoSteps': False,
                           'TerminationProtected': False
                           },
                      'Applications': [{'Name': 'Hadoop'}, {'Name': 'Spark'}],
                      'BootstrapActions': [{'Name': 'bootstrap',
                                            'ScriptBootstrapAction': {'Path': 's3://bucket/bootstrap-actions.sh'}}],
                      'Name': 'Test SparkSteps',
                      'JobFlowRole': 'EMR_EC2_DefaultRole',
                      'ReleaseLabel': 'emr-5.2.0',
                      'VisibleToAllUsers': True,
                      'ServiceRole': 'EMR_DefaultRole'}
Esempio n. 14
0
def test_emr_ebs_storage():
    config = emr_config('emr-5.2.0',
                        instance_type_master='m4.large',
                        keep_alive=False,
                        instance_type_core='c3.8xlarge',
                        instance_type_task='c3.8xlarge',
                        ebs_volume_size_core=100,
                        ebs_volume_type_core='gp2',
                        ebs_volumes_per_core=2,
                        ebs_volume_size_task=10,
                        ebs_volume_type_task='io1',
                        ebs_optimized_task=True,
                        num_core=2,
                        num_task=4,
                        bid_price_master='0.05',
                        bid_price_core='0.25',
                        bid_price_task='0.1',
                        name="Test SparkSteps",
                        bootstrap_script='s3://bucket/bootstrap-actions.sh')
    assert config == {'Instances':
                          {'InstanceGroups': [{'InstanceCount': 1,  # NOQA: E127
                                               'InstanceRole': 'MASTER',
                                               'InstanceType': 'm4.large',
                                               'Market': 'SPOT',
                                               'BidPrice': '0.05',
                                               'Name': 'Master Node'},
                                              {'BidPrice': '0.25',
                                               'InstanceCount': 2,
                                               'InstanceRole': 'CORE',
                                               'InstanceType': 'c3.8xlarge',
                                               'Market': 'SPOT',
                                               'Name': 'Core Nodes',
                                               'EbsConfiguration': {
                                               'EbsBlockDeviceConfigs': [{
                                                    'VolumeSpecification': {
                                                        'VolumeType': 'gp2',
                                                        'SizeInGB': 100
                                                    },
                                                    'VolumesPerInstance': 2
                                                }],
                                                'EbsOptimized': False
                                               }},
                                              {'BidPrice': '0.1',
                                               'InstanceCount': 4,
                                               'InstanceRole': 'TASK',
                                               'InstanceType': 'c3.8xlarge',
                                               'Market': 'SPOT',
                                               'Name': 'Task Nodes',
                                               'EbsConfiguration': {
                                               'EbsBlockDeviceConfigs': [{
                                                    'VolumeSpecification': {
                                                        'VolumeType': 'io1',
                                                        'SizeInGB': 10
                                                    },
                                                    'VolumesPerInstance': 1
                                                }],
                                                'EbsOptimized': True
                                               }}],
                           'KeepJobFlowAliveWhenNoSteps': False,
                           'TerminationProtected': False
                           },
                      'Applications': [{'Name': 'Hadoop'}, {'Name': 'Spark'}],
                      'BootstrapActions': [{'Name': 'bootstrap',
                                            'ScriptBootstrapAction': {'Path': 's3://bucket/bootstrap-actions.sh'}}],
                      'Name': 'Test SparkSteps',
                      'JobFlowRole': 'EMR_EC2_DefaultRole',
                      'ReleaseLabel': 'emr-5.2.0',
                      'VisibleToAllUsers': True,
                      'ServiceRole': 'EMR_DefaultRole'}