def test_polls_until_job_state_timeout(config, step_info, aws_api, time_sleep,
                                       fixed_datetime, shell_function):
    config['poll_cluster'] = True
    job_response = copy.deepcopy(step_info[0])
    job_response['Status']['Timeline']['CreationDateTime'] = \
        datetime(2017, 12, 31, 0, 0, 0, 0).replace(tzinfo=pytz.utc)

    # return two RUNNING states and then an exceeded timeout when called
    aws_api.return_value.list_cluster_steps.side_effect = \
        [step_info, step_info, [job_response]]

    with pytest.raises(ValueError) as excinfo:
        handle_job_request(config)

    assert str(excinfo.value) == 'Job exceeded timeout 60'

    # expected time.sleep calls while polling job
    expected_calls = [call(60) for i in range(0, 3)]
    assert time_sleep.call_count == 3
    time_sleep.assert_has_calls(expected_calls)

    # expected interactions to list EMR steps
    expected_calls = [call('cl-359', 'WordCount', False) for i in range(0, 3)]
    assert aws_api.return_value.list_cluster_steps.call_count == 3
    aws_api.return_value.list_cluster_steps.assert_has_calls(expected_calls)
def test_invalid_job_runtime_throws_error(config, aws_api, shell_function):
    with pytest.raises(ValueError) as excinfo:
        config['job_runtime'] = 'javascript'
        handle_job_request(config)

    assert str(excinfo.value) == \
        "--job-runtime should be in ['scala', 'java', 'python']"
def test_missing_output_resource_id_raises_error(config, aws_api, step_info,
                                                 time_sleep, fixed_datetime,
                                                 shell_function):
    with pytest.raises(ValueError) as excinfo:
        shell_function.return_value = 'unexpected aws cli output'
        handle_job_request(config)

    assert str(excinfo.value) == 'StepIds not found in terminal output'
def test_cluster_job_not_added_conditions(config, step_info, aws_api,
                                          time_sleep, shell_function):
    # job should not be added if artifact_path is empty
    config['artifact_path'] = ''
    handle_job_request(config)
    assert not shell_function.called

    # job should be added under these parameters
    config['artifact_path'] = \
        's3://us-east-1.elasticmapreduce/samples/wordcount/'
    handle_job_request(config)
    assert shell_function.call_count == 1
def test_terminates_cluster_when_enabled(config, step_info, aws_api,
                                         time_sleep, fixed_datetime,
                                         shell_function):
    step_info[0]['Status']['State'] = 'COMPLETED'
    aws_api.return_value.list_cluster_steps.return_value = step_info

    # set terminate to true
    config['poll_cluster'] = True
    config['terminate'] = True
    handle_job_request(config)

    assert aws_api.return_value.terminate_clusters.call_count == 1
def test_adds_expected_python_job(config, aws_api, shell_function):
    expected_output = \
        """
        aws emr add-steps --profile qa --cluster-id cl-359
         --steps Type=Spark,Name=WordCount,ActionOnFailure=CONTINUE,
        Args=[--deploy-mode,cluster,--master,yarn,--conf,
        'spark.app.name=WordCount',
        --conf,'spark.yarn.appMasterEnv.ENVIRONMENT=qa',--py-files,
        s3://us-east-1.elasticmapreduce/samples/wordcount/application.zip,
        s3://us-east-1.elasticmapreduce/samples/wordcount/main.py]
        """

    handle_job_request(config)

    shell_function.assert_called_once_with(
        textwrap.dedent(expected_output).replace('\n', ''))
def test_adds_expected_java_job(config, aws_api, shell_function):
    expected_output = \
        """
        aws emr add-steps --profile qa --cluster-id cl-359
         --steps Type=Spark,Name=WordCount,ActionOnFailure=CONTINUE,
        Args=[--deploy-mode,cluster,--master,yarn,--conf,
        'spark.app.name=WordCount',--class,org.apache.spark.examples.WordCount,
        --conf,'spark.driver.extraJavaOptions=-DenvironmentKey=qa',
        --conf,'spark.executor.extraJavaOptions=-DenvironmentKey=qa',
        s3://us-east-1.elasticmapreduce/samples/wordcount.jar,
        hdfs:///text-input/]
        """

    config['job_runtime'] = 'Java'
    config['artifact_path'] = \
        's3://us-east-1.elasticmapreduce/samples/wordcount.jar'
    config['job_args'] = 'hdfs:///text-input/'
    handle_job_request(config)

    shell_function.assert_called_once_with(
        textwrap.dedent(expected_output).replace('\n', ''))
def test_polls_until_job_state_completed(config, step_info, aws_api,
                                         time_sleep, fixed_datetime,
                                         shell_function):
    config['poll_cluster'] = True
    job_response = copy.deepcopy(step_info[0])
    job_response['Status']['State'] = 'COMPLETED'

    # return two RUNNING states and then a COMPLETED state when called
    aws_api.return_value.list_cluster_steps.side_effect = \
        [step_info, step_info, [job_response]]

    handle_job_request(config)

    # expected time.sleep calls while polling job
    expected_calls = [call(60) for i in range(0, 3)]
    assert time_sleep.call_count == 3
    time_sleep.assert_has_calls(expected_calls)

    # expected interactions to list EMR steps
    expected_calls = [call('cl-359', 'WordCount', False) for i in range(0, 3)]
    assert aws_api.return_value.list_cluster_steps.call_count == 3
    aws_api.return_value.list_cluster_steps.assert_has_calls(expected_calls)