コード例 #1
0
ファイル: emr_test.py プロジェクト: hblanks/mrjob
    def test_local_bootstrap_action(self):
        # make sure that local bootstrap action scripts get uploaded to S3
        action_path = os.path.join(self.tmp_dir, 'apt-install.sh')
        with open(action_path, 'w') as f:
            f.write('for $pkg in $@; do sudo apt-get install $pkg; done\n')

        bootstrap_actions = [
            action_path + ' python-scipy mysql-server']

        runner = EMRJobRunner(conf_path=False,
                              bootstrap_actions=bootstrap_actions,
                              s3_sync_wait_time=0.01)

        job_flow_id = runner.make_persistent_job_flow()

        emr_conn = runner.make_emr_conn()
        job_flow = emr_conn.describe_jobflow(job_flow_id)
        actions = job_flow.bootstrapactions

        assert_equal(len(actions), 2)

        assert actions[0].path.startswith('s3://mrjob-')
        assert actions[0].path.endswith('/apt-install.sh')
        assert_equal(actions[0].name, 'apt-install.sh')
        assert_equal(actions[0].args, ['python-scipy', 'mysql-server'])

        # check for master boostrap script
        assert actions[1].path.startswith('s3://mrjob-')
        assert actions[1].path.endswith('b.py')
        assert_equal(actions[1].args, [])
        assert_equal(actions[1].name, 'master')

        # make sure master bootstrap script is on S3
        assert runner.path_exists(actions[1].path)
コード例 #2
0
ファイル: emr_test.py プロジェクト: hblanks/mrjob
    def test_bootstrap_actions_get_added(self):
        bootstrap_actions = [
            's3://elasticmapreduce/bootstrap-actions/configure-hadoop -m,mapred.tasktracker.map.tasks.maximum=1',
            's3://foo/bar#xyzzy', # use alternate name for script
        ]

        runner = EMRJobRunner(conf_path=False,
                              bootstrap_actions=bootstrap_actions,
                              s3_sync_wait_time=0.01)

        job_flow_id = runner.make_persistent_job_flow()

        emr_conn = runner.make_emr_conn()
        job_flow = emr_conn.describe_jobflow(job_flow_id)
        actions = job_flow.bootstrapactions

        assert_equal(len(actions), 3)

        assert_equal(
            actions[0].path,
            's3://elasticmapreduce/bootstrap-actions/configure-hadoop')
        assert_equal(
            actions[0].args,
            ['-m,mapred.tasktracker.map.tasks.maximum=1'])
        assert_equal(actions[0].name, 'configure-hadoop')

        assert_equal(actions[1].path, 's3://foo/bar')
        assert_equal(actions[1].args, [])
        assert_equal(actions[1].name, 'xyzzy')

        # check for master bootstrap script
        assert actions[2].path.startswith('s3://mrjob-')
        assert actions[2].path.endswith('b.py')
        assert_equal(actions[2].args, [])
        assert_equal(actions[2].name, 'master')

        # make sure master bootstrap script is on S3
        assert runner.path_exists(actions[2].path)