コード例 #1
0
 def step(jar='/home/hadoop/contrib/streaming/hadoop-streaming.jar',
          args=[
              '-mapper', 'my_job.py --mapper', '-reducer',
              'my_job.py --reducer'
          ],
          state='COMPLETE',
          create_hours_ago=None,
          start_hours_ago=None,
          end_hours_ago=None,
          name='Streaming Step',
          action_on_failure='TERMINATE_CLUSTER',
          **kwargs):
     if create_hours_ago:
         kwargs['creationdatetime'] = to_iso8601(self.now - timedelta(
             hours=create_hours_ago))
     if start_hours_ago:
         kwargs['startdatetime'] = to_iso8601(self.now - timedelta(
             hours=start_hours_ago))
     if end_hours_ago:
         kwargs['enddatetime'] = to_iso8601(self.now - timedelta(
             hours=end_hours_ago))
     kwargs['args'] = [MockEmrObject(value=a) for a in args]
     return MockEmrObject(jar=jar,
                          state=state,
                          name=name,
                          action_on_failure=action_on_failure,
                          **kwargs)
コード例 #2
0
ファイル: test_pool.py プロジェクト: DanisHack/mrjob
    def test_clock_skew(self):
        # make sure something reasonable happens if now is before
        # the start time
        jf = MockEmrObject(
            creationdatetime=to_iso8601(datetime(2010, 6, 6, 4)),
            startdatetime=to_iso8601(datetime(2010, 6, 6, 4, 26)))

        self.assertEqual(
            est_time_to_hour(jf, now=datetime(2010, 6, 6, 4, 25, 59)),
            timedelta(seconds=1))
コード例 #3
0
ファイル: test_pool.py プロジェクト: yukatherin/mrjob
    def test_clock_skew(self):
        # make sure something reasonable happens if now is before
        # the start time
        jf = MockEmrObject(creationdatetime=to_iso8601(datetime(2010, 6, 6,
                                                                4)),
                           startdatetime=to_iso8601(datetime(
                               2010, 6, 6, 4, 26)))

        self.assertEqual(
            est_time_to_hour(jf, now=datetime(2010, 6, 6, 4, 25, 59)),
            timedelta(seconds=1))
コード例 #4
0
ファイル: test_pool.py プロジェクト: yukatherin/mrjob
    def test_started(self):
        jf = MockEmrObject(creationdatetime=to_iso8601(datetime(2010, 6, 6,
                                                                4)),
                           startdatetime=to_iso8601(datetime(
                               2010, 6, 6, 4, 26)))

        self.assertEqual(est_time_to_hour(jf, now=datetime(2010, 6, 6, 4, 35)),
                         timedelta(minutes=51))

        self.assertEqual(est_time_to_hour(jf, now=datetime(2010, 6, 6, 5, 20)),
                         timedelta(minutes=6))

        self.assertEqual(est_time_to_hour(jf, now=datetime(2010, 6, 6, 6, 26)),
                         timedelta(minutes=60))
コード例 #5
0
ファイル: test_pool.py プロジェクト: yukatherin/mrjob
    def test_now_is_automatically_set(self):
        jf = MockEmrObject(creationdatetime=to_iso8601(datetime.utcnow()))

        t = est_time_to_hour(jf)

        self.assertLessEqual(t, timedelta(minutes=60))
        self.assertGreater(t, timedelta(minutes=59))

        jf2 = MockEmrObject(creationdatetime=to_iso8601(datetime.utcnow() -
                                                        timedelta(minutes=1)),
                            startdatetime=to_iso8601(datetime.utcnow()))

        t = est_time_to_hour(jf2)

        self.assertLessEqual(t, timedelta(minutes=60))
        self.assertGreater(t, timedelta(minutes=59))
コード例 #6
0
ファイル: test_pool.py プロジェクト: DanisHack/mrjob
    def test_started(self):
        jf = MockEmrObject(
            creationdatetime=to_iso8601(datetime(2010, 6, 6, 4)),
            startdatetime=to_iso8601(datetime(2010, 6, 6, 4, 26)))

        self.assertEqual(
            est_time_to_hour(jf, now=datetime(2010, 6, 6, 4, 35)),
            timedelta(minutes=51))

        self.assertEqual(
            est_time_to_hour(jf, now=datetime(2010, 6, 6, 5, 20)),
            timedelta(minutes=6))

        self.assertEqual(
            est_time_to_hour(jf, now=datetime(2010, 6, 6, 6, 26)),
            timedelta(minutes=60))
コード例 #7
0
    def test_started(self):
        cs = MockEmrObject(status=MockEmrObject(timeline=MockEmrObject(
            creationdatetime=to_iso8601(datetime(2010, 6, 6, 4, 26)),
            readydatetime=to_iso8601(datetime(2010, 6, 6, 4, 30)))))

        self.assertEqual(
            _est_time_to_hour(cs, now=datetime(2010, 6, 6, 4, 35)),
            timedelta(minutes=51))

        self.assertEqual(
            _est_time_to_hour(cs, now=datetime(2010, 6, 6, 5, 20)),
            timedelta(minutes=6))

        self.assertEqual(
            _est_time_to_hour(cs, now=datetime(2010, 6, 6, 6, 26)),
            timedelta(minutes=60))
コード例 #8
0
    def test_now_is_automatically_set(self):
        cs = MockEmrObject(status=MockEmrObject(timeline=MockEmrObject(
            creationdatetime=to_iso8601(datetime.utcnow()))))

        t = _est_time_to_hour(cs)

        self.assertLessEqual(t, timedelta(minutes=60))
        self.assertGreater(t, timedelta(minutes=59))
コード例 #9
0
ファイル: test_pool.py プロジェクト: DanisHack/mrjob
    def test_now_is_automatically_set(self):
        jf = MockEmrObject(
            creationdatetime=to_iso8601(datetime.utcnow()))

        t = est_time_to_hour(jf)

        self.assertLessEqual(t, timedelta(minutes=60))
        self.assertGreater(t, timedelta(minutes=59))

        jf2 = MockEmrObject(
            creationdatetime=to_iso8601(
                datetime.utcnow() - timedelta(minutes=1)),
            startdatetime=to_iso8601(datetime.utcnow()))

        t = est_time_to_hour(jf2)

        self.assertLessEqual(t, timedelta(minutes=60))
        self.assertGreater(t, timedelta(minutes=59))
コード例 #10
0
ファイル: test_pool.py プロジェクト: Milkigit/mrjob
    def test_started(self):
        cs = MockEmrObject(
            status=MockEmrObject(
                timeline=MockEmrObject(
                    creationdatetime=to_iso8601(datetime(2010, 6, 6, 4, 26)),
                    readydatetime=to_iso8601(datetime(2010, 6, 6, 4, 30)))))

        self.assertEqual(
            _est_time_to_hour(cs, now=datetime(2010, 6, 6, 4, 35)),
            timedelta(minutes=51))

        self.assertEqual(
            _est_time_to_hour(cs, now=datetime(2010, 6, 6, 5, 20)),
            timedelta(minutes=6))

        self.assertEqual(
            _est_time_to_hour(cs, now=datetime(2010, 6, 6, 6, 26)),
            timedelta(minutes=60))
コード例 #11
0
ファイル: test_pool.py プロジェクト: Milkigit/mrjob
    def test_now_is_automatically_set(self):
        cs = MockEmrObject(
            status=MockEmrObject(
                timeline=MockEmrObject(
                    creationdatetime=to_iso8601(datetime.utcnow()))))

        t = _est_time_to_hour(cs)

        self.assertLessEqual(t, timedelta(minutes=60))
        self.assertGreater(t, timedelta(minutes=59))
コード例 #12
0
 def step(
     jar="/home/hadoop/contrib/streaming/hadoop-streaming.jar",
     args=["-mapper", "my_job.py --mapper", "-reducer", "my_job.py --reducer"],
     state="COMPLETE",
     create_hours_ago=None,
     start_hours_ago=None,
     end_hours_ago=None,
     name="Streaming Step",
     action_on_failure="TERMINATE_JOB_FLOW",
     **kwargs
 ):
     if create_hours_ago:
         kwargs["creationdatetime"] = to_iso8601(self.now - timedelta(hours=create_hours_ago))
     if start_hours_ago:
         kwargs["startdatetime"] = to_iso8601(self.now - timedelta(hours=start_hours_ago))
     if end_hours_ago:
         kwargs["enddatetime"] = to_iso8601(self.now - timedelta(hours=end_hours_ago))
     kwargs["args"] = [MockEmrObject(value=a) for a in args]
     return MockEmrObject(jar=jar, state=state, name=name, action_on_failure=action_on_failure, **kwargs)
コード例 #13
0
 def step(jar='/home/hadoop/contrib/streaming/hadoop-streaming.jar',
          args=['-mapper', 'my_job.py --mapper',
                '-reducer', 'my_job.py --reducer'],
          state='COMPLETE',
          start_time_back=None,
          end_time_back=None,
          name='Streaming Step',
          action_on_failure='TERMINATE_JOB_FLOW',
          **kwargs):
     if start_time_back:
         kwargs['startdatetime'] = to_iso8601(
             self.now - timedelta(hours=start_time_back))
     if end_time_back:
         kwargs['enddatetime'] = to_iso8601(
             self.now - timedelta(hours=end_time_back))
     kwargs['args'] = [MockEmrObject(value=a) for a in args]
     return MockEmrObject(
         jar=jar, state=state, name=name,
         action_on_failure=action_on_failure, **kwargs)
コード例 #14
0
    def test_can_get_all_job_flows(self):
        now = datetime.datetime.utcnow()

        NUM_JOB_FLOWS = 2222
        assert_gt(NUM_JOB_FLOWS, DEFAULT_MAX_JOB_FLOWS_RETURNED)

        for i in range(NUM_JOB_FLOWS):
            jfid = 'j-%04d' % i
            self.mock_emr_job_flows[jfid] = MockEmrObject(
                creationdatetime=to_iso8601(now - datetime.timedelta(minutes=i)),
                jobflowid=jfid)

        emr_conn = EMRJobRunner().make_emr_conn()

        # ordinary describe_jobflows() hits the limit on number of job flows
        some_jfs = emr_conn.describe_jobflows()
        assert_equal(len(some_jfs), DEFAULT_MAX_JOB_FLOWS_RETURNED)

        all_jfs = describe_all_job_flows(emr_conn)
        assert_equal(len(all_jfs), NUM_JOB_FLOWS)
        assert_equal(sorted(jf.jobflowid for jf in all_jfs),
                     [('j-%04d' % i) for i in range(NUM_JOB_FLOWS)])
コード例 #15
0
ファイル: emr_test.py プロジェクト: boursier/mrjob
    def test_can_get_all_job_flows(self):
        now = datetime.datetime.utcnow()

        NUM_JOB_FLOWS = 2222
        assert_gt(NUM_JOB_FLOWS, DEFAULT_MAX_JOB_FLOWS_RETURNED)

        for i in range(NUM_JOB_FLOWS):
            jfid = 'j-%04d' % i
            self.mock_emr_job_flows[jfid] = MockEmrObject(
                creationdatetime=to_iso8601(now - datetime.timedelta(minutes=i)),
                jobflowid=jfid)

        emr_conn = EMRJobRunner().make_emr_conn()

        # ordinary describe_jobflows() hits the limit on number of job flows
        some_jfs = emr_conn.describe_jobflows()
        assert_equal(len(some_jfs), DEFAULT_MAX_JOB_FLOWS_RETURNED)

        all_jfs = describe_all_job_flows(emr_conn)
        assert_equal(len(all_jfs), NUM_JOB_FLOWS)
        assert_equal(sorted(jf.jobflowid for jf in all_jfs),
                     [('j-%04d' % i) for i in range(NUM_JOB_FLOWS)])
コード例 #16
0
    def create_fake_job_flows(self):
        self.now = datetime.utcnow().replace(microsecond=0)

        # empty job
        self.mock_emr_job_flows['j-EMPTY'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            steps=[],
        )

        # Build a step object easily
        # also make it respond to .args()
        def step(jar='/home/hadoop/contrib/streaming/hadoop-streaming.jar',
                 args=['-mapper', 'my_job.py --mapper',
                       '-reducer', 'my_job.py --reducer'],
                 state='COMPLETE',
                 start_time_back=None,
                 end_time_back=None,
                 name='Streaming Step',
                 action_on_failure='TERMINATE_JOB_FLOW',
                 **kwargs):
            if start_time_back:
                kwargs['startdatetime'] = to_iso8601(
                    self.now - timedelta(hours=start_time_back))
            if end_time_back:
                kwargs['enddatetime'] = to_iso8601(
                    self.now - timedelta(hours=end_time_back))
            kwargs['args'] = [MockEmrObject(value=a) for a in args]
            return MockEmrObject(
                jar=jar, state=state, name=name,
                action_on_failure=action_on_failure, **kwargs)

        # currently running job
        self.mock_emr_job_flows['j-CURRENTLY_RUNNING'] = MockEmrObject(
            state='RUNNING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[step(start_time_back=4, state='RUNNING')],
        )

        # finished job flow
        self.mock_emr_job_flows['j-DONE'] = MockEmrObject(
            state='COMPLETE',
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            startdatetime=to_iso8601(self.now - timedelta(hours=9)),
            enddatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[step(start_time_back=8, end_time_back=6)],
        )

        # idle job flow
        self.mock_emr_job_flows['j-DONE_AND_IDLE'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[step(start_time_back=4, end_time_back=2)],
        )

        # hive job flow (looks completed but isn't)
        self.mock_emr_job_flows['j-HIVE'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[step(
                start_time_back=4,
                end_time_back=4,
                jar='s3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar',
                args=[],
            )],
        )

        # custom hadoop streaming jar
        self.mock_emr_job_flows['j-CUSTOM_DONE_AND_IDLE'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[step(
                start_time_back=4,
                end_time_back=4,
                jar='s3://my_bucket/tmp/somejob/files/oddjob-0.0.3-SNAPSHOT-standalone.jar',
                args=[],
            )],
        )

        mock_conn = MockEmrConnection()

        # hadoop debugging without any other steps
        jobflow_id = mock_conn.run_jobflow(name='j-DEBUG_ONLY',
                                           log_uri='',
                                           enable_debugging=True)
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows['j-DEBUG_ONLY'] = jf
        jf.state = 'WAITING'
        jf.startdatetime=to_iso8601(self.now - timedelta(hours=2))
        jf.steps[0].enddatetime=to_iso8601(self.now - timedelta(hours=2))

        # hadoop debugging + actual job
        # same jar as hive but with different args
        jobflow_id = mock_conn.run_jobflow(name='j-HADOOP_DEBUGGING',
                                           log_uri='',
                                           enable_debugging=True,
                                           steps=[step()])
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows['j-HADOOP_DEBUGGING'] = jf
        jf.state = 'WAITING'
        jf.creationdatetime = to_iso8601(self.now - timedelta(hours=6))
        jf.startdatetime = to_iso8601(self.now - timedelta(hours=5))
        # Need to reset times manually because mockboto resets them
        jf.steps[0].enddatetime = to_iso8601(self.now - timedelta(hours=5))
        jf.steps[1].startdatetime = to_iso8601(self.now - timedelta(hours=4))
        jf.steps[1].enddatetime = to_iso8601(self.now - timedelta(hours=2))

        # skip cancelled steps
        self.mock_emr_job_flows['j-IDLE_AND_FAILED'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[
                step(start_time_back=4, end_time_back=3, state='FAILED'),
                step(
                    state='CANCELLED',
                )
            ],
        )

        # add job flow IDs and fake names to the mock job flows
        for jfid, jf in self.mock_emr_job_flows.iteritems():
            jf.jobflowid = jfid
            jf.name = jfid[2:].replace('_', ' ').title() + ' Job Flow'
コード例 #17
0
 def ago(**kwargs):
     if any(v is None for v in kwargs.values()):
         return None
     return to_iso8601(self.now - timedelta(**kwargs))
コード例 #18
0
 def ago(**kwargs):
     if any(v is None for v in kwargs.values()):
         return None
     return to_iso8601(self.now - timedelta(**kwargs))
コード例 #19
0
    def create_fake_job_flows(self):
        self.now = datetime.utcnow().replace(microsecond=0)
        self.add_mock_s3_data({'my_bucket': {}})

        # Build a step object easily
        # also make it respond to .args()
        def step(jar='/home/hadoop/contrib/streaming/hadoop-streaming.jar',
                 args=['-mapper', 'my_job.py --mapper',
                       '-reducer', 'my_job.py --reducer'],
                 state='COMPLETE',
                 create_hours_ago=None,
                 start_hours_ago=None,
                 end_hours_ago=None,
                 name='Streaming Step',
                 action_on_failure='TERMINATE_CLUSTER',
                 **kwargs):
            if create_hours_ago:
                kwargs['creationdatetime'] = to_iso8601(
                    self.now - timedelta(hours=create_hours_ago))
            if start_hours_ago:
                kwargs['startdatetime'] = to_iso8601(
                    self.now - timedelta(hours=start_hours_ago))
            if end_hours_ago:
                kwargs['enddatetime'] = to_iso8601(
                    self.now - timedelta(hours=end_hours_ago))
            kwargs['args'] = [MockEmrObject(value=a) for a in args]
            return MockEmrObject(
                jar=jar, state=state, name=name,
                action_on_failure=action_on_failure, **kwargs)

        # empty job
        self.mock_emr_job_flows['j-EMPTY'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            state='STARTING',
            steps=[],
        )

        # job that's bootstrapping
        self.mock_emr_job_flows['j-BOOTSTRAPPING'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            startdatetime=to_iso8601(
                self.now - timedelta(hours=9, minutes=55)),
            state='BOOTSTRAPPING',
            steps=[step(create_hours_ago=10, state='PENDING')],
        )

        # currently running job
        self.mock_emr_job_flows['j-CURRENTLY_RUNNING'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=4,
                                                          minutes=10)),
            startdatetime=to_iso8601(self.now - timedelta(hours=4,
                                                          minutes=15)),
            state='RUNNING',
            steps=[step(start_hours_ago=4, state='RUNNING')],
        )

        # finished job flow
        self.mock_emr_job_flows['j-DONE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            enddatetime=to_iso8601(self.now - timedelta(hours=5)),
            readydatetime=to_iso8601(self.now - timedelta(hours=8)),
            startdatetime=to_iso8601(self.now - timedelta(hours=9)),
            state='COMPLETE',
            steps=[step(start_hours_ago=8, end_hours_ago=6)],
        )

        # idle job flow
        self.mock_emr_job_flows['j-DONE_AND_IDLE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[step(start_hours_ago=4, end_hours_ago=2)],
        )

        # idle job flow with an active lock
        self.mock_emr_job_flows['j-IDLE_AND_LOCKED'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[step(start_hours_ago=4, end_hours_ago=2)],
        )
        self.add_mock_s3_data({
            'my_bucket': {
                'locks/j-IDLE_AND_LOCKED/2': b'not_you',
            },
        }, time_modified=datetime.utcnow())

        # idle job flow with an expired lock
        self.mock_emr_job_flows['j-IDLE_AND_EXPIRED'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[step(start_hours_ago=4, end_hours_ago=2)],
        )
        self.add_mock_s3_data({
            'my_bucket': {
                'locks/j-IDLE_AND_EXPIRED/2': b'not_you',
            },
        }, time_modified=datetime.utcnow()-timedelta(minutes=5))

        # idle job flow with an expired lock
        self.mock_emr_job_flows['j-IDLE_BUT_INCOMPLETE_STEPS'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[step(start_hours_ago=4, end_hours_ago=None)],
        )

        # hive job flow (looks completed but isn't)
        self.mock_emr_job_flows['j-HIVE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[step(
                start_hours_ago=4,
                end_hours_ago=4,
                jar=('s3://us-east-1.elasticmapreduce/libs/script-runner/'
                     'script-runner.jar'),
                args=[],
            )],
        )

        # custom hadoop streaming jar
        self.mock_emr_job_flows['j-CUSTOM_DONE_AND_IDLE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[step(
                start_hours_ago=4,
                end_hours_ago=4,
                jar=('s3://my_bucket/tmp/somejob/files/'
                     'oddjob-0.0.3-SNAPSHOT-standalone.jar'),
                args=[],
            )],
        )

        mock_conn = MockEmrConnection()

        # hadoop debugging without any other steps
        jobflow_id = mock_conn.run_jobflow(name='j-DEBUG_ONLY',
                                           log_uri='',
                                           enable_debugging=True,
                                           now=self.now -
                                               timedelta(hours=3, minutes=5))
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows['j-DEBUG_ONLY'] = jf
        jf.state = 'WAITING'
        jf.startdatetime = to_iso8601(
            self.now - timedelta(hours=3))
        jf.readydatetime = to_iso8601(
            self.now - timedelta(hours=2, minutes=55))
        jf.steps[0].enddatetime = to_iso8601(self.now - timedelta(hours=2))

        # hadoop debugging + actual job
        # same jar as hive but with different args
        jobflow_id = mock_conn.run_jobflow(name='j-HADOOP_DEBUGGING',
                                           log_uri='',
                                           enable_debugging=True,
                                           now=self.now -
                                           timedelta(hours=6))
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows['j-HADOOP_DEBUGGING'] = jf
        jf.steps.append(step())
        jf.state = 'WAITING'
        jf.startdatetime = to_iso8601(self.now - timedelta(hours=5))
        jf.readydatetime = to_iso8601(
            self.now - timedelta(hours=4, minutes=55))
        # Need to reset times manually because mockboto resets them
        jf.steps[0].enddatetime = to_iso8601(self.now - timedelta(hours=5))
        jf.steps[1].startdatetime = to_iso8601(self.now - timedelta(hours=4))
        jf.steps[1].enddatetime = to_iso8601(self.now - timedelta(hours=2))

        # should skip cancelled steps
        self.mock_emr_job_flows['j-IDLE_AND_FAILED'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[
                step(start_hours_ago=4, end_hours_ago=3, state='FAILED'),
                step(
                    state='CANCELLED',
                )
            ],
        )

        # pooled job flow reaching end of full hour
        self.mock_emr_job_flows['j-POOLED'] = MockEmrObject(
            bootstrapactions=[
                MockEmrObject(args=[], name='action 0'),
                MockEmrObject(args=[
                    MockEmrObject(
                        value='pool-0123456789abcdef0123456789abcdef'),
                    MockEmrObject(value='reflecting'),
                ], name='master'),
            ],
            creationdatetime=to_iso8601(self.now - timedelta(hours=1)),
            readydatetime=to_iso8601(self.now - timedelta(minutes=50)),
            startdatetime=to_iso8601(self.now - timedelta(minutes=55)),
            state='WAITING',
            steps=[],
        )

        # job flow that has had pending jobs but hasn't run them
        self.mock_emr_job_flows['j-PENDING_BUT_IDLE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=3)),
            readydatetime=to_iso8601(
                self.now - timedelta(hours=2, minutes=50)),
            startdatetime=to_iso8601(
                self.now - timedelta(hours=2, minutes=55)),
            state='RUNNING',
            steps=[step(create_hours_ago=3, state='PENDING')],
        )

        # add job flow IDs and fake names to the mock job flows
        for jfid, jf in self.mock_emr_job_flows.items():
            jf.jobflowid = jfid
            jf.name = jfid[2:].replace('_', ' ').title() + ' Job Flow'
コード例 #20
0
    def create_fake_job_flows(self):
        self.now = datetime.utcnow().replace(microsecond=0)

        # Build a step object easily
        # also make it respond to .args()
        def step(
            jar="/home/hadoop/contrib/streaming/hadoop-streaming.jar",
            args=["-mapper", "my_job.py --mapper", "-reducer", "my_job.py --reducer"],
            state="COMPLETE",
            create_hours_ago=None,
            start_hours_ago=None,
            end_hours_ago=None,
            name="Streaming Step",
            action_on_failure="TERMINATE_JOB_FLOW",
            **kwargs
        ):
            if create_hours_ago:
                kwargs["creationdatetime"] = to_iso8601(self.now - timedelta(hours=create_hours_ago))
            if start_hours_ago:
                kwargs["startdatetime"] = to_iso8601(self.now - timedelta(hours=start_hours_ago))
            if end_hours_ago:
                kwargs["enddatetime"] = to_iso8601(self.now - timedelta(hours=end_hours_ago))
            kwargs["args"] = [MockEmrObject(value=a) for a in args]
            return MockEmrObject(jar=jar, state=state, name=name, action_on_failure=action_on_failure, **kwargs)

        # empty job
        self.mock_emr_job_flows["j-EMPTY"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)), state="STARTING"
        )

        # job that's bootstrapping
        self.mock_emr_job_flows["j-BOOTSTRAPPING"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            startdatetime=to_iso8601(self.now - timedelta(hours=9, minutes=55)),
            state="BOOTSTRAPPING",
            steps=[step(create_hours_ago=10, state="PENDING")],
        )

        # currently running job
        self.mock_emr_job_flows["j-CURRENTLY_RUNNING"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=4, minutes=10)),
            startdatetime=to_iso8601(self.now - timedelta(hours=4, minutes=15)),
            state="RUNNING",
            steps=[step(start_hours_ago=4, state="RUNNING")],
        )

        # finished job flow
        self.mock_emr_job_flows["j-DONE"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            enddatetime=to_iso8601(self.now - timedelta(hours=5)),
            readydatetime=to_iso8601(self.now - timedelta(hours=8)),
            startdatetime=to_iso8601(self.now - timedelta(hours=9)),
            state="COMPLETE",
            steps=[step(start_hours_ago=8, end_hours_ago=6)],
        )

        # idle job flow
        self.mock_emr_job_flows["j-DONE_AND_IDLE"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state="WAITING",
            steps=[step(start_hours_ago=4, end_hours_ago=2)],
        )

        # hive job flow (looks completed but isn't)
        self.mock_emr_job_flows["j-HIVE"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state="WAITING",
            steps=[
                step(
                    start_hours_ago=4,
                    end_hours_ago=4,
                    jar=("s3://us-east-1.elasticmapreduce/libs/script-runner/" "script-runner.jar"),
                    args=[],
                )
            ],
        )

        # custom hadoop streaming jar
        self.mock_emr_job_flows["j-CUSTOM_DONE_AND_IDLE"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state="WAITING",
            steps=[
                step(
                    start_hours_ago=4,
                    end_hours_ago=4,
                    jar=("s3://my_bucket/tmp/somejob/files/" "oddjob-0.0.3-SNAPSHOT-standalone.jar"),
                    args=[],
                )
            ],
        )

        mock_conn = MockEmrConnection()

        # hadoop debugging without any other steps
        jobflow_id = mock_conn.run_jobflow(
            name="j-DEBUG_ONLY", log_uri="", enable_debugging=True, now=self.now - timedelta(hours=3, minutes=5)
        )
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows["j-DEBUG_ONLY"] = jf
        jf.state = "WAITING"
        jf.startdatetime = to_iso8601(self.now - timedelta(hours=3))
        jf.readydatetime = to_iso8601(self.now - timedelta(hours=2, minutes=55))
        jf.steps[0].enddatetime = to_iso8601(self.now - timedelta(hours=2))

        # hadoop debugging + actual job
        # same jar as hive but with different args
        jobflow_id = mock_conn.run_jobflow(
            name="j-HADOOP_DEBUGGING",
            log_uri="",
            enable_debugging=True,
            steps=[step()],
            now=self.now - timedelta(hours=6),
        )
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows["j-HADOOP_DEBUGGING"] = jf
        jf.state = "WAITING"
        jf.startdatetime = to_iso8601(self.now - timedelta(hours=5))
        jf.readydatetime = to_iso8601(self.now - timedelta(hours=4, minutes=55))
        # Need to reset times manually because mockboto resets them
        jf.steps[0].enddatetime = to_iso8601(self.now - timedelta(hours=5))
        jf.steps[1].startdatetime = to_iso8601(self.now - timedelta(hours=4))
        jf.steps[1].enddatetime = to_iso8601(self.now - timedelta(hours=2))

        # should skip cancelled steps
        self.mock_emr_job_flows["j-IDLE_AND_FAILED"] = MockEmrObject(
            state="WAITING",
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            readydatetime=to_iso8601(self.now - timedelta(hours=5, minutes=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[step(start_hours_ago=4, end_hours_ago=3, state="FAILED"), step(state="CANCELLED")],
        )

        # pooled job flow reaching end of full hour
        self.mock_emr_job_flows["j-POOLED"] = MockEmrObject(
            bootstrapactions=[
                MockEmrObject(args=[]),
                MockEmrObject(
                    args=[
                        MockEmrObject(value="pool-0123456789abcdef0123456789abcdef"),
                        MockEmrObject(value="reflecting"),
                    ]
                ),
            ],
            creationdatetime=to_iso8601(self.now - timedelta(hours=1)),
            readydatetime=to_iso8601(self.now - timedelta(minutes=50)),
            startdatetime=to_iso8601(self.now - timedelta(minutes=55)),
            state="WAITING",
            steps=[],
        )

        # job flow that has had pending jobs but hasn't run them
        self.mock_emr_job_flows["j-PENDING_BUT_IDLE"] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=3)),
            readydatetime=to_iso8601(self.now - timedelta(hours=2, minutes=50)),
            startdatetime=to_iso8601(self.now - timedelta(hours=2, minutes=55)),
            state="RUNNING",
            steps=[step(create_hours_ago=3, state="PENDING")],
        )

        # add job flow IDs and fake names to the mock job flows
        for jfid, jf in self.mock_emr_job_flows.iteritems():
            jf.jobflowid = jfid
            jf.name = jfid[2:].replace("_", " ").title() + " Job Flow"
コード例 #21
0
    def create_fake_job_flows(self):
        self.now = datetime.utcnow().replace(microsecond=0)

        # empty job
        self.mock_emr_job_flows['j-EMPTY'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            steps=[],
        )

        # currently running job
        self.mock_emr_job_flows['j-CURRENTLY_RUNNING'] = MockEmrObject(
            state='RUNNING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[
                MockEmrObject(
                    startdatetime=to_iso8601(self.now - timedelta(hours=4)),
                    jar=
                    '/home/hadoop/contrib/streaming/hadoop-0.18-streaming.jar',
                    state='RUNNING',
                )
            ],
        )

        # finished job flow
        self.mock_emr_job_flows['j-DONE'] = MockEmrObject(
            state='COMPLETE',
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            startdatetime=to_iso8601(self.now - timedelta(hours=9)),
            enddatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[
                MockEmrObject(
                    startdatetime=to_iso8601(self.now - timedelta(hours=8)),
                    enddatetime=to_iso8601(self.now - timedelta(hours=6)),
                    jar=
                    '/home/hadoop/contrib/streaming/hadoop-0.18-streaming.jar',
                    state='COMPLETE',
                )
            ],
        )

        # idle job flow
        self.mock_emr_job_flows['j-DONE_AND_IDLE'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[
                MockEmrObject(
                    startdatetime=to_iso8601(self.now - timedelta(hours=4)),
                    enddatetime=to_iso8601(self.now - timedelta(hours=2)),
                    jar=
                    '/home/hadoop/contrib/streaming/hadoop-0.18-streaming.jar',
                    state='COMPLETE',
                )
            ],
        )

        # hive job flow (looks completed but isn't)
        self.mock_emr_job_flows['j-HIVE'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[
                MockEmrObject(
                    startdatetime=to_iso8601(self.now - timedelta(hours=4)),
                    enddatetime=to_iso8601(self.now - timedelta(hours=4)),
                    jar=
                    's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar',
                    state='COMPLETE',
                )
            ],
        )

        # hadoop debugging + actual job
        # hadoop debugging looks the same to us as Hive (they use the same
        # jar). The difference is that there's also a streaming step.
        self.mock_emr_job_flows['j-HADOOP_DEBUGGING'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[
                MockEmrObject(
                    startdatetime=to_iso8601(self.now - timedelta(hours=5)),
                    enddatetime=to_iso8601(self.now - timedelta(hours=5)),
                    jar=
                    's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar',
                    state='COMPLETE',
                ),
                MockEmrObject(
                    startdatetime=to_iso8601(self.now - timedelta(hours=4)),
                    enddatetime=to_iso8601(self.now - timedelta(hours=2)),
                    jar=
                    '/home/hadoop/contrib/streaming/hadoop-0.18-streaming.jar',
                    state='COMPLETE',
                )
            ],
        )

        # skip cancelled steps
        self.mock_emr_job_flows['j-IDLE_AND_FAILED'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[
                MockEmrObject(
                    startdatetime=to_iso8601(self.now - timedelta(hours=4)),
                    enddatetime=to_iso8601(self.now - timedelta(hours=3)),
                    jar=
                    '/home/hadoop/contrib/streaming/hadoop-0.18-streaming.jar',
                    state='FAILED',
                ),
                MockEmrObject(
                    jar=
                    '/home/hadoop/contrib/streaming/hadoop-0.18-streaming.jar',
                    state='CANCELLED',
                )
            ],
        )

        # add job flow IDs and fake names to the mock job flows
        for jfid, jf in self.mock_emr_job_flows.iteritems():
            jf.jobflowid = jfid
            jf.name = jfid[2:].replace('_', ' ').title() + ' Job Flow'
コード例 #22
0
    def create_fake_job_flows(self):
        self.now = datetime.utcnow().replace(microsecond=0)

        # empty job
        self.mock_emr_job_flows['j-EMPTY'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            state='WAITING',
        )

        # Build a step object easily
        # also make it respond to .args()
        def step(jar='/home/hadoop/contrib/streaming/hadoop-streaming.jar',
                 args=[
                     '-mapper', 'my_job.py --mapper', '-reducer',
                     'my_job.py --reducer'
                 ],
                 state='COMPLETE',
                 start_time_back=None,
                 end_time_back=None,
                 name='Streaming Step',
                 action_on_failure='TERMINATE_JOB_FLOW',
                 **kwargs):
            if start_time_back:
                kwargs['startdatetime'] = to_iso8601(self.now - timedelta(
                    hours=start_time_back))
            if end_time_back:
                kwargs['enddatetime'] = to_iso8601(self.now - timedelta(
                    hours=end_time_back))
            kwargs['args'] = [MockEmrObject(value=a) for a in args]
            return MockEmrObject(jar=jar,
                                 state=state,
                                 name=name,
                                 action_on_failure=action_on_failure,
                                 **kwargs)

        # currently running job
        self.mock_emr_job_flows['j-CURRENTLY_RUNNING'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now -
                                     timedelta(hours=4, minutes=15)),
            state='RUNNING',
            steps=[step(start_time_back=4, state='RUNNING')],
        )

        # finished job flow
        self.mock_emr_job_flows['j-DONE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            enddatetime=to_iso8601(self.now - timedelta(hours=5)),
            startdatetime=to_iso8601(self.now - timedelta(hours=9)),
            state='COMPLETE',
            steps=[step(start_time_back=8, end_time_back=6)],
        )

        # idle job flow
        self.mock_emr_job_flows['j-DONE_AND_IDLE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[step(start_time_back=4, end_time_back=2)],
        )

        # hive job flow (looks completed but isn't)
        self.mock_emr_job_flows['j-HIVE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[
                step(
                    start_time_back=4,
                    end_time_back=4,
                    jar=('s3://us-east-1.elasticmapreduce/libs/script-runner/'
                         'script-runner.jar'),
                    args=[],
                )
            ],
        )

        # custom hadoop streaming jar
        self.mock_emr_job_flows['j-CUSTOM_DONE_AND_IDLE'] = MockEmrObject(
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            state='WAITING',
            steps=[
                step(
                    start_time_back=4,
                    end_time_back=4,
                    jar=('s3://my_bucket/tmp/somejob/files/'
                         'oddjob-0.0.3-SNAPSHOT-standalone.jar'),
                    args=[],
                )
            ],
        )

        mock_conn = MockEmrConnection()

        # hadoop debugging without any other steps
        jobflow_id = mock_conn.run_jobflow(name='j-DEBUG_ONLY',
                                           log_uri='',
                                           enable_debugging=True)
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows['j-DEBUG_ONLY'] = jf
        jf.state = 'WAITING'
        jf.startdatetime = to_iso8601(self.now - timedelta(hours=2))
        jf.steps[0].enddatetime = to_iso8601(self.now - timedelta(hours=2))

        # hadoop debugging + actual job
        # same jar as hive but with different args
        jobflow_id = mock_conn.run_jobflow(name='j-HADOOP_DEBUGGING',
                                           log_uri='',
                                           enable_debugging=True,
                                           steps=[step()])
        jf = mock_conn.describe_jobflow(jobflow_id)
        self.mock_emr_job_flows['j-HADOOP_DEBUGGING'] = jf
        jf.state = 'WAITING'
        jf.creationdatetime = to_iso8601(self.now - timedelta(hours=6))
        jf.startdatetime = to_iso8601(self.now - timedelta(hours=5))
        # Need to reset times manually because mockboto resets them
        jf.steps[0].enddatetime = to_iso8601(self.now - timedelta(hours=5))
        jf.steps[1].startdatetime = to_iso8601(self.now - timedelta(hours=4))
        jf.steps[1].enddatetime = to_iso8601(self.now - timedelta(hours=2))

        # skip cancelled steps
        self.mock_emr_job_flows['j-IDLE_AND_FAILED'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[
                step(start_time_back=4, end_time_back=3, state='FAILED'),
                step(state='CANCELLED', )
            ],
        )

        # pooled job flow reaching end of full hour
        self.mock_emr_job_flows['j-POOLED'] = MockEmrObject(
            bootstrapactions=[
                MockEmrObject(args=[]),
                MockEmrObject(args=[
                    MockEmrObject(
                        value='pool-0123456789abcdef0123456789abcdef'),
                    MockEmrObject(value='reflecting'),
                ]),
            ],
            creationdatetime=to_iso8601(self.now - timedelta(hours=1)),
            startdatetime=to_iso8601(self.now - timedelta(minutes=55)),
            state='WAITING',
            steps=[],
        )

        # add job flow IDs and fake names to the mock job flows
        for jfid, jf in self.mock_emr_job_flows.iteritems():
            jf.jobflowid = jfid
            jf.name = jfid[2:].replace('_', ' ').title() + ' Job Flow'
コード例 #23
0
    def create_fake_job_flows(self):
        self.now = datetime.utcnow().replace(microsecond=0)

        # empty job
        self.mock_emr_job_flows['j-EMPTY'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            steps=[],
        )

        # currently running job
        self.mock_emr_job_flows['j-CURRENTLY_RUNNING'] = MockEmrObject(
            state='RUNNING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[MockEmrObject(
                startdatetime=to_iso8601(self.now - timedelta(hours=4)),
                jar='/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar',
                state='RUNNING',
            )],
        )

        # finished job flow
        self.mock_emr_job_flows['j-DONE'] = MockEmrObject(
            state='COMPLETE',
            creationdatetime=to_iso8601(self.now - timedelta(hours=10)),
            startdatetime=to_iso8601(self.now - timedelta(hours=9)),
            enddatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[MockEmrObject(
                startdatetime=to_iso8601(self.now - timedelta(hours=8)),
                enddatetime=to_iso8601(self.now - timedelta(hours=6)),
                jar='/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar',
                state='COMPLETE',
            )],
        )

        # idle job flow
        self.mock_emr_job_flows['j-DONE_AND_IDLE'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[MockEmrObject(
                startdatetime=to_iso8601(self.now - timedelta(hours=4)),
                enddatetime=to_iso8601(self.now - timedelta(hours=2)),
                jar='/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar',
                state='COMPLETE',
            )],
        )

        # hive job flow (looks completed but isn't)
        self.mock_emr_job_flows['j-HIVE'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[MockEmrObject(
                startdatetime=to_iso8601(self.now - timedelta(hours=4)),
                enddatetime=to_iso8601(self.now - timedelta(hours=4)),
                jar='s3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar',
                state='COMPLETE',
            )],
        )

        # hadoop debugging + actual job
        # hadoop debugging looks the same to us as Hive (they use the same
        # jar). The difference is that there's also a streaming step.
        self.mock_emr_job_flows['j-HADOOP_DEBUGGING'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[
                MockEmrObject(
                    startdatetime=to_iso8601(self.now - timedelta(hours=5)),
                    enddatetime=to_iso8601(self.now - timedelta(hours=5)),
                    jar='s3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar',
                    state='COMPLETE',
                ),
                MockEmrObject(
                    startdatetime=to_iso8601(self.now - timedelta(hours=4)),
                    enddatetime=to_iso8601(self.now - timedelta(hours=2)),
                    jar='/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar',
                    state='COMPLETE',
                )
            ],
        )

        # skip cancelled steps
        self.mock_emr_job_flows['j-IDLE_AND_FAILED'] = MockEmrObject(
            state='WAITING',
            creationdatetime=to_iso8601(self.now - timedelta(hours=6)),
            startdatetime=to_iso8601(self.now - timedelta(hours=5)),
            steps=[

                MockEmrObject(
                    startdatetime=to_iso8601(self.now - timedelta(hours=4)),
                    enddatetime=to_iso8601(self.now - timedelta(hours=3)),
                    jar='/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar',
                    state='FAILED',
                ),
                MockEmrObject(
                    jar='/home/hadoop/contrib/streaming/hadoop-0.20-streaming.jar',
                    state='CANCELLED',
                )
            ],
        )

        # add job flow IDs and fake names to the mock job flows
        for jfid, jf in self.mock_emr_job_flows.iteritems():
            jf.jobflowid = jfid
            jf.name = jfid[2:].replace('_', ' ').title() + ' Job Flow'