예제 #1
0
    def execute(self, context: 'Context') -> List[str]:
        emr_hook = EmrHook(aws_conn_id=self.aws_conn_id)

        emr = emr_hook.get_conn()

        job_flow_id = self.job_flow_id or emr_hook.get_cluster_id_by_name(
            str(self.job_flow_name), self.cluster_states)

        if not job_flow_id:
            raise AirflowException(
                f'No cluster found for name: {self.job_flow_name}')

        if self.do_xcom_push:
            context['ti'].xcom_push(key='job_flow_id', value=job_flow_id)

        self.log.info('Adding steps to %s', job_flow_id)

        # steps may arrive as a string representing a list
        # e.g. if we used XCom or a file then: steps="[{ step1 }, { step2 }]"
        steps = self.steps
        if isinstance(steps, str):
            steps = ast.literal_eval(steps)

        response = emr.add_job_flow_steps(JobFlowId=job_flow_id, Steps=steps)

        if not response['ResponseMetadata']['HTTPStatusCode'] == 200:
            raise AirflowException(f'Adding steps failed: {response}')
        else:
            self.log.info('Steps %s added to JobFlow', response['StepIds'])
            return response['StepIds']
예제 #2
0
    def test_get_cluster_id_by_name(self):
        """
        Test that we can resolve cluster id by cluster name.
        """
        hook = EmrHook(aws_conn_id='aws_default', emr_conn_id='emr_default')

        job_flow = hook.create_job_flow({'Name': 'test_cluster',
                                         'Instances': {'KeepJobFlowAliveWhenNoSteps': True}})

        job_flow_id = job_flow['JobFlowId']

        matching_cluster = hook.get_cluster_id_by_name('test_cluster', ['RUNNING', 'WAITING'])

        self.assertEqual(matching_cluster, job_flow_id)

        no_match = hook.get_cluster_id_by_name('foo', ['RUNNING', 'WAITING', 'BOOTSTRAPPING'])

        self.assertIsNone(no_match)
예제 #3
0
    def execute(self, context):
        emr_hook = EmrHook(aws_conn_id=self.aws_conn_id)

        emr = emr_hook.get_conn()

        job_flow_id = self.job_flow_id or emr_hook.get_cluster_id_by_name(
            self.job_flow_name, self.cluster_states)
        if not job_flow_id:
            raise AirflowException(
                f'No cluster found for name: {self.job_flow_name}')

        if self.do_xcom_push:
            context['ti'].xcom_push(key='job_flow_id', value=job_flow_id)

        self.log.info('Adding steps to %s', job_flow_id)
        response = emr.add_job_flow_steps(JobFlowId=job_flow_id,
                                          Steps=self.steps)

        if not response['ResponseMetadata']['HTTPStatusCode'] == 200:
            raise AirflowException('Adding steps failed: %s' % response)
        else:
            self.log.info('Steps %s added to JobFlow', response['StepIds'])
            return response['StepIds']