Beispiel #1
0
 def describe_step(self, clusterid: str, stepid: str) -> dict:
     """
     Return the transform job info associated with the name
     :param clusterid: EMR Cluster ID
     :type stepid: str: StepID
     :return: A dict contains all the transform job info
     """
     emr_hook = EmrHook(aws_conn_id=self.aws_conn_id)
     emr = emr_hook.get_conn()
     return emr.describe_step(ClusterId=clusterid, StepId=stepid)
Beispiel #2
0
    def execute(self, context):
        emr_hook = EmrHook(aws_conn_id=self.aws_conn_id)

        emr = emr_hook.get_conn()

        job_flow_id = self.job_flow_id or emr_hook.get_cluster_id_by_name(self.job_flow_name,
                                                                          self.cluster_states)
        if not job_flow_id:
            raise AirflowException(f'No cluster found for name: {self.job_flow_name}')

        if self.do_xcom_push:
            context['ti'].xcom_push(key='job_flow_id', value=job_flow_id)

        self.log.info('Adding steps to %s', job_flow_id)
        response = emr.add_job_flow_steps(JobFlowId=job_flow_id, Steps=self.steps)

        if not response['ResponseMetadata']['HTTPStatusCode'] == 200:
            raise AirflowException('Adding steps failed: %s' % response)
        else:
            self.log.info('Steps %s added to JobFlow', response['StepIds'])
            return response['StepIds']
Beispiel #3
0
    def execute(self, context: Dict[str, Any]) -> List[str]:
        emr_hook = EmrHook(aws_conn_id=self.aws_conn_id)

        emr = emr_hook.get_conn()

        job_flow_id = self.job_flow_id or emr_hook.get_cluster_id_by_name(
            str(self.job_flow_name), self.cluster_states
        )

        if not job_flow_id:
            raise AirflowException(f"No cluster found for name: {self.job_flow_name}")

        if self.do_xcom_push:
            context["ti"].xcom_push(key="job_flow_id", value=job_flow_id)

        self.log.info("Adding steps to %s", job_flow_id)

        # steps may arrive as a string representing a list
        # e.g. if we used XCom or a file then: steps="[{ step1 }, { step2 }]"
        steps = self.steps
        if isinstance(steps, str):
            steps = ast.literal_eval(steps)

        response = emr.add_job_flow_steps(JobFlowId=job_flow_id, Steps=steps)

        if not response["ResponseMetadata"]["HTTPStatusCode"] == 200:
            raise AirflowException("Adding steps failed: %s" % response)
        else:
            # Assumption : ONly a single step is submitted each time.
            step_ids = response["StepIds"]
            step_id = step_ids[0]
            if self.wait_for_completion:
                self.check_status(
                    job_flow_id,
                    step_id,
                    self.describe_step,
                    self.check_interval,
                )
            self.log.info("Steps %s added to JobFlow", response["StepIds"])
            return response["StepIds"]
 def test_get_conn_returns_a_boto3_connection(self):
     hook = EmrHook(aws_conn_id='aws_default')
     self.assertIsNotNone(hook.get_conn().list_clusters())