def describe_step(self, clusterid: str, stepid: str) -> dict: """ Return the transform job info associated with the name :param clusterid: EMR Cluster ID :type stepid: str: StepID :return: A dict contains all the transform job info """ emr_hook = EmrHook(aws_conn_id=self.aws_conn_id) emr = emr_hook.get_conn() return emr.describe_step(ClusterId=clusterid, StepId=stepid)
def execute(self, context): emr_hook = EmrHook(aws_conn_id=self.aws_conn_id) emr = emr_hook.get_conn() job_flow_id = self.job_flow_id or emr_hook.get_cluster_id_by_name(self.job_flow_name, self.cluster_states) if not job_flow_id: raise AirflowException(f'No cluster found for name: {self.job_flow_name}') if self.do_xcom_push: context['ti'].xcom_push(key='job_flow_id', value=job_flow_id) self.log.info('Adding steps to %s', job_flow_id) response = emr.add_job_flow_steps(JobFlowId=job_flow_id, Steps=self.steps) if not response['ResponseMetadata']['HTTPStatusCode'] == 200: raise AirflowException('Adding steps failed: %s' % response) else: self.log.info('Steps %s added to JobFlow', response['StepIds']) return response['StepIds']
def execute(self, context: Dict[str, Any]) -> List[str]: emr_hook = EmrHook(aws_conn_id=self.aws_conn_id) emr = emr_hook.get_conn() job_flow_id = self.job_flow_id or emr_hook.get_cluster_id_by_name( str(self.job_flow_name), self.cluster_states ) if not job_flow_id: raise AirflowException(f"No cluster found for name: {self.job_flow_name}") if self.do_xcom_push: context["ti"].xcom_push(key="job_flow_id", value=job_flow_id) self.log.info("Adding steps to %s", job_flow_id) # steps may arrive as a string representing a list # e.g. if we used XCom or a file then: steps="[{ step1 }, { step2 }]" steps = self.steps if isinstance(steps, str): steps = ast.literal_eval(steps) response = emr.add_job_flow_steps(JobFlowId=job_flow_id, Steps=steps) if not response["ResponseMetadata"]["HTTPStatusCode"] == 200: raise AirflowException("Adding steps failed: %s" % response) else: # Assumption : ONly a single step is submitted each time. step_ids = response["StepIds"] step_id = step_ids[0] if self.wait_for_completion: self.check_status( job_flow_id, step_id, self.describe_step, self.check_interval, ) self.log.info("Steps %s added to JobFlow", response["StepIds"]) return response["StepIds"]
def test_get_conn_returns_a_boto3_connection(self): hook = EmrHook(aws_conn_id='aws_default') self.assertIsNotNone(hook.get_conn().list_clusters())