def execute(self, context): emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn() self.log.info('Adding steps to %s', self.job_flow_id) response = emr.add_job_flow_steps(JobFlowId=self.job_flow_id, Steps=self.steps) if response['ResponseMetadata']['HTTPStatusCode'] != 200: raise AirflowException('Adding steps failed: %s' % response) self.log.info('Steps %s added to JobFlow', response['StepIds']) return response['StepIds']
def execute(self, context): emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn() logging.info("Adding steps to %s", self.job_flow_id) response = emr.add_job_flow_steps(JobFlowId=self.job_flow_id, Steps=self.steps) if not response["ResponseMetadata"]["HTTPStatusCode"] == 200: raise AirflowException("Adding steps failed: %s" % response) else: logging.info("Steps %s added to JobFlow", response["StepIds"]) return response["StepIds"]
def execute(self, context): emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn() self.log.info('Adding steps to %s', self.job_flow_id) response = emr.add_job_flow_steps(JobFlowId=self.job_flow_id, Steps=self.steps) if not response['ResponseMetadata']['HTTPStatusCode'] == 200: raise AirflowException('Adding steps failed: %s' % response) else: self.log.info('Steps %s added to JobFlow', response['StepIds']) return response['StepIds']
def execute(self, context): attempt = context['ti'].try_number logging.info('attempt: {}'.format(attempt)) emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn() job_flow_id = self.job_flow_id if not job_flow_id: job_flow_id = emr.get_cluster_id_by_name(self.job_flow_name, self.cluster_states) if self.do_xcom_push: context['ti'].xcom_push(key='job_flow_id', value=job_flow_id) step_name = self.step_name if attempt == 1 else "{} (attempt {})".format( self.step_name, attempt) action_on_failure = self.action_on_failure if attempt % 3 == 0: action_on_failure = 'TERMINATE_JOB_FLOW' spark_conf = self.get_spark_params_config(self.spark_params, self.spark_conf) steps = self.generate_spark_step(step_name, self.main_class, self.app_name, spark_conf, self.application_args, self.jar_path, action_on_failure) logging.info("spark_params: " + str(steps)) self.log.info('Adding steps to %s', job_flow_id) response = emr.add_job_flow_steps(JobFlowId=job_flow_id, Steps=steps) logging.info('Running Spark Job {} with JobFlow ID {}'.format( self.task_id, self.job_flow_id)) while True: step_id = response['StepIds'][0] logging.info('step id - {}'.format(step_id)) result = self.describe_step(emr, response) step_status = result['Step']['Status']['State'] logging.info('step status - {}'.format(step_status)) # step state can be 'PENDING'|'CANCEL_PENDING'|'RUNNING'|'COMPLETED'|'CANCELLED'|'FAILED'|'INTERRUPTED' if step_status == 'COMPLETED': break elif step_status != 'COMPLETED' and step_status != 'PENDING' and step_status != 'RUNNING': raise AirflowException('Spark job {} has failed'.format( self.task_id)) logging.info("Spark Job '{}' status is {}".format( self.task_id, step_status))
def execute(self, context): emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn() job_flow_id = self.job_flow_id if not job_flow_id: job_flow_id = emr.get_cluster_id_by_name(self.job_flow_name, self.cluster_states) if self.do_xcom_push: context['ti'].xcom_push(key='job_flow_id', value=job_flow_id) self.log.info('Adding steps to %s', job_flow_id) response = emr.add_job_flow_steps(JobFlowId=job_flow_id, Steps=self.steps) if not response['ResponseMetadata']['HTTPStatusCode'] == 200: raise AirflowException('Adding steps failed: %s' % response) else: self.log.info('Steps %s added to JobFlow', response['StepIds']) return response['StepIds']