Python EmrHook Beispiele, airflow.contrib.hooks.emr_hook.EmrHook Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: emr_step_sensor.py Projekt: RogerThomas/incubator-airflow

    def get_emr_response(self):
        emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn()

        _log.info('Poking step {0} on cluster {1}'.format(
            self.step_id, self.job_flow_id))
        return emr.describe_step(ClusterId=self.job_flow_id,
                                 StepId=self.step_id)

Beispiel #2

0

Datei anzeigen

    def get_emr_response(self):
        emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn()

        self.log.info('Poking step %s on cluster %s', self.step_id,
                      self.job_flow_id)
        return emr.describe_step(ClusterId=self.job_flow_id,
                                 StepId=self.step_id)

Beispiel #3

0

Datei anzeigen

    def test_create_job_flow_uses_the_emr_config_to_create_a_cluster(self):
        client = boto3.client('emr', region_name='us-east-1')

        hook = EmrHook(aws_conn_id='aws_default', emr_conn_id='emr_default')
        cluster = hook.create_job_flow({'Name': 'test_cluster'})

        self.assertEqual(client.list_clusters()['Clusters'][0]['Id'],
                         cluster['JobFlowId'])

Beispiel #4

0

Datei anzeigen

Datei: test_emr_hook.py Projekt: Fokko/incubator-airflow

    def test_create_job_flow_uses_the_emr_config_to_create_a_cluster(self):
        client = boto3.client('emr', region_name='us-east-1')

        hook = EmrHook(aws_conn_id='aws_default', emr_conn_id='emr_default')
        cluster = hook.create_job_flow({'Name': 'test_cluster'})

        self.assertEqual(client.list_clusters()['Clusters'][0]['Id'],
                         cluster['JobFlowId'])

Beispiel #5

0

Datei anzeigen

 def describe_step(self, clusterid: str, stepid: str) -> dict:
     """
     Return the transform job info associated with the name
     :param clusterid: EMR Cluster ID
     :type stepid: str: StepID
     :return: A dict contains all the transform job info
     """
     emr_hook = EmrHook(aws_conn_id=self.aws_conn_id)
     emr = emr_hook.get_conn()
     return emr.describe_step(ClusterId=clusterid, StepId=stepid)

Beispiel #6

0

Datei anzeigen

Datei: emr_terminate_job_flow_operator.py Projekt: AndreiDev/incubator-airflow

    def execute(self, context):
        emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn()

        logging.info('Terminating JobFlow %s', self.job_flow_id)
        response = emr.terminate_job_flows(JobFlowIds=[self.job_flow_id])

        if not response['ResponseMetadata']['HTTPStatusCode'] == 200:
            raise AirflowException('JobFlow termination failed: %s' % response)
        else:
            logging.info('JobFlow with id %s terminated', self.job_flow_id)

Beispiel #7

0

Datei anzeigen

Datei: emr_terminate_job_flow_operator.py Projekt: iamtouchskyer/Titan

    def execute(self, context):
        emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn()

        self.log.info('Terminating JobFlow %s', self.job_flow_id)
        response = emr.terminate_job_flows(JobFlowIds=[self.job_flow_id])

        if not response['ResponseMetadata']['HTTPStatusCode'] == 200:
            raise AirflowException('JobFlow termination failed: %s' % response)
        else:
            self.log.info('JobFlow with id %s terminated', self.job_flow_id)

Beispiel #8

0

Datei anzeigen

Datei: emr_add_steps_operator.py Projekt: sourcery-ai-bot/incubator-airflow

    def execute(self, context):
        emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn()

        self.log.info('Adding steps to %s', self.job_flow_id)
        response = emr.add_job_flow_steps(JobFlowId=self.job_flow_id, Steps=self.steps)

        if response['ResponseMetadata']['HTTPStatusCode'] != 200:
            raise AirflowException('Adding steps failed: %s' % response)
        self.log.info('Steps %s added to JobFlow', response['StepIds'])
        return response['StepIds']

Beispiel #9

0

Datei anzeigen

Datei: emr_create_job_flow_operator.py Projekt: AndreiDev/incubator-airflow

    def execute(self, context):
        emr = EmrHook(aws_conn_id=self.aws_conn_id, emr_conn_id=self.emr_conn_id)

        logging.info('Creating JobFlow')
        response = emr.create_job_flow(self.job_flow_overrides)

        if not response['ResponseMetadata']['HTTPStatusCode'] == 200:
            raise AirflowException('JobFlow creation failed: %s' % response)
        else:
            logging.info('JobFlow with id %s created', response['JobFlowId'])
            return response['JobFlowId']

Beispiel #10

0

Datei anzeigen

Datei: emr_add_steps_operator.py Projekt: 7digital/incubator-airflow

    def execute(self, context):
        emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn()

        self.log.info('Adding steps to %s', self.job_flow_id)
        response = emr.add_job_flow_steps(JobFlowId=self.job_flow_id, Steps=self.steps)

        if not response['ResponseMetadata']['HTTPStatusCode'] == 200:
            raise AirflowException('Adding steps failed: %s' % response)
        else:
            self.log.info('Steps %s added to JobFlow', response['StepIds'])
            return response['StepIds']

Beispiel #11

0

Datei anzeigen

Datei: emr_add_steps_operator.py Projekt: seancron/airflow

    def execute(self, context):
        emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn()

        logging.info("Adding steps to %s", self.job_flow_id)
        response = emr.add_job_flow_steps(JobFlowId=self.job_flow_id, Steps=self.steps)

        if not response["ResponseMetadata"]["HTTPStatusCode"] == 200:
            raise AirflowException("Adding steps failed: %s" % response)
        else:
            logging.info("Steps %s added to JobFlow", response["StepIds"])
            return response["StepIds"]

Beispiel #12

0

Datei anzeigen

    def execute(self, context):
        emr = EmrHook(aws_conn_id=self.aws_conn_id,
                      emr_conn_id=self.emr_conn_id)

        _log.info('Creating JobFlow')
        response = emr.create_job_flow(self.job_flow_overrides)

        if not response['ResponseMetadata']['HTTPStatusCode'] == 200:
            raise AirflowException('JobFlow creation failed: %s' % response)
        else:
            _log.info('JobFlow with id %s created', response['JobFlowId'])
            return response['JobFlowId']

Beispiel #13

0

Datei anzeigen

Datei: emr_add_spark_step_operator.py Projekt: orpeleg/airflow-examples

    def execute(self, context):
        attempt = context['ti'].try_number
        logging.info('attempt: {}'.format(attempt))
        emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn()

        job_flow_id = self.job_flow_id

        if not job_flow_id:
            job_flow_id = emr.get_cluster_id_by_name(self.job_flow_name,
                                                     self.cluster_states)

        if self.do_xcom_push:
            context['ti'].xcom_push(key='job_flow_id', value=job_flow_id)

        step_name = self.step_name if attempt == 1 else "{} (attempt {})".format(
            self.step_name, attempt)

        action_on_failure = self.action_on_failure
        if attempt % 3 == 0:
            action_on_failure = 'TERMINATE_JOB_FLOW'

        spark_conf = self.get_spark_params_config(self.spark_params,
                                                  self.spark_conf)

        steps = self.generate_spark_step(step_name, self.main_class,
                                         self.app_name, spark_conf,
                                         self.application_args, self.jar_path,
                                         action_on_failure)
        logging.info("spark_params: " + str(steps))

        self.log.info('Adding steps to %s', job_flow_id)
        response = emr.add_job_flow_steps(JobFlowId=job_flow_id, Steps=steps)

        logging.info('Running Spark Job {} with JobFlow ID {}'.format(
            self.task_id, self.job_flow_id))
        while True:
            step_id = response['StepIds'][0]
            logging.info('step id - {}'.format(step_id))
            result = self.describe_step(emr, response)
            step_status = result['Step']['Status']['State']
            logging.info('step status - {}'.format(step_status))
            # step state can be 'PENDING'|'CANCEL_PENDING'|'RUNNING'|'COMPLETED'|'CANCELLED'|'FAILED'|'INTERRUPTED'
            if step_status == 'COMPLETED':
                break
            elif step_status != 'COMPLETED' and step_status != 'PENDING' and step_status != 'RUNNING':
                raise AirflowException('Spark job {} has failed'.format(
                    self.task_id))

            logging.info("Spark Job '{}' status is {}".format(
                self.task_id, step_status))

Beispiel #14

0

Datei anzeigen

Datei: test_emr_hook.py Projekt: IcyWillow/airflow

    def test_get_cluster_id_by_name(self):
        """
        Test that we can resolve cluster id by cluster name.
        """
        hook = EmrHook(aws_conn_id='aws_default', emr_conn_id='emr_default')

        job_flow = hook.create_job_flow({'Name': 'test_cluster',
                                         'Instances': {'KeepJobFlowAliveWhenNoSteps': True}})

        job_flow_id = job_flow['JobFlowId']

        matching_cluster = hook.get_cluster_id_by_name('test_cluster', ['RUNNING', 'WAITING'])

        self.assertEqual(matching_cluster, job_flow_id)

        no_match = hook.get_cluster_id_by_name('foo', ['RUNNING', 'WAITING', 'BOOTSTRAPPING'])

        self.assertIsNone(no_match)

Beispiel #15

0

Datei anzeigen

    def execute(self, context):
        emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn()

        job_flow_id = self.job_flow_id

        if not job_flow_id:
            job_flow_id = emr.get_cluster_id_by_name(self.job_flow_name, self.cluster_states)

        if self.do_xcom_push:
            context['ti'].xcom_push(key='job_flow_id', value=job_flow_id)

        self.log.info('Adding steps to %s', job_flow_id)
        response = emr.add_job_flow_steps(JobFlowId=job_flow_id, Steps=self.steps)

        if not response['ResponseMetadata']['HTTPStatusCode'] == 200:
            raise AirflowException('Adding steps failed: %s' % response)
        else:
            self.log.info('Steps %s added to JobFlow', response['StepIds'])
            return response['StepIds']

Beispiel #16

0

Datei anzeigen

    def test_create_job_flow_extra_args(self):
        """
        Test that we can add extra arguments to the launch call.

        This is useful for when AWS add new options, such as
        "SecurityConfiguration" so that we don't have to change our code
        """
        client = boto3.client('emr', region_name='us-east-1')

        hook = EmrHook(aws_conn_id='aws_default', emr_conn_id='emr_default')
        # AmiVersion is really old and almost no one will use it anymore, but
        # it's one of the "optional" request params that moto supports - it's
        # coverage of EMR isn't 100% it turns out.
        cluster = hook.create_job_flow({'Name': 'test_cluster',
                                        'ReleaseLabel': '',
                                        'AmiVersion': '3.2'})

        cluster = client.describe_cluster(ClusterId=cluster['JobFlowId'])['Cluster']

        # The AmiVersion comes back as {Requested,Running}AmiVersion fields.
        self.assertEqual(cluster['RequestedAmiVersion'], '3.2')

Beispiel #17

0

Datei anzeigen

    def execute(self, context: Dict[str, Any]) -> List[str]:
        emr_hook = EmrHook(aws_conn_id=self.aws_conn_id)

        emr = emr_hook.get_conn()

        job_flow_id = self.job_flow_id or emr_hook.get_cluster_id_by_name(
            str(self.job_flow_name), self.cluster_states
        )

        if not job_flow_id:
            raise AirflowException(f"No cluster found for name: {self.job_flow_name}")

        if self.do_xcom_push:
            context["ti"].xcom_push(key="job_flow_id", value=job_flow_id)

        self.log.info("Adding steps to %s", job_flow_id)

        # steps may arrive as a string representing a list
        # e.g. if we used XCom or a file then: steps="[{ step1 }, { step2 }]"
        steps = self.steps
        if isinstance(steps, str):
            steps = ast.literal_eval(steps)

        response = emr.add_job_flow_steps(JobFlowId=job_flow_id, Steps=steps)

        if not response["ResponseMetadata"]["HTTPStatusCode"] == 200:
            raise AirflowException("Adding steps failed: %s" % response)
        else:
            # Assumption : ONly a single step is submitted each time.
            step_ids = response["StepIds"]
            step_id = step_ids[0]
            if self.wait_for_completion:
                self.check_status(
                    job_flow_id,
                    step_id,
                    self.describe_step,
                    self.check_interval,
                )
            self.log.info("Steps %s added to JobFlow", response["StepIds"])
            return response["StepIds"]

Beispiel #18

0

Datei anzeigen

    def ensure_cluster_exists(**kwargs):
        try:
            response = EmrHook().get_conn().list_clusters(
                ClusterStates=['STARTING', 'RUNNING', 'WAITING'])

            matching_clusters = list(
                filter(lambda cluster: cluster['Name'] == CLUSTER_NAME,
                       response['Clusters']))
            if (len(matching_clusters) >= 1):
                print('cluster is created already!')
                cluster_id = matching_clusters[0]['Id']
                kwargs['ti'].xcom_push(key='clusterid', value=cluster_id)
                return 'parse_id'
            else:
                print('cluster does not exist!')
                return 'cluster_creator'

        except Exception as e:
            print('Error:', e)
            return 'cluster_creator'

Beispiel #19

0

Datei anzeigen

Datei: emr_job_flow_sensor.py Projekt: wing1124/incubator-airflow

    def get_emr_response(self):
        emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn()

        self.logger.info('Poking cluster %s', self.job_flow_id)
        return emr.describe_cluster(ClusterId=self.job_flow_id)

Beispiel #20

0

Datei anzeigen

def client(clientID):
    global emr
    emr = EmrHook.get_connection(clientID)

Beispiel #21

0

Datei anzeigen

Datei: test_emr_hook.py Projekt: zjucypher/incubator-airflow

 def test_get_conn_returns_a_boto3_connection(self):
     hook = EmrHook(aws_conn_id='aws_default')
     self.assertIsNotNone(hook.get_conn().list_clusters())

Beispiel #22

0

Datei anzeigen

Datei: emr_step_sensor.py Projekt: doordash/incubator-airflow

    def get_emr_response(self):
        emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn()

        self.log.info('Poking step %s on cluster %s', self.step_id, self.job_flow_id)
        return emr.describe_step(ClusterId=self.job_flow_id, StepId=self.step_id)

Beispiel #23

0

Datei anzeigen

Datei: emr_step_sensor.py Projekt: owlabs/incubator-airflow

    def get_emr_response(self):
        emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn()

        _log.info('Poking step {0} on cluster {1}'.format(self.step_id, self.job_flow_id))
        return emr.describe_step(ClusterId=self.job_flow_id, StepId=self.step_id)

Beispiel #24

0

Datei anzeigen

Datei: emr_job_flow_sensor.py Projekt: AndreiDev/incubator-airflow

    def get_emr_response(self):
        emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn()

        logging.info('Poking cluster %s' % self.job_flow_id)
        return emr.describe_cluster(ClusterId=self.job_flow_id)

Beispiel #25

0

Datei anzeigen

Datei: simple_dag.py Projekt: rliuamzn/emr-studio-samples

 def get_emr_response(self):
     emr = EmrHook(aws_conn_id=self.aws_conn_id).get_conn()
     self.log.info('Poking notebook execution %s', self.notebook_execution_id)
     return emr.describe_notebook_execution(NotebookExecutionId=self.notebook_execution_id)