コード例 #1
0
def add_step_to_emr(cluster_create_task, task_identifier, step_params,
                    cluster_remover, task_create_cluster, aws_connection, dag):
    """
    In case we need to add multiple steps to the cluster
    cluster_create_task: ID of task that creates a cluster
    task_identifier: ID of step
    step_params: parameters to pass to the step
    cluster_remover: task that terminates the cluster
    task_create_cluster: task that creates the cluster
    aws_connection: Connection to AWS for account credentials
    dag: DAG that is created by the user
    """
    step_adder = EmrAddStepsOperator(
        task_id=task_identifier,
        job_flow_id="{{ task_instance.xcom_pull('" + task_create_cluster +
        "', key='return_value') }}",
        aws_conn_id=aws_connection,
        steps=step_params,
        dag=dag)

    step_checker = EmrStepSensor(
        task_id=task_identifier + '_watch_step',
        job_flow_id="{{ task_instance.xcom_pull('" + task_create_cluster +
        "', key='return_value') }}",
        step_id="{{ task_instance.xcom_pull('" + task_identifier +
        "', key='return_value')[0] }}",
        aws_conn_id=aws_connection,
        dag=dag)

    cluster_create_task.set_downstream(step_adder)
    step_adder.set_downstream(step_checker)
    step_checker.set_downstream(cluster_remover)
コード例 #2
0
ファイル: emr.py プロジェクト: naturalett/incubator-liminal
    def apply_task_to_dag(self, **kwargs):
        task = kwargs['task']
        parent = kwargs.get('parent', task.parent)

        self._validate_task_type(task)

        # assuming emr already exists
        add_step = EmrAddStepsOperator(
            task_id=f'{task.task_id}_add_step',
            job_flow_id=self.job_flow_id,
            job_flow_name=self.job_flow_name,
            aws_conn_id=self.aws_conn_id,
            steps=self.__generate_emr_step(
                task.task_id, [str(x) for x in task.get_runnable_command()]),
            cluster_states=self.cluster_states,
            dag=task.dag)

        if task.parent:
            parent.set_downstream(add_step)

        emr_sensor_step = EmrStepSensor(
            task_id=f'{task.task_id}_watch_step',
            job_flow_id="{{ task_instance.xcom_pull('" + add_step.task_id +
            "', key='job_flow_id') }}",
            step_id="{{ task_instance.xcom_pull('" + add_step.task_id +
            "', key='return_value')[0] }}",
            aws_conn_id=self.aws_conn_id,
            dag=task.dag)

        add_step.set_downstream(emr_sensor_step)

        return emr_sensor_step
コード例 #3
0
    aws_conn_id='aws_default',
    emr_conn_id='emr_default',
    dag=dag)

step_adder = EmrAddStepsOperator(
    task_id='add_steps',
    job_flow_id=
    "{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
    aws_conn_id='aws_default',
    steps=SPARK_TEST_STEPS,
    dag=dag)

step_checker = EmrStepSensor(
    task_id='watch_step',
    job_flow_id=
    "{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
    step_id="{{ task_instance.xcom_pull('add_steps', key='return_value')[0] }}",
    aws_conn_id='aws_default',
    dag=dag)

cluster_remover = EmrTerminateJobFlowOperator(
    task_id='remove_cluster',
    job_flow_id=
    "{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
    aws_conn_id='aws_default',
    dag=dag)

cluster_creator.set_downstream(step_adder)
step_adder.set_downstream(step_checker)
step_checker.set_downstream(cluster_remover)
コード例 #4
0
    emr_conn_id='emr_default',
    dag=dag
)

step_adder = EmrAddStepsOperator(
    task_id='add_steps',
    job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
    aws_conn_id='aws_default',
    steps=SPARK_TEST_STEPS,
    dag=dag
)

step_checker = EmrStepSensor(
    task_id='watch_step',
    job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
    step_id="{{ task_instance.xcom_pull('add_steps', key='return_value')[0] }}",
    aws_conn_id='aws_default',
    dag=dag
)

cluster_remover = EmrTerminateJobFlowOperator(
    task_id='remove_cluster',
    job_flow_id="{{ task_instance.xcom_pull('create_job_flow', key='return_value') }}",
    aws_conn_id='aws_default',
    dag=dag
)

cluster_creator.set_downstream(step_adder)
step_adder.set_downstream(step_checker)
step_checker.set_downstream(cluster_remover)