예제 #1
0
    def test_workflow(self):
        with patch(HOOK) as MockHook:
            hook = MockHook()
            hook.get_conn.return_value = self.mock_conn
            hook.wait.return_value = None

            dataproc_task = DataprocWorkflowTemplateInstantiateOperator(
                task_id=TASK_ID,
                project_id=PROJECT_ID,
                region=REGION,
                template_id=TEMPLATE_ID,
                dag=self.dag)

            dataproc_task.execute(None)
            template_name = ('projects/test-project-id/regions/test-region/'
                             'workflowTemplates/template-id')
            self.mock_workflows.instantiate.assert_called_once_with(
                name=template_name, body=mock.ANY)
            hook.wait.assert_called_once_with(self.operation)
    def test_workflow(self):
        with patch(HOOK) as MockHook:
            hook = MockHook()
            hook.get_conn.return_value = self.mock_conn
            hook.wait.return_value = None

            dataproc_task = DataprocWorkflowTemplateInstantiateOperator(
                task_id=TASK_ID,
                project_id=GCP_PROJECT_ID,
                region=GCP_REGION,
                template_id=TEMPLATE_ID,
                dag=self.dag
            )

            dataproc_task.execute(None)
            template_name = (
                'projects/test-project-id/regions/test-region/'
                'workflowTemplates/template-id')
            self.mock_workflows.instantiate.assert_called_once_with(
                name=template_name,
                body=mock.ANY)
            hook.wait.assert_called_once_with(self.operation)
from airflow import DAG
from airflow.models import Variable
from airflow.contrib.operators.dataproc_operator import DataprocWorkflowTemplateInstantiateOperator

from datetime import datetime, timedelta


default_args = {
    'start_date': datetime(2020, 5, 12) # datetime(2020, 5, 1),
}

dag = DAG(
    'workflow_template', default_args=default_args, schedule_interval="@once")

t1 = DataprocWorkflowTemplateInstantiateOperator(
    task_id="execute",
    gcp_conn_id='google_cloud_default',
    project_id='sincere-bongo-264115',
    region='southamerica-east1',
    template_id='example',
    dag=dag)
start_date = datetime(2019, 1, 1)

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': start_date,
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5)
}

with DAG(dag_id='simple_dataproc_workflow',
         default_args=default_args,
         start_date=start_date,
         schedule_interval=None) as dag:

    run_hive = DataprocWorkflowTemplateInstantiateOperator(
        task_id='RunHiveWorkflow',
        project_id=PROJECT_ID,
        region=REGION_ID,
        template_id=TEMPLATE_ID
    )

    load_data = DummyOperator(
        task_id='DummyTask'
    )

    load_data >> run_hive
예제 #5
0
SCHEDULE_INTERVAL = timedelta(minutes=60)
START_DATE = datetime.now() - SCHEDULE_INTERVAL

# [START composer_simple_define_dag_airflow_1]
default_dag_args = {
    'start_date': START_DATE,
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
    'project_id': PROJECT_ID
}

# Define a DAG (directed acyclic graph) of tasks.
with models.DAG(
    # The id you will see in the DAG airflow page
    'dataproc_workflow_clouddq',
    schedule_interval=SCHEDULE_INTERVAL,
    default_args=default_dag_args
) as dag:
    start_template_job = DataprocWorkflowTemplateInstantiateOperator(
        # The task id of your job
        task_id="dataproc_workflow_clouddq",
        # The template id of your workflow
        template_id=TEMPLATE_ID,
        project_id=PROJECT_ID,
        # The region for the template
        # For more info on regions where Dataflow is available see:
        # https://cloud.google.com/dataflow/docs/resources/locations
        region=REGION,
    )