Esempio n. 1
0
 def setUp(self):
     self.dataflow = DataflowTemplatedJobStartOperator(
         task_id=TASK_ID,
         template=TEMPLATE,
         job_name=JOB_NAME,
         parameters=PARAMETERS,
         dataflow_default_options=DEFAULT_OPTIONS_TEMPLATE,
         poll_sleep=POLL_SLEEP)
Esempio n. 2
0
 def setUp(self):
     self.dataflow = DataflowTemplatedJobStartOperator(
         task_id=TASK_ID,
         template=TEMPLATE,
         job_name=JOB_NAME,
         parameters=PARAMETERS,
         options=DEFAULT_OPTIONS_TEMPLATE,
         dataflow_default_options={"EXTRA_OPTION": "TEST_A"},
         poll_sleep=POLL_SLEEP,
         location=TEST_LOCATION,
         environment={"maxWorkers": 2},
     )
Esempio n. 3
0
        job_id=
        "{{task_instance.xcom_pull('start-python-job-async')['job_id']}}",
        location='europe-west3',
        callback=check_autoscaling_event,
    )
    # [END howto_sensor_wait_for_job_autoscaling_event]

    start_python_job_async >> wait_for_python_job_async_done
    start_python_job_async >> wait_for_python_job_async_metric
    start_python_job_async >> wait_for_python_job_async_message
    start_python_job_async >> wait_for_python_job_async_autoscaling_event

with models.DAG(
        "example_gcp_dataflow_template",
        default_args=default_args,
        start_date=days_ago(1),
        schedule_interval=None,  # Override to match your needs
        tags=['example'],
) as dag_template:
    # [START howto_operator_start_template_job]
    start_template_job = DataflowTemplatedJobStartOperator(
        task_id="start-template-job",
        template='gs://dataflow-templates/latest/Word_Count',
        parameters={
            'inputFile': "gs://dataflow-samples/shakespeare/kinglear.txt",
            'output': GCS_OUTPUT
        },
        location='europe-west3',
    )
    # [END howto_operator_start_template_job]
Esempio n. 4
0
with models.DAG(
        # The id you will see in the DAG airflow page
        "composer_dataflow_dag",
        default_args=default_args,
        # The interval with which to schedule the DAG
        schedule_interval=datetime.timedelta(
            days=1),  # Override to match your needs
) as dag:

    start_template_job = DataflowTemplatedJobStartOperator(
        # The task id of your job
        task_id="dataflow_operator_transform_csv_to_bq",
        # The name of the template that you're using.
        # Below is a list of all the templates you can use.
        # For versions in non-production environments, use the subfolder 'latest'
        # https://cloud.google.com/dataflow/docs/guides/templates/provided-batch#gcstexttobigquery
        template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
        # Use the link above to specify the correct parameters for your template.
        parameters={
            "javascriptTextTransformFunctionName": "transformCSVtoJSON",
            "JSONPath": bucket_path + "/jsonSchema.json",
            "javascriptTextTransformGcsPath":
            bucket_path + "/transformCSVtoJSON.js",
            "inputFilePattern": bucket_path + "/inputFile.txt",
            "outputTable": project_id + ":average_weather.average_weather",
            "bigQueryLoadingTemporaryDirectory": bucket_path + "/tmp/",
        },
    )

# [END composer_dataflow_dag]
                default_args=default_args,
                schedule_interval=None) as dag:
    start = dummy.DummyOperator(task_id='start', trigger_rule='all_success')

    end = dummy.DummyOperator(task_id='end', trigger_rule='all_success')

    # Bigquery Tables automatically created for demo porpuse.
    # Consider a dedicated pipeline or tool for a real life scenario.
    customers_import = DataflowTemplatedJobStartOperator(
        task_id="dataflow_customers_import",
        template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
        project_id=LOD_PRJ,
        location=DF_REGION,
        parameters={
            "javascriptTextTransformFunctionName": "transform",
            "JSONPath": ORC_GCS + "/customers_schema.json",
            "javascriptTextTransformGcsPath": ORC_GCS + "/customers_udf.js",
            "inputFilePattern": DRP_GCS + "/customers.csv",
            "outputTable":
            DWH_LAND_PRJ + ":" + DWH_LAND_BQ_DATASET + ".customers",
            "bigQueryLoadingTemporaryDirectory": LOD_GCS_STAGING + "/tmp/bq/",
        },
    )

    purchases_import = DataflowTemplatedJobStartOperator(
        task_id="dataflow_purchases_import",
        template="gs://dataflow-templates/latest/GCS_Text_to_BigQuery",
        project_id=LOD_PRJ,
        location=DF_REGION,
        parameters={
            "javascriptTextTransformFunctionName": "transform",