Exemple #1
0
 def test_execute(self, mock_hook):
     op = DataprocUpdateClusterOperator(
         task_id=TASK_ID,
         location=GCP_LOCATION,
         cluster_name=CLUSTER_NAME,
         cluster=CLUSTER,
         update_mask=UPDATE_MASK,
         request_id=REQUEST_ID,
         graceful_decommission_timeout={"graceful_decommission_timeout": "600s"},
         project_id=GCP_PROJECT,
         gcp_conn_id=GCP_CONN_ID,
         retry=RETRY,
         timeout=TIMEOUT,
         metadata=METADATA,
         impersonation_chain=IMPERSONATION_CHAIN,
     )
     op.execute(context={})
     mock_hook.assert_called_once_with(gcp_conn_id=GCP_CONN_ID, impersonation_chain=IMPERSONATION_CHAIN)
     mock_hook.return_value.update_cluster.assert_called_once_with(
         location=GCP_LOCATION,
         project_id=GCP_PROJECT,
         cluster_name=CLUSTER_NAME,
         cluster=CLUSTER,
         update_mask=UPDATE_MASK,
         graceful_decommission_timeout={"graceful_decommission_timeout": "600s"},
         request_id=REQUEST_ID,
         retry=RETRY,
         timeout=TIMEOUT,
         metadata=METADATA,
     )
with models.DAG(
        "example_gcp_dataproc",
        default_args={"start_date": days_ago(1)},
        schedule_interval=None,
) as dag:
    create_cluster = DataprocCreateClusterOperator(task_id="create_cluster",
                                                   project_id=PROJECT_ID,
                                                   cluster=CLUSTER,
                                                   region=REGION)

    scale_cluster = DataprocUpdateClusterOperator(
        task_id="scale_cluster",
        cluster_name=CLUSTER_NAME,
        cluster=CLUSTER_UPDATE,
        update_mask=UPDATE_MASK,
        graceful_decommission_timeout=TIMEOUT,
        project_id=PROJECT_ID,
        location=REGION,
    )

    pig_task = DataprocSubmitJobOperator(task_id="pig_task",
                                         job=PIG_JOB,
                                         location=REGION,
                                         project_id=PROJECT_ID)

    spark_sql_task = DataprocSubmitJobOperator(
        task_id="spark_sql_task",
        job=SPARK_SQL_JOB,
        location=REGION,
        project_id=PROJECT_ID,