コード例 #1
0
    def test_execute(self, mock_hook):
        operator = BigQueryCreateEmptyDatasetOperator(
            task_id=TASK_ID,
            dataset_id=TEST_DATASET,
            project_id=TEST_GCP_PROJECT_ID
        )

        operator.execute(None)
        mock_hook.return_value \
            .get_conn() \
            .cursor() \
            .create_empty_dataset \
            .assert_called_once_with(
                dataset_id=TEST_DATASET,
                project_id=TEST_GCP_PROJECT_ID,
                dataset_reference={}
            )
submit_cleaning_spark_job = DataProcPySparkOperator(
    task_id='submit_cleaning_spark_job',
    main=cleaning_job_code_path,
    cluster_name=cluster_name,
    job_name='football_dataset_cleaner',
    region=region,
    arguments=['gs://int_football_bucket/data_lake/football/results.csv',
               'gs://int_football_bucket/staging/football/results.parquet'],
    gcp_conn_id=gcp_conn,
    dag=dag
)

create_football_matches_dataset = BigQueryCreateEmptyDatasetOperator(
    task_id='create_football_matches_dataset',
    project_id=project_id,
    dataset_id=dataset_id,
    bigquery_conn_id=gcp_conn,
    dag=dag
)

create_games_table = BigQueryCreateEmptyTableOperator(
    task_id="create_games_table",
    project_id=project_id,
    dataset_id=dataset_id,
    bigquery_conn_id=gcp_conn,
    table_id="games",
    schema_fields=[{"name": "date", "type": "TIMESTAMP", "mode": "REQUIRED"},
                   {"name": "team_1", "type": "STRING", "mode": "REQUIRED"},
                   {"name": "team_2", "type": "STRING", "mode": "REQUIRED"},
                   {"name": "team_1_score", "type": "INTEGER", "mode": "REQUIRED"},
                   {"name": "team_2_score", "type": "INTEGER", "mode": "REQUIRED"},