Exemple #1
0
    def test_execute(self, mock_hook):
        ignore_if_missing = True
        deletion_dataset_table = f'{TEST_DATASET}.{TEST_TABLE_ID}'

        operator = BigQueryDeleteTableOperator(
            task_id=TASK_ID,
            deletion_dataset_table=deletion_dataset_table,
            ignore_if_missing=ignore_if_missing)

        operator.execute(None)
        mock_hook.return_value.delete_table.assert_called_once_with(
            table_id=deletion_dataset_table, not_found_ok=ignore_if_missing)
Exemple #2
0
    def test_execute(self, mock_hook):
        ignore_if_missing = True
        deletion_dataset_table = '{}.{}'.format(TEST_DATASET, TEST_TABLE_ID)

        operator = BigQueryDeleteTableOperator(
            task_id=TASK_ID,
            deletion_dataset_table=deletion_dataset_table,
            ignore_if_missing=ignore_if_missing)

        operator.execute(None)
        mock_hook.return_value \
            .run_table_delete \
            .assert_called_once_with(
                deletion_dataset_table=deletion_dataset_table,
                ignore_if_missing=ignore_if_missing
            )
# Main DAG
# --------------------------------------------------------------------------------

with models.DAG('delete_tables_dag',
                default_args=default_args,
                schedule_interval=None) as dag:
    start = dummy.DummyOperator(task_id='start', trigger_rule='all_success')

    end = dummy.DummyOperator(task_id='end', trigger_rule='all_success')

    # Bigquery Tables deleted here for demo porpuse.
    # Consider a dedicated pipeline or tool for a real life scenario.
    with TaskGroup('delete_table') as delte_table:
        delete_table_customers = BigQueryDeleteTableOperator(
            task_id="delete_table_customers",
            deletion_dataset_table=DWH_LAND_PRJ + "." + DWH_LAND_BQ_DATASET +
            ".customers",
            impersonation_chain=[TRF_SA_DF])

        delete_table_purchases = BigQueryDeleteTableOperator(
            task_id="delete_table_purchases",
            deletion_dataset_table=DWH_LAND_PRJ + "." + DWH_LAND_BQ_DATASET +
            ".purchases",
            impersonation_chain=[TRF_SA_DF])

        delete_table_customer_purchase_curated = BigQueryDeleteTableOperator(
            task_id="delete_table_customer_purchase_curated",
            deletion_dataset_table=DWH_CURATED_PRJ + "." +
            DWH_CURATED_BQ_DATASET + ".customer_purchase",
            impersonation_chain=[TRF_SA_DF])
Exemple #4
0
        task_id="create_view",
        dataset_id=LOCATION_DATASET_NAME,
        table_id="test_view",
        view={
            "query": "SELECT * FROM `{}.test_table`".format(DATASET_NAME),
            "useLegacySql": False
        })

    get_empty_dataset_tables = BigQueryGetDatasetTablesOperator(
        task_id="get_empty_dataset_tables", dataset_id=DATASET_NAME)

    get_dataset_tables = BigQueryGetDatasetTablesOperator(
        task_id="get_dataset_tables", dataset_id=DATASET_NAME)

    delete_view = BigQueryDeleteTableOperator(
        task_id="delete_view",
        deletion_dataset_table="{}.test_view".format(DATASET_NAME))

    delete_table = BigQueryDeleteTableOperator(
        task_id="delete_table",
        deletion_dataset_table="{}.test_table".format(DATASET_NAME))

    get_dataset = BigQueryGetDatasetOperator(task_id="get-dataset",
                                             dataset_id=DATASET_NAME)

    get_dataset_result = BashOperator(
        task_id="get_dataset_result",
        bash_command=
        "echo \"{{ task_instance.xcom_pull('get-dataset')['id'] }}\"",
    )
        schema_fields_updates=[
            {
                "name": "emp_name",
                "description": "Name of employee"
            },
            {
                "name": "salary",
                "description": "Monthly salary in USD"
            },
        ],
    )
    # [END howto_operator_bigquery_update_table_schema]

    # [START howto_operator_bigquery_delete_table]
    delete_table = BigQueryDeleteTableOperator(
        task_id="delete_table",
        deletion_dataset_table=f"{PROJECT_ID}.{DATASET_NAME}.test_table",
    )
    # [END howto_operator_bigquery_delete_table]

    # [START howto_operator_bigquery_create_view]
    create_view = BigQueryCreateEmptyTableOperator(
        task_id="create_view",
        dataset_id=DATASET_NAME,
        table_id="test_view",
        view={
            "query": f"SELECT * FROM `{PROJECT_ID}.{DATASET_NAME}.test_table`",
            "useLegacySql": False,
        },
    )
    # [END howto_operator_bigquery_create_view]
Exemple #6
0
         "ORDER BY avg_rating DESC"),
    write_disposition="WRITE_TRUNCATE",
    create_disposition="CREATE_IF_NEEDED",
    bigquery_conn_id="gcp",
    dag=dag,
)

extract_top_ratings = BigQueryToGCSOperator(
    task_id="extract_top_ratings",
    source_project_dataset_table=(os.environ["GCP_PROJECT"] + ":" +
                                  os.environ["BIGQUERY_DATASET"] + "." +
                                  "rating_results_{{ ds_nodash }}"),
    destination_cloud_storage_uris=[
        "gs://" + os.environ["RESULT_BUCKET"] + "/{{ ds_nodash }}.csv"
    ],
    export_format="CSV",
    bigquery_conn_id="gcp",
    dag=dag,
)

delete_result_table = BigQueryDeleteTableOperator(
    task_id="delete_result_table",
    deletion_dataset_table=(os.environ["GCP_PROJECT"] + ":" +
                            os.environ["BIGQUERY_DATASET"] + "." +
                            "rating_results_{{ ds_nodash }}"),
    bigquery_conn_id="gcp",
    dag=dag,
)

fetch_ratings >> import_in_bigquery >> query_top_ratings >> extract_top_ratings >> delete_result_table
Exemple #7
0
        project_id=os.environ['GCP_PROJECT_ID'],
        region=os.environ['GCP_REGION'],
        parameters={
            'PATH_TO_ETL_FILE':
            f"gs://{os.environ['GCP_GCS_BUCKET_ROUTINE']}/etl/etl.py",
            'CLUSTER_NAME': 'goodreads-etl',
            'ARG_SOURCE_BUCKET': os.environ['GCP_GCS_BUCKET_WORKING'],
            'ARG_DESTINATION_BUCKET': os.environ['GCP_GCS_BUCKET_PROCESSED'],
            'ARG_OBJECT_PREFIX': '{{ run_id }}',
        },
        gcp_conn_id='gr_dataproc_conn',
    )

    drop_stage_user_data = BigQueryDeleteTableOperator(
        task_id="drop_stage_user_data",
        deletion_dataset_table=
        f"{GCP_PROJECT_ID}.{GCP_BQ_DATASET_STAGE}.{GCP_BQ_TABLE_USERS}",
        gcp_conn_id='gr_bigquery_conn',
        ignore_if_missing=True)

    drop_stage_authors_data = BigQueryDeleteTableOperator(
        task_id="drop_stage_authors_data",
        deletion_dataset_table=
        f"{GCP_PROJECT_ID}.{GCP_BQ_DATASET_STAGE}.{GCP_BQ_TABLE_AUTHORS}",
        gcp_conn_id='gr_bigquery_conn',
        ignore_if_missing=True)

    drop_stage_books_data = BigQueryDeleteTableOperator(
        task_id="drop_stage_books_data",
        deletion_dataset_table=
        f"{GCP_PROJECT_ID}.{GCP_BQ_DATASET_STAGE}.{GCP_BQ_TABLE_BOOKS}",
        gcp_conn_id='gr_bigquery_conn',