Example #1
0
    def test_execute(self, mock_hook):
        operator = BigQueryDeleteDatasetOperator(
            task_id=TASK_ID,
            dataset_id=TEST_DATASET,
            project_id=TEST_GCP_PROJECT_ID,
            delete_contents=TEST_DELETE_CONTENTS,
        )

        operator.execute(None)
        mock_hook.return_value.delete_dataset.assert_called_once_with(
            dataset_id=TEST_DATASET, project_id=TEST_GCP_PROJECT_ID, delete_contents=TEST_DELETE_CONTENTS
        )
Example #2
0
        body={
            "outputUriPrefix": EXPORT_DESTINATION_URL,
            "collectionIds": [EXPORT_COLLECTION_ID]
        },
    )
    # [END howto_operator_export_database_to_gcs]

    create_dataset = BigQueryCreateEmptyDatasetOperator(
        task_id="create_dataset",
        dataset_id=DATASET_NAME,
        location=DATASET_LOCATION,
        project_id=GCP_PROJECT_ID,
    )

    delete_dataset = BigQueryDeleteDatasetOperator(task_id="delete_dataset",
                                                   dataset_id=DATASET_NAME,
                                                   project_id=GCP_PROJECT_ID,
                                                   delete_contents=True)

    # [START howto_operator_create_external_table_multiple_types]
    create_external_table_multiple_types = BigQueryCreateExternalTableOperator(
        task_id="create_external_table",
        bucket=BUCKET_NAME,
        table_resource={
            "tableReference": {
                "projectId": GCP_PROJECT_ID,
                "datasetId": DATASET_NAME,
                "tableId": "firestore_data",
            },
            "schema": {
                "fields": [
                    {
Example #3
0
    patch_dataset = BigQueryPatchDatasetOperator(
        task_id="patch_dataset",
        dataset_id=DATASET_NAME,
        dataset_resource={
            "friendlyName": "Patched Dataset",
            "description": "Patched dataset"
        },
    )

    update_dataset = BigQueryUpdateDatasetOperator(
        task_id="update_dataset",
        dataset_id=DATASET_NAME,
        dataset_resource={"description": "Updated dataset"})

    delete_dataset = BigQueryDeleteDatasetOperator(task_id="delete_dataset",
                                                   dataset_id=DATASET_NAME,
                                                   delete_contents=True)

    delete_dataset_with_location = BigQueryDeleteDatasetOperator(
        task_id="delete_dataset_with_location",
        dataset_id=LOCATION_DATASET_NAME,
        delete_contents=True)

    create_dataset >> execute_query_save >> delete_dataset
    create_dataset >> get_empty_dataset_tables >> create_table >> get_dataset_tables >> delete_dataset
    create_dataset >> get_dataset >> delete_dataset
    create_dataset >> patch_dataset >> update_dataset >> delete_dataset
    execute_query_save >> get_data >> get_dataset_result
    get_data >> delete_dataset
    create_dataset >> create_external_table >> execute_query_external_table >> \
        copy_from_selected_data >> delete_dataset
            schema_fields=SCHEMA,
            location=location,
        )

        create_table_2 = BigQueryCreateEmptyTableOperator(
            task_id="create_table_2",
            dataset_id=DATASET_NAME,
            table_id=TABLE_2,
            schema_fields=SCHEMA,
            location=location,
        )

        create_dataset >> [create_table_1, create_table_2]

        delete_dataset = BigQueryDeleteDatasetOperator(
            task_id="delete_dataset", dataset_id=DATASET_NAME, delete_contents=True
        )

        # [START howto_operator_bigquery_insert_job]
        insert_query_job = BigQueryInsertJobOperator(
            task_id="insert_query_job",
            configuration={
                "query": {
                    "query": INSERT_ROWS_QUERY,
                    "useLegacySql": "False",
                }
            },
            location=location,
        )
        # [END howto_operator_bigquery_insert_job]
# [START howto_operator_gcs_to_bigquery]
load_csv = GCSToBigQueryOperator(
    task_id='gcs_to_bigquery_example',
    bucket='cloud-samples-data',
    source_objects=['bigquery/us-states/us-states.csv'],
    destination_project_dataset_table=f"{DATASET_NAME}.{TABLE_NAME}",
    schema_fields=[
        {
            'name': 'name',
            'type': 'STRING',
            'mode': 'NULLABLE'
        },
        {
            'name': 'post_abbr',
            'type': 'STRING',
            'mode': 'NULLABLE'
        },
    ],
    write_disposition='WRITE_TRUNCATE',
    dag=dag)
# [END howto_operator_gcs_to_bigquery]

delete_test_dataset = BigQueryDeleteDatasetOperator(
    task_id='delete_airflow_test_dataset',
    dataset_id=DATASET_NAME,
    delete_contents=True,
    dag=dag)

create_test_dataset >> load_csv >> delete_test_dataset
Example #6
0
IMPERSONATION_CHAIN = f"impersonated_account@{PROJECT_ID}.iam.gserviceaccount.com"

DATA_SAMPLE_GCS_URL_PARTS = urlparse(DATA_SAMPLE_GCS_URL)
DATA_SAMPLE_GCS_BUCKET_NAME = DATA_SAMPLE_GCS_URL_PARTS.netloc
DATA_SAMPLE_GCS_OBJECT_NAME = DATA_SAMPLE_GCS_URL_PARTS.path[1:]


with models.DAG(
    "example_bigquery_operations",
    schedule_interval=None,  # Override to match your needs
    start_date=days_ago(1),
    tags=["example"],
) as dag:
    # [START howto_operator_bigquery_create_dataset]
    create_dataset = BigQueryCreateEmptyDatasetOperator(
        task_id="create-dataset", 
        dataset_id=DATASET_NAME,
        impersonation_chain=IMPERSONATION_CHAIN
    )
    # [END howto_operator_bigquery_create_dataset]


    # [START howto_operator_bigquery_delete_dataset]
    delete_dataset = BigQueryDeleteDatasetOperator(
        task_id="delete_dataset", 
        dataset_id=DATASET_NAME, 
        delete_contents=True,
        impersonation_chain=IMPERSONATION_CHAIN
    )
    # [END howto_operator_bigquery_delete_dataset]