コード例 #1
0
    def test_create_clustered_empty_table(self, mock_hook):

        schema_fields = [
            {"name": "emp_name", "type": "STRING", "mode": "REQUIRED"},
            {"name": "date_hired", "type": "DATE", "mode": "REQUIRED"},
            {"name": "date_birth", "type": "DATE", "mode": "NULLABLE"},
        ]
        time_partitioning = {"type": "DAY", "field": "date_hired"}
        cluster_fields = ["date_birth"]
        operator = BigQueryCreateEmptyTableOperator(
            task_id=TASK_ID,
            dataset_id=TEST_DATASET,
            project_id=TEST_GCP_PROJECT_ID,
            table_id=TEST_TABLE_ID,
            schema_fields=schema_fields,
            time_partitioning=time_partitioning,
            cluster_fields=cluster_fields,
        )

        operator.execute(None)
        mock_hook.return_value.create_empty_table.assert_called_once_with(
            dataset_id=TEST_DATASET,
            project_id=TEST_GCP_PROJECT_ID,
            table_id=TEST_TABLE_ID,
            schema_fields=schema_fields,
            time_partitioning=time_partitioning,
            cluster_fields=cluster_fields,
            labels=None,
            view=None,
            encryption_configuration=None,
            table_resource=None,
            exists_ok=False,
        )
コード例 #2
0
def bq_create(tablename, bqdataset, **kwargs):
    table_schema = tablename.split('.')[0]
    table_name = tablename.split('.')[1]
    ti = kwargs['ti']
    target_dataset = bqdataset
    schema = json.loads(
        str(ti.xcom_pull(task_ids='getschema_{}'.format(tablename))))
    create = BigQueryCreateEmptyTableOperator(
        task_id='create_bq_{}'.format(tablename),
        project_id=gcp_project,
        dataset_id=target_dataset,
        table_id=table_name,
        schema_fields=schema,
        bigquery_conn_id=bq_conn)
    print(schema)
    create.execute(context=kwargs)
コード例 #3
0
    def test_execute(self, mock_hook):
        operator = BigQueryCreateEmptyTableOperator(
            task_id=TASK_ID, dataset_id=TEST_DATASET, project_id=TEST_GCP_PROJECT_ID, table_id=TEST_TABLE_ID
        )

        operator.execute(None)
        mock_hook.return_value.create_empty_table.assert_called_once_with(
            dataset_id=TEST_DATASET,
            project_id=TEST_GCP_PROJECT_ID,
            table_id=TEST_TABLE_ID,
            schema_fields=None,
            time_partitioning={},
            cluster_fields=None,
            labels=None,
            view=None,
            encryption_configuration=None,
            table_resource=None,
            exists_ok=False,
        )
コード例 #4
0
        task_id="create_bucket",
        bucket_name=GCS_BUCKET,
        project_id=GCP_PROJECT_ID,
    )

    create_dataset = BigQueryCreateEmptyDatasetOperator(
        task_id="create_dataset",
        dataset_id=DATASET_NAME,
    )

    create_table = BigQueryCreateEmptyTableOperator(
        task_id="create_table",
        dataset_id=DATASET_NAME,
        table_id=TABLE_NAME,
        schema_fields=[
            {'name': 'campaign_name', 'type': 'STRING', 'mode': 'NULLABLE'},
            {'name': 'campaign_id', 'type': 'STRING', 'mode': 'NULLABLE'},
            {'name': 'ad_id', 'type': 'STRING', 'mode': 'NULLABLE'},
            {'name': 'clicks', 'type': 'STRING', 'mode': 'NULLABLE'},
            {'name': 'impressions', 'type': 'STRING', 'mode': 'NULLABLE'},
        ],
    )

    # [START howto_operator_facebook_ads_to_gcs]
    run_operator = FacebookAdsReportToGcsOperator(
        task_id='run_fetch_data',
        owner='airflow',
        bucket_name=GCS_BUCKET,
        parameters=PARAMETERS,
        fields=FIELDS,
        gcp_conn_id=GCS_CONN_ID,
        object_name=GCS_OBJ_PATH,
コード例 #5
0
ファイル: example_bigquery.py プロジェクト: stu12345/airflow
        task_id="create-dataset", dataset_id=DATASET_NAME)

    create_dataset_with_location = BigQueryCreateEmptyDatasetOperator(
        task_id="create_dataset_with_location",
        dataset_id=LOCATION_DATASET_NAME,
        location=BQ_LOCATION)

    create_table = BigQueryCreateEmptyTableOperator(
        task_id="create_table",
        dataset_id=DATASET_NAME,
        table_id="test_table",
        schema_fields=[
            {
                "name": "emp_name",
                "type": "STRING",
                "mode": "REQUIRED"
            },
            {
                "name": "salary",
                "type": "INTEGER",
                "mode": "NULLABLE"
            },
        ],
    )

    create_table_with_location = BigQueryCreateEmptyTableOperator(
        task_id="create_table_with_location",
        dataset_id=LOCATION_DATASET_NAME,
        table_id="test_table",
        schema_fields=[
            {
コード例 #6
0
        dag_id,
        schedule_interval=None,  # Override to match your needs
        start_date=days_ago(1),
        tags=["example"],
        user_defined_macros={"DATASET": DATASET_NAME, "TABLE": TABLE_1},
    ) as dag_with_locations:
        create_dataset = BigQueryCreateEmptyDatasetOperator(
            task_id="create-dataset",
            dataset_id=DATASET_NAME,
            location=location,
        )

        create_table_1 = BigQueryCreateEmptyTableOperator(
            task_id="create_table_1",
            dataset_id=DATASET_NAME,
            table_id=TABLE_1,
            schema_fields=SCHEMA,
            location=location,
        )

        create_table_2 = BigQueryCreateEmptyTableOperator(
            task_id="create_table_2",
            dataset_id=DATASET_NAME,
            table_id=TABLE_2,
            schema_fields=SCHEMA,
            location=location,
        )

        create_dataset >> [create_table_1, create_table_2]

        delete_dataset = BigQueryDeleteDatasetOperator(
コード例 #7
0
        user_defined_macros={
            "DATASET": DATASET_NAME,
            "TABLE": TABLE_NAME
        },
        default_args={"project_id": PROJECT_ID},
) as dag_with_locations:
    create_dataset = BigQueryCreateEmptyDatasetOperator(
        task_id="create-dataset",
        dataset_id=DATASET_NAME,
        project_id=PROJECT_ID)

    create_table = BigQueryCreateEmptyTableOperator(
        task_id="create_table",
        dataset_id=DATASET_NAME,
        table_id=TABLE_NAME,
        schema_fields=SCHEMA,
        time_partitioning={
            "type": "DAY",
            "field": "ds",
        },
    )
    # [START howto_sensor_bigquery_table]
    check_table_exists = BigQueryTableExistenceSensor(
        task_id="check_table_exists",
        project_id=PROJECT_ID,
        dataset_id=DATASET_NAME,
        table_id=TABLE_NAME)
    # [END howto_sensor_bigquery_table]

    execute_insert_query = BigQueryExecuteQueryOperator(
        task_id="execute_insert_query",
        sql=INSERT_ROWS_QUERY,
コード例 #8
0
        ],
    )

    create_dataset = BigQueryCreateEmptyDatasetOperator(
        task_id="create_dataset", dataset_id=DATASET_NAME)

    for table in [ORIGIN, TARGET]:
        create_table = BigQueryCreateEmptyTableOperator(
            task_id=f"create_{table}_table",
            dataset_id=DATASET_NAME,
            table_id=table,
            schema_fields=[
                {
                    "name": "emp_name",
                    "type": "STRING",
                    "mode": "REQUIRED"
                },
                {
                    "name": "salary",
                    "type": "INTEGER",
                    "mode": "NULLABLE"
                },
            ],
        )
        create_dataset >> create_table >> [copy_selected_data, bigquery_to_gcs]

    delete_dataset = BigQueryDeleteDatasetOperator(task_id="delete_dataset",
                                                   dataset_id=DATASET_NAME,
                                                   delete_contents=True)

    [copy_selected_data, bigquery_to_gcs] >> delete_dataset
コード例 #9
0
with models.DAG(
        "example_bigquery_operations",
        schedule_interval='@once',  # Override to match your needs
        start_date=days_ago(1),
        tags=["example"],
) as dag:
    # [START howto_operator_bigquery_create_table]
    create_table = BigQueryCreateEmptyTableOperator(
        task_id="create_table",
        dataset_id=DATASET_NAME,
        table_id="test_table",
        schema_fields=[
            {
                "name": "emp_name",
                "type": "STRING",
                "mode": "REQUIRED"
            },
            {
                "name": "salary",
                "type": "INTEGER",
                "mode": "NULLABLE"
            },
        ],
    )
    # [END howto_operator_bigquery_create_table]

    # [START howto_operator_bigquery_update_table_schema]
    update_table_schema = BigQueryUpdateTableSchemaOperator(
        task_id="update_table_schema",
        dataset_id=DATASET_NAME,
        table_id="test_table",
コード例 #10
0
        dag=dag,
    )
    # [END howto_operator_salesforce_to_gcs]

    create_dataset = BigQueryCreateEmptyDatasetOperator(
        task_id="create_dataset", dataset_id=DATASET_NAME, project_id=GCP_PROJECT_ID, gcp_conn_id=GCS_CONN_ID
    )

    create_table = BigQueryCreateEmptyTableOperator(
        task_id="create_table",
        dataset_id=DATASET_NAME,
        table_id=TABLE_NAME,
        schema_fields=[
            {'name': 'id', 'type': 'STRING', 'mode': 'NULLABLE'},
            {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'},
            {'name': 'company', 'type': 'STRING', 'mode': 'NULLABLE'},
            {'name': 'phone', 'type': 'STRING', 'mode': 'NULLABLE'},
            {'name': 'email', 'type': 'STRING', 'mode': 'NULLABLE'},
            {'name': 'createddate', 'type': 'STRING', 'mode': 'NULLABLE'},
            {'name': 'lastmodifieddate', 'type': 'STRING', 'mode': 'NULLABLE'},
            {'name': 'isdeleted', 'type': 'BOOL', 'mode': 'NULLABLE'},
        ],
    )

    load_csv = GCSToBigQueryOperator(
        task_id='gcs_to_bq',
        bucket=GCS_BUCKET,
        source_objects=[GCS_OBJ_PATH],
        destination_project_dataset_table=f"{DATASET_NAME}.{TABLE_NAME}",
        write_disposition='WRITE_TRUNCATE',
    )
コード例 #11
0
        gcp_conn_id='gr_bigquery_conn',
        ignore_if_missing=True)

    drop_stage_reviews_data = BigQueryDeleteTableOperator(
        task_id="drop_stage_reviews_data",
        deletion_dataset_table=
        f"{GCP_PROJECT_ID}.{GCP_BQ_DATASET_STAGE}.{GCP_BQ_TABLE_REVIEWS}",
        gcp_conn_id='gr_bigquery_conn',
        ignore_if_missing=True)

    create_stage_table = DummyOperator(task_id='create_stage_table')

    create_stage_users_table = BigQueryCreateEmptyTableOperator(
        task_id='create_stage_users_table',
        dataset_id=GCP_BQ_DATASET_STAGE,
        table_id=GCP_BQ_TABLE_USERS,
        project_id=GCP_PROJECT_ID,
        bigquery_conn_id='gr_bigquery_conn',
        schema_fields=stage.users_schema)

    create_stage_authors_table = BigQueryCreateEmptyTableOperator(
        task_id='create_stage_authors_table',
        dataset_id=GCP_BQ_DATASET_STAGE,
        table_id=GCP_BQ_TABLE_AUTHORS,
        project_id=GCP_PROJECT_ID,
        bigquery_conn_id='gr_bigquery_conn',
        schema_fields=stage.authors_schema)

    create_stage_books_table = BigQueryCreateEmptyTableOperator(
        task_id='create_stage_books_table',
        dataset_id=GCP_BQ_DATASET_STAGE,