FROM
      `{GCP_PROJECT}.{PROJECT_DATASET}.github_agg`
      WHERE _PARTITIONTIME BETWEEN TIMESTAMP("{r"{{ yesterday_ds }}"}") AND TIMESTAMP("{r"{{ yesterday_ds }}"}")
      ) as b
    ON a.url = b.url
    ''',
    destination_dataset_table=f'{GCP_PROJECT}.{PROJECT_DATASET}.hackernews_github_agg${r"{{ yesterday_ds_nodash }}"}',
    bigquery_conn_id= BQ_CONN,
    dag=dag)

# Task 7: Check for data in the final table
# To test: docker-compose run --rm webserver airflow test bigquery_github_trends check_hackernews_github_join 2020-01-01
t7 = BigQueryCheckOperator(
    task_id='check_hackernews_github_join',
    sql=f'''
    #legacySql
    SELECT
    partition_id
    FROM
    [{GCP_PROJECT}:{PROJECT_DATASET}.hackernews_github_agg$__PARTITIONS_SUMMARY__]
    WHERE partition_id = "{r"{{ yesterday_ds_nodash }}"}"
    ''',
    bigquery_conn_id= BQ_CONN,
    dag=dag)

t1.set_downstream(t3)
t3.set_downstream(t4)
t2.set_downstream(t5)
t4.set_downstream(t6)
t5.set_downstream(t6)
t6.set_downstream(t7)
Example #2
0
        sql = 'select x1.* , (LAG(sum_per_day) OVER(PARTITION BY region_name ORDER BY date) - sum_per_day)* -1  as new_confirmed_cases from (SELECT date, region_name, sum(confirmed_cases) as sum_per_day FROM `bigquery-public-data.covid19_italy.data_by_province` group by date, region_name) x1',
        destination_dataset_table='{0}.{1}.datamart_covid_italy1'.format(
            BQ_PROJECT, BQ_DATASET),   
        create_disposition='CREATE_IF_NEEDED',
        write_disposition='WRITE_TRUNCATE',
        allow_large_results=True,
        use_legacy_sql=False,
        bigquery_conn_id="bigquery_default",
        dag=dag)

t2 = BigQueryOperator(
        task_id = 'bq_operator2',
        sql = 'select * from `bigquery-public-data.covid19_italy.data_by_region` LIMIT 1000',
        destination_dataset_table='{0}.{1}.datamart_covid_italy2'.format(
            BQ_PROJECT, BQ_DATASET),   
        create_disposition='CREATE_IF_NEEDED',
        write_disposition='WRITE_TRUNCATE',
        allow_large_results=True,
        use_legacy_sql=False,
        bigquery_conn_id="bigquery_default",
        dag=dag)

)
#set dependencies 
t1.set_downstream(t2)
        
#conn_id liat di settingan airflow nya / bigquery_default ???        
#destination_project_dataset_table (str) – The dotted (<project>.|<project>:)<dataset>.<table> BigQuery table to load data into. 
#If <project> is not included, project will be the project defined in the connection json. (templated)