FROM `{GCP_PROJECT}.{PROJECT_DATASET}.github_agg` WHERE _PARTITIONTIME BETWEEN TIMESTAMP("{r"{{ yesterday_ds }}"}") AND TIMESTAMP("{r"{{ yesterday_ds }}"}") ) as b ON a.url = b.url ''', destination_dataset_table=f'{GCP_PROJECT}.{PROJECT_DATASET}.hackernews_github_agg${r"{{ yesterday_ds_nodash }}"}', bigquery_conn_id= BQ_CONN, dag=dag) # Task 7: Check for data in the final table # To test: docker-compose run --rm webserver airflow test bigquery_github_trends check_hackernews_github_join 2020-01-01 t7 = BigQueryCheckOperator( task_id='check_hackernews_github_join', sql=f''' #legacySql SELECT partition_id FROM [{GCP_PROJECT}:{PROJECT_DATASET}.hackernews_github_agg$__PARTITIONS_SUMMARY__] WHERE partition_id = "{r"{{ yesterday_ds_nodash }}"}" ''', bigquery_conn_id= BQ_CONN, dag=dag) t1.set_downstream(t3) t3.set_downstream(t4) t2.set_downstream(t5) t4.set_downstream(t6) t5.set_downstream(t6) t6.set_downstream(t7)
sql = 'select x1.* , (LAG(sum_per_day) OVER(PARTITION BY region_name ORDER BY date) - sum_per_day)* -1 as new_confirmed_cases from (SELECT date, region_name, sum(confirmed_cases) as sum_per_day FROM `bigquery-public-data.covid19_italy.data_by_province` group by date, region_name) x1', destination_dataset_table='{0}.{1}.datamart_covid_italy1'.format( BQ_PROJECT, BQ_DATASET), create_disposition='CREATE_IF_NEEDED', write_disposition='WRITE_TRUNCATE', allow_large_results=True, use_legacy_sql=False, bigquery_conn_id="bigquery_default", dag=dag) t2 = BigQueryOperator( task_id = 'bq_operator2', sql = 'select * from `bigquery-public-data.covid19_italy.data_by_region` LIMIT 1000', destination_dataset_table='{0}.{1}.datamart_covid_italy2'.format( BQ_PROJECT, BQ_DATASET), create_disposition='CREATE_IF_NEEDED', write_disposition='WRITE_TRUNCATE', allow_large_results=True, use_legacy_sql=False, bigquery_conn_id="bigquery_default", dag=dag) ) #set dependencies t1.set_downstream(t2) #conn_id liat di settingan airflow nya / bigquery_default ??? #destination_project_dataset_table (str) – The dotted (<project>.|<project>:)<dataset>.<table> BigQuery table to load data into. #If <project> is not included, project will be the project defined in the connection json. (templated)