destination_project_dataset_table='{}:firearm_seizures.seizures_temp'. format(os.environ['GCP_PROJECT']), bucket='{}_firearm_seizures'.format(os.environ['GCS_PREFIX']), source_objects=[ "avro_output/{}/{}/{}/*.avro".format(dt.strftime('%Y'), dt.strftime('%m').lower(), dt.strftime("%Y-%m-%d")) ], write_disposition='WRITE_APPEND', source_format='AVRO', time_partitioning={'type': 'DAY'}, dag=dag) bq_geo_join = BigQueryOperator( task_id='firearms_bq_geojoin', sql=build_revgeo_query('firearm_seizures', 'seizures_temp'), use_legacy_sql=False, destination_dataset_table='{}:firearm_seizures.seizures'.format( os.environ['GCP_PROJECT']), write_disposition='WRITE_APPEND', time_partitioning={'type': 'DAY'}, dag=dag) bq_drop_temp = BigQueryOperator( task_id='firearms_bq_drop_temp', sql='DROP TABLE `{}.firearm_seizures.seizures_temp`'.format( os.environ['GCP_PROJECT']), use_legacy_sql=False, dag=dag) gcs_load >> dataflow_task >> (bq_insert_temp,
dag=dag) accela_permits_bq_merge = BigQueryOperator( task_id='accela_permits_bq_merge', sql='SELECT * FROM {}.accela.permits_temp'.format( os.environ['GCLOUD_PROJECT']), use_legacy_sql=False, destination_dataset_table='{}:accela.permits_raw'.format( os.environ['GCLOUD_PROJECT']), write_disposition='WRITE_APPEND', time_partitioning={'type': 'DAY'}, dag=dag) accela_permits_geojoin = BigQueryOperator( task_id='accela_permits_geojoin', sql=build_revgeo_query('accela', 'permits_raw', 'id'), use_legacy_sql=False, destination_dataset_table='{}:accela.permits'.format( os.environ['GCLOUD_PROJECT']), write_disposition='WRITE_APPEND', time_partitioning={'type': 'DAY'}, dag=dag) accela_bq_drop_temp = BigQueryOperator( task_id='accela_bq_drop_temp', sql='DROP TABLE `{}.accela.permits_temp`'.format( os.environ['GCLOUD_PROJECT']), use_legacy_sql=False, dag=dag) accela_permits_beam_cleanup = BashOperator(
computronix_domi_permits_bq = GoogleCloudStorageToBigQueryOperator( task_id='computronix_domi_permits_bq', destination_project_dataset_table='{}:computronix.domi_permits_raw'.format(os.environ['GCLOUD_PROJECT']), bucket='{}_computronix'.format(os.environ['GCS_PREFIX']), source_objects=["domi_permits/avro_output/{{ ds|get_ds_year }}/{{ ds|get_ds_month }}/{{ ds }}/*.avro"], write_disposition='WRITE_TRUNCATE', create_disposition='CREATE_IF_NEEDED', time_partitioning={'type': 'DAY'}, source_format='AVRO', autodetect=True, dag=dag ) computronix_domi_permits_geojoin = BigQueryOperator( task_id='computronix_domi_permits_geojoin', sql=build_revgeo_query('computronix', 'domi_permits_raw', 'JOBID'), use_legacy_sql=False, destination_dataset_table='{}:computronix.domi_permits'.format(os.environ['GCLOUD_PROJECT']), write_disposition='WRITE_TRUNCATE', time_partitioning={'type': 'DAY'}, dag=dag ) computronix_domi_permits_beam_cleanup = BashOperator( task_id='computronix_domi_permits_beam_cleanup', bash_command=airflow_utils.beam_cleanup_statement('{}_computronix'.format(os.environ['GCS_PREFIX'])), dag=dag ) computronix_domi_permits_gcs >> computronix_domi_permits_dataflow >> computronix_domi_permits_bq >> \ (computronix_domi_permits_geojoin, computronix_domi_permits_beam_cleanup)
dag=dag ) qalert_requests_bq_merge = BigQueryOperator( task_id='qalert_requests_bq_merge', sql='SELECT * FROM {}.qalert.requests_temp'.format(os.environ['GCLOUD_PROJECT']), use_legacy_sql=False, destination_dataset_table='{}:qalert.requests_raw'.format(os.environ['GCLOUD_PROJECT']), write_disposition='WRITE_APPEND', time_partitioning={'type': 'DAY'}, dag=dag ) qalert_requests_geojoin = BigQueryOperator( task_id='qalert_geojoin', sql=build_revgeo_query('qalert', 'requests_raw', 'id'), use_legacy_sql=False, destination_dataset_table='{}:qalert.requests'.format(os.environ['GCLOUD_PROJECT']), write_disposition='WRITE_TRUNCATE', time_partitioning={'type': 'DAY'}, dag=dag ) qalert_bq_drop_temp = BigQueryOperator( task_id='qalert_bq_drop_temp', sql='DROP TABLE `{}.qalert.requests_temp`'.format(os.environ['GCLOUD_PROJECT']), use_legacy_sql=False, dag=dag ) qalert_beam_cleanup = BashOperator(
task_id='firearms_bq_insert', destination_project_dataset_table='{}:firearm_seizures.seizures_raw'. format(os.environ['GCLOUD_PROJECT']), bucket='{}_firearm_seizures'.format(os.environ['GCS_PREFIX']), source_objects=[ "avro_output/{{ ds|get_ds_year }}/{{ ds|get_ds_month }}/{{ ds }}/*.avro" ], write_disposition='WRITE_APPEND', source_format='AVRO', autodetect=True, time_partitioning={'type': 'DAY'}, dag=dag) bq_geojoin = BigQueryOperator( task_id='qalert_geojoin', sql=build_revgeo_query('firearm_seizures', 'seizures_raw', 'address'), use_legacy_sql=False, destination_dataset_table='{}:firearm_seizures.seizures'.format( os.environ['GCLOUD_PROJECT']), write_disposition='WRITE_APPEND', time_partitioning={'type': 'DAY'}, dag=dag) beam_cleanup = BashOperator(task_id='firearms_beam_cleanup', bash_command=airflow_utils.beam_cleanup_statement( '{}_firearm_seizures'.format( os.environ['GCS_PREFIX'])), dag=dag) gcs_load >> dataflow_task >> (bq_insert, beam_cleanup) >> bq_geojoin
time_partitioning={'type': 'DAY'}, dag=dag) wprdc_dedup_fire = BigQueryOperator( task_id='wprdc_dedup_fire', sql=dedup_table('ems_fire_calls', 'fire_raw'), use_legacy_sql=False, destination_dataset_table='{}:ems_fire_calls.fire_raw'.format( os.environ['GCLOUD_PROJECT']), write_disposition='WRITE_TRUNCATE', time_partitioning={'type': 'DAY'}, dag=dag) wprdc_ems_geojoin = BigQueryOperator( task_id='wprdc_ems_geojoin', sql=build_revgeo_query('ems_fire_calls', 'ems_raw', 'call_id_hash'), use_legacy_sql=False, destination_dataset_table='{}:ems_fire_calls.ems_calls'.format( os.environ['GCLOUD_PROJECT']), write_disposition='WRITE_TRUNCATE', time_partitioning={'type': 'DAY'}, dag=dag) wprdc_fire_geojoin = BigQueryOperator( task_id='wprdc_fire_geojoin', sql=build_revgeo_query('ems_fire_calls', 'fire_raw', 'call_id_hash'), use_legacy_sql=False, destination_dataset_table='{}:ems_fire_calls.fire_calls'.format( os.environ['GCLOUD_PROJECT']), write_disposition='WRITE_TRUNCATE', time_partitioning={'type': 'DAY'},
os.environ['GCP_PROJECT']), bucket='{}_311'.format(os.environ['GCS_PREFIX']), source_objects=[ "requests/avro_output/{}/{}/{}/*.avro".format( dt.strftime('%Y'), dt.strftime('%m').lower(), dt.strftime("%Y-%m-%d")) ], write_disposition='WRITE_APPEND', create_disposition='CREATE_IF_NEEDED', source_format='AVRO', time_partitioning={'type': 'DAY'}, dag=dag) qalert_requests_geo_join = BigQueryOperator( task_id='qalert_requests_bq_geojoin', sql=build_revgeo_query('311', 'requests_temp'), use_legacy_sql=False, destination_dataset_table='{}:311.requests_geo_temp'.format( os.environ['GCP_PROJECT']), write_disposition='WRITE_APPEND', time_partitioning={'type': 'DAY'}, dag=dag) qalert_requests_bq_filter = BigQueryOperator( task_id='qalert_requests_bq_filter', sql=filter_old_values('311', 'requests_geo_temp', 'requests', 'id'), use_legacy_sql=False, dag=dag) qalert_requests_bq_merge = BigQueryOperator( task_id='qalert_requests_bq_merge',
destination_project_dataset_table='{}:parking_transactions.meters_raw'. format(os.environ['GCLOUD_PROJECT']), bucket='{}_parking'.format(os.environ['GCS_PREFIX']), source_objects=[ "meters/avro_output/{{ ds|get_ds_year }}/{{ ds|get_ds_month }}/{{ ds }}/*.avro" ], write_disposition='WRITE_TRUNCATE', create_disposition='CREATE_IF_NEEDED', source_format='AVRO', autodetect=True, time_partitioning={'type': 'DAY'}, dag=dag) parking_meters_geojoin = BigQueryOperator( task_id='parking_meters_geojoin', sql=build_revgeo_query('parking_transactions', 'meters_raw', 'id'), use_legacy_sql=False, destination_dataset_table='{}:parking_transactions.meters'.format( os.environ['GCLOUD_PROJECT']), write_disposition='WRITE_TRUNCATE', time_partitioning={'type': 'DAY'}, dag=dag) parking_beam_cleanup = BashOperator( task_id='parking_beam_cleanup', bash_command=airflow_utils.beam_cleanup_statement('{}_parking'.format( os.environ['GCS_PREFIX'])), dag=dag) parking_transactions_dataflow = BashOperator( task_id='parking_transactions_dataflow',