#: Upload clean and geocode claims data clean_geocode = PythonOperator(task_id='clean_geocode_claims', python_callable=clean_geocode_claims, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Upload prod claims file to S3 upload_claimstat_clean = S3FileTransferOperator( task_id='upload_claimstat_clean', source_base_path=conf['prod_data_dir'], source_key='claim_stat_datasd.csv', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='risk/claims_clean_datasd_v1.csv', on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) upload_addresses_to_S3 = S3FileTransferOperator( task_id='upload_claims_address_book', source_base_path=conf['temp_data_dir'], source_key='claims_address_book.csv', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['ref_s3_bucket'], dest_s3_key='claims_address_book.csv', on_failure_callback=notify, on_retry_callback=notify,
#: Inventory Doc To CSV inventory_to_csv = PythonOperator(task_id='inventory_to_csv', python_callable=inventory_to_csv, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Upload Inventory CSV to S3 upload_inventory = S3FileTransferOperator( task_id='upload_inventory', source_base_path=conf['prod_data_dir'], source_key='inventory_datasd_v1.csv', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='inventory/inventory_datasd_v1.csv', on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Update portal modified date update_inventory_md = get_seaboard_update_dag('data-inventory.md', dag) #: Execution Rules #: Latest only for inventory to csv inventory_to_csv.set_upstream(inv_latest_only) #: Inventory csv gets created before its uploaded upload_inventory.set_upstream(inventory_to_csv)
#: Get fire_department data from DB get_fd_data = PythonOperator(task_id='get_fd_data', python_callable=get_fd_data, provide_context=True, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Upload prod fire_department_SD.csv file to S3 upload_fd_data = S3FileTransferOperator( task_id='upload_fd_data', source_base_path=conf['prod_data_dir'], source_key='fd_incidents_{0}_datasd_v1.csv'.format(cur_yr), dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='fd_cad/' + 'fd_incidents_{0}_datasd_v1.csv'.format(cur_yr), on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Update data inventory json update_json_date = PythonOperator(task_id='update_json_date', python_callable=update_json_date, provide_context=True, op_kwargs={'ds_fname': 'fire_ems_incidents'}, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag)
#: Combine all transactions combine_schedules = PythonOperator(task_id='combine_all_schedules', python_callable=combine_all_schedules, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Upload prod transactions file to S3 upload_fin_support = S3FileTransferOperator( task_id='upload_financial_support', source_base_path=conf['prod_data_dir'], source_key='financial_support_' + str(cur_yr) + '_datasd_v1.csv', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], # What is this supposed to be? dest_s3_key='campaign_fin/financial_support_' + str(cur_yr) + '_datasd_v1.csv', on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Update data inventory json update_json_date = PythonOperator( task_id='update_json_date', python_callable=update_json_date, provide_context=True, op_kwargs={'ds_fname': 'financial_trans_election_comms'}, on_failure_callback=notify, on_retry_callback=notify,
on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) event_files = ["pkin","pkout"] for file in event_files: file_time = datetime.now().strftime('%Y_%m_%d_') file_name = f'{file_time}{file}.json' s3_upload = S3FileTransferOperator( # creating a different upload object for each... task_id=f'upload_{file}', source_base_path=conf['prod_data_dir'], source_key=file_name, dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key=f'cityiq/{file_name}', on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Upload after getting events s3_upload.set_upstream(get_parking_bbox) #: Execution Rules #: Must get token after latest only operator get_token_response.set_upstream(cityiq_latest_only) #: Get events after getting token
task_id='process_imcat', python_callable=process_paving_data, op_kwargs={'mode': 'imcat'}, provide_context=True, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Upload imcat streets file to S3 upload_imcat_data = S3FileTransferOperator( task_id='upload_streets_data_imcat', source_base_path=conf['prod_data_dir'], source_key='sd_paving_imcat_datasd_v1.csv', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='tsw/sd_paving_imcat_datasd_v1.csv', on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Upload sdif streets file to S3 upload_sdif_data = S3FileTransferOperator( task_id='upload_streets_data_sdif', source_base_path=conf['prod_data_dir'], source_key='sd_paving_datasd_v1.csv', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='tsw/sd_paving_datasd_v1.csv', on_failure_callback=notify,
prod_files = budget_files + actuals_files categories = ['_'.join(x.split('_')[0:2]) for x in prod_files] for index, f in enumerate(prod_files): cat = categories[index] #: Upload budget files to S3 upload_task = S3FileTransferOperator( task_id='upload_'+f[0:-11], source_base_path=conf['prod_data_dir'], source_key=f, dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='budget/'+f, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) pos = [i for i, e in enumerate(categories) if e == cat] if cat == "budget_reference": ds_task = get_refs md = 'budget-reference-'+f.split('_')[2] #: Update portal modified date update_budget_md = get_seaboard_update_dag(md+'.md', dag)
#: Join BIDs to permits join_bids = PythonOperator( task_id='join_bids', python_callable=join_bids, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Upload data to S3 upload_dsd_permits = S3FileTransferOperator( task_id='upload_dsd_permits', source_base_path=conf['prod_data_dir'], source_key='dsd_permits_{}_datasd_v1.csv'.format(year), dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_key='dsd/' + 'dsd_permits_{}_datasd_v1.csv'.format(year), replace=True, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: update permits.md file update_permits_md = get_seaboard_update_dag('permits-dsd.md', dag) #: Execution rules #: dsd_permits_latest_only must run before get_permits_files get_permits_files.set_upstream(dsd_permits_latest_only)
dag=dag) #: Geocode new entries and update production file geocode_data = PythonOperator(task_id='geocode_data', python_callable=geocode_data, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) addresses_to_S3 = S3FileTransferOperator( task_id='upload_address_book', source_base_path=conf['prod_data_dir'], source_key='ttcs_address_book.csv', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['ref_s3_bucket'], dest_s3_key='ttcs_address_book.csv', on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Spatially join BIDs data join_bids = PythonOperator(task_id='join_bids', python_callable=join_bids, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Create subsets
#: Convert geojson to geobuf shape_zip = PythonOperator(task_id='shape_to_zip', python_callable=shp_to_zip, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Upload shp GIS file to S3 upload_shp_file = S3FileTransferOperator( task_id='tree_canopy_shp_to_S3', source_base_path=conf['prod_data_dir'], source_key='tree_canopy_datasd.zip', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='sde/tree_canopy_datasd.zip', on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Upload geojson GIS file to S3 upload_geojson_file = S3FileTransferOperator( task_id='tree_canopy_geojson_to_S3', source_base_path=conf['prod_data_dir'], source_key='tree_canopy_datasd.geojson', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='sde/tree_canopy_datasd.geojson', on_failure_callback=notify,
# Get last bacteria tests for any given point. get_latest_bac_tests = PythonOperator(task_id='get_latest_bac_tests', python_callable=get_latest_bac_tests, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) # Uploads the indicator bacteria tests full result. upload_indicator_bac_tests = S3FileTransferOperator( task_id='upload_indicator_bac_tests', source_base_path=conf['prod_data_dir'], source_key='indicator_bacteria_tests_datasd_v1.csv', dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_key='water_testing/indicator_bacteria_tests_datasd_v1.csv', replace=True, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) # Uploads the latest indicator bacteria tests. upload_latest_indicator_bac_tests = S3FileTransferOperator( task_id='upload_latest_indicator_bac_tests', source_base_path=conf['prod_data_dir'], source_key='latest_indicator_bac_tests_datasd_v1.csv', dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_key='water_testing/latest_indicator_bac_tests_datasd_v1.csv', on_failure_callback=notify,
#: Process and geocode raw special events file process_special_events = PythonOperator( task_id='process_special_events', python_callable=process_special_events, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Process and geocode raw special events file addresses_to_S3 = S3FileTransferOperator( task_id='upload_address_book', source_base_path=conf['prod_data_dir'], source_key='events_address_book.csv', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['ref_s3_bucket'], dest_s3_key='events_address_book.csv', on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Upload prod SE file to S3 upload_special_events_web = S3FileTransferOperator( task_id='upload_special_events_web', source_base_path=conf['prod_data_dir'], source_key='special_events_list_datasd.csv', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='special_events/special_events_list_datasd.csv', on_failure_callback=notify,
name_parts = file_name.split('_') if 'v1' in name_parts: name_parts.remove('datasd') name_parts.remove('v1') task_name = '_'.join(name_parts[3:-1]) md_name = '-'.join(name_parts[3:-1]) #: Upload prod gid file to S3 upload_task = S3FileTransferOperator( task_id='upload_' + task_name, source_base_path=conf['prod_data_dir'], source_key='get_it_done_{}_requests_datasd_v1.csv'.format( task_name), dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='get_it_done_311/get_it_done_{}_requests_datasd_v1.csv' .format(task_name), on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) if task_name in services: for service_index, service in enumerate(services): if task_name == service: #: Github .md update service_update_task = get_seaboard_update_dag( 'gid-' + md_name + '.md', dag) #: update json must run after the get task upload_task.set_upstream(service_tasks[service_index])
#: Process billing data process_billing = PythonOperator(task_id='process_billing', python_callable=process_billing, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Upload billing data to S3 billing_to_S3 = S3FileTransferOperator( task_id='billing_to_S3', source_base_path=conf['prod_data_dir'], source_key=datasd[0], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_key='read/' + datasd[0], on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Retrieve READ leases data from FTP get_leases = BashOperator(task_id='get_leases', bash_command=get_leases(), on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Process leases data
#: Convert geojson to geobuf shape_zip = PythonOperator(task_id='sidewalks_shape_to_zip', python_callable=shp_to_zip, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Upload OCI file to S3 upload_oci_file = S3FileTransferOperator( task_id='upload_oci', source_base_path=conf['prod_data_dir'], source_key='sidewalk_cond_datasd_v1.csv', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='tsw/sidewalk_cond_datasd_v1.csv', on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Upload shp GIS file to S3 upload_shp_file = S3FileTransferOperator( task_id='sidewalks_shp_to_S3', source_base_path=conf['prod_data_dir'], source_key='sidewalks.zip', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='tsw/sidewalks.zip', on_failure_callback=notify,
task_id='build_prod_file', python_callable=build_prod_file, provide_context=True, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Uploads the generated production file upload_prod_file = S3FileTransferOperator( task_id='upload_parking_full', source_base_path=conf['prod_data_dir'], source_key=flist['full'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_key='parking_meters/' + flist['full'], replace=True, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Builds by month aggregation build_by_month_aggregation = PythonOperator( task_id='build_by_month_agg', python_callable=build_aggregation, op_kwargs={'agg_type': 'pole_by_month'}, provide_context=True, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify,
dag=dag) div_doc_table = PythonOperator(task_id='divide_doc_table', python_callable=latest_res_ords, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) upload_reso_ord = S3FileTransferOperator( task_id='upload_documentum_reso_ordinance_latest', source_base_path=conf['prod_data_dir'], source_key='documentum_scs_council_reso_ordinance_v_2016_current.csv', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key= 'city_docs/documentum_scs_council_reso_ordinance_v_2016_current.csv', on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Execution rules #: documentum_docs_latest_only must run before get_doc_tables get_doc_tables.set_upstream(documentum_docs_latest_only) #: get_doc_tables must run before div_doc_table div_doc_table.set_upstream(get_doc_tables) #: get_doc_tables must run before upload_doc_tables upload_reso_ord.set_upstream(div_doc_table)
task_id='clean_files', provide_context=True, python_callable=clean_files, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag ) #: Uploads the generated production file upload_prod_file = S3FileTransferOperator( task_id='upload_meter_locs', source_base_path=conf['prod_data_dir'], source_key='treas_parking_meters_loc_datasd_v1.csv', dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_key='parking_meters/treas_parking_meters_loc_datasd_v1.csv', replace=True, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Update data inventory json update_json_date = PythonOperator( task_id='update_json_date', python_callable=update_json_date, provide_context=True, op_kwargs={'ds_fname': 'parking_meters_locations'}, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify,
dag=dag) #: Process collisions data and save result to prod folder process_hc_data = PythonOperator(task_id='process_data', python_callable=process_data, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Upload prod file to S3 hc_to_S3 = S3FileTransferOperator(task_id='prod_file_to_S3', source_base_path=conf['prod_data_dir'], source_key='hate_crimes_datasd.csv', dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_key='pd/hate_crimes_datasd.csv', on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Update data inventory json update_hc_date = PythonOperator(task_id='update_json_date', python_callable=update_json_date, provide_context=True, op_kwargs={'ds_fname': 'hate_crimes'}, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag)
dag=dag) process_public_art = PythonOperator(task_id='process_public_art', python_callable=process_public_art, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Upload prod art file to S3 upload_public_art = S3FileTransferOperator( task_id='upload_public_art', source_base_path=conf['prod_data_dir'], source_key='public_art_locations_datasd_v1.csv', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='public_art/public_art_locations_datasd_v1.csv', on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Update data inventory json update_json_date = PythonOperator( task_id='update_json_date', python_callable=update_json_date, provide_context=True, op_kwargs={'ds_fname': 'civic_art_collection'}, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify,
#: combine violations, process, output file combine_sw_violations = PythonOperator(task_id='combine_sw_violations', python_callable=combine_violations, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Upload prod csv file to S3 violations_csv_to_s3 = S3FileTransferOperator( task_id='violations_csv_to_s3', source_base_path=conf['prod_data_dir'], source_key='stormwater_violations_merged.csv', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='tsw_int/stormwater_violations_merged.csv', on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Upload prod csv with null geos file to S3 violations_csv_null_geos_to_s3 = S3FileTransferOperator( task_id='violations_csv_w_null_geos_to_s3', source_base_path=conf['prod_data_dir'], source_key='stormwater_violations_merged_null_geos.csv', dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='tsw_int/stormwater_violations_merged_null_geos.csv', on_failure_callback=notify,
#: Builds the prod file build_traffic_counts = PythonOperator(task_id='build_traffic_counts', python_callable=build_traffic_counts, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Uploads the generated production file upload_traffic_counts = S3FileTransferOperator( task_id='upload_traffic_counts', source_base_path=conf['prod_data_dir'], source_key='traffic_counts_datasd_v1.csv', dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_key='traffic_counts/traffic_counts_datasd_v1.csv', replace=True, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) update_json_date = PythonOperator(task_id='update_json_date', python_callable=update_json_date, provide_context=True, op_kwargs={'ds_fname': 'traffic_volumes'}, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag)
#: Get onbase tables get_doc_tables = PythonOperator(task_id='get_onbase_tables', python_callable=get_onbase, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) files = [f for f in os.listdir(conf['prod_data_dir'])] for f in files: file_name = f.split('.')[0] name_parts = file_name.split('_') if name_parts[0] == "onbase": #: Upload onbase prod files to S3 upload_doc_tables = S3FileTransferOperator( task_id='upload_' + file_name, source_base_path=conf['prod_data_dir'], source_key='{}.csv'.format(file_name), dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='city_docs/{}.csv'.format(file_name), on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: get_doc_tables must run before upload_doc_tables upload_doc_tables.set_upstream(get_doc_tables)
def create_sde_tasks(dag, folder, layer, datasd_name, md, path_to_file, sde_to_shp): """Dynamically create SDE Airflow tasks. dag: DAG defined in _dags file. folder: subfolder in the sde folder on S3. layer: layer name. datasd_name: layer name + _datasd. md: name of md file on Seaboard. path_to_file: poseidon path + datasd_name. sde_to_shp: _jobs specific sde_to_shp function """ #: Latest Only Operator for sde layer sde_latest_only = LatestOnlyOperator(task_id='{layer}_latest_only' .format(layer=layer), dag=dag) #: Convert sde table to shapefile format to_shp = PythonOperator( task_id='{layer}_to_shp'.format(layer=layer), python_callable=sde_to_shp, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Convert shapefile to GeoJSON format to_geojson = BashOperator( task_id='{layer}_to_geojson'.format(layer=layer), bash_command=shp_to_geojson(path_to_file), on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Convert shapefile to TopoJSON format to_topojson = BashOperator( task_id='{layer}_to_topojson'.format(layer=layer), bash_command=shp_to_topojson(path_to_file), on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Compress shapefile components to_zip = PythonOperator( task_id='{layer}_shp_to_zip'.format(layer=layer), python_callable=shp_to_zip, op_kwargs={'datasd_name': datasd_name}, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Upload shapefile to S3 shp_to_S3 = S3FileTransferOperator( task_id='{layer}_shp_to_S3'.format(layer=layer), source_base_path=conf['prod_data_dir'], source_key='{datasd_name}.zip'.format(datasd_name=datasd_name), dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='sde/{folder}/{datasd_name}.zip' .format(folder=folder, datasd_name=datasd_name), on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Upload geojson to S3 geojson_to_S3 = S3FileTransferOperator( task_id='{layer}_geojson_to_S3'.format(layer=layer), source_base_path=conf['prod_data_dir'], source_key='{datasd_name}.geojson'.format(datasd_name=datasd_name), dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='sde/{folder}/{datasd_name}.geojson' .format(folder=folder, datasd_name=datasd_name), on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Upload topojson to S3 topojson_to_S3 = S3FileTransferOperator( task_id='{layer}_topojson_to_S3'.format(layer=layer), source_base_path=conf['prod_data_dir'], source_key='{datasd_name}.topojson'.format(datasd_name=datasd_name), dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='sde/{folder}/{datasd_name}.topojson' .format(folder=folder, datasd_name=datasd_name), on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, dag=dag) #: Update portal modified date update_md = get_seaboard_update_dag('{md}.md'.format(md=md), dag) if layer not in no_pbf: #: Convert GeoJSON to Geobuf format to_geobuf = PythonOperator( task_id='{layer}_to_geobuf'.format(layer=layer), python_callable=geojson_to_geobuf, op_kwargs={'path_to_file': path_to_file}, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Convert geobuf to gzipped geobuf to_gzip = PythonOperator( task_id='{layer}_geobuf_to_gzip'.format(layer=layer), python_callable=geobuf_to_gzip, op_kwargs={'datasd_name': datasd_name}, on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, dag=dag) #: Upload geobuf to S3 geobuf_to_S3 = S3FileTransferOperator( task_id='{layer}_geobuf_to_S3'.format(layer=layer), source_base_path=conf['prod_data_dir'], source_key='{datasd_name}.pbf'.format(datasd_name=datasd_name), dest_s3_conn_id=conf['default_s3_conn_id'], dest_s3_bucket=conf['dest_s3_bucket'], dest_s3_key='sde/{folder}/{datasd_name}.pbf' .format(folder=folder, datasd_name=datasd_name), on_failure_callback=notify, on_retry_callback=notify, on_success_callback=notify, replace=True, use_gzip=True, dag=dag) #: Conversion to geobuf is triggered after conversion to geojson. to_geobuf.set_upstream(to_geojson) #: Compression to gzip is triggered after conversion to geobuf. to_gzip.set_upstream(to_geobuf) #: geobuf upload to S3 is triggered after compression to gzipped geobuf. geobuf_to_S3.set_upstream(to_gzip) #: Github update depends on shapefile S3 upload success. update_md.set_upstream(geobuf_to_S3) #: Execution rules: #: sde_latest_only must run before shp conversion. to_shp.set_upstream(sde_latest_only) #: Conversion to geojson is triggered after conversion to shp. to_geojson.set_upstream(to_shp) #: Conversion to topojson is triggered after conversion to shapefile. to_topojson.set_upstream(to_shp) #: Compression to zip is triggered after conversion to geojson and topojson. to_zip.set_upstream(to_geojson) to_zip.set_upstream(to_topojson) #: shapefile upload to S3 is triggered after conversion to zip. shp_to_S3.set_upstream(to_zip) #: geojson upload to S3 is triggered after conversion to geojson. geojson_to_S3.set_upstream(to_geojson) #: topojson upload to S3 is triggered after conversion to topojson. topojson_to_S3.set_upstream(to_topojson) #: Github update depends on shapefile S3 upload success. update_md.set_upstream(shp_to_S3) update_md.set_upstream(geojson_to_S3) update_md.set_upstream(topojson_to_S3)