def ETL(table_name): base_path = create_base_path(__file__) file_path = Path(__file__).parent / 'tmp' / f'{table_name}.csv' Flow( load(str(file_path), name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(data): base_path = create_base_path(__file__) Flow(data, update_resource(None, name=table_name), update_resource(resources=table_name, path=table_name + '.csv'), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'usnps_parks' base_path = create_base_path(__file__) file_path = Path(__file__).parent / 'nps_boundry' / 'usnps_parks.csv' Flow( load(str(file_path), name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'doe_bluebook' url = 'https://data.cityofnewyork.us/api/views/8b9a-pywy/rows.csv?accessType=DOWNLOAD' base_path = create_base_path(__file__) Flow(load(url, name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'dsny_mtsgaragemaintenance' base_path = create_base_path(__file__) file_path = Path(__file__).parent / 'tmp' / 'dsny_mtsgaragemaintenance.csv' Flow( load(str(file_path), name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'housing_input_removals' url = 'https://raw.githubusercontent.com/NYCPlanning/db-developments/master/developments_build/data/housing_input_removals.csv' base_path = create_base_path(__file__) Flow(load(url, name=table_name, format='csv', force_strings=False), joined_lower(resources=table_name), add_field('b', 'string', ''), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'bic_tradewaste' url = 'https://data.cityofnewyork.us/api/views/hsjb-p5ky/rows.csv' base_path = create_base_path(__file__) Flow( load(url, name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path)) ).process()
def ETL(): table_name = 'hpd_hny_units_by_building' url = 'https://data.cityofnewyork.us/api/views/hg8x-zxpr/rows.csv?accessType=DOWNLOAD' base_path = create_base_path(__file__) Flow(load(url, name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'doitt_buildingfootprints' url = 'https://data.cityofnewyork.us/api/views/pkvt-jviv/rows.csv?accessType=DOWNLOAD' base_path = create_base_path(__file__) Flow(load(url, name=table_name, format='csv', force_strings=True), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'nysed_nonpublicenrollment' url = 'http://www.p12.nysed.gov/irs/statistics/nonpublic/2018-19_NonPub_EnrollmentbyGrade.xlsx' base_path = create_base_path(__file__) Flow(load(url, name=table_name, format='xlsx', force_strings=True), joined_lower(resources=table_name), update_resource(resources=table_name, path=table_name + '.csv'), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'nysdoh_nursinghomes' url = 'https://health.data.ny.gov/api/views/izta-vnpq/rows.csv?accessType=DOWNLOAD' base_path = create_base_path(__file__) Flow(load(url, name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), update_resource(resources=table_name, path=table_name + '.csv'), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'nycha_communitycenters' url = 'https://data.cityofnewyork.us/api/views/crns-fw6u/rows.csv?accessType=DOWNLOAD' base_path = create_base_path(__file__) Flow(load(url, name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'nysdoh_healthfacilities' url = 'https://health.data.ny.gov/api/views/vn5v-hh5r/rows.csv?accessType=DOWNLOAD' base_path = create_base_path(__file__) Flow(load(url, name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'dcla_culturalinstitutions' url = 'https://data.cityofnewyork.us/api/views/u35m-9t32/rows.csv?accessType=DOWNLOAD' base_path = create_base_path(__file__) Flow( load(url, name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path)) ).process()
def ETL(): table_name = 'nysdec_solidwaste' url = 'https://data.ny.gov/api/views/2fni-raj8/rows.csv?accessType=DOWNLOAD' base_path = create_base_path(__file__) Flow(load(url, name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(data): table_name = 'nypl_libraries' base_path = create_base_path(__file__) Flow(data, set_type('lon', type='string'), set_type('lat', type='string'), update_resource(None, name=table_name), update_resource(resources=table_name, path=table_name + '.csv'), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(data): table_name = 'foodbankny_foodbanks' base_path = create_base_path(__file__) Flow( # data, load(f'{str(Path(__file__).parent)}/foodbankny_foodbanks.csv'), update_resource(None, name=table_name), update_resource(resources=table_name, path=table_name + '.csv'), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'nysopwdd_providers' url = 'https://data.ny.gov/api/views/ieqx-cqyk/rows.csv?accessType=DOWNLOAD' base_path = create_base_path(__file__) Flow(load(url, name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), update_resource(resources=table_name, path=table_name + '.csv'), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'qpl_libraries' url = 'https://data.cityofnewyork.us/api/views/kh3d-xhq7/rows.csv?accessType=DOWNLOAD' base_path = create_base_path(__file__) Flow(load(url, name=table_name, format='csv', force_strings=True), update_resource(resources=table_name, path=table_name + '.csv'), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'facilities_classification' url='https://raw.githubusercontent.com/NYCPlanning/db-facilities-tmp/dev/referencetables/classification.csv' base_path = create_base_path(__file__) Flow( load(url, name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), dump_to_s3(resources=table_name, params=dict(base_path=base_path)) ).process()
def ETL(table_name): base_path = create_base_path(__file__) sourcePath = Path(__file__).parent file_path = [ filepath for filepath in Path(sourcePath / 'tmp').glob('**/*') if filepath.suffix == '.csv' ][0] Flow( load(str(file_path), name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), update_resource(None, name=table_name), update_resource(resources=table_name, path=table_name + '.csv'), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'usdot_airports' base_path = create_base_path(__file__) file_path = Path(__file__).parent / 'tmp' / 'usdot_airports.csv' Flow( load(str(file_path), name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), filter_rows(equals=[dict(state_name='NEW YORK')]), filter_rows(equals=[ dict(county='NEW YORK'), dict(county='BRONX'), dict(county='KINGS'), dict(county='QUEENS'), dict(county='RICHMOND') ]), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()
def ETL(): table_name = 'nysed_activeinstitutions' base_path = create_base_path(__file__) file_path = Path(__file__).parent / 'nysed_activeinstitutions.csv' Flow( load(str(file_path), name=table_name, format='csv', force_strings=True), joined_lower(resources=table_name), rename_field('gis_longitute_(x)', 'gis_longitute_x'), rename_field('gis_latitude_(y)', 'gis_latitude_y'), rename_field( 'federal_information_processing_standard_(fips)_state_code', 'federal_information_processing_standard_fips_state_code'), rename_field( 'federal_information_processing_standard_(fips)_county_code', 'federal_information_processing_standard_fips_county_code'), dump_to_s3(resources=table_name, params=dict(base_path=base_path))).process()