Exemple #1
0
def ETL(table_name):
    base_path = create_base_path(__file__)
    file_path = Path(__file__).parent / 'tmp' / f'{table_name}.csv'
    Flow(
        load(str(file_path), name=table_name, format='csv',
             force_strings=True), joined_lower(resources=table_name),
        dump_to_s3(resources=table_name,
                   params=dict(base_path=base_path))).process()
Exemple #2
0
def ETL(data):

    base_path = create_base_path(__file__)
    Flow(data, update_resource(None, name=table_name),
         update_resource(resources=table_name, path=table_name + '.csv'),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
Exemple #3
0
def ETL():
    table_name = 'usnps_parks'
    base_path = create_base_path(__file__)
    file_path = Path(__file__).parent / 'nps_boundry' / 'usnps_parks.csv'
    Flow(
        load(str(file_path), name=table_name, format='csv',
             force_strings=True), joined_lower(resources=table_name),
        dump_to_s3(resources=table_name,
                   params=dict(base_path=base_path))).process()
Exemple #4
0
def ETL():
    table_name = 'doe_bluebook'
    url = 'https://data.cityofnewyork.us/api/views/8b9a-pywy/rows.csv?accessType=DOWNLOAD'
    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
Exemple #5
0
def ETL():
    table_name = 'dsny_mtsgaragemaintenance'
    base_path = create_base_path(__file__)
    file_path = Path(__file__).parent / 'tmp' / 'dsny_mtsgaragemaintenance.csv'
    Flow(
        load(str(file_path), name=table_name, format='csv',
             force_strings=True), joined_lower(resources=table_name),
        dump_to_s3(resources=table_name,
                   params=dict(base_path=base_path))).process()
Exemple #6
0
def ETL():
    table_name = 'housing_input_removals'
    url = 'https://raw.githubusercontent.com/NYCPlanning/db-developments/master/developments_build/data/housing_input_removals.csv'
    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=False),
         joined_lower(resources=table_name), add_field('b', 'string', ''),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
Exemple #7
0
def ETL():
    table_name = 'bic_tradewaste'
    url = 'https://data.cityofnewyork.us/api/views/hsjb-p5ky/rows.csv'
    base_path = create_base_path(__file__)
    Flow(
        load(url, name=table_name, format='csv', force_strings=True),
        joined_lower(resources=table_name),
        dump_to_s3(resources=table_name, params=dict(base_path=base_path))
    ).process()
Exemple #8
0
def ETL():
    table_name = 'hpd_hny_units_by_building'
    url = 'https://data.cityofnewyork.us/api/views/hg8x-zxpr/rows.csv?accessType=DOWNLOAD'
    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
Exemple #9
0
def ETL():
    table_name = 'doitt_buildingfootprints'
    url = 'https://data.cityofnewyork.us/api/views/pkvt-jviv/rows.csv?accessType=DOWNLOAD'

    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
Exemple #10
0
def ETL():
    table_name = 'nysed_nonpublicenrollment'
    url = 'http://www.p12.nysed.gov/irs/statistics/nonpublic/2018-19_NonPub_EnrollmentbyGrade.xlsx'
    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='xlsx', force_strings=True),
         joined_lower(resources=table_name),
         update_resource(resources=table_name, path=table_name + '.csv'),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
Exemple #11
0
def ETL():
    table_name = 'nysdoh_nursinghomes'
    url = 'https://health.data.ny.gov/api/views/izta-vnpq/rows.csv?accessType=DOWNLOAD'
    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         joined_lower(resources=table_name),
         update_resource(resources=table_name, path=table_name + '.csv'),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
Exemple #12
0
def ETL():
    table_name = 'nycha_communitycenters'
    url = 'https://data.cityofnewyork.us/api/views/crns-fw6u/rows.csv?accessType=DOWNLOAD'

    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
Exemple #13
0
def ETL():
    table_name = 'nysdoh_healthfacilities'
    url = 'https://health.data.ny.gov/api/views/vn5v-hh5r/rows.csv?accessType=DOWNLOAD'

    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
Exemple #14
0
def ETL():
    table_name = 'dcla_culturalinstitutions'
    url = 'https://data.cityofnewyork.us/api/views/u35m-9t32/rows.csv?accessType=DOWNLOAD'
    base_path = create_base_path(__file__)

    Flow(
        load(url, name=table_name, format='csv', force_strings=True),
        joined_lower(resources=table_name),
        dump_to_s3(resources=table_name, params=dict(base_path=base_path))
    ).process()
Exemple #15
0
def ETL():
    table_name = 'nysdec_solidwaste'
    url = 'https://data.ny.gov/api/views/2fni-raj8/rows.csv?accessType=DOWNLOAD'

    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
Exemple #16
0
def ETL(data):
    table_name = 'nypl_libraries'
    base_path = create_base_path(__file__)

    Flow(data, set_type('lon', type='string'), set_type('lat', type='string'),
         update_resource(None, name=table_name),
         update_resource(resources=table_name, path=table_name + '.csv'),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
Exemple #17
0
def ETL(data):
    table_name = 'foodbankny_foodbanks'
    base_path = create_base_path(__file__)
    Flow(
        # data,
        load(f'{str(Path(__file__).parent)}/foodbankny_foodbanks.csv'),
        update_resource(None, name=table_name),
        update_resource(resources=table_name, path=table_name + '.csv'),
        joined_lower(resources=table_name),
        dump_to_s3(resources=table_name,
                   params=dict(base_path=base_path))).process()
Exemple #18
0
def ETL():
    table_name = 'nysopwdd_providers'
    url = 'https://data.ny.gov/api/views/ieqx-cqyk/rows.csv?accessType=DOWNLOAD'

    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         joined_lower(resources=table_name),
         update_resource(resources=table_name, path=table_name + '.csv'),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
Exemple #19
0
def ETL():
    table_name = 'qpl_libraries'
    url = 'https://data.cityofnewyork.us/api/views/kh3d-xhq7/rows.csv?accessType=DOWNLOAD'

    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         update_resource(resources=table_name, path=table_name + '.csv'),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
Exemple #20
0
def ETL():
    table_name = 'facilities_classification'
    url='https://raw.githubusercontent.com/NYCPlanning/db-facilities-tmp/dev/referencetables/classification.csv'

    base_path = create_base_path(__file__)

    Flow(
        load(url, name=table_name, format='csv', force_strings=True),
        joined_lower(resources=table_name),
        dump_to_s3(resources=table_name, params=dict(base_path=base_path))
    ).process()
Exemple #21
0
def ETL(table_name):
    base_path = create_base_path(__file__)
    sourcePath = Path(__file__).parent
    file_path = [
        filepath for filepath in Path(sourcePath / 'tmp').glob('**/*')
        if filepath.suffix == '.csv'
    ][0]
    Flow(
        load(str(file_path), name=table_name, format='csv',
             force_strings=True), joined_lower(resources=table_name),
        update_resource(None, name=table_name),
        update_resource(resources=table_name, path=table_name + '.csv'),
        dump_to_s3(resources=table_name,
                   params=dict(base_path=base_path))).process()
Exemple #22
0
def ETL():
    table_name = 'usdot_airports'
    base_path = create_base_path(__file__)
    file_path = Path(__file__).parent / 'tmp' / 'usdot_airports.csv'
    Flow(
        load(str(file_path), name=table_name, format='csv',
             force_strings=True), joined_lower(resources=table_name),
        filter_rows(equals=[dict(state_name='NEW YORK')]),
        filter_rows(equals=[
            dict(county='NEW YORK'),
            dict(county='BRONX'),
            dict(county='KINGS'),
            dict(county='QUEENS'),
            dict(county='RICHMOND')
        ]), dump_to_s3(resources=table_name,
                       params=dict(base_path=base_path))).process()
Exemple #23
0
def ETL():
    table_name = 'nysed_activeinstitutions'
    base_path = create_base_path(__file__)
    file_path = Path(__file__).parent / 'nysed_activeinstitutions.csv'
    Flow(
        load(str(file_path), name=table_name, format='csv',
             force_strings=True), joined_lower(resources=table_name),
        rename_field('gis_longitute_(x)', 'gis_longitute_x'),
        rename_field('gis_latitude_(y)', 'gis_latitude_y'),
        rename_field(
            'federal_information_processing_standard_(fips)_state_code',
            'federal_information_processing_standard_fips_state_code'),
        rename_field(
            'federal_information_processing_standard_(fips)_county_code',
            'federal_information_processing_standard_fips_county_code'),
        dump_to_s3(resources=table_name,
                   params=dict(base_path=base_path))).process()