コード例 #1
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL(data):

    base_path = create_base_path(__file__)
    Flow(data, update_resource(None, name=table_name),
         update_resource(resources=table_name, path=table_name + '.csv'),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
コード例 #2
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL(table_name):
    base_path = create_base_path(__file__)
    file_path = Path(__file__).parent / 'tmp' / f'{table_name}.csv'
    Flow(
        load(str(file_path), name=table_name, format='csv',
             force_strings=True), joined_lower(resources=table_name),
        dump_to_s3(resources=table_name,
                   params=dict(base_path=base_path))).process()
コード例 #3
0
def ETL():
    table_name = 'doe_bluebook'
    url = 'https://data.cityofnewyork.us/api/views/8b9a-pywy/rows.csv?accessType=DOWNLOAD'
    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
コード例 #4
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'doitt_buildingfootprints'
    url = 'https://data.cityofnewyork.us/api/views/pkvt-jviv/rows.csv?accessType=DOWNLOAD'

    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
コード例 #5
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'usnps_parks'
    base_path = create_base_path(__file__)
    file_path = Path(__file__).parent / 'nps_boundry' / 'usnps_parks.csv'
    Flow(
        load(str(file_path), name=table_name, format='csv',
             force_strings=True), joined_lower(resources=table_name),
        dump_to_s3(resources=table_name,
                   params=dict(base_path=base_path))).process()
コード例 #6
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'hpd_hny_units_by_building'
    url = 'https://data.cityofnewyork.us/api/views/hg8x-zxpr/rows.csv?accessType=DOWNLOAD'
    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
コード例 #7
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'housing_input_removals'
    url = 'https://raw.githubusercontent.com/NYCPlanning/db-developments/master/developments_build/data/housing_input_removals.csv'
    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=False),
         joined_lower(resources=table_name), add_field('b', 'string', ''),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
コード例 #8
0
def ETL():
    table_name = 'bic_tradewaste'
    url = 'https://data.cityofnewyork.us/api/views/hsjb-p5ky/rows.csv'
    base_path = create_base_path(__file__)
    Flow(
        load(url, name=table_name, format='csv', force_strings=True),
        joined_lower(resources=table_name),
        dump_to_s3(resources=table_name, params=dict(base_path=base_path))
    ).process()
コード例 #9
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'dsny_mtsgaragemaintenance'
    base_path = create_base_path(__file__)
    file_path = Path(__file__).parent / 'tmp' / 'dsny_mtsgaragemaintenance.csv'
    Flow(
        load(str(file_path), name=table_name, format='csv',
             force_strings=True), joined_lower(resources=table_name),
        dump_to_s3(resources=table_name,
                   params=dict(base_path=base_path))).process()
コード例 #10
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'nycha_communitycenters'
    url = 'https://data.cityofnewyork.us/api/views/crns-fw6u/rows.csv?accessType=DOWNLOAD'

    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
コード例 #11
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'nysed_nonpublicenrollment'
    url = 'http://www.p12.nysed.gov/irs/statistics/nonpublic/2018-19_NonPub_EnrollmentbyGrade.xlsx'
    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='xlsx', force_strings=True),
         joined_lower(resources=table_name),
         update_resource(resources=table_name, path=table_name + '.csv'),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
コード例 #12
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'nysdoh_nursinghomes'
    url = 'https://health.data.ny.gov/api/views/izta-vnpq/rows.csv?accessType=DOWNLOAD'
    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         joined_lower(resources=table_name),
         update_resource(resources=table_name, path=table_name + '.csv'),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
コード例 #13
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'dcla_culturalinstitutions'
    url = 'https://data.cityofnewyork.us/api/views/u35m-9t32/rows.csv?accessType=DOWNLOAD'
    base_path = create_base_path(__file__)

    Flow(
        load(url, name=table_name, format='csv', force_strings=True),
        joined_lower(resources=table_name),
        dump_to_s3(resources=table_name, params=dict(base_path=base_path))
    ).process()
コード例 #14
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'nysdec_solidwaste'
    url = 'https://data.ny.gov/api/views/2fni-raj8/rows.csv?accessType=DOWNLOAD'

    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
コード例 #15
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL(data):
    table_name = 'nypl_libraries'
    base_path = create_base_path(__file__)

    Flow(data, set_type('lon', type='string'), set_type('lat', type='string'),
         update_resource(None, name=table_name),
         update_resource(resources=table_name, path=table_name + '.csv'),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
コード例 #16
0
def ETL():
    table_name = 'nysdoh_healthfacilities'
    url = 'https://health.data.ny.gov/api/views/vn5v-hh5r/rows.csv?accessType=DOWNLOAD'

    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
コード例 #17
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'nysopwdd_providers'
    url = 'https://data.ny.gov/api/views/ieqx-cqyk/rows.csv?accessType=DOWNLOAD'

    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         joined_lower(resources=table_name),
         update_resource(resources=table_name, path=table_name + '.csv'),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
コード例 #18
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'qpl_libraries'
    url = 'https://data.cityofnewyork.us/api/views/kh3d-xhq7/rows.csv?accessType=DOWNLOAD'

    base_path = create_base_path(__file__)

    Flow(load(url, name=table_name, format='csv', force_strings=True),
         update_resource(resources=table_name, path=table_name + '.csv'),
         joined_lower(resources=table_name),
         dump_to_s3(resources=table_name,
                    params=dict(base_path=base_path))).process()
コード例 #19
0
def ETL(data):
    table_name = 'foodbankny_foodbanks'
    base_path = create_base_path(__file__)
    Flow(
        # data,
        load(f'{str(Path(__file__).parent)}/foodbankny_foodbanks.csv'),
        update_resource(None, name=table_name),
        update_resource(resources=table_name, path=table_name + '.csv'),
        joined_lower(resources=table_name),
        dump_to_s3(resources=table_name,
                   params=dict(base_path=base_path))).process()
コード例 #20
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'facilities_classification'
    url='https://raw.githubusercontent.com/NYCPlanning/db-facilities-tmp/dev/referencetables/classification.csv'

    base_path = create_base_path(__file__)

    Flow(
        load(url, name=table_name, format='csv', force_strings=True),
        joined_lower(resources=table_name),
        dump_to_s3(resources=table_name, params=dict(base_path=base_path))
    ).process()
コード例 #21
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL(table_name):
    base_path = create_base_path(__file__)
    sourcePath = Path(__file__).parent
    file_path = [
        filepath for filepath in Path(sourcePath / 'tmp').glob('**/*')
        if filepath.suffix == '.csv'
    ][0]
    Flow(
        load(str(file_path), name=table_name, format='csv',
             force_strings=True), joined_lower(resources=table_name),
        update_resource(None, name=table_name),
        update_resource(resources=table_name, path=table_name + '.csv'),
        dump_to_s3(resources=table_name,
                   params=dict(base_path=base_path))).process()
コード例 #22
0
def ETL():
    import pprint
    base_path = create_base_path(__file__)

    Flow(
        Load('housing_input_hny_job_manual', 'latest'),
        # Load('housing_input_dcpattributes', 'latest'),
        # Load('housing_input_lookup_occupancy', 'latest'),
        Load('housing_input_removals', 'latest'),
        # Load('housing_input_lookup_status','latest'),
        # Load('dob_cofos','latest'),
        # Load('dob_jobapplications','latest'),
        # Load('dob_permitissuance','latest'),
        Load('hpd_hny_units_by_building', 'latest'),
        Load('hpd_hny_units_by_project', 'latest'),
        dump_2_s3(params=dict(base_path=base_path))).process()
コード例 #23
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'usdot_airports'
    base_path = create_base_path(__file__)
    file_path = Path(__file__).parent / 'tmp' / 'usdot_airports.csv'
    Flow(
        load(str(file_path), name=table_name, format='csv',
             force_strings=True), joined_lower(resources=table_name),
        filter_rows(equals=[dict(state_name='NEW YORK')]),
        filter_rows(equals=[
            dict(county='NEW YORK'),
            dict(county='BRONX'),
            dict(county='KINGS'),
            dict(county='QUEENS'),
            dict(county='RICHMOND')
        ]), dump_to_s3(resources=table_name,
                       params=dict(base_path=base_path))).process()
コード例 #24
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL():
    table_name = 'nysed_activeinstitutions'
    base_path = create_base_path(__file__)
    file_path = Path(__file__).parent / 'nysed_activeinstitutions.csv'
    Flow(
        load(str(file_path), name=table_name, format='csv',
             force_strings=True), joined_lower(resources=table_name),
        rename_field('gis_longitute_(x)', 'gis_longitute_x'),
        rename_field('gis_latitude_(y)', 'gis_latitude_y'),
        rename_field(
            'federal_information_processing_standard_(fips)_state_code',
            'federal_information_processing_standard_fips_state_code'),
        rename_field(
            'federal_information_processing_standard_(fips)_county_code',
            'federal_information_processing_standard_fips_county_code'),
        dump_to_s3(resources=table_name,
                   params=dict(base_path=base_path))).process()
コード例 #25
0
ファイル: build.py プロジェクト: NYCPlanning/db-data-recipes
def ETL(): 
    import pprint
    base_path = create_base_path(__file__)
    
    Flow(
        Load('dcp_commercialoverlay', 'latest'),
        Load('dcp_limitedheight', 'latest'),
        Load('dcp_mih','latest'),
        Load('dof_dtm','latest'),
        Load('dcp_specialpurpose','latest'),
        Load('dcp_specialpurposesubdistricts','latest'),
        Load('dcp_zoningdistricts','latest'),
        Load('dcp_zoningmapamendments','latest'),
        Load('dcp_zoningtaxlots', 'latest'),
        Load('dcp_zoningmapindex', 'latest'),
        dump_2_s3(params=dict(base_path=base_path))
    ).process()
コード例 #26
0
def ETL(table_name):
    key = str(Path(create_base_path(__file__))/f'{table_name}.csv')
    file_path = str(Path(__file__).parent/'tmp'/f'{table_name}.csv')
    content_type, _ = mimetypes.guess_type(key)
    client = make_client()
    bucket = os.environ.get('BUCKET')
    config = TransferConfig(multipart_threshold=1024^2*100, max_concurrency=10,
                        multipart_chunksize=1024^2*100, use_threads=True)

    beg_ts = time.time()
    client.upload_file(
                Filename=file_path,
                Bucket=bucket,
                Config = config,
                ExtraArgs={ 'ACL': 'public-read', 'ContentType': content_type or 'text/plain'},
                Key=key)
    end_ts = time.time()
    print(f'dumped to {key}, elapsed time: {end_ts - beg_ts}')
コード例 #27
0
def ETL():
    import pprint
    base_path = create_base_path(__file__)

    Flow(
        Load('doe_universalprek', 'latest'),
        Load('hhc_hospitals', 'latest'),
        Load('dcla_culturalinstitutions', 'latest'),
        Load('nycha_policeservice', 'latest'),
        Load('dohmh_daycare', 'latest'),
        Load('dpr_parksproperties', 'latest'),
        Load('doe_busroutesgarages', 'latest'),
        Load('dcp_pops', 'latest'),
        Load('dcas_colp', 'latest'),
        Load('dfta_contracts', 'latest'),
        Load('dycd_afterschoolprograms', 'latest'),
        Load('sbs_workforce1', 'latest'),
        Load('nysdec_solidwaste', 'latest'),
        Load('nysomh_mentalhealth', 'latest'),
        Load('nysdoh_healthfacilities', 'latest'),
        Load('nysopwdd_providers', 'latest'),
        Load('usnps_parks', 'latest'),
        Load('dca_operatingbusinesses', 'latest'),
        Load('dep_wwtc', 'latest'),
        Load('foodbankny_foodbanks', 'latest'),
        Load('bpl_libraries', 'latest'),
        Load('qpl_libraries', 'latest'),
        Load('dsny_mtsgaragemaintenance', 'latest'),
        Load('doe_lcgms', 'latest'),
        Load('nysdoh_nursinghomes', 'latest'),
        Load('nysed_activeinstitutions', 'latest'),
        Load('usdot_airports', 'latest'),
        Load('nypl_libraries', 'latest'),
        Load('usdot_ports', 'latest'),
        Load('dot_mannedfacilities', 'latest'),
        Load('dot_bridgehouses', 'latest'),
        Load('dot_ferryterminals', 'latest'),
        Load('dot_publicparking', 'latest'),
        Load('hra_centers', 'latest'),
        Load('nysdec_lands', 'latest'),
        Load('nycha_communitycenters', 'latest'),
        Load('nysdec_lands', 'latest'),
        Load('nycha_communitycenters', 'latest'),
        Load('moeo_socialservicesiteloactions', 'latest'),
        Load('fbop_corrections', 'latest'),
        Load('nysparks_historicplaces', 'latest'),
        Load('uscourts_courts', 'latest'),
        Load('nysocfs_offices', 'latest'),
        Load('nysoasas_programs', 'latest'),
        Load('nysdoccs_corrections', 'latest'),
        Load('nycdoc_corrections', 'latest'),
        Load('dot_pedplazas', 'latest'),
        Load('nycourts_courts', 'latest'),
        Load('dcp_sfpsd', 'latest'),
        Load('nysparks_parks', 'latest'),
        Load('doe_bluebook', 'latest'),
        Load('acs_daycareheadstart', 'latest'),
        Load('fdny_firehouses', 'latest'),
        # # load geo boundaries
        # Load('doitt_buildingcentroids', 'latest'),
        # Load('dcp_boroboundaries_wi', 'latest'),
        # Load('dcp_cdboundaries', 'latest'),
        # Load('dcp_censustracts', 'latest'),
        # Load('dcp_councildistricts', 'latest'),
        # Load('dcp_ntaboundaries', 'latest'),
        # Load('dcp_policeprecincts', 'latest'),
        # Load('dcp_school_districts', 'latest'),
        # Load('doitt_zipcodeboundaries', 'latest'),
        dump_2_s3(params=dict(base_path=base_path))).process()