def parse_file_to_sql(): # parse NASS Data df = parse_file() df.rename({'CV (%)': 'CV_pct'}, axis='columns', inplace=True) # to sql types = { 'Program': TEXT, 'year': INTEGER, 'Period': TEXT, 'Geo Level': TEXT, 'region': TEXT, 'State ANSI': INTEGER, 'watershed_code': INTEGER, 'Commodity': TEXT, 'Data Item': TEXT, 'Domain': TEXT, 'naics': INTEGER, 'value': FLOAT, 'CV_pct': TEXT, 'units': TEXT } df.to_sql('nass', con=engine, if_exists='replace', dtype=types) tbl_desc = 'USDA NASS -- https://quickstats.nass.usda.gov/#777837D3-E71B-323A-8137-D10646E77104' comment = text( "COMMENT ON TABLE nass IS '{}';".format(tbl_desc)).execution_options( autocommit=True) engine.execute(comment)
def join_all_usatrade_to_sql(): # parse USA Trade Data df = join_all_usatrade() # drop annoying partial year data df.drop(df[df['year'] == '2017 through November'].index, inplace=True) df.reset_index(drop=True, inplace=True) # to sql types = {'flow': TEXT, 'region': TEXT, 'Country': TEXT, 'year': TEXT, 'value': FLOAT, 'NAICS': INTEGER, 'Commodity_desc': TEXT, 'units': TEXT, 'Commodity': TEXT} df.to_sql('usatrade', con=engine, if_exists='replace', dtype=types) tbl_desc = 'USA Trade -- https://usatrade.census.gov/' comment = text("COMMENT ON TABLE usatrade IS '{}';".format( tbl_desc)).execution_options(autocommit=True) engine.execute(comment)
def parse_file_to_sql(): # parse EIA Crude Oil Data (BlueNOTE) Data df = parse_file() # to sql types = {'year': INTEGER, 'price': FLOAT, 'units': TEXT, 'notes': TEXT} df.to_sql('eia_crude_oil', con=engine, if_exists='replace', dtype=types) tbl_desc = 'EIA Annual Crude Oil Prices -- https://www.eia.gov/dnav/pet/pet_pri_rac2_dcu_nus_a.htm' comment = text("COMMENT ON TABLE eia_crude_oil IS '{}';".format( tbl_desc)).execution_options(autocommit=True) engine.execute(comment)
def join_all_emissions_to_sql(): # parse EIA SEDS emissions df = join_all_emissions() types = { 'source': TEXT, 'region': TEXT, 'year': INTEGER, 'emissions': FLOAT, 'units': TEXT } df.to_sql('emissions', con=engine, if_exists='replace', dtype=types) tbl_desc = 'EIA State Energy Data System -- https://www.eia.gov/environment/emissions/state/' comment = text("COMMENT ON TABLE emissions IS '{}';".format( tbl_desc)).execution_options(autocommit=True) engine.execute(comment)
def file_parser_to_sql(): df = file_parser() # to sql types = { 'from_industry': TEXT, 'to_industry': TEXT, 'from_industry_desc': TEXT, 'to_industry_desc': TEXT, 'year': INTEGER, 'value': FLOAT, 'units': TEXT } df.to_sql('bea_use_detailed', con=engine, if_exists='replace', dtype=types) tbl_desc = 'BEA IO Use Table (Detailed) -- https://www.bea.gov/industry/input-output-accounts-data' comment = text("COMMENT ON TABLE bea_use_detailed IS '{}';".format( tbl_desc)).execution_options(autocommit=True) engine.execute(comment)
def parse_file_to_sql(): df = parse_file() types = { 'Data_Status': TEXT, 'MSN': TEXT, 'region': TEXT, 'year': INTEGER, 'value': FLOAT, 'desc': TEXT, 'units': TEXT } df.to_sql('seds', con=engine, if_exists='replace', dtype=types) tbl_desc = 'EIA State Energy Data System -- https://www.eia.gov/state/seds/seds-data-fuel.php?sid=US#DataFiles' comment = text( "COMMENT ON TABLE seds IS '{}';".format(tbl_desc)).execution_options( autocommit=True) engine.execute(comment)
def join_all_sgf_to_sql(): # parse SGF df = join_all_sgf() # to sql types = { 'label': TEXT, 'geographic_region': TEXT, 'year': INTEGER, 'value': FLOAT, 'units': TEXT } df.to_sql('sgf', con=engine, if_exists='replace', dtype=types) tbl_desc = 'Annual Survey of State Government Finances -- https://www.census.gov/programs-surveys/state/data/tables.html' comment = text( "COMMENT ON TABLE sgf IS '{}';".format(tbl_desc)).execution_options( autocommit=True) engine.execute(comment)
def file_parser_to_sql(): # parse BEA Use Table df = file_parser() # to sql types = { 'input': TEXT, 'input_code': TEXT, 'output': TEXT, 'output_code': TEXT, 'year': INTEGER, 'value': BIGINT, 'units': TEXT } df.to_sql('bea_use', con=engine, if_exists='replace', dtype=types) tbl_desc = 'BEA IO Use Table (71 Industry) -- https://www.bea.gov/industry/input-output-accounts-data' comment = text("COMMENT ON TABLE bea_use IS '{}';".format( tbl_desc)).execution_options(autocommit=True) engine.execute(comment)
def join_all_pce_to_sql(): # personal consumption expenditures df = join_all_pce() df['Line'] = to_numeric(df['Line'], errors='coerce') df['Line'] = df['Line'].map(int) df['year'] = df['year'].map(int) types = {'GeoFIPS': TEXT, 'GeoName': TEXT, 'Region': TEXT, 'TableName': TEXT, 'ComponentName': TEXT, 'units': TEXT, 'Line': INTEGER, 'Description': TEXT, 'year': INTEGER, 'value': FLOAT} df.to_sql('pce', con=engine, if_exists='replace', dtype=types) tbl_desc = 'BEA Personal Consumer Expenditures -- https://apps.bea.gov/itable/iTable.cfm?ReqID=70&step=1' comment = text("COMMENT ON TABLE pce IS '{}';".format( tbl_desc)).execution_options(autocommit=True) engine.execute(comment)
def file_parser_to_sql(): # parse Commodity Flow Survey df, apdx1, apdx2, apdx3, apdx4 = file_parser() types = {'SHIPMT_ID': INTEGER, 'ORIG_STATE': TEXT, 'ORIG_MA': TEXT, 'ORIG_CFS_AREA': TEXT, 'DEST_STATE': TEXT, 'DEST_MA': TEXT, 'DEST_CFS_AREA': TEXT, 'NAICS': TEXT, 'QUARTER': INTEGER, 'SCTG': TEXT, 'SCTG_desc': TEXT, 'MODE': INTEGER, 'SHIPMT_VALUE': FLOAT, 'SHIPMT_VALUE_units': TEXT, 'SHIPMT_WGHT': FLOAT, 'SHIPMT_WGHT_units': TEXT, 'SHIPMT_DIST_GC': INTEGER, 'SHIPMT_DIST_GC_units': TEXT, 'SHIPMT_DIST_ROUTED': INTEGER, 'SHIPMT_DIST_ROUTED_units': TEXT, 'TEMP_CNTL_YN': TEXT, 'EXPORT_YN': TEXT, 'EXPORT_CNTRY': TEXT, 'HAZMAT': TEXT, 'WGT_FACTOR': FLOAT} df.to_sql('cfs_2012', con=engine, if_exists='replace', dtype=types) tbl_desc = '2012 Commodity Flow Survey -- https://www.census.gov/programs-surveys/cfs.html' comment = text("COMMENT ON TABLE cfs_2012 IS '{}';".format( tbl_desc)).execution_options(autocommit=True) engine.execute(comment) types = {'from': TEXT, 'to': TEXT} apdx1.to_sql('cfs_2012_orig_dest_cfs_area_map', con=engine, if_exists='replace', dtype=types) tbl_desc = '2012 Commodity Flow Survey -- Descriptions from Appendix 1' comment = text("COMMENT ON TABLE cfs_2012_orig_dest_cfs_area_map IS '{}';".format( tbl_desc)).execution_options(autocommit=True) engine.execute(comment) types = {'from': INTEGER, 'to': TEXT} apdx2['from'] = apdx2['from'].map(int) apdx2.to_sql('cfs_2012_naics_map', con=engine, if_exists='replace', dtype=types) tbl_desc = '2012 Commodity Flow Survey -- Descriptions from Appendix 2' comment = text("COMMENT ON TABLE cfs_2012_naics_map IS '{}';".format( tbl_desc)).execution_options(autocommit=True) engine.execute(comment) types = {'from': TEXT, 'to': TEXT} apdx3.to_sql('cfs_2012_sctg_map', con=engine, if_exists='replace') tbl_desc = '2012 Commodity Flow Survey -- Descriptions from Appendix 3' comment = text("COMMENT ON TABLE cfs_2012_sctg_map IS '{}';".format( tbl_desc)).execution_options(autocommit=True) engine.execute(comment) types = {'from': INTEGER, 'to': TEXT} apdx4['from'] = apdx4['from'].map(int) apdx4.to_sql('cfs_2012_mode_map', con=engine, if_exists='replace') tbl_desc = '2012 Commodity Flow Survey -- Descriptions from Appendix 4' comment = text("COMMENT ON TABLE cfs_2012_mode_map IS '{}';".format( tbl_desc)).execution_options(autocommit=True) engine.execute(comment)