def assign_naics(df): cw_load = load_bea_crosswalk() cw = cw_load[['BEA_2012_Detail_Code', 'NAICS_2012_Code']].drop_duplicates().reset_index(drop=True) # drop all rows with naics >6 cw = cw[cw['NAICS_2012_Code'].apply(lambda x: len(str(x)) == 6)].reset_index(drop=True) df = pd.merge(df, cw, left_on='Activity', right_on='BEA_2012_Detail_Code') df = df.drop(columns=["BEA_2012_Detail_Code"]) df = df.rename(columns={"NAICS_2012_Code": "Sector"}) df['SectorSourceName'] = 'NAICS_2012_Code' return df
def assign_naics(df): cw_load = load_bea_crosswalk() cw = cw_load[['BEA_2012_Detail_Code', 'NAICS_2012_Code']].drop_duplicates().reset_index(drop=True) # least aggregate level that applies is 5 digits cw = cw[cw['NAICS_2012_Code'].apply(lambda x: len(str(x)) == 6)].reset_index(drop=True) cw = cw.sort_values(['BEA_2012_Detail_Code', 'NAICS_2012_Code']) df = pd.merge(df, cw, left_on='Activity', right_on='BEA_2012_Detail_Code') df = df.drop(columns=["BEA_2012_Detail_Code"]) df = df.rename(columns={"NAICS_2012_Code": "Sector"}) df['SectorSourceName'] = 'NAICS_2012_Code' return df
def assign_naics(df): """ Function to assign NAICS codes to each dataframe activity :param df: df, a FlowByActivity subset that contains unique activity names :return: df with assigned Sector columns """ cw_load = load_bea_crosswalk() cw = cw_load[['BEA_2012_Detail_Code', 'NAICS_2012_Code']].drop_duplicates().reset_index(drop=True) # least aggregate level that applies is 5 digits cw = cw[cw['NAICS_2012_Code'].apply( lambda x: len(str(x)) == 6)].reset_index(drop=True) cw = cw.sort_values(['BEA_2012_Detail_Code', 'NAICS_2012_Code']) df = pd.merge(df, cw, left_on='Activity', right_on='BEA_2012_Detail_Code') df = df.drop(columns=["BEA_2012_Detail_Code"]) df = df.rename(columns={"NAICS_2012_Code": "Sector"}) df['SectorSourceName'] = 'NAICS_2012_Code' return df
# write_Crosswalk_BEA_2012_Detail.py (scripts) # !/usr/bin/env python3 # coding=utf-8 """ Create a crosswalk linking BEA to NAICS for 2012 Detail """ from flowsa.common import datapath, load_bea_crosswalk if __name__ == '__main__': cw_load = load_bea_crosswalk() cw = cw_load[['BEA_2012_Detail_Code', 'NAICS_2012_Code']].drop_duplicates().reset_index(drop=True) # drop all rows with naics >6 cw = cw[cw['NAICS_2012_Code'].apply( lambda x: len(str(x)) == 6)].reset_index(drop=True) df = cw.rename(columns={ "NAICS_2012_Code": "Sector", "BEA_2012_Detail_Code": "Activity" }) df['SectorSourceName'] = 'NAICS_2012_Code' df['ActivitySourceName'] = 'BEA_2012_Detail_Code' df.dropna(subset=["Sector"], inplace=True) # assign sector type df['SectorType'] = None # sort df df = df.sort_values('Sector') # reset index df.reset_index(drop=True, inplace=True)
def convert_statcan_data_to_US_water_use(df, attr): """ Use Canadian GDP data to convert 3 digit canadian water use to us water use: - canadian gdp - us gdp :param df: df, FBA format :param attr: dictionary, attribute data from method yaml for activity set :return: df, FBA format, flowamounts converted """ # load Canadian GDP data gdp = load_fba_w_standardized_units(datasource='StatCan_GDP', year=attr['allocation_source_year'], flowclass='Money') # drop 31-33 gdp = gdp[gdp['ActivityProducedBy'] != '31-33'] gdp = gdp.rename(columns={"FlowAmount": "CanDollar"}) # check units before merge compare_df_units(df, gdp) # merge df df_m = pd.merge(df, gdp[['CanDollar', 'ActivityProducedBy']], how='left', left_on='ActivityConsumedBy', right_on='ActivityProducedBy') df_m['CanDollar'] = df_m['CanDollar'].fillna(0) df_m = df_m.drop(columns=["ActivityProducedBy_y"]) df_m = df_m.rename(columns={"ActivityProducedBy_x": "ActivityProducedBy"}) df_m = df_m[df_m['CanDollar'] != 0] exchange_rate = get_Canadian_to_USD_exchange_rate( str(attr['allocation_source_year'])) exchange_rate = float(exchange_rate) # convert to mgal/USD df_m.loc[:, 'FlowAmount'] = df_m['FlowAmount'] / (df_m['CanDollar'] / exchange_rate) df_m.loc[:, 'Unit'] = 'Mgal/USD' df_m = df_m.drop(columns=["CanDollar"]) # convert Location to US df_m.loc[:, 'Location'] = US_FIPS df_m = assign_fips_location_system(df_m, str(attr['allocation_source_year'])) # load us gdp # load Canadian GDP data us_gdp_load = load_fba_w_standardized_units( datasource='BEA_GDP_GrossOutput', year=attr['allocation_source_year'], flowclass='Money') # load bea crosswalk cw_load = load_bea_crosswalk() cw = cw_load[['BEA_2012_Detail_Code', 'NAICS_2012_Code']].drop_duplicates() cw = cw[cw['NAICS_2012_Code'].apply( lambda x: len(str(x)) == 3)].drop_duplicates().reset_index(drop=True) # merge us_gdp = pd.merge(us_gdp_load, cw, how='left', left_on='ActivityProducedBy', right_on='BEA_2012_Detail_Code') us_gdp = us_gdp.drop( columns=['ActivityProducedBy', 'BEA_2012_Detail_Code']) # rename columns us_gdp = us_gdp.rename(columns={'NAICS_2012_Code': 'ActivityProducedBy'}) # agg by naics us_gdp = aggregator(us_gdp, fba_default_grouping_fields) us_gdp = us_gdp.rename(columns={'FlowAmount': 'us_gdp'}) # determine annual us water use df_m2 = pd.merge(df_m, us_gdp[['ActivityProducedBy', 'us_gdp']], how='left', left_on='ActivityConsumedBy', right_on='ActivityProducedBy') df_m2.loc[:, 'FlowAmount'] = df_m2['FlowAmount'] * (df_m2['us_gdp']) df_m2.loc[:, 'Unit'] = 'Mgal' df_m2 = df_m2.rename( columns={'ActivityProducedBy_x': 'ActivityProducedBy'}) df_m2 = df_m2.drop(columns=['ActivityProducedBy_y', 'us_gdp']) return df_m2
def convert_statcan_data_to_US_water_use(df, attr): """ Use Canadian GDP data to convert 3 digit canadian water use to us water use: - canadian gdp - us gdp :return: """ import flowsa from flowsa.values_from_literature import get_Canadian_to_USD_exchange_rate from flowsa.flowbyfunctions import assign_fips_location_system, aggregator, fba_default_grouping_fields from flowsa.common import US_FIPS, load_bea_crosswalk # load Canadian GDP data gdp = flowsa.getFlowByActivity(flowclass=['Money'], datasource='StatCan_GDP', years=[attr['allocation_source_year']]) # drop 31-33 gdp = gdp[gdp['ActivityProducedBy'] != '31-33'] gdp = gdp.rename(columns={"FlowAmount": "CanDollar"}) # merge df df_m = pd.merge(df, gdp[['CanDollar', 'ActivityProducedBy']], how='left', left_on='ActivityConsumedBy', right_on='ActivityProducedBy') df_m['CanDollar'] = df_m['CanDollar'].fillna(0) df_m = df_m.drop(columns=["ActivityProducedBy_y"]) df_m = df_m.rename(columns={"ActivityProducedBy_x": "ActivityProducedBy"}) df_m = df_m[df_m['CanDollar'] != 0] exchange_rate = get_Canadian_to_USD_exchange_rate( str(attr['allocation_source_year'])) exchange_rate = float(exchange_rate) # convert to mgal/USD df_m.loc[:, 'FlowAmount'] = df_m['FlowAmount'] / (df_m['CanDollar'] / exchange_rate) df_m.loc[:, 'Unit'] = 'Mgal/USD' df_m = df_m.drop(columns=["CanDollar"]) # convert Location to US df_m.loc[:, 'Location'] = US_FIPS df_m = assign_fips_location_system(df_m, str(attr['allocation_source_year'])) # load us gdp # load Canadian GDP data us_gdp_load = flowsa.getFlowByActivity( flowclass=['Money'], datasource='BEA_GDP_GrossOutput_IO', years=[attr['allocation_source_year']]) # load bea crosswalk cw_load = load_bea_crosswalk() cw = cw_load[['BEA_2012_Detail_Code', 'NAICS_2012_Code']].drop_duplicates() cw = cw[cw['NAICS_2012_Code'].apply( lambda x: len(str(x)) == 3)].drop_duplicates().reset_index(drop=True) # merge us_gdp = pd.merge(us_gdp_load, cw, how='left', left_on='ActivityProducedBy', right_on='BEA_2012_Detail_Code') us_gdp = us_gdp.drop( columns=['ActivityProducedBy', 'BEA_2012_Detail_Code']) # rename columns us_gdp = us_gdp.rename(columns={'NAICS_2012_Code': 'ActivityProducedBy'}) # agg by naics us_gdp = aggregator(us_gdp, fba_default_grouping_fields) us_gdp = us_gdp.rename(columns={'FlowAmount': 'us_gdp'}) # determine annual us water use df_m2 = pd.merge(df_m, us_gdp[['ActivityProducedBy', 'us_gdp']], how='left', left_on='ActivityConsumedBy', right_on='ActivityProducedBy') df_m2.loc[:, 'FlowAmount'] = df_m2['FlowAmount'] * (df_m2['us_gdp']) df_m2.loc[:, 'Unit'] = 'Mgal' df_m2 = df_m2.rename( columns={'ActivityProducedBy_x': 'ActivityProducedBy'}) df_m2 = df_m2.drop(columns=['ActivityProducedBy_y', 'us_gdp']) return df_m2