Example #1
0
def assign_naics(df):

    cw_load = load_bea_crosswalk()
    cw = cw_load[['BEA_2012_Detail_Code', 'NAICS_2012_Code']].drop_duplicates().reset_index(drop=True)
    # drop all rows with naics >6
    cw = cw[cw['NAICS_2012_Code'].apply(lambda x: len(str(x)) == 6)].reset_index(drop=True)

    df = pd.merge(df, cw, left_on='Activity', right_on='BEA_2012_Detail_Code')
    df = df.drop(columns=["BEA_2012_Detail_Code"])
    df = df.rename(columns={"NAICS_2012_Code": "Sector"})
    df['SectorSourceName'] = 'NAICS_2012_Code'

    return df
Example #2
0
def assign_naics(df):

    cw_load = load_bea_crosswalk()
    cw = cw_load[['BEA_2012_Detail_Code', 'NAICS_2012_Code']].drop_duplicates().reset_index(drop=True)
    # least aggregate level that applies is 5 digits
    cw = cw[cw['NAICS_2012_Code'].apply(lambda x: len(str(x)) == 6)].reset_index(drop=True)

    cw = cw.sort_values(['BEA_2012_Detail_Code', 'NAICS_2012_Code'])

    df = pd.merge(df, cw, left_on='Activity', right_on='BEA_2012_Detail_Code')
    df = df.drop(columns=["BEA_2012_Detail_Code"])
    df = df.rename(columns={"NAICS_2012_Code": "Sector"})
    df['SectorSourceName'] = 'NAICS_2012_Code'

    return df
Example #3
0
def assign_naics(df):
    """
    Function to assign NAICS codes to each dataframe activity
    :param df: df, a FlowByActivity subset that contains unique activity names
    :return: df with assigned Sector columns
    """

    cw_load = load_bea_crosswalk()
    cw = cw_load[['BEA_2012_Detail_Code',
                  'NAICS_2012_Code']].drop_duplicates().reset_index(drop=True)
    # least aggregate level that applies is 5 digits
    cw = cw[cw['NAICS_2012_Code'].apply(
        lambda x: len(str(x)) == 6)].reset_index(drop=True)

    cw = cw.sort_values(['BEA_2012_Detail_Code', 'NAICS_2012_Code'])

    df = pd.merge(df, cw, left_on='Activity', right_on='BEA_2012_Detail_Code')
    df = df.drop(columns=["BEA_2012_Detail_Code"])
    df = df.rename(columns={"NAICS_2012_Code": "Sector"})
    df['SectorSourceName'] = 'NAICS_2012_Code'

    return df
Example #4
0
# write_Crosswalk_BEA_2012_Detail.py (scripts)
# !/usr/bin/env python3
# coding=utf-8
"""
Create a crosswalk linking BEA to NAICS for 2012 Detail

"""
from flowsa.common import datapath, load_bea_crosswalk

if __name__ == '__main__':

    cw_load = load_bea_crosswalk()
    cw = cw_load[['BEA_2012_Detail_Code',
                  'NAICS_2012_Code']].drop_duplicates().reset_index(drop=True)
    # drop all rows with naics >6
    cw = cw[cw['NAICS_2012_Code'].apply(
        lambda x: len(str(x)) == 6)].reset_index(drop=True)

    df = cw.rename(columns={
        "NAICS_2012_Code": "Sector",
        "BEA_2012_Detail_Code": "Activity"
    })
    df['SectorSourceName'] = 'NAICS_2012_Code'
    df['ActivitySourceName'] = 'BEA_2012_Detail_Code'
    df.dropna(subset=["Sector"], inplace=True)
    # assign sector type
    df['SectorType'] = None
    # sort df
    df = df.sort_values('Sector')
    # reset index
    df.reset_index(drop=True, inplace=True)
Example #5
0
def convert_statcan_data_to_US_water_use(df, attr):
    """
    Use Canadian GDP data to convert 3 digit canadian water use to us water
    use:
    - canadian gdp
    - us gdp
    :param df: df, FBA format
    :param attr: dictionary, attribute data from method yaml for activity set
    :return: df, FBA format, flowamounts converted
    """

    # load Canadian GDP data
    gdp = load_fba_w_standardized_units(datasource='StatCan_GDP',
                                        year=attr['allocation_source_year'],
                                        flowclass='Money')

    # drop 31-33
    gdp = gdp[gdp['ActivityProducedBy'] != '31-33']
    gdp = gdp.rename(columns={"FlowAmount": "CanDollar"})

    # check units before merge
    compare_df_units(df, gdp)
    # merge df
    df_m = pd.merge(df,
                    gdp[['CanDollar', 'ActivityProducedBy']],
                    how='left',
                    left_on='ActivityConsumedBy',
                    right_on='ActivityProducedBy')
    df_m['CanDollar'] = df_m['CanDollar'].fillna(0)
    df_m = df_m.drop(columns=["ActivityProducedBy_y"])
    df_m = df_m.rename(columns={"ActivityProducedBy_x": "ActivityProducedBy"})
    df_m = df_m[df_m['CanDollar'] != 0]

    exchange_rate = get_Canadian_to_USD_exchange_rate(
        str(attr['allocation_source_year']))
    exchange_rate = float(exchange_rate)
    # convert to mgal/USD
    df_m.loc[:, 'FlowAmount'] = df_m['FlowAmount'] / (df_m['CanDollar'] /
                                                      exchange_rate)
    df_m.loc[:, 'Unit'] = 'Mgal/USD'

    df_m = df_m.drop(columns=["CanDollar"])

    # convert Location to US
    df_m.loc[:, 'Location'] = US_FIPS
    df_m = assign_fips_location_system(df_m,
                                       str(attr['allocation_source_year']))

    # load us gdp
    # load Canadian GDP data
    us_gdp_load = load_fba_w_standardized_units(
        datasource='BEA_GDP_GrossOutput',
        year=attr['allocation_source_year'],
        flowclass='Money')

    # load bea crosswalk
    cw_load = load_bea_crosswalk()
    cw = cw_load[['BEA_2012_Detail_Code', 'NAICS_2012_Code']].drop_duplicates()
    cw = cw[cw['NAICS_2012_Code'].apply(
        lambda x: len(str(x)) == 3)].drop_duplicates().reset_index(drop=True)

    # merge
    us_gdp = pd.merge(us_gdp_load,
                      cw,
                      how='left',
                      left_on='ActivityProducedBy',
                      right_on='BEA_2012_Detail_Code')
    us_gdp = us_gdp.drop(
        columns=['ActivityProducedBy', 'BEA_2012_Detail_Code'])
    # rename columns
    us_gdp = us_gdp.rename(columns={'NAICS_2012_Code': 'ActivityProducedBy'})
    # agg by naics
    us_gdp = aggregator(us_gdp, fba_default_grouping_fields)
    us_gdp = us_gdp.rename(columns={'FlowAmount': 'us_gdp'})

    # determine annual us water use
    df_m2 = pd.merge(df_m,
                     us_gdp[['ActivityProducedBy', 'us_gdp']],
                     how='left',
                     left_on='ActivityConsumedBy',
                     right_on='ActivityProducedBy')

    df_m2.loc[:, 'FlowAmount'] = df_m2['FlowAmount'] * (df_m2['us_gdp'])
    df_m2.loc[:, 'Unit'] = 'Mgal'
    df_m2 = df_m2.rename(
        columns={'ActivityProducedBy_x': 'ActivityProducedBy'})
    df_m2 = df_m2.drop(columns=['ActivityProducedBy_y', 'us_gdp'])

    return df_m2
Example #6
0
def convert_statcan_data_to_US_water_use(df, attr):
    """
    Use Canadian GDP data to convert 3 digit canadian water use to us water
    use:
    - canadian gdp
    - us gdp
    :return:
    """
    import flowsa
    from flowsa.values_from_literature import get_Canadian_to_USD_exchange_rate
    from flowsa.flowbyfunctions import assign_fips_location_system, aggregator, fba_default_grouping_fields
    from flowsa.common import US_FIPS, load_bea_crosswalk

    # load Canadian GDP data
    gdp = flowsa.getFlowByActivity(flowclass=['Money'],
                                   datasource='StatCan_GDP',
                                   years=[attr['allocation_source_year']])
    # drop 31-33
    gdp = gdp[gdp['ActivityProducedBy'] != '31-33']
    gdp = gdp.rename(columns={"FlowAmount": "CanDollar"})

    # merge df
    df_m = pd.merge(df,
                    gdp[['CanDollar', 'ActivityProducedBy']],
                    how='left',
                    left_on='ActivityConsumedBy',
                    right_on='ActivityProducedBy')
    df_m['CanDollar'] = df_m['CanDollar'].fillna(0)
    df_m = df_m.drop(columns=["ActivityProducedBy_y"])
    df_m = df_m.rename(columns={"ActivityProducedBy_x": "ActivityProducedBy"})
    df_m = df_m[df_m['CanDollar'] != 0]

    exchange_rate = get_Canadian_to_USD_exchange_rate(
        str(attr['allocation_source_year']))
    exchange_rate = float(exchange_rate)
    # convert to mgal/USD
    df_m.loc[:, 'FlowAmount'] = df_m['FlowAmount'] / (df_m['CanDollar'] /
                                                      exchange_rate)
    df_m.loc[:, 'Unit'] = 'Mgal/USD'

    df_m = df_m.drop(columns=["CanDollar"])

    # convert Location to US
    df_m.loc[:, 'Location'] = US_FIPS
    df_m = assign_fips_location_system(df_m,
                                       str(attr['allocation_source_year']))

    # load us gdp
    # load Canadian GDP data
    us_gdp_load = flowsa.getFlowByActivity(
        flowclass=['Money'],
        datasource='BEA_GDP_GrossOutput_IO',
        years=[attr['allocation_source_year']])
    # load bea crosswalk
    cw_load = load_bea_crosswalk()
    cw = cw_load[['BEA_2012_Detail_Code', 'NAICS_2012_Code']].drop_duplicates()
    cw = cw[cw['NAICS_2012_Code'].apply(
        lambda x: len(str(x)) == 3)].drop_duplicates().reset_index(drop=True)

    # merge
    us_gdp = pd.merge(us_gdp_load,
                      cw,
                      how='left',
                      left_on='ActivityProducedBy',
                      right_on='BEA_2012_Detail_Code')
    us_gdp = us_gdp.drop(
        columns=['ActivityProducedBy', 'BEA_2012_Detail_Code'])
    # rename columns
    us_gdp = us_gdp.rename(columns={'NAICS_2012_Code': 'ActivityProducedBy'})
    # agg by naics
    us_gdp = aggregator(us_gdp, fba_default_grouping_fields)
    us_gdp = us_gdp.rename(columns={'FlowAmount': 'us_gdp'})

    # determine annual us water use
    df_m2 = pd.merge(df_m,
                     us_gdp[['ActivityProducedBy', 'us_gdp']],
                     how='left',
                     left_on='ActivityConsumedBy',
                     right_on='ActivityProducedBy')

    df_m2.loc[:, 'FlowAmount'] = df_m2['FlowAmount'] * (df_m2['us_gdp'])
    df_m2.loc[:, 'Unit'] = 'Mgal'
    df_m2 = df_m2.rename(
        columns={'ActivityProducedBy_x': 'ActivityProducedBy'})
    df_m2 = df_m2.drop(columns=['ActivityProducedBy_y', 'us_gdp'])

    return df_m2