예제 #1
0
def assign_naics(df):

    cw_load = load_bea_crosswalk()
    cw = cw_load[['BEA_2012_Detail_Code', 'NAICS_2012_Code']].drop_duplicates().reset_index(drop=True)
    # drop all rows with naics >6
    cw = cw[cw['NAICS_2012_Code'].apply(lambda x: len(str(x)) == 6)].reset_index(drop=True)

    df = pd.merge(df, cw, left_on='Activity', right_on='BEA_2012_Detail_Code')
    df = df.drop(columns=["BEA_2012_Detail_Code"])
    df = df.rename(columns={"NAICS_2012_Code": "Sector"})
    df['SectorSourceName'] = 'NAICS_2012_Code'

    return df


if __name__ == '__main__':
    # select years to pull unique activity names
    years = ['2002']
    # df of unique ers activity names
    df = unique_activity_names('BEA_Make_Table', years)
    # add manual naics 2012 assignments
    df = assign_naics(df)
    # drop any rows where naics12 is 'nan' (because level of detail not needed or to prevent double counting)
    df.dropna(subset=["Sector"], inplace=True)
    # assign sector type
    df['SectorType'] = None
    # sort df
    df = order_crosswalk(df)
    # save as csv
    df.to_csv(datapath + "activitytosectormapping/" + "Crosswalk_BEA_Make_Table_toNAICS.csv", index=False)
예제 #2
0
    return df


if __name__ == '__main__':
    # select years to pull unique activity names
    year = '2002'
    # datasource
    datasource = 'Blackhurst_IO'
    # df of unique ers activity names
    df = unique_activity_names(datasource, year)
    # add manual naics 2012 assignments
    df = assign_naics(df)
    # drop any rows where naics12 is 'nan'
    # (because level of detail not needed or to prevent double counting)
    df.dropna(subset=["Sector"], inplace=True)
    # assign sector type
    df['SectorType'] = None
    # subset to just the sectors used in water allocation m2. Must reexamine
    # crosswalk for additional BEA activities to ensure no data loss and
    # accurate mapping
    sector_list = ['21', '54136']
    df2 = df.loc[df['Sector'].str.startswith(
        tuple(sector_list))].reset_index(drop=True)
    # sort df
    df2 = order_crosswalk(df2)
    # save as csv
    df.to_csv(datapath + "activitytosectormapping/" + "NAICS_Crosswalk_" +
              datasource + ".csv",
              index=False)