def assign_naics(df): cw_load = load_bea_crosswalk() cw = cw_load[['BEA_2012_Detail_Code', 'NAICS_2012_Code']].drop_duplicates().reset_index(drop=True) # drop all rows with naics >6 cw = cw[cw['NAICS_2012_Code'].apply(lambda x: len(str(x)) == 6)].reset_index(drop=True) df = pd.merge(df, cw, left_on='Activity', right_on='BEA_2012_Detail_Code') df = df.drop(columns=["BEA_2012_Detail_Code"]) df = df.rename(columns={"NAICS_2012_Code": "Sector"}) df['SectorSourceName'] = 'NAICS_2012_Code' return df if __name__ == '__main__': # select years to pull unique activity names years = ['2002'] # df of unique ers activity names df = unique_activity_names('BEA_Make_Table', years) # add manual naics 2012 assignments df = assign_naics(df) # drop any rows where naics12 is 'nan' (because level of detail not needed or to prevent double counting) df.dropna(subset=["Sector"], inplace=True) # assign sector type df['SectorType'] = None # sort df df = order_crosswalk(df) # save as csv df.to_csv(datapath + "activitytosectormapping/" + "Crosswalk_BEA_Make_Table_toNAICS.csv", index=False)
return df if __name__ == '__main__': # select years to pull unique activity names year = '2002' # datasource datasource = 'Blackhurst_IO' # df of unique ers activity names df = unique_activity_names(datasource, year) # add manual naics 2012 assignments df = assign_naics(df) # drop any rows where naics12 is 'nan' # (because level of detail not needed or to prevent double counting) df.dropna(subset=["Sector"], inplace=True) # assign sector type df['SectorType'] = None # subset to just the sectors used in water allocation m2. Must reexamine # crosswalk for additional BEA activities to ensure no data loss and # accurate mapping sector_list = ['21', '54136'] df2 = df.loc[df['Sector'].str.startswith( tuple(sector_list))].reset_index(drop=True) # sort df df2 = order_crosswalk(df2) # save as csv df.to_csv(datapath + "activitytosectormapping/" + "NAICS_Crosswalk_" + datasource + ".csv", index=False)