def extract_facility_data(inventory_dict): import stewi facility_mapping = pd.DataFrame() # load facility data from stewi output directory, keeping only the facility IDs, and geographic information inventory_list = list(inventory_dict.keys()) for i in range(len(inventory_dict)): # define inventory name as inventory type + inventory year (e.g., NEI_2017) database = inventory_list[i] year = list(inventory_dict.values())[i] inventory_name = database + '_' + year facilities = stewi.getInventoryFacilities(database, year) facilities = facilities[['FacilityID', 'State', 'County', 'NAICS']] if len(facilities[facilities.duplicated(subset='FacilityID', keep=False)]) > 0: log.info('Duplicate facilities in ' + inventory_name + ' - keeping first listed') facilities.drop_duplicates(subset='FacilityID', keep='first', inplace=True) facility_mapping = facility_mapping.append(facilities) # Apply FIPS to facility locations facility_mapping = apply_county_FIPS(facility_mapping) return facility_mapping
def get_reported_releases(CASlist): """ Retrieves release info from stewi for a list of CAS :param CASlist: list, a list of CAS in standard CAS format :return: a pandas DataFrame with records for each release with context and facility information """ chem_releases = pd.DataFrame() for k, v in inventories_of_interest.items(): inv = stewi.getInventory(k, v) #filter by chems of interest inv['FlowName'] = inv['FlowName'].apply(lambda x: x.lower()) inv_fl_of_interest = list(chems_stewi_matches[k].values) inv_fl_of_interest = list(filter(None, inv_fl_of_interest)) inv_fl_of_interest = [x.lower() for x in inv_fl_of_interest] inv = inv[inv["FlowName"].isin(inv_fl_of_interest)] inv["Source"] = k inv["Year"] = v #Join with facility data to get location fac = stewi.getInventoryFacilities(k, v) #Filter by fac in chem_releases uniq_facs = pd.unique(inv['FacilityID']) fac = fac[fac["FacilityID"].isin(uniq_facs)] inv = pd.merge(inv, fac, on=['FacilityID']) chem_releases = pd.concat([chem_releases, inv], sort=False) return chem_releases
def extract_facility_data(inventory_dict): """ Returns df of facilities from each inventory in inventory_dict, including FIPS code :param inventory_dict: a dictionary of inventory types and years (e.g., {'NEI':'2017', 'TRI':'2017'}) :return: df """ import stewi facility_mapping = pd.DataFrame() # load facility data from stewi output directory, keeping only the # facility IDs, and geographic information inventory_list = list(inventory_dict.keys()) for i in range(len(inventory_dict)): # define inventory name as inventory type + inventory year # (e.g., NEI_2017) database = inventory_list[i] year = list(inventory_dict.values())[i] inventory_name = database + '_' + year facilities = stewi.getInventoryFacilities(database, year) facilities = facilities[['FacilityID', 'State', 'County', 'NAICS']] if len(facilities[facilities.duplicated(subset='FacilityID', keep=False)]) > 0: log.debug('Duplicate facilities in %s - keeping first listed', inventory_name) facilities.drop_duplicates(subset='FacilityID', keep='first', inplace=True) facility_mapping = facility_mapping.append(facilities) # Apply FIPS to facility locations facility_mapping = apply_county_FIPS(facility_mapping) return facility_mapping
def assign_nonpoint_dqi(args): ''' Compares facility coverage data between NEI point and Census to estimate facility coverage in NEI nonpoint ''' import stewi import flowsa nei_facility_list = stewi.getInventoryFacilities('NEI', args['year']) nei_count = nei_facility_list.groupby('NAICS')['FacilityID'].count() census = flowsa.getFlowByActivity(flowclass=['Other'], years=[args['year']], datasource="Census_CBP") census = census[census['FlowName'] == 'Number of establishments'] census_count = census.groupby('ActivityProducedBy')['FlowAmount'].sum()
import pandas as pd import stewi from os.path import join from electricitylci.globals import data_dir from electricitylci.model_config import model_specs # get egrid facility file from stewi egrid_facilities = stewi.getInventoryFacilities("eGRID", model_specs.egrid_year) egrid_facilities.rename(columns={'Plant primary coal/oil/gas/ other fossil fuel category': 'FuelCategory', 'Plant primary fuel': 'PrimaryFuel', 'eGRID subregion acronym': 'Subregion', 'NERC region acronym': 'NERC'}, inplace=True) # Remove NERC from original egrid output in stewi because there are mismatches in the original data with more than 1 NERC per egrid subregion egrid_facilities = egrid_facilities.drop(columns='NERC') # Bring in eGRID subregion-NERC mapping egrid_nerc = pd.read_csv(join(data_dir, 'egrid_subregion_to_NERC.csv'), low_memory=False) egrid_facilities = pd.merge(egrid_facilities, egrid_nerc, on='Subregion', how='left') len(egrid_facilities) # 2016:9709 egrid_subregions = list(pd.unique(egrid_facilities['Subregion'])) # Remove nan if present egrid_subregions = [x for x in egrid_subregions if str(x) != 'nan'] len(egrid_subregions) # 2016: 26 # egrid_subregions = ['AZNM'] egrid_primary_fuel_categories = sorted(pd.unique(egrid_facilities['FuelCategory'].dropna())) # correspondence between fuel category and percent_gen fuel_cat_to_per_gen = {'BIOMASS': 'Plant biomass generation percent (resource mix)',
import pandas as pd import stewi from os.path import join from electricitylci.globals import data_dir from electricitylci.model_config import ( egrid_year, min_plant_percent_generation_from_primary_fuel_category) #get egrid facility file from stewi egrid_facilities = stewi.getInventoryFacilities("eGRID", egrid_year) egrid_facilities.rename(columns={ 'Plant primary coal/oil/gas/ other fossil fuel category': 'FuelCategory', 'Plant primary fuel': 'PrimaryFuel', 'eGRID subregion acronym': 'Subregion', 'NERC region acronym': 'NERC' }, inplace=True) #Remove NERC from original egrid output in stewi because there are mismatches in the original data with more than 1 NERC per egrid subregion egrid_facilities = egrid_facilities.drop(columns='NERC') #Bring in eGRID subregion-NERC mapping egrid_nerc = pd.read_csv(join(data_dir, 'egrid_subregion_to_NERC.csv')) egrid_facilities = pd.merge(egrid_facilities, egrid_nerc, on='Subregion', how='left') len(egrid_facilities)
inventory='TRI' year = '2016' #Get one of these inventory tri2016 = stewi.getInventory(inventory,year) #See first 50 tri2016.head(50) #Look at all the unique flows in this inventory tri2016flows = stewi.getInventoryFlows(inventory,year) #See first 50 tri2016flows.head(50) #Look at all the unique facilities in this inventory tri2016facilities = stewi.getInventoryFacilities(inventory,year) #See first 50 tri2016facilities.head(50) #Now combine with some inventories in another inventory based on facilities #Enter inventories that you would like to combine in the "Inventory_acryonym":"year" format enclosed in "{}" inventories_to_get = {"TRI":"2016","NEI":"2016","RCRAInfo":"2015","eGRID":"2016"} base_inventory = inventory combinedinventories = stewicombo.combineInventoriesforFacilitiesinOneInventory(base_inventory, inventories_to_get) #See first 50 combinedinventories.head(50) #See a summary of the combined inventories by facility and flow pivotofinventories = stewicombo.pivotCombinedInventories(combinedinventories) pivotofinventories.head(200)