def map_to_fedefl(df): try: import fedelemflowlist except ImportError: log.warning('requires installation of fedelemflowlist, data will not ' 'validate correctly') return None tri = fedelemflowlist.get_flowmapping('TRI') tri = tri[['SourceFlowName', 'TargetFlowName']].drop_duplicates() mapped_df = df.merge(tri, how='left', left_on='FlowName', right_on='SourceFlowName') if mapped_df['FlowAmount'].sum() != df['FlowAmount'].sum(): log.warning('Error on mapping, data loss') # validation throws errors when mixture and trade secret chemicals are # maintained so drop them while they remain unmapped criteria = (mapped_df['TargetFlowName'].isna() & (mapped_df['FlowName'].str.lower().str.contains('trade secret') | mapped_df['FlowName'].str.lower().str.contains('mixture'))) mapped_df = mapped_df[~criteria].reset_index(drop=True) missing_flows = mapped_df[mapped_df['TargetFlowName'].isna()]['FlowName'] missing_flows = missing_flows.drop_duplicates().sort_values() if len(missing_flows) > 0: log.debug('flows from reference df missing in mapping file') mapped_df.loc[~mapped_df['TargetFlowName'].isna(), 'FlowName'] = mapped_df['TargetFlowName'] mapped_df = mapped_df.drop(columns=['SourceFlowName', 'TargetFlowName']) return mapped_df
def map_elementary_flows(fba, from_fba_source, keep_unmapped_rows=False): """ Applies mapping from fedelemflowlist to convert flows to fedelemflowlist flows :param fba: df flow-by-activity or flow-by-sector with 'Flowable', 'Context', and 'Unit' fields :param from_fba_source: str Source name of fba list to look for mappings :param keep_unmapped_rows: False if want unmapped rows dropped, True if want to retain :return: """ from fedelemflowlist import get_flowmapping # rename columns to match FBS formatting fba = fba.rename(columns={ "FlowName": 'Flowable', "Compartment": "Context" }) flowmapping = get_flowmapping(from_fba_source) mapping_fields = [ "SourceListName", "SourceFlowName", "SourceFlowContext", "SourceUnit", "ConversionFactor", "TargetFlowName", "TargetFlowContext", "TargetUnit" ] if flowmapping.empty: log.warning("No mapping file in fedelemflowlist found for " + from_fba_source) # return the original df but with columns renamed so can continue working on the FBS fba_mapped_df = fba.copy() else: flowmapping = flowmapping[mapping_fields] # define merge type based on keeping or dropping unmapped data if keep_unmapped_rows is False: merge_type = 'inner' else: merge_type = 'left' # merge fba with flows fba_mapped_df = pd.merge( fba, flowmapping, left_on=["Flowable", "Context"], right_on=["SourceFlowName", "SourceFlowContext"], how=merge_type) fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Flowable"] = fba_mapped_df["TargetFlowName"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Context"] = fba_mapped_df["TargetFlowContext"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Unit"] = fba_mapped_df["TargetUnit"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "FlowAmount"] = \ fba_mapped_df["FlowAmount"] * fba_mapped_df["ConversionFactor"] # drop fba_mapped_df = fba_mapped_df.drop(columns=mapping_fields) return fba_mapped_df
def __init__(self, df: pandas.DataFrame, system=None, mapping=None, preserve_unmapped=False, case_insensitive=False): self.__df = df self.__system = system self.__case_insensitive = case_insensitive if mapping is None: log.info("load flow mapping v=%s from fed.elem.flows") mapping = flowlist.get_flowmapping(source=system) if self.__case_insensitive: mapping['SourceFlowName'] = mapping['SourceFlowName'].str.lower() self.__mapping = mapping # type: pandas.DataFrame self.__preserve_unmapped = preserve_unmapped
def __init__(self, version="0.1", flow_list: pd.DataFrame = None, flow_mapping: pd.DataFrame = None): self.version = version if flow_list is None: self.flow_list = fedfl.get_flowlist(version) # type: pd.DataFrame else: self.flow_list = flow_list if flow_mapping is None: self.flow_mapping = fedfl.get_flowmapping( # type: pd.DataFrame version) else: self.flow_mapping = flow_mapping
def map_elementary_flows(fba, from_fba_source): """ Applies mapping from fedelemflowlist to convert flows to fedelemflowlist flows :param fba: df flow-by-activity or flow-by-sector with 'Flowable', 'Context', and 'Unit' fields :param from_fba_source: str Source name of fba list to look for mappings :return: """ from fedelemflowlist import get_flowmapping # rename flow name to flowable - remove this once elementary flows are mapped fba = fba.rename(columns={"FlowName": 'Flowable', "Compartment": "Context"}) flowmapping = get_flowmapping(from_fba_source) mapping_fields = ["SourceListName", "SourceFlowName", "SourceFlowContext", "SourceUnit", "ConversionFactor", "TargetFlowName", "TargetFlowContext", "TargetUnit"] if flowmapping.empty: log.warning("No mapping file in fedelemflowlist found for " + from_fba_source) # return the original df but with columns renamed so can continue working on the FBS fba_mapped_df = fba.copy() else: flowmapping = flowmapping[mapping_fields] # merge fba with flows fba_mapped_df = pd.merge(fba, flowmapping, left_on=["Flowable", "Context"], right_on=["SourceFlowName", "SourceFlowContext"], how="left") fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Flowable"] = fba_mapped_df["TargetFlowName"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Context"] = fba_mapped_df["TargetFlowContext"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Unit"] = fba_mapped_df["TargetUnit"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "FlowAmount"] = \ fba_mapped_df["FlowAmount"] * fba_mapped_df["ConversionFactor"] # drop fba_mapped_df = fba_mapped_df.drop(columns=mapping_fields) return fba_mapped_df
def map_elementary_flows(fba, from_fba_source): """ Applies mapping from fedelemflowlist to convert flows to fedelemflowlist flows :param fba: df flow-by-activity or flow-by-sector with 'Flowable', 'Context', and 'Unit' fields :param from_fba_source: str Source name of fba list to look for mappings :return: """ from fedelemflowlist import get_flowmapping flowmapping = get_flowmapping(from_fba_source) mapping_fields = [ "SourceListName", "SourceFlowName", "SourceFlowContext", "SourceUnit", "ConversionFactor", "TargetFlowName", "TargetFlowContext", "TargetUnit" ] if flowmapping.empty: log.ERROR("No mapping file in fedelemflowlist found for " + from_fba_source) flowmapping = flowmapping[mapping_fields] # merge fba with flows fba_mapped_df = pd.merge(fba, flowmapping, left_on=["Flowable", "Context"], right_on=["SourceFlowName", "SourceFlowContext"], how="left") fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Flowable"] = fba_mapped_df["TargetFlowName"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Context"] = fba_mapped_df["TargetFlowContext"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Unit"] = fba_mapped_df["TargetUnit"] fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "FlowAmount"] = \ fba_mapped_df["FlowAmount"] * fba_mapped_df["ConversionFactor"] # drop fba_mapped_df = fba_mapped_df.drop(columns=mapping_fields) return fba_mapped_df
import pandas as pd import fedelemflowlist from electricitylci.model_config import inventories, fedelemflowlist_version # flowlist = fedelemflowlist.get_flowlist() mapping_to_fedelemflows = fedelemflowlist.get_flowmapping() mapping_to_fedelemflows = mapping_to_fedelemflows[[ "SourceListName", "SourceFlowName", "SourceFlowContext", "SourceUnit", "TargetFlowName", "TargetFlowUUID", "TargetFlowContext", "TargetUnit", ]] def map_emissions_to_fedelemflows(df_with_flows_compartments): mapped_df = pd.merge( df_with_flows_compartments, mapping_to_fedelemflows.drop_duplicates( subset=["SourceFlowName", "SourceFlowContext"]), left_on=["FlowName", "Compartment"], right_on=["SourceFlowName", "SourceFlowContext"], how="left", ) # If a NewName is present there was a match, replace FlowName and Compartment with new names mapped_df.loc[mapped_df["TargetFlowName"].notnull(), "FlowName"] = mapped_df["TargetFlowName"]
""" Determines FEDEFL flowables not used in a mapping and exports those flowables to csv output: csv with columns 'Class','Flowable','CAS No','Formula','Synonyms' """ import fedelemflowlist import pandas as pd from fedelemflowlist.globals import outputpath, flow_list_fields #Set name of mapping file. More than one mapping file can be used mapping_to_use = ['TRACI2.1'] if __name__ == '__main__': mapping = fedelemflowlist.get_flowmapping(mapping_to_use) # Get Flow UUIDs for flows used in selected mapping mapping_flow_uuids = pd.DataFrame(pd.unique(mapping['TargetFlowUUID']), columns=["Flow UUID"]) # Get all flows all_flows = fedelemflowlist.get_flows() all_UUIDs = all_flows['Flow UUID'] # Subset all flows to get just those used in selected mapping flows_used_in_mapping = pd.merge(all_flows, mapping_flow_uuids) flows_used_UUIDs = flows_used_in_mapping['Flow UUID'] # Flows not in mappings flows_notused_UUIDs = set(all_UUIDs) - set(flows_used_UUIDs) len(flows_notused_UUIDs) flows_notused = all_flows[all_flows['Flow UUID'].isin(flows_notused_UUIDs)]
""" Combines all mapping files. To help maintain consistency in future future mappings output: xlsx in the mapping format. """ import fedelemflowlist from fedelemflowlist.globals import outputpath if __name__ == '__main__': mapping = fedelemflowlist.get_flowmapping() #the following line sets "=" so it has a space in front so it displays properly mapping.loc[mapping['MatchCondition'] == "=", 'MatchCondition'] = " =" mapping.to_excel(outputpath + 'All_Mappings.xlsx', index=False)
def apply_flow_mapping(df, source, flow_type, keep_unmapped_rows=False, field_dict=None, ignore_source_name=False): """ Maps a dataframe using a flow mapping file from fedelemflowlist or materialflowlist. :param df: dataframe to be mapped :param source: list or str, name of mapping file(s) :param flow_type: str either 'ELEMENTARY_FLOW', 'TECHNOSPHERE_FLOW', or 'WASTE_FLOW' :param keep_unmaped_rows: bool, False if want unmapped rows dropped, True if want to retain :param field_dict: dictionary of field names in df containing the following keys: 'SourceName', 'FlowableName', 'FlowableUnit', 'FlowableContext', 'FlowableQuantity', 'UUID'. If None, uses the default fields of 'SourceName','Flowable', 'Unit','Context','FlowAmount','FlowUUID' :param ignore_source_name: bool, False if flows should be mapped based on SourceName. (E.g., should be False when mapping across multiple datasets) """ if field_dict is None: # Default field dictionary for mapping field_dict = { 'SourceName': 'SourceName', 'FlowableName': 'Flowable', 'FlowableUnit': 'Unit', 'FlowableContext': 'Context', 'FlowableQuantity': 'FlowAmount', 'UUID': 'FlowUUID' } mapping_fields = [ "SourceListName", "SourceFlowName", "SourceFlowContext", "SourceUnit", "ConversionFactor", "TargetFlowName", "TargetFlowContext", "TargetUnit", "TargetFlowUUID" ] if flow_type == 'ELEMENTARY_FLOW': try: import fedelemflowlist as fedefl mapping = fedefl.get_flowmapping(source) except ImportError: log.warning( 'Error importing fedelemflowlist, install fedelemflowlist ' 'to apply flow mapping to elementary flows: ' 'https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List/wiki/GitHub-Contributors#install-for-users' ) return None else: try: import materialflowlist as mfl mapping = mfl.get_flowmapping(source) except ImportError: log.warning( 'Error importing materialflowlist, install materialflowlist ' 'to apply flow mapping to waste and technosphere flows: ' 'https://github.com/USEPA/materialflowlist/wiki') return None if len(mapping) == 0: # mapping not found return None mapping = mapping[mapping_fields] mapping[['ConversionFactor']] = mapping[['ConversionFactor' ]].fillna(value=1) if keep_unmapped_rows is False: merge_type = 'inner' else: merge_type = 'left' map_to = [ field_dict['SourceName'], field_dict['FlowableName'], field_dict['FlowableContext'], field_dict['FlowableUnit'] ] map_from = [ "SourceListName", "SourceFlowName", "SourceFlowContext", "SourceUnit" ] if ignore_source_name: map_to.remove(field_dict['SourceName']) map_from.remove('SourceListName') for field in map_to: df[field].fillna('', inplace=True) for field in map_from: mapping[field].fillna('', inplace=True) # merge df with flows mapped_df = pd.merge(df, mapping, left_on=map_to, right_on=map_from, how=merge_type) criteria = mapped_df['TargetFlowName'].notnull() mapped_df.loc[criteria, field_dict['FlowableName']] = mapped_df["TargetFlowName"] mapped_df.loc[ criteria, field_dict['FlowableContext']] = mapped_df["TargetFlowContext"] mapped_df.loc[criteria, field_dict['FlowableUnit']] = mapped_df["TargetUnit"] mapped_df.loc[criteria, field_dict["FlowableQuantity"]] = \ mapped_df[field_dict["FlowableQuantity"]] * mapped_df["ConversionFactor"] mapped_df.loc[criteria, field_dict['UUID']] = mapped_df["TargetFlowUUID"] # drop mapping fields mapped_df = mapped_df.drop(columns=mapping_fields) return mapped_df
def setUp(self): """Get flowlist used for all tests """ self.flowmappings = fedelemflowlist.get_flowmapping() self.flowlist = self.flowlist = fedelemflowlist.get_flows()
Requires target unit to be the primary unit. Existing conversion factor must be set to 1 to avoid replacing manual conversion factors. Mapping file must already conform to mapping format. """ import pandas as pd import fedelemflowlist from fedelemflowlist.globals import flowmappingpath, flowmapping_fields, log #Add source name here. The .csv mapping file with this name must be in the flowmapping directory #None can be used to add conversions in all mapping files source = 'ReCiPe2016' if __name__ == '__main__': # Pull in mapping file mapping = fedelemflowlist.get_flowmapping(source) conversions = fedelemflowlist.get_alt_conversion() # merge in conversion factors where source unit = alternate unit mapping_w_conversion = pd.merge( mapping, conversions, how='left', left_on=['TargetFlowName', 'SourceUnit', 'TargetUnit'], right_on=['Flowable', 'AltUnit', 'Unit']) # update conversion factor where current conversion is 1 and the updated conversion exists converted1 = mapping_w_conversion['InverseConversionFactor'].notnull() converted2 = mapping_w_conversion['ConversionFactor'] == 1 mapping_w_conversion['Convert'] = converted1 & converted2 mapping_w_conversion.loc[ (mapping_w_conversion['Convert'] == True), 'ConversionFactor'] = mapping_w_conversion['InverseConversionFactor']
def supported_mapping_systems() -> list: fmap = flowlist.get_flowmapping() # type: pd.DataFrame systems = set() for i in range(0, len(fmap.index)): systems.add(fmap.iat[i, 0]) return list(systems)
import pandas as pd import fedelemflowlist from electricitylci.model_config import inventories, fedelemflowlist_version #flowlist = fedelemflowlist.get_flowlist() mapping_to_fedelemflows = fedelemflowlist.get_flowmapping( version=fedelemflowlist_version, source_list=inventories) mapping_to_fedelemflows = mapping_to_fedelemflows[[ 'Source', 'OriginalName', 'OriginalCategory', 'OriginalProperty', 'NewName', 'NewCategory', 'NewSubCategory', 'NewUnit', 'UUID' ]] def map_emissions_to_fedelemflows(df_with_flows_compartments): mapped_df = pd.merge( df_with_flows_compartments, mapping_to_fedelemflows, left_on=['Source', 'FlowName', 'Compartment'], right_on=['Source', 'OriginalName', 'OriginalCategory'], how='left') #If a NewName is present there was a match, replace FlowName and Compartment with new names mapped_df.loc[mapped_df['NewName'].notnull(), 'FlowName'] = mapped_df['NewName'] mapped_df.loc[mapped_df['NewName'].notnull(), 'Compartment'] = mapped_df['NewCategory'] mapped_df.loc[mapped_df['NewName'].notnull(), 'Unit'] = mapped_df['NewUnit'] mapped_df = mapped_df.rename(columns={'UUID': 'FlowUUID'}) #If air, soil, or water assigned it directionality of emission. Others will get assigned later as needed
def concat_map_upstream_databases(eia_gen_year, *arg, **kwargs): import fedelemflowlist as fedefl """ Concatenates all of the databases given as args. Then all of the emissions in the combined database are mapped to the federal elementary flows list based on the mapping file 'eLCI' in preparation for being turned into openLCA processes and combined with the generation emissions. Parameters ---------- *arg : dataframes The dataframes to be combined, generated by the upstream modules or renewables modules (electricitylci.nuclear_upstream, .petroleum_upstream, .solar_upstream, etc.) Returns ------- datafame if kwarg group_name is used then the function will return a tuple containing the mapped dataframe and lists of tuples for the unique mapped and unmapped flows. """ mapped_column_dict = { "TargetFlowName": "FlowName", "TargetFlowUUID": "FlowUUID", "TargetFlowContext": "Compartment", "TargetUnit": "Unit", } compartment_mapping = { "air": "emission/air", "water": "emission/water", "ground": "emission/ground", "soil": "emission/ground", "resource": "resource", "NETL database/emissions": "NETL database/emissions", "NETL database/resources": "NETL database/resources", } print(f"Concatenating and flow-mapping {len(arg)} upstream databases.") upstream_df_list = list() for df in arg: if isinstance(df, pd.DataFrame): if "Compartment_path" not in df.columns: df["Compartment_path"] = float("nan") df["Compartment_path"].fillna( df["Compartment"].map(compartment_mapping), inplace=True) upstream_df_list.append(df) upstream_df = pd.concat(upstream_df_list, ignore_index=True, sort=False) module_logger.info("Creating flow mapping database") flow_mapping = fedefl.get_flowmapping('eLCI') flow_mapping["SourceFlowName"] = flow_mapping["SourceFlowName"].str.lower() module_logger.info("Preparing upstream df for merge") upstream_df["FlowName_orig"] = upstream_df["FlowName"] upstream_df["Compartment_orig"] = upstream_df["Compartment"] upstream_df["Compartment_path_orig"] = upstream_df["Compartment_path"] upstream_df["Unit_orig"] = upstream_df["Unit"] upstream_df["FlowName"] = upstream_df["FlowName"].str.lower().str.rstrip() upstream_df["Compartment"] = ( upstream_df["Compartment"].str.lower().str.rstrip()) upstream_df["Compartment_path"] = ( upstream_df["Compartment_path"].str.lower().str.rstrip()) upstream_columns = upstream_df.columns groupby_cols = [ "fuel_type", "stage_code", "FlowName", "Compartment", "input", "plant_id", "Compartment_path", "Unit", "FlowName_orig", "Compartment_path_orig", "Unit_orig", ] upstream_df["Unit"].fillna("<blank>", inplace=True) module_logger.info("Grouping upstream database") if "Electricity" in upstream_df.columns: upstream_df_grp = upstream_df.groupby(groupby_cols, as_index=False).agg({ "FlowAmount": "sum", "quantity": "mean", "Electricity": "mean" }) else: upstream_df_grp = upstream_df.groupby(groupby_cols, as_index=False).agg({ "FlowAmount": "sum", "quantity": "mean" }) upstream_df = upstream_df[[ "FlowName_orig", "Compartment_path_orig", "stage_code" ]] module_logger.info("Merging upstream database and flow mapping") upstream_mapped_df = pd.merge( left=upstream_df_grp, right=flow_mapping, left_on=["FlowName", "Compartment_path"], right_on=["SourceFlowName", "SourceFlowContext"], how="left", ) del (upstream_df_grp, flow_mapping) upstream_mapped_df.drop(columns={"FlowName", "Compartment", "Unit"}, inplace=True) upstream_mapped_df = upstream_mapped_df.rename(columns=mapped_column_dict, copy=False) upstream_mapped_df.drop_duplicates( subset=["plant_id", "FlowName", "Compartment_path", "FlowAmount"], inplace=True, ) upstream_mapped_df.dropna(subset=["FlowName"], inplace=True) # upstream_mapped_df.to_csv(f"{output_dir}/upstream_mapped_df.csv") module_logger.info("Applying conversion factors") upstream_mapped_df["FlowAmount"] = (upstream_mapped_df["FlowAmount"] * upstream_mapped_df["ConversionFactor"]) upstream_mapped_df.rename(columns={"fuel_type": "FuelCategory"}, inplace=True) upstream_mapped_df["FuelCategory"] = upstream_mapped_df[ "FuelCategory"].str.upper() upstream_mapped_df["ElementaryFlowPrimeContext"] = "emission" upstream_mapped_df.loc[ upstream_mapped_df["Compartment"].str.contains("resource"), "ElementaryFlowPrimeContext", ] = "resource" upstream_mapped_df["Source"] = "netl" upstream_mapped_df["Year"] = eia_gen_year final_columns = [ "plant_id", "FuelCategory", "stage_code", "FlowName", "Compartment", "Compartment_path", "FlowUUID", "Unit", "ElementaryFlowPrimeContext", "FlowAmount", "quantity", # "Electricity", "Source", "Year", ] if "Electricity" in upstream_columns: final_columns = final_columns + ["Electricity"] if "input" in upstream_columns: final_columns = final_columns + ["input"] # I added the section below to help generate lists of matched and unmatched # flows. Because of the groupby, it's expensive enough not to run everytime. # I didn't want to get rid of it in case it comes in handy later. if kwargs != {}: if "group_name" in kwargs: module_logger.info("kwarg group_name used: generating flows lists") unique_orig = upstream_df.groupby( by=["FlowName_orig", "Compartment_path_orig"]).groups unique_mapped = upstream_mapped_df.groupby(by=[ "FlowName_orig", "Compartment_path_orig", "Unit_orig", "FlowName", "Compartment", "Unit", ]).groups unique_mapped_set = set(unique_mapped.keys()) unique_orig_set = set(unique_orig.keys()) unmatched_list = sorted(list(unique_orig_set - unique_mapped_set)) matched_list = sorted(list(unique_mapped_set)) fname_append = f"_{kwargs['group_name']}" with open(f"{output_dir}/flowmapping_lists{fname_append}.txt", "w") as f: f.write("Unmatched flows\n") if kwargs is not None: if kwargs["group_name"] is not None: f.write(f"From the group: {kwargs['group_name']}\n") for x in unmatched_list: f.write(f"{x}\n") f.write("\nMatched flows\n") for x in matched_list: f.write(f"{x}\n") f.close() upstream_mapped_df = upstream_mapped_df[final_columns] return upstream_mapped_df, unmatched_list, matched_list upstream_mapped_df = upstream_mapped_df[final_columns] return upstream_mapped_df