Exemple #1
0
def map_to_fedefl(df):
    try:
        import fedelemflowlist
    except ImportError:
        log.warning('requires installation of fedelemflowlist, data will not '
                    'validate correctly')
        return None
    tri = fedelemflowlist.get_flowmapping('TRI')
    tri = tri[['SourceFlowName', 'TargetFlowName']].drop_duplicates()
    mapped_df = df.merge(tri,
                         how='left',
                         left_on='FlowName',
                         right_on='SourceFlowName')
    if mapped_df['FlowAmount'].sum() != df['FlowAmount'].sum():
        log.warning('Error on mapping, data loss')
    # validation throws errors when mixture and trade secret chemicals are
    # maintained so drop them while they remain unmapped
    criteria = (mapped_df['TargetFlowName'].isna() &
                (mapped_df['FlowName'].str.lower().str.contains('trade secret')
                 | mapped_df['FlowName'].str.lower().str.contains('mixture')))
    mapped_df = mapped_df[~criteria].reset_index(drop=True)
    missing_flows = mapped_df[mapped_df['TargetFlowName'].isna()]['FlowName']
    missing_flows = missing_flows.drop_duplicates().sort_values()
    if len(missing_flows) > 0:
        log.debug('flows from reference df missing in mapping file')
    mapped_df.loc[~mapped_df['TargetFlowName'].isna(),
                  'FlowName'] = mapped_df['TargetFlowName']
    mapped_df = mapped_df.drop(columns=['SourceFlowName', 'TargetFlowName'])
    return mapped_df
Exemple #2
0
def map_elementary_flows(fba, from_fba_source, keep_unmapped_rows=False):
    """
    Applies mapping from fedelemflowlist to convert flows to fedelemflowlist flows
    :param fba: df flow-by-activity or flow-by-sector with 'Flowable', 'Context', and 'Unit' fields
    :param from_fba_source: str Source name of fba list to look for mappings
    :param keep_unmapped_rows: False if want unmapped rows dropped, True if want to retain
    :return:
    """

    from fedelemflowlist import get_flowmapping

    # rename columns to match FBS formatting
    fba = fba.rename(columns={
        "FlowName": 'Flowable',
        "Compartment": "Context"
    })

    flowmapping = get_flowmapping(from_fba_source)
    mapping_fields = [
        "SourceListName", "SourceFlowName", "SourceFlowContext", "SourceUnit",
        "ConversionFactor", "TargetFlowName", "TargetFlowContext", "TargetUnit"
    ]
    if flowmapping.empty:
        log.warning("No mapping file in fedelemflowlist found for " +
                    from_fba_source)
        # return the original df but with columns renamed so can continue working on the FBS
        fba_mapped_df = fba.copy()
    else:
        flowmapping = flowmapping[mapping_fields]

        # define merge type based on keeping or dropping unmapped data
        if keep_unmapped_rows is False:
            merge_type = 'inner'
        else:
            merge_type = 'left'

        # merge fba with flows
        fba_mapped_df = pd.merge(
            fba,
            flowmapping,
            left_on=["Flowable", "Context"],
            right_on=["SourceFlowName", "SourceFlowContext"],
            how=merge_type)
        fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(),
                          "Flowable"] = fba_mapped_df["TargetFlowName"]
        fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(),
                          "Context"] = fba_mapped_df["TargetFlowContext"]
        fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(),
                          "Unit"] = fba_mapped_df["TargetUnit"]
        fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "FlowAmount"] = \
            fba_mapped_df["FlowAmount"] * fba_mapped_df["ConversionFactor"]

        # drop
        fba_mapped_df = fba_mapped_df.drop(columns=mapping_fields)

    return fba_mapped_df
Exemple #3
0
 def __init__(self, df: pandas.DataFrame, system=None,
              mapping=None, preserve_unmapped=False, case_insensitive=False):
     self.__df = df
     self.__system = system
     self.__case_insensitive = case_insensitive
     if mapping is None:
         log.info("load flow mapping v=%s from fed.elem.flows")
         mapping = flowlist.get_flowmapping(source=system)
         if self.__case_insensitive:
             mapping['SourceFlowName'] = mapping['SourceFlowName'].str.lower()
     self.__mapping = mapping  # type: pandas.DataFrame
     self.__preserve_unmapped = preserve_unmapped
Exemple #4
0
    def __init__(self,
                 version="0.1",
                 flow_list: pd.DataFrame = None,
                 flow_mapping: pd.DataFrame = None):
        self.version = version

        if flow_list is None:
            self.flow_list = fedfl.get_flowlist(version)  # type: pd.DataFrame
        else:
            self.flow_list = flow_list

        if flow_mapping is None:
            self.flow_mapping = fedfl.get_flowmapping(  # type: pd.DataFrame
                version)
        else:
            self.flow_mapping = flow_mapping
Exemple #5
0
def map_elementary_flows(fba, from_fba_source):
    """
    Applies mapping from fedelemflowlist to convert flows to fedelemflowlist flows
    :param fba: df flow-by-activity or flow-by-sector with 'Flowable', 'Context', and 'Unit' fields
    :param from_fba_source: str Source name of fba list to look for mappings
    :return:
    """

    from fedelemflowlist import get_flowmapping

    # rename flow name to flowable - remove this once elementary flows are mapped
    fba = fba.rename(columns={"FlowName": 'Flowable',
                              "Compartment": "Context"})

    flowmapping = get_flowmapping(from_fba_source)
    mapping_fields = ["SourceListName",
                      "SourceFlowName",
                      "SourceFlowContext",
                      "SourceUnit",
                      "ConversionFactor",
                      "TargetFlowName",
                      "TargetFlowContext",
                      "TargetUnit"]
    if flowmapping.empty:
        log.warning("No mapping file in fedelemflowlist found for " + from_fba_source)
        # return the original df but with columns renamed so can continue working on the FBS
        fba_mapped_df = fba.copy()
    else:
        flowmapping = flowmapping[mapping_fields]

        # merge fba with flows
        fba_mapped_df = pd.merge(fba, flowmapping,
                                 left_on=["Flowable", "Context"],
                                 right_on=["SourceFlowName", "SourceFlowContext"],
                                 how="left")
        fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Flowable"] = fba_mapped_df["TargetFlowName"]
        fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Context"] = fba_mapped_df["TargetFlowContext"]
        fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "Unit"] = fba_mapped_df["TargetUnit"]
        fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "FlowAmount"] = \
            fba_mapped_df["FlowAmount"] * fba_mapped_df["ConversionFactor"]

        # drop
        fba_mapped_df = fba_mapped_df.drop(columns=mapping_fields)

    return fba_mapped_df
Exemple #6
0
def map_elementary_flows(fba, from_fba_source):
    """
    Applies mapping from fedelemflowlist to convert flows to fedelemflowlist flows
    :param fba: df flow-by-activity or flow-by-sector with 'Flowable', 'Context', and 'Unit' fields
    :param from_fba_source: str Source name of fba list to look for mappings
    :return:
    """
    from fedelemflowlist import get_flowmapping

    flowmapping = get_flowmapping(from_fba_source)
    mapping_fields = [
        "SourceListName", "SourceFlowName", "SourceFlowContext", "SourceUnit",
        "ConversionFactor", "TargetFlowName", "TargetFlowContext", "TargetUnit"
    ]
    if flowmapping.empty:
        log.ERROR("No mapping file in fedelemflowlist found for " +
                  from_fba_source)
    flowmapping = flowmapping[mapping_fields]
    # merge fba with flows
    fba_mapped_df = pd.merge(fba,
                             flowmapping,
                             left_on=["Flowable", "Context"],
                             right_on=["SourceFlowName", "SourceFlowContext"],
                             how="left")
    fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(),
                      "Flowable"] = fba_mapped_df["TargetFlowName"]
    fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(),
                      "Context"] = fba_mapped_df["TargetFlowContext"]
    fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(),
                      "Unit"] = fba_mapped_df["TargetUnit"]
    fba_mapped_df.loc[fba_mapped_df["TargetFlowName"].notnull(), "FlowAmount"] = \
        fba_mapped_df["FlowAmount"] * fba_mapped_df["ConversionFactor"]

    # drop
    fba_mapped_df = fba_mapped_df.drop(columns=mapping_fields)
    return fba_mapped_df
Exemple #7
0
import pandas as pd
import fedelemflowlist
from electricitylci.model_config import inventories, fedelemflowlist_version

# flowlist = fedelemflowlist.get_flowlist()
mapping_to_fedelemflows = fedelemflowlist.get_flowmapping()
mapping_to_fedelemflows = mapping_to_fedelemflows[[
    "SourceListName",
    "SourceFlowName",
    "SourceFlowContext",
    "SourceUnit",
    "TargetFlowName",
    "TargetFlowUUID",
    "TargetFlowContext",
    "TargetUnit",
]]


def map_emissions_to_fedelemflows(df_with_flows_compartments):

    mapped_df = pd.merge(
        df_with_flows_compartments,
        mapping_to_fedelemflows.drop_duplicates(
            subset=["SourceFlowName", "SourceFlowContext"]),
        left_on=["FlowName", "Compartment"],
        right_on=["SourceFlowName", "SourceFlowContext"],
        how="left",
    )
    # If a NewName is present there was a match, replace FlowName and Compartment with new names
    mapped_df.loc[mapped_df["TargetFlowName"].notnull(),
                  "FlowName"] = mapped_df["TargetFlowName"]
"""
Determines FEDEFL flowables not used in a mapping and exports those flowables to csv
output: csv with columns 'Class','Flowable','CAS No','Formula','Synonyms'
"""

import fedelemflowlist
import pandas as pd
from fedelemflowlist.globals import outputpath, flow_list_fields

#Set name of mapping file. More than one mapping file can be used
mapping_to_use = ['TRACI2.1']

if __name__ == '__main__':
    mapping = fedelemflowlist.get_flowmapping(mapping_to_use)
    # Get Flow UUIDs for flows used in selected mapping
    mapping_flow_uuids = pd.DataFrame(pd.unique(mapping['TargetFlowUUID']),
                                      columns=["Flow UUID"])

    # Get all flows
    all_flows = fedelemflowlist.get_flows()
    all_UUIDs = all_flows['Flow UUID']
    # Subset all flows to get just those used in selected mapping
    flows_used_in_mapping = pd.merge(all_flows, mapping_flow_uuids)

    flows_used_UUIDs = flows_used_in_mapping['Flow UUID']

    # Flows not in mappings
    flows_notused_UUIDs = set(all_UUIDs) - set(flows_used_UUIDs)
    len(flows_notused_UUIDs)

    flows_notused = all_flows[all_flows['Flow UUID'].isin(flows_notused_UUIDs)]
Exemple #9
0
"""
Combines all mapping files.

To help maintain consistency in future future mappings
output: xlsx in the mapping format.
"""

import fedelemflowlist
from fedelemflowlist.globals import outputpath

if __name__ == '__main__':
    mapping = fedelemflowlist.get_flowmapping()
    #the following line sets "=" so it has a space in front so it displays properly
    mapping.loc[mapping['MatchCondition'] == "=", 'MatchCondition'] = " ="
    mapping.to_excel(outputpath + 'All_Mappings.xlsx', index=False)
Exemple #10
0
def apply_flow_mapping(df,
                       source,
                       flow_type,
                       keep_unmapped_rows=False,
                       field_dict=None,
                       ignore_source_name=False):
    """
    Maps a dataframe using a flow mapping file from fedelemflowlist or
    materialflowlist.
    
    :param df: dataframe to be mapped
    :param source: list or str, name of mapping file(s)
    :param flow_type: str either 'ELEMENTARY_FLOW', 'TECHNOSPHERE_FLOW',
        or 'WASTE_FLOW'
    :param keep_unmaped_rows: bool, False if want unmapped rows
        dropped, True if want to retain
    :param field_dict: dictionary of field names in df containing the following keys:
        'SourceName',
        'FlowableName',
        'FlowableUnit',
        'FlowableContext',
        'FlowableQuantity',
        'UUID'.
        If None, uses the default fields of 'SourceName','Flowable',
        'Unit','Context','FlowAmount','FlowUUID'
    :param ignore_source_name: bool, False if flows should be mapped based on
        SourceName. (E.g., should be False when mapping across multiple datasets)
        
    """

    if field_dict is None:
        # Default field dictionary for mapping
        field_dict = {
            'SourceName': 'SourceName',
            'FlowableName': 'Flowable',
            'FlowableUnit': 'Unit',
            'FlowableContext': 'Context',
            'FlowableQuantity': 'FlowAmount',
            'UUID': 'FlowUUID'
        }

    mapping_fields = [
        "SourceListName", "SourceFlowName", "SourceFlowContext", "SourceUnit",
        "ConversionFactor", "TargetFlowName", "TargetFlowContext",
        "TargetUnit", "TargetFlowUUID"
    ]

    if flow_type == 'ELEMENTARY_FLOW':
        try:
            import fedelemflowlist as fedefl
            mapping = fedefl.get_flowmapping(source)
        except ImportError:
            log.warning(
                'Error importing fedelemflowlist, install fedelemflowlist '
                'to apply flow mapping to elementary flows: '
                'https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List/wiki/GitHub-Contributors#install-for-users'
            )
            return None
    else:
        try:
            import materialflowlist as mfl
            mapping = mfl.get_flowmapping(source)
        except ImportError:
            log.warning(
                'Error importing materialflowlist, install materialflowlist '
                'to apply flow mapping to waste and technosphere flows: '
                'https://github.com/USEPA/materialflowlist/wiki')
            return None
    if len(mapping) == 0:
        # mapping not found
        return None

    mapping = mapping[mapping_fields]
    mapping[['ConversionFactor']] = mapping[['ConversionFactor'
                                             ]].fillna(value=1)
    if keep_unmapped_rows is False:
        merge_type = 'inner'
    else:
        merge_type = 'left'

    map_to = [
        field_dict['SourceName'], field_dict['FlowableName'],
        field_dict['FlowableContext'], field_dict['FlowableUnit']
    ]

    map_from = [
        "SourceListName", "SourceFlowName", "SourceFlowContext", "SourceUnit"
    ]

    if ignore_source_name:
        map_to.remove(field_dict['SourceName'])
        map_from.remove('SourceListName')

    for field in map_to:
        df[field].fillna('', inplace=True)
    for field in map_from:
        mapping[field].fillna('', inplace=True)

    # merge df with flows
    mapped_df = pd.merge(df,
                         mapping,
                         left_on=map_to,
                         right_on=map_from,
                         how=merge_type)

    criteria = mapped_df['TargetFlowName'].notnull()

    mapped_df.loc[criteria,
                  field_dict['FlowableName']] = mapped_df["TargetFlowName"]
    mapped_df.loc[
        criteria,
        field_dict['FlowableContext']] = mapped_df["TargetFlowContext"]
    mapped_df.loc[criteria,
                  field_dict['FlowableUnit']] = mapped_df["TargetUnit"]
    mapped_df.loc[criteria, field_dict["FlowableQuantity"]] = \
        mapped_df[field_dict["FlowableQuantity"]] * mapped_df["ConversionFactor"]
    mapped_df.loc[criteria, field_dict['UUID']] = mapped_df["TargetFlowUUID"]

    # drop mapping fields
    mapped_df = mapped_df.drop(columns=mapping_fields)

    return mapped_df
 def setUp(self):
     """Get flowlist used for all tests
     """
     self.flowmappings = fedelemflowlist.get_flowmapping()
     self.flowlist = self.flowlist = fedelemflowlist.get_flows()
Exemple #12
0
Requires target unit to be the primary unit. Existing conversion factor must be
set to 1 to avoid replacing manual conversion factors. Mapping file must already
conform to mapping format.
"""
import pandas as pd
import fedelemflowlist
from fedelemflowlist.globals import flowmappingpath, flowmapping_fields, log

#Add source name here. The .csv mapping file with this name must be in the flowmapping directory
#None can be used to add conversions in all mapping files
source = 'ReCiPe2016'

if __name__ == '__main__':
    # Pull in mapping file
    mapping = fedelemflowlist.get_flowmapping(source)
    conversions = fedelemflowlist.get_alt_conversion()
    # merge in conversion factors where source unit = alternate unit
    mapping_w_conversion = pd.merge(
        mapping,
        conversions,
        how='left',
        left_on=['TargetFlowName', 'SourceUnit', 'TargetUnit'],
        right_on=['Flowable', 'AltUnit', 'Unit'])
    # update conversion factor where current conversion is 1 and the updated conversion exists
    converted1 = mapping_w_conversion['InverseConversionFactor'].notnull()
    converted2 = mapping_w_conversion['ConversionFactor'] == 1
    mapping_w_conversion['Convert'] = converted1 & converted2
    mapping_w_conversion.loc[
        (mapping_w_conversion['Convert'] == True),
        'ConversionFactor'] = mapping_w_conversion['InverseConversionFactor']
Exemple #13
0
def supported_mapping_systems() -> list:
    fmap = flowlist.get_flowmapping()  # type: pd.DataFrame
    systems = set()
    for i in range(0, len(fmap.index)):
        systems.add(fmap.iat[i, 0])
    return list(systems)
Exemple #14
0
import pandas as pd
import fedelemflowlist
from electricitylci.model_config import inventories, fedelemflowlist_version

#flowlist = fedelemflowlist.get_flowlist()
mapping_to_fedelemflows = fedelemflowlist.get_flowmapping(
    version=fedelemflowlist_version, source_list=inventories)
mapping_to_fedelemflows = mapping_to_fedelemflows[[
    'Source', 'OriginalName', 'OriginalCategory', 'OriginalProperty',
    'NewName', 'NewCategory', 'NewSubCategory', 'NewUnit', 'UUID'
]]


def map_emissions_to_fedelemflows(df_with_flows_compartments):
    mapped_df = pd.merge(
        df_with_flows_compartments,
        mapping_to_fedelemflows,
        left_on=['Source', 'FlowName', 'Compartment'],
        right_on=['Source', 'OriginalName', 'OriginalCategory'],
        how='left')
    #If a NewName is present there was a match, replace FlowName and Compartment with new names
    mapped_df.loc[mapped_df['NewName'].notnull(),
                  'FlowName'] = mapped_df['NewName']
    mapped_df.loc[mapped_df['NewName'].notnull(),
                  'Compartment'] = mapped_df['NewCategory']
    mapped_df.loc[mapped_df['NewName'].notnull(),
                  'Unit'] = mapped_df['NewUnit']

    mapped_df = mapped_df.rename(columns={'UUID': 'FlowUUID'})

    #If air, soil, or water assigned it directionality of emission. Others will get assigned later as needed
Exemple #15
0
def concat_map_upstream_databases(eia_gen_year, *arg, **kwargs):
    import fedelemflowlist as fedefl
    """
    Concatenates all of the databases given as args. Then all of the
    emissions in the combined database are mapped to the federal elementary
    flows list based on the mapping file 'eLCI' in preparation for being
    turned into openLCA processes and combined with the generation emissions.

    Parameters
    ----------
    *arg : dataframes
        The dataframes to be combined, generated by the upstream modules or
        renewables modules (electricitylci.nuclear_upstream, .petroleum_upstream,
        .solar_upstream, etc.)

    Returns
    -------
    datafame

    if kwarg group_name is used then the function will return a tuple containing
    the mapped dataframe and lists of tuples for the unique mapped and unmapped flows.
    """
    mapped_column_dict = {
        "TargetFlowName": "FlowName",
        "TargetFlowUUID": "FlowUUID",
        "TargetFlowContext": "Compartment",
        "TargetUnit": "Unit",
    }
    compartment_mapping = {
        "air": "emission/air",
        "water": "emission/water",
        "ground": "emission/ground",
        "soil": "emission/ground",
        "resource": "resource",
        "NETL database/emissions": "NETL database/emissions",
        "NETL database/resources": "NETL database/resources",
    }
    print(f"Concatenating and flow-mapping {len(arg)} upstream databases.")
    upstream_df_list = list()
    for df in arg:
        if isinstance(df, pd.DataFrame):
            if "Compartment_path" not in df.columns:
                df["Compartment_path"] = float("nan")
                df["Compartment_path"].fillna(
                    df["Compartment"].map(compartment_mapping), inplace=True)
            upstream_df_list.append(df)
    upstream_df = pd.concat(upstream_df_list, ignore_index=True, sort=False)
    module_logger.info("Creating flow mapping database")
    flow_mapping = fedefl.get_flowmapping('eLCI')
    flow_mapping["SourceFlowName"] = flow_mapping["SourceFlowName"].str.lower()

    module_logger.info("Preparing upstream df for merge")
    upstream_df["FlowName_orig"] = upstream_df["FlowName"]
    upstream_df["Compartment_orig"] = upstream_df["Compartment"]
    upstream_df["Compartment_path_orig"] = upstream_df["Compartment_path"]
    upstream_df["Unit_orig"] = upstream_df["Unit"]
    upstream_df["FlowName"] = upstream_df["FlowName"].str.lower().str.rstrip()
    upstream_df["Compartment"] = (
        upstream_df["Compartment"].str.lower().str.rstrip())
    upstream_df["Compartment_path"] = (
        upstream_df["Compartment_path"].str.lower().str.rstrip())
    upstream_columns = upstream_df.columns
    groupby_cols = [
        "fuel_type",
        "stage_code",
        "FlowName",
        "Compartment",
        "input",
        "plant_id",
        "Compartment_path",
        "Unit",
        "FlowName_orig",
        "Compartment_path_orig",
        "Unit_orig",
    ]
    upstream_df["Unit"].fillna("<blank>", inplace=True)
    module_logger.info("Grouping upstream database")
    if "Electricity" in upstream_df.columns:
        upstream_df_grp = upstream_df.groupby(groupby_cols,
                                              as_index=False).agg({
                                                  "FlowAmount":
                                                  "sum",
                                                  "quantity":
                                                  "mean",
                                                  "Electricity":
                                                  "mean"
                                              })
    else:
        upstream_df_grp = upstream_df.groupby(groupby_cols,
                                              as_index=False).agg({
                                                  "FlowAmount":
                                                  "sum",
                                                  "quantity":
                                                  "mean"
                                              })
    upstream_df = upstream_df[[
        "FlowName_orig", "Compartment_path_orig", "stage_code"
    ]]
    module_logger.info("Merging upstream database and flow mapping")
    upstream_mapped_df = pd.merge(
        left=upstream_df_grp,
        right=flow_mapping,
        left_on=["FlowName", "Compartment_path"],
        right_on=["SourceFlowName", "SourceFlowContext"],
        how="left",
    )
    del (upstream_df_grp, flow_mapping)
    upstream_mapped_df.drop(columns={"FlowName", "Compartment", "Unit"},
                            inplace=True)
    upstream_mapped_df = upstream_mapped_df.rename(columns=mapped_column_dict,
                                                   copy=False)
    upstream_mapped_df.drop_duplicates(
        subset=["plant_id", "FlowName", "Compartment_path", "FlowAmount"],
        inplace=True,
    )
    upstream_mapped_df.dropna(subset=["FlowName"], inplace=True)

    # upstream_mapped_df.to_csv(f"{output_dir}/upstream_mapped_df.csv")

    module_logger.info("Applying conversion factors")
    upstream_mapped_df["FlowAmount"] = (upstream_mapped_df["FlowAmount"] *
                                        upstream_mapped_df["ConversionFactor"])

    upstream_mapped_df.rename(columns={"fuel_type": "FuelCategory"},
                              inplace=True)
    upstream_mapped_df["FuelCategory"] = upstream_mapped_df[
        "FuelCategory"].str.upper()
    upstream_mapped_df["ElementaryFlowPrimeContext"] = "emission"
    upstream_mapped_df.loc[
        upstream_mapped_df["Compartment"].str.contains("resource"),
        "ElementaryFlowPrimeContext", ] = "resource"
    upstream_mapped_df["Source"] = "netl"
    upstream_mapped_df["Year"] = eia_gen_year
    final_columns = [
        "plant_id",
        "FuelCategory",
        "stage_code",
        "FlowName",
        "Compartment",
        "Compartment_path",
        "FlowUUID",
        "Unit",
        "ElementaryFlowPrimeContext",
        "FlowAmount",
        "quantity",
        #            "Electricity",
        "Source",
        "Year",
    ]
    if "Electricity" in upstream_columns:
        final_columns = final_columns + ["Electricity"]
    if "input" in upstream_columns:
        final_columns = final_columns + ["input"]

    # I added the section below to help generate lists of matched and unmatched
    # flows. Because of the groupby, it's expensive enough not to run everytime.
    # I didn't want to get rid of it in case it comes in handy later.
    if kwargs != {}:
        if "group_name" in kwargs:
            module_logger.info("kwarg group_name used: generating flows lists")
            unique_orig = upstream_df.groupby(
                by=["FlowName_orig", "Compartment_path_orig"]).groups
            unique_mapped = upstream_mapped_df.groupby(by=[
                "FlowName_orig",
                "Compartment_path_orig",
                "Unit_orig",
                "FlowName",
                "Compartment",
                "Unit",
            ]).groups
            unique_mapped_set = set(unique_mapped.keys())
            unique_orig_set = set(unique_orig.keys())
            unmatched_list = sorted(list(unique_orig_set - unique_mapped_set))
            matched_list = sorted(list(unique_mapped_set))
            fname_append = f"_{kwargs['group_name']}"
            with open(f"{output_dir}/flowmapping_lists{fname_append}.txt",
                      "w") as f:
                f.write("Unmatched flows\n")
                if kwargs is not None:
                    if kwargs["group_name"] is not None:
                        f.write(f"From the group: {kwargs['group_name']}\n")
                for x in unmatched_list:
                    f.write(f"{x}\n")
                f.write("\nMatched flows\n")
                for x in matched_list:
                    f.write(f"{x}\n")
                f.close()
            upstream_mapped_df = upstream_mapped_df[final_columns]
            return upstream_mapped_df, unmatched_list, matched_list
    upstream_mapped_df = upstream_mapped_df[final_columns]
    return upstream_mapped_df