Python data_frame Examples, lciafmt.df.data_frame Python Examples

Example #1

0

Show file

File: recipe.py Project: USEPA/LCIAformatter

def _read(file: str) -> pd.DataFrame:
    log.info("read ReCiPe 2016 from file %s", file)
    wb = xlrd.open_workbook(file)
    records = []
    for name in wb.sheet_names():
        if _eqstr(name, "Version") or _eqstr(name,
                                             "Midpoint to endpoint factors"):
            continue
        _read_mid_points(wb.sheet_by_name(name), records)

    return dfutil.data_frame(records)

Example #2

0

Show file

File: fedefl_inventory.py Project: sarahcashmanz/LCIAformatter

def get(subset=None) -> pd.DataFrame:
    """
    Returns a dataframe of inventory based methods.
    :param subset: a list of dictionary keys from available inventories, if 
    none selected all availabile inventories will be generated
    :return: df in standard LCIAmethod format
    """
    method = df.data_frame(list())
    method['Characterization Factor'] = pd.to_numeric(
        method['Characterization Factor'])

    if subset == None:
        list_of_inventories = subsets.get_subsets()
    else:
        list_of_inventories = subset

    alt_units = flowlist.get_alt_conversion()

    for inventory in list_of_inventories:
        flows = flowlist.get_flows(subset=inventory)
        flows.drop([
            'Formula', 'Synonyms', 'Class', 'External Reference', 'Preferred',
            'AltUnit', 'AltUnitConversionFactor'
        ],
                   axis=1,
                   inplace=True)
        flows['Indicator'] = inventory
        flows['Indicator unit'] = subsets.get_inventory_unit(inventory)
        flows['Characterization Factor'] = 1

        # Apply unit conversions where flow unit differs from indicator unit
        flows_w_conversion = pd.merge(
            flows,
            alt_units,
            how='left',
            left_on=['Flowable', 'Indicator unit', 'Unit'],
            right_on=['Flowable', 'AltUnit', 'Unit'])
        flows_w_conversion.loc[(flows_w_conversion['AltUnit'] ==
                                flows_w_conversion['Indicator unit']),
                               'Characterization Factor'] = flows_w_conversion[
                                   'AltUnitConversionFactor']
        flows_w_conversion.drop(
            ['AltUnit', 'AltUnitConversionFactor', 'InverseConversionFactor'],
            axis=1,
            inplace=True)

        method = pd.concat([method, flows_w_conversion], ignore_index=True)

    method['Method'] = 'Inventory'
    return method

Example #3

0

Show file

def _read(xls_file: str) -> pd.DataFrame:
    """Read the data from the Excel file with the given path into a Pandas
       data frame."""

    log.info("read Traci 2.1 from file %s", xls_file)
    wb = xlrd.open_workbook(xls_file)
    sheet = wb.sheet_by_name("Substances")

    categories = {}
    for col in range(3, sheet.ncols):
        name = xls.cell_str(sheet, 0, col)
        if name == "":
            break
        cat_info = _category_info(name)
        if cat_info is not None:
            categories[col] = cat_info

    records = []
    for row in range(1, sheet.nrows):
        flow = xls.cell_str(sheet, row, 2)
        if flow == "":
            break
        cas = format_cas(xls.cell_val(sheet, row, 1))
        for col in range(3, sheet.ncols):
            cat_info = categories.get(col)
            if cat_info is None:
                continue
            factor = xls.cell_f64(sheet, row, col)
            if factor == 0.0:
                continue
            dataframe.record(records,
                             method="TRACI 2.1",
                             indicator=cat_info[0],
                             indicator_unit=cat_info[1],
                             flow=flow,
                             flow_category=cat_info[2],
                             flow_unit=cat_info[3],
                             cas_number=cas,
                             factor=factor)

    return dataframe.data_frame(records)

Example #4

0

Show file

    def run(self) -> pd.DataFrame:
        map_idx = self._build_map_index()
        log.info("applying flow mapping...")
        mapped = 0
        preserved = 0
        df = self.__df
        records = []
        for row in range(0, self.__df.shape[0]):
            key = Mapper._flow_key(
                name=df.iat[row, 5],
                category=df.iat[row, 7],
                unit=df.iat[row, 8],
            )
            targets = map_idx.get(key)  # type: List[_FlowInfo]

            if targets is None:
                log.debug("could not map flow %s", key)
                if self.__preserve_unmapped:
                    records.append(dfutil.as_list(df, row=row))
                    preserved += 1
                continue

            rec = dfutil.as_list(df, row=row)
            for target in targets:
                r = rec.copy()
                r[5] = target.name
                r[6] = target.uuid
                r[7] = target.category
                r[8] = target.unit
                r[12] = r[12] / float(target.conversionfactor)
                records.append(r)
                mapped += 1
        log.info(
            "created %i factors for mapped flows; " +
            "preserved %i factors for unmapped flows", mapped, preserved)
        return dfutil.data_frame(records)

Example #5

0

Show file

File: iw.py Project: USEPA/LCIAformatter

def _read(access_file: str) -> pd.DataFrame:
    """Read the data from the Access database with the given path into a
    Pandas data frame."""

    log.info("read ImpactWorld+ from file %s", access_file)

    path = cache.get_path(access_file)

    connStr = (
        r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};'
        r'DBQ=' + path + ";")

    cnxn = pyodbc.connect(connStr)
    crsr = cnxn.cursor()
    records = []

    # Extract non regionalized data from "CF - not regionalized - All other impact categories"
    crsr.execute("SELECT * FROM [CF - not regionalized - All other impact categories]")
    rows = crsr.fetchall()
    for row in rows:
        dfutil.record(records,
                      method="ImpactWorld+",
                      indicator = row[1],
                      indicator_unit=row[2],
                      flow=row[5],
                      flow_category=row[3] + "/" + row[4],
                      flow_unit=row[8],
                      cas_number=format_cas(row[6]).lstrip("0"),
                      factor=row[7])

    """List relevant sheets in Impact World Access file. Second item in tuple
    tells the source of compartment information. Compartment for water
    categories are not included in access file, defined below. Elementary flow
    names are used to define the compartment for land transformation and
    occupation. Compartment and Subcompartment data is available in the Access
    file for other categories."""
    regional_sheets = [("CF - regionalized - WaterScarc - aggregated", "Raw/in water"),
                       ("CF - regionalized - WaterAvailab_HH - aggregated", "Raw/in water"),
                       ("CF - regionalized - LandTrans - aggregated", "Elementary Flow"),
                       ("CF - regionalized - LandOcc - aggregated", "Elementary Flow"),
                       ("CF - regionalized - EutroMar - aggregated", "Compartment"),
                       ("CF - regionalized - PartMatterForm - aggregated","Compartment"),
                       ("CF - regionalized - AcidFW - aggregated", "Compartment"),
                       ("CF - regionalized - AcidTerr - aggregated", "Compartment"),
                       ("CF - regionalized - EutroFW - aggregated", "Compartment"),
                       ]

    for x in regional_sheets:
        if x[0] == "CF - regionalized - PartMatterForm - aggregated":
            # Extract global flows from the particulate matter Access sheet
            # Structure of this sheet is
            sql = "SELECT * FROM [" + x[0] + "] WHERE (([" + x[0] + "].Region In('World')))"
            crsr.execute(sql)
            rows = crsr.fetchall()

            for row in rows:
                dfutil.record(records,
                              method="ImpactWorld+",
                              indicator=row.ImpCat,
                              indicator_unit=row.Unit.strip('[]').split('/')[0],
                              flow=row.__getattribute__('Elem flow'),
                              flow_category="Air/" + row.__getattribute__("Archetype 1"),
                              flow_unit=row.Unit.strip('[]').split('/')[1],
                              cas_number="",
                              factor=row.CFvalue)

        else:
            sql = "SELECT * FROM [" + x[0] + "] WHERE (([" + x[0] + "].Resolution In('Global', 'Not regionalized')))"
            crsr.execute(sql)
            rows = crsr.fetchall()

            # extract column headers from Access sheet for exception testing
            cols = [column[0] for column in crsr.description]

            for row in rows:
                #Add water to detailed context information available in Access file
                if x[0] in ['CF - regionalized - WaterScarc - aggregated',
                            'CF - regionalized - WaterAvailab_HH - aggregated']:
                    flow_stmt = 'Water, ' + row.__getattribute__('Elem flow')
                else:
                    flow_stmt = row.__getattribute__('Elem flow')

                # Define context/compartment for flow based on impact category.
                if {'Compartment', 'Subcompartment'}.issubset(cols):
                    category_stmt = row.Compartment + "/" + row.Subcompartment
                elif x[0] in ['CF - regionalized - LandTrans - aggregated',
                              'CF - regionalized - LandOcc - aggregated',
                              'CF - regionalized - WaterScarc - aggregated',
                              'CF - regionalized - WaterAvailab_HH - aggregated']:
                    category_stmt = flow_stmt
                else:
                    category_stmt = x[1]

                dfutil.record(records,
                              method="ImpactWorld+",
                              indicator = row.ImpCat,
                              indicator_unit=row.Unit.strip('[]').split('/')[0],
                              flow=flow_stmt,
                              flow_category=category_stmt,
                              flow_unit=row.Unit.strip('[]').split('/')[1],
                              cas_number="",
                              factor=row.__getattribute__('Weighted Average'))

    return dfutil.data_frame(records)