Esempio n. 1
0
def _read(xls_file: str) -> pd.DataFrame:
    """Read the data from the Excel file with the given path into a Pandas
       data frame."""

    log.info("read Traci 2.1 from file %s", xls_file)
    wb = xlrd.open_workbook(xls_file)
    sheet = wb.sheet_by_name("Substances")

    categories = {}
    for col in range(3, sheet.ncols):
        name = xls.cell_str(sheet, 0, col)
        if name == "":
            break
        cat_info = _category_info(name)
        if cat_info is not None:
            categories[col] = cat_info

    records = []
    for row in range(1, sheet.nrows):
        flow = xls.cell_str(sheet, row, 2)
        if flow == "":
            break
        cas = format_cas(xls.cell_val(sheet, row, 1))
        for col in range(3, sheet.ncols):
            cat_info = categories.get(col)
            if cat_info is None:
                continue
            factor = xls.cell_f64(sheet, row, col)
            if factor == 0.0:
                continue
            dataframe.record(records,
                             method="TRACI 2.1",
                             indicator=cat_info[0],
                             indicator_unit=cat_info[1],
                             flow=flow,
                             flow_category=cat_info[2],
                             flow_unit=cat_info[3],
                             cas_number=cas,
                             factor=factor)

    return dataframe.data_frame(records)
Esempio n. 2
0
def _read_mid_points(sheet: xlrd.book.sheet, records: list):
    log.debug("try to read midpoint factors from sheet %s", sheet.name)

    start_row, data_col, with_perspectives = _find_data_start(sheet)
    if start_row < 0:
        log.debug("could not find a value column in sheet %s", sheet.name)
        return

    flow_col = _find_flow_column(sheet)
    if flow_col < 0:
        return

    cas_col = _find_cas_column(sheet)
    indicator_unit, flow_unit, unit_col = _determine_units(sheet)
    compartment, compartment_col = _determine_compartments(sheet)

    perspectives = ["I", "H", "E"]
    factor_count = 0
    for row in range(start_row, sheet.nrows):
        if compartment_col > -1:
            compartment = xls.cell_str(sheet, row, compartment_col)
        if compartment in contexts:
            compartment = contexts[compartment]
        if unit_col > -1:
            flow_unit = xls.cell_str(sheet, row, unit_col)
            if "/" in flow_unit:
                flow_unit = flow_unit.split("/")[1].strip()
        cas = ""
        if cas_col > -1:
            cas = format_cas(xls.cell_f64(sheet, row, cas_col))

        if with_perspectives:
            for i in range(0, 3):
                val = xls.cell_f64(sheet, row, data_col + i)
                if val == 0.0:
                    continue
                dfutil.record(records,
                              method="ReCiPe 2016 - Midpoint/" +
                              perspectives[i],
                              indicator=sheet.name,
                              indicator_unit=indicator_unit,
                              flow=xls.cell_str(sheet, row, flow_col),
                              flow_category=compartment,
                              flow_unit=flow_unit,
                              cas_number=cas,
                              factor=val)
                factor_count += 1
        else:
            val = xls.cell_f64(sheet, row, data_col)
            if val == 0.0:
                continue
            for p in perspectives:
                dfutil.record(records,
                              method="ReCiPe 2016 - Midpoint/" + p,
                              indicator=sheet.name,
                              indicator_unit=indicator_unit,
                              flow=xls.cell_str(sheet, row, flow_col),
                              flow_category=compartment,
                              flow_unit=flow_unit,
                              cas_number=cas,
                              factor=val)
                factor_count += 1
    log.debug("extracted %i factors", factor_count)
Esempio n. 3
0
def _read(access_file: str) -> pd.DataFrame:
    """Read the data from the Access database with the given path into a
    Pandas data frame."""

    log.info("read ImpactWorld+ from file %s", access_file)

    path = cache.get_path(access_file)

    connStr = (
        r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};'
        r'DBQ=' + path + ";")

    cnxn = pyodbc.connect(connStr)
    crsr = cnxn.cursor()
    records = []

    # Extract non regionalized data from "CF - not regionalized - All other impact categories"
    crsr.execute("SELECT * FROM [CF - not regionalized - All other impact categories]")
    rows = crsr.fetchall()
    for row in rows:
        dfutil.record(records,
                      method="ImpactWorld+",
                      indicator = row[1],
                      indicator_unit=row[2],
                      flow=row[5],
                      flow_category=row[3] + "/" + row[4],
                      flow_unit=row[8],
                      cas_number=format_cas(row[6]).lstrip("0"),
                      factor=row[7])

    """List relevant sheets in Impact World Access file. Second item in tuple
    tells the source of compartment information. Compartment for water
    categories are not included in access file, defined below. Elementary flow
    names are used to define the compartment for land transformation and
    occupation. Compartment and Subcompartment data is available in the Access
    file for other categories."""
    regional_sheets = [("CF - regionalized - WaterScarc - aggregated", "Raw/in water"),
                       ("CF - regionalized - WaterAvailab_HH - aggregated", "Raw/in water"),
                       ("CF - regionalized - LandTrans - aggregated", "Elementary Flow"),
                       ("CF - regionalized - LandOcc - aggregated", "Elementary Flow"),
                       ("CF - regionalized - EutroMar - aggregated", "Compartment"),
                       ("CF - regionalized - PartMatterForm - aggregated","Compartment"),
                       ("CF - regionalized - AcidFW - aggregated", "Compartment"),
                       ("CF - regionalized - AcidTerr - aggregated", "Compartment"),
                       ("CF - regionalized - EutroFW - aggregated", "Compartment"),
                       ]

    for x in regional_sheets:
        if x[0] == "CF - regionalized - PartMatterForm - aggregated":
            # Extract global flows from the particulate matter Access sheet
            # Structure of this sheet is
            sql = "SELECT * FROM [" + x[0] + "] WHERE (([" + x[0] + "].Region In('World')))"
            crsr.execute(sql)
            rows = crsr.fetchall()

            for row in rows:
                dfutil.record(records,
                              method="ImpactWorld+",
                              indicator=row.ImpCat,
                              indicator_unit=row.Unit.strip('[]').split('/')[0],
                              flow=row.__getattribute__('Elem flow'),
                              flow_category="Air/" + row.__getattribute__("Archetype 1"),
                              flow_unit=row.Unit.strip('[]').split('/')[1],
                              cas_number="",
                              factor=row.CFvalue)

        else:
            sql = "SELECT * FROM [" + x[0] + "] WHERE (([" + x[0] + "].Resolution In('Global', 'Not regionalized')))"
            crsr.execute(sql)
            rows = crsr.fetchall()

            # extract column headers from Access sheet for exception testing
            cols = [column[0] for column in crsr.description]

            for row in rows:
                #Add water to detailed context information available in Access file
                if x[0] in ['CF - regionalized - WaterScarc - aggregated',
                            'CF - regionalized - WaterAvailab_HH - aggregated']:
                    flow_stmt = 'Water, ' + row.__getattribute__('Elem flow')
                else:
                    flow_stmt = row.__getattribute__('Elem flow')

                # Define context/compartment for flow based on impact category.
                if {'Compartment', 'Subcompartment'}.issubset(cols):
                    category_stmt = row.Compartment + "/" + row.Subcompartment
                elif x[0] in ['CF - regionalized - LandTrans - aggregated',
                              'CF - regionalized - LandOcc - aggregated',
                              'CF - regionalized - WaterScarc - aggregated',
                              'CF - regionalized - WaterAvailab_HH - aggregated']:
                    category_stmt = flow_stmt
                else:
                    category_stmt = x[1]

                dfutil.record(records,
                              method="ImpactWorld+",
                              indicator = row.ImpCat,
                              indicator_unit=row.Unit.strip('[]').split('/')[0],
                              flow=flow_stmt,
                              flow_category=category_stmt,
                              flow_unit=row.Unit.strip('[]').split('/')[1],
                              cas_number="",
                              factor=row.__getattribute__('Weighted Average'))

    return dfutil.data_frame(records)