Beispiel #1
0
def _determine_units(sheet: xlrd.book.sheet) -> (str, str, int):
    indicator_unit = "?"
    flow_unit = "?"
    unit_col = -1
    row, col, _ = _find_data_start(sheet)
    row -= 2

    if row > 0:
        s = xls.cell_str(sheet, row, col)
        if s is not None and s != "":
            if "/" in s:
                parts = s.strip(" ()").split("/")
                indicator_unit = parts[0].strip()
                flow_unit = parts[1].strip()
            else:
                indicator_unit = s.strip()

    for row, col in xls.iter_cells(sheet):
        if row > 5:
            break
        s = xls.cell_str(sheet, row, col)
        if _eqstr(s, "Unit"):
            unit_col = col
            break

    if indicator_unit != "?":
        log.debug("determined indicator unit: %s", indicator_unit)
    elif _containstr(sheet.name, "land", "transformation"):
        log.debug("unknown indicator unit; assuming it is m2")
        indicator_unit = "m2"
    elif _containstr(sheet.name, "land", "occupation"):
        log.debug("unknown indicator unit; assuming it is m2*a")
        indicator_unit = "m2*a"
    elif _containstr(sheet.name, "water", "consumption"):
        log.debug("unknown indicator unit; assuming it is m3")
        indicator_unit = "m3"
    else:
        log.debug("unknown indicator unit")

    if _containstr(flow_unit, "kg"):
        flow_unit = "kg"

    if unit_col > -1:
        log.debug("take units from column %i", unit_col)
    elif flow_unit != "?":
        log.debug("determined flow unit: %s", flow_unit)
    elif _containstr(sheet.name, "land", "transformation"):
        log.debug("unknown flow unit; assume it is m2")
        flow_unit = "m2"
    elif _containstr(sheet.name, "land", "occupation"):
        log.debug("unknown flow unit; assuming it is m2*a")
        flow_unit = "m2*a"
    elif _containstr(sheet.name, "water", "consumption"):
        log.debug("unknown flow unit; assuming it is m3")
        flow_unit = "m3"
    else:
        log.debug("unknown flow unit; assuming it is 'kg'")
        flow_unit = "kg"

    return indicator_unit, flow_unit, unit_col
Beispiel #2
0
def _read_endpoints(file: str) -> pandas.DataFrame:
    log.info("reading endpoint factors from file")
    wb = xlrd.open_workbook(file)
    endpoint_cols = ['Method','EndpointMethod', 'EndpointIndicator', 'EndpointUnit','EndpointConversion']
    endpoint = pandas.DataFrame(columns = endpoint_cols)
    endpoints = []
    perspectives = ["I", "H", "E"]
    indicator = ""
    indicator_unit = ""
    for name in wb.sheet_names():
        if _eqstr(name, "Midpoint to endpoint factors"):
            sheet = wb.sheet_by_name(name)
            start_row, data_col, with_perspectives = _find_data_start(sheet)
            #impact categories in column 1
            flow_col = 0
            
            endpoint_factor_count = 0
            for row in range(start_row, sheet.nrows):
                indicator = xls.cell_str(sheet, row, flow_col)
                indicator_unit = xls.cell_str(sheet, row, flow_col+1)
                for i in range(0, 3):
                    val = xls.cell_f64(sheet, row, data_col + i)
                    if val == 0.0:
                        continue
                    endpoints.append("ReCiPe 2016 - Midpoint/" + perspectives[i])
                    endpoints.append("ReCiPe 2016 - Endpoint/" + perspectives[i])
                    endpoints.append(indicator)
                    endpoints.append(indicator_unit)
                    endpoints.append(val)
                    to_add=pandas.Series(endpoints, index=endpoint_cols)
                    endpoint=endpoint.append(to_add, ignore_index=True)
                    endpoints=[]
                    endpoint_factor_count += 1        
            log.debug("extracted %i endpoint factors", endpoint_factor_count)
        else:
            continue
    log.info("processing endpoint factors")
    endpoint.loc[endpoint['EndpointUnit'].str.contains('daly', case=False), 'EndpointUnit']='DALY'
    endpoint.loc[endpoint['EndpointUnit'].str.contains('species', case=False), 'EndpointUnit']='species-year'
    endpoint.loc[endpoint['EndpointUnit'].str.contains('USD', case=False), 'EndpointUnit']='USD2013'
    
    log.info("reading endpoint map from csv")
    endpoint_map = pandas.read_csv(util.datapath+'ReCiPe2016_endpoint_to_midpoint.csv')
    endpoint=endpoint.merge(endpoint_map,how="left",on='EndpointIndicator')
    
    #split into two dataframes
    endpoint_by_flow = endpoint[endpoint['FlowFlag']==1]
    endpoint_by_flow = endpoint_by_flow.drop(columns='FlowFlag')
    endpoint_by_flow.rename(columns={'EndpointIndicator':'Flowable'}, inplace=True)
    endpoint = endpoint[endpoint['FlowFlag'].isna()]
    endpoint = endpoint.drop(columns='FlowFlag')    
    #return endpoint and endpoint by flow
    return endpoint, endpoint_by_flow
Beispiel #3
0
def _find_cas_column(sheet: xlrd.book.sheet) -> int:
    ccol = -1
    for row, col in xls.iter_cells(sheet):
        s = xls.cell_str(sheet, row, col)
        if _eqstr(s, "cas"):
            ccol = col
            log.debug("identified column %i %s for CAS numbers", ccol, s)
            break
    return ccol
Beispiel #4
0
def _find_data_start(sheet: xlrd.book.sheet) -> (int, int, bool):
    for row, col in xls.iter_cells(sheet):
        s = xls.cell_str(sheet, row, col)
        if s is None or s == "":
            continue
        if _eqstr(s, "I") or _containstr(s, "Individualist"):
            return row + 1, col, True
        if _eqstr(s, "all perspectives"):
            return row + 1, col, False
    return -1, -1
Beispiel #5
0
def _read(xls_file: str) -> pd.DataFrame:
    """Read the data from the Excel file with the given path into a Pandas
       data frame."""

    log.info("read Traci 2.1 from file %s", xls_file)
    wb = xlrd.open_workbook(xls_file)
    sheet = wb.sheet_by_name("Substances")

    categories = {}
    for col in range(3, sheet.ncols):
        name = xls.cell_str(sheet, 0, col)
        if name == "":
            break
        cat_info = _category_info(name)
        if cat_info is not None:
            categories[col] = cat_info

    records = []
    for row in range(1, sheet.nrows):
        flow = xls.cell_str(sheet, row, 2)
        if flow == "":
            break
        cas = format_cas(xls.cell_val(sheet, row, 1))
        for col in range(3, sheet.ncols):
            cat_info = categories.get(col)
            if cat_info is None:
                continue
            factor = xls.cell_f64(sheet, row, col)
            if factor == 0.0:
                continue
            dataframe.record(records,
                             method="TRACI 2.1",
                             indicator=cat_info[0],
                             indicator_unit=cat_info[1],
                             flow=flow,
                             flow_category=cat_info[2],
                             flow_unit=cat_info[3],
                             cas_number=cas,
                             factor=factor)

    return dataframe.data_frame(records)
Beispiel #6
0
def _find_flow_column(sheet: xlrd.book.sheet) -> int:
    if _containstr(sheet.name, "land", "occupation"):
        ncol = 1
        return ncol
    ncol = -1
    for row, col in xls.iter_cells(sheet):
        s = xls.cell_str(sheet, row, col)
        if _containstr(s, "name") or _containstr(s, "substance"):
            ncol = col
            log.debug("identified column %i %s for flow names", ncol, s)
            break
    if ncol < 0:
        log.debug("no 'name' column in %s, take col=0 for that", sheet.name)
        ncol = 0
    return ncol
Beispiel #7
0
def _determine_compartments(sheet: xlrd.book.sheet) -> (str, int):
    compartment_col = -1
    for row, col in xls.iter_cells(sheet):
        if row > 5:
            break
        s = xls.cell_str(sheet, row, col)
        if _containstr(s, "compartment") \
            or _containstr(s, "name", "in", "ReCiPe"):
            compartment_col = col
            break

    if compartment_col > -1:
        log.debug("found compartment column %i", compartment_col)
        return "", compartment_col

    elif _containstr(sheet.name, "global", "warming") \
            or _containstr(sheet.name, "ozone") \
            or _containstr(sheet.name, "particulate") \
            or _containstr(sheet.name, "acidification"):
        log.debug("no compartment column; assuming 'air'")
        return "air", -1

    elif _containstr(sheet.name, "mineral", "resource", "scarcity"):
        log.debug("no compartment column; assuming 'resource/ground'")
        return "resource/ground", -1

    elif _containstr(sheet.name, "fossil", "resource", "scarcity"):
        log.debug("no compartment column; assuming 'resource'")
        return "resource", -1

    if _containstr(sheet.name, "water", "consumption"):
        log.debug("no compartment column; assuming 'resource/fresh water'")
        return "resource/fresh water", -1

    log.debug("no compartment column")
    return "", -1
Beispiel #8
0
def _read_mid_points(sheet: xlrd.book.sheet, records: list):
    log.debug("try to read midpoint factors from sheet %s", sheet.name)

    start_row, data_col, with_perspectives = _find_data_start(sheet)
    if start_row < 0:
        log.debug("could not find a value column in sheet %s", sheet.name)
        return

    flow_col = _find_flow_column(sheet)
    if flow_col < 0:
        return

    cas_col = _find_cas_column(sheet)
    indicator_unit, flow_unit, unit_col = _determine_units(sheet)
    compartment, compartment_col = _determine_compartments(sheet)

    perspectives = ["I", "H", "E"]
    factor_count = 0
    for row in range(start_row, sheet.nrows):
        if compartment_col > -1:
            compartment = xls.cell_str(sheet, row, compartment_col)
        if compartment in contexts:
            compartment = contexts[compartment]
        if unit_col > -1:
            flow_unit = xls.cell_str(sheet, row, unit_col)
            if "/" in flow_unit:
                flow_unit = flow_unit.split("/")[1].strip()
        cas = ""
        if cas_col > -1:
            cas = format_cas(xls.cell_f64(sheet, row, cas_col))

        if with_perspectives:
            for i in range(0, 3):
                val = xls.cell_f64(sheet, row, data_col + i)
                if val == 0.0:
                    continue
                dfutil.record(records,
                              method="ReCiPe 2016 - Midpoint/" +
                              perspectives[i],
                              indicator=sheet.name,
                              indicator_unit=indicator_unit,
                              flow=xls.cell_str(sheet, row, flow_col),
                              flow_category=compartment,
                              flow_unit=flow_unit,
                              cas_number=cas,
                              factor=val)
                factor_count += 1
        else:
            val = xls.cell_f64(sheet, row, data_col)
            if val == 0.0:
                continue
            for p in perspectives:
                dfutil.record(records,
                              method="ReCiPe 2016 - Midpoint/" + p,
                              indicator=sheet.name,
                              indicator_unit=indicator_unit,
                              flow=xls.cell_str(sheet, row, flow_col),
                              flow_category=compartment,
                              flow_unit=flow_unit,
                              cas_number=cas,
                              factor=val)
                factor_count += 1
    log.debug("extracted %i factors", factor_count)