def _determine_units(sheet: xlrd.book.sheet) -> (str, str, int): indicator_unit = "?" flow_unit = "?" unit_col = -1 row, col, _ = _find_data_start(sheet) row -= 2 if row > 0: s = xls.cell_str(sheet, row, col) if s is not None and s != "": if "/" in s: parts = s.strip(" ()").split("/") indicator_unit = parts[0].strip() flow_unit = parts[1].strip() else: indicator_unit = s.strip() for row, col in xls.iter_cells(sheet): if row > 5: break s = xls.cell_str(sheet, row, col) if _eqstr(s, "Unit"): unit_col = col break if indicator_unit != "?": log.debug("determined indicator unit: %s", indicator_unit) elif _containstr(sheet.name, "land", "transformation"): log.debug("unknown indicator unit; assuming it is m2") indicator_unit = "m2" elif _containstr(sheet.name, "land", "occupation"): log.debug("unknown indicator unit; assuming it is m2*a") indicator_unit = "m2*a" elif _containstr(sheet.name, "water", "consumption"): log.debug("unknown indicator unit; assuming it is m3") indicator_unit = "m3" else: log.debug("unknown indicator unit") if _containstr(flow_unit, "kg"): flow_unit = "kg" if unit_col > -1: log.debug("take units from column %i", unit_col) elif flow_unit != "?": log.debug("determined flow unit: %s", flow_unit) elif _containstr(sheet.name, "land", "transformation"): log.debug("unknown flow unit; assume it is m2") flow_unit = "m2" elif _containstr(sheet.name, "land", "occupation"): log.debug("unknown flow unit; assuming it is m2*a") flow_unit = "m2*a" elif _containstr(sheet.name, "water", "consumption"): log.debug("unknown flow unit; assuming it is m3") flow_unit = "m3" else: log.debug("unknown flow unit; assuming it is 'kg'") flow_unit = "kg" return indicator_unit, flow_unit, unit_col
def _read_endpoints(file: str) -> pandas.DataFrame: log.info("reading endpoint factors from file") wb = xlrd.open_workbook(file) endpoint_cols = ['Method','EndpointMethod', 'EndpointIndicator', 'EndpointUnit','EndpointConversion'] endpoint = pandas.DataFrame(columns = endpoint_cols) endpoints = [] perspectives = ["I", "H", "E"] indicator = "" indicator_unit = "" for name in wb.sheet_names(): if _eqstr(name, "Midpoint to endpoint factors"): sheet = wb.sheet_by_name(name) start_row, data_col, with_perspectives = _find_data_start(sheet) #impact categories in column 1 flow_col = 0 endpoint_factor_count = 0 for row in range(start_row, sheet.nrows): indicator = xls.cell_str(sheet, row, flow_col) indicator_unit = xls.cell_str(sheet, row, flow_col+1) for i in range(0, 3): val = xls.cell_f64(sheet, row, data_col + i) if val == 0.0: continue endpoints.append("ReCiPe 2016 - Midpoint/" + perspectives[i]) endpoints.append("ReCiPe 2016 - Endpoint/" + perspectives[i]) endpoints.append(indicator) endpoints.append(indicator_unit) endpoints.append(val) to_add=pandas.Series(endpoints, index=endpoint_cols) endpoint=endpoint.append(to_add, ignore_index=True) endpoints=[] endpoint_factor_count += 1 log.debug("extracted %i endpoint factors", endpoint_factor_count) else: continue log.info("processing endpoint factors") endpoint.loc[endpoint['EndpointUnit'].str.contains('daly', case=False), 'EndpointUnit']='DALY' endpoint.loc[endpoint['EndpointUnit'].str.contains('species', case=False), 'EndpointUnit']='species-year' endpoint.loc[endpoint['EndpointUnit'].str.contains('USD', case=False), 'EndpointUnit']='USD2013' log.info("reading endpoint map from csv") endpoint_map = pandas.read_csv(util.datapath+'ReCiPe2016_endpoint_to_midpoint.csv') endpoint=endpoint.merge(endpoint_map,how="left",on='EndpointIndicator') #split into two dataframes endpoint_by_flow = endpoint[endpoint['FlowFlag']==1] endpoint_by_flow = endpoint_by_flow.drop(columns='FlowFlag') endpoint_by_flow.rename(columns={'EndpointIndicator':'Flowable'}, inplace=True) endpoint = endpoint[endpoint['FlowFlag'].isna()] endpoint = endpoint.drop(columns='FlowFlag') #return endpoint and endpoint by flow return endpoint, endpoint_by_flow
def _find_cas_column(sheet: xlrd.book.sheet) -> int: ccol = -1 for row, col in xls.iter_cells(sheet): s = xls.cell_str(sheet, row, col) if _eqstr(s, "cas"): ccol = col log.debug("identified column %i %s for CAS numbers", ccol, s) break return ccol
def _find_data_start(sheet: xlrd.book.sheet) -> (int, int, bool): for row, col in xls.iter_cells(sheet): s = xls.cell_str(sheet, row, col) if s is None or s == "": continue if _eqstr(s, "I") or _containstr(s, "Individualist"): return row + 1, col, True if _eqstr(s, "all perspectives"): return row + 1, col, False return -1, -1
def _read(xls_file: str) -> pd.DataFrame: """Read the data from the Excel file with the given path into a Pandas data frame.""" log.info("read Traci 2.1 from file %s", xls_file) wb = xlrd.open_workbook(xls_file) sheet = wb.sheet_by_name("Substances") categories = {} for col in range(3, sheet.ncols): name = xls.cell_str(sheet, 0, col) if name == "": break cat_info = _category_info(name) if cat_info is not None: categories[col] = cat_info records = [] for row in range(1, sheet.nrows): flow = xls.cell_str(sheet, row, 2) if flow == "": break cas = format_cas(xls.cell_val(sheet, row, 1)) for col in range(3, sheet.ncols): cat_info = categories.get(col) if cat_info is None: continue factor = xls.cell_f64(sheet, row, col) if factor == 0.0: continue dataframe.record(records, method="TRACI 2.1", indicator=cat_info[0], indicator_unit=cat_info[1], flow=flow, flow_category=cat_info[2], flow_unit=cat_info[3], cas_number=cas, factor=factor) return dataframe.data_frame(records)
def _find_flow_column(sheet: xlrd.book.sheet) -> int: if _containstr(sheet.name, "land", "occupation"): ncol = 1 return ncol ncol = -1 for row, col in xls.iter_cells(sheet): s = xls.cell_str(sheet, row, col) if _containstr(s, "name") or _containstr(s, "substance"): ncol = col log.debug("identified column %i %s for flow names", ncol, s) break if ncol < 0: log.debug("no 'name' column in %s, take col=0 for that", sheet.name) ncol = 0 return ncol
def _determine_compartments(sheet: xlrd.book.sheet) -> (str, int): compartment_col = -1 for row, col in xls.iter_cells(sheet): if row > 5: break s = xls.cell_str(sheet, row, col) if _containstr(s, "compartment") \ or _containstr(s, "name", "in", "ReCiPe"): compartment_col = col break if compartment_col > -1: log.debug("found compartment column %i", compartment_col) return "", compartment_col elif _containstr(sheet.name, "global", "warming") \ or _containstr(sheet.name, "ozone") \ or _containstr(sheet.name, "particulate") \ or _containstr(sheet.name, "acidification"): log.debug("no compartment column; assuming 'air'") return "air", -1 elif _containstr(sheet.name, "mineral", "resource", "scarcity"): log.debug("no compartment column; assuming 'resource/ground'") return "resource/ground", -1 elif _containstr(sheet.name, "fossil", "resource", "scarcity"): log.debug("no compartment column; assuming 'resource'") return "resource", -1 if _containstr(sheet.name, "water", "consumption"): log.debug("no compartment column; assuming 'resource/fresh water'") return "resource/fresh water", -1 log.debug("no compartment column") return "", -1
def _read_mid_points(sheet: xlrd.book.sheet, records: list): log.debug("try to read midpoint factors from sheet %s", sheet.name) start_row, data_col, with_perspectives = _find_data_start(sheet) if start_row < 0: log.debug("could not find a value column in sheet %s", sheet.name) return flow_col = _find_flow_column(sheet) if flow_col < 0: return cas_col = _find_cas_column(sheet) indicator_unit, flow_unit, unit_col = _determine_units(sheet) compartment, compartment_col = _determine_compartments(sheet) perspectives = ["I", "H", "E"] factor_count = 0 for row in range(start_row, sheet.nrows): if compartment_col > -1: compartment = xls.cell_str(sheet, row, compartment_col) if compartment in contexts: compartment = contexts[compartment] if unit_col > -1: flow_unit = xls.cell_str(sheet, row, unit_col) if "/" in flow_unit: flow_unit = flow_unit.split("/")[1].strip() cas = "" if cas_col > -1: cas = format_cas(xls.cell_f64(sheet, row, cas_col)) if with_perspectives: for i in range(0, 3): val = xls.cell_f64(sheet, row, data_col + i) if val == 0.0: continue dfutil.record(records, method="ReCiPe 2016 - Midpoint/" + perspectives[i], indicator=sheet.name, indicator_unit=indicator_unit, flow=xls.cell_str(sheet, row, flow_col), flow_category=compartment, flow_unit=flow_unit, cas_number=cas, factor=val) factor_count += 1 else: val = xls.cell_f64(sheet, row, data_col) if val == 0.0: continue for p in perspectives: dfutil.record(records, method="ReCiPe 2016 - Midpoint/" + p, indicator=sheet.name, indicator_unit=indicator_unit, flow=xls.cell_str(sheet, row, flow_col), flow_category=compartment, flow_unit=flow_unit, cas_number=cas, factor=val) factor_count += 1 log.debug("extracted %i factors", factor_count)