def _read(xls_file: str) -> pd.DataFrame: """Read the data from the Excel file with the given path into a Pandas data frame.""" log.info("read Traci 2.1 from file %s", xls_file) wb = xlrd.open_workbook(xls_file) sheet = wb.sheet_by_name("Substances") categories = {} for col in range(3, sheet.ncols): name = xls.cell_str(sheet, 0, col) if name == "": break cat_info = _category_info(name) if cat_info is not None: categories[col] = cat_info records = [] for row in range(1, sheet.nrows): flow = xls.cell_str(sheet, row, 2) if flow == "": break cas = format_cas(xls.cell_val(sheet, row, 1)) for col in range(3, sheet.ncols): cat_info = categories.get(col) if cat_info is None: continue factor = xls.cell_f64(sheet, row, col) if factor == 0.0: continue dataframe.record(records, method="TRACI 2.1", indicator=cat_info[0], indicator_unit=cat_info[1], flow=flow, flow_category=cat_info[2], flow_unit=cat_info[3], cas_number=cas, factor=factor) return dataframe.data_frame(records)
def _read_mid_points(sheet: xlrd.book.sheet, records: list): log.debug("try to read midpoint factors from sheet %s", sheet.name) start_row, data_col, with_perspectives = _find_data_start(sheet) if start_row < 0: log.debug("could not find a value column in sheet %s", sheet.name) return flow_col = _find_flow_column(sheet) if flow_col < 0: return cas_col = _find_cas_column(sheet) indicator_unit, flow_unit, unit_col = _determine_units(sheet) compartment, compartment_col = _determine_compartments(sheet) perspectives = ["I", "H", "E"] factor_count = 0 for row in range(start_row, sheet.nrows): if compartment_col > -1: compartment = xls.cell_str(sheet, row, compartment_col) if compartment in contexts: compartment = contexts[compartment] if unit_col > -1: flow_unit = xls.cell_str(sheet, row, unit_col) if "/" in flow_unit: flow_unit = flow_unit.split("/")[1].strip() cas = "" if cas_col > -1: cas = format_cas(xls.cell_f64(sheet, row, cas_col)) if with_perspectives: for i in range(0, 3): val = xls.cell_f64(sheet, row, data_col + i) if val == 0.0: continue dfutil.record(records, method="ReCiPe 2016 - Midpoint/" + perspectives[i], indicator=sheet.name, indicator_unit=indicator_unit, flow=xls.cell_str(sheet, row, flow_col), flow_category=compartment, flow_unit=flow_unit, cas_number=cas, factor=val) factor_count += 1 else: val = xls.cell_f64(sheet, row, data_col) if val == 0.0: continue for p in perspectives: dfutil.record(records, method="ReCiPe 2016 - Midpoint/" + p, indicator=sheet.name, indicator_unit=indicator_unit, flow=xls.cell_str(sheet, row, flow_col), flow_category=compartment, flow_unit=flow_unit, cas_number=cas, factor=val) factor_count += 1 log.debug("extracted %i factors", factor_count)
def _read(access_file: str) -> pd.DataFrame: """Read the data from the Access database with the given path into a Pandas data frame.""" log.info("read ImpactWorld+ from file %s", access_file) path = cache.get_path(access_file) connStr = ( r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};' r'DBQ=' + path + ";") cnxn = pyodbc.connect(connStr) crsr = cnxn.cursor() records = [] # Extract non regionalized data from "CF - not regionalized - All other impact categories" crsr.execute("SELECT * FROM [CF - not regionalized - All other impact categories]") rows = crsr.fetchall() for row in rows: dfutil.record(records, method="ImpactWorld+", indicator = row[1], indicator_unit=row[2], flow=row[5], flow_category=row[3] + "/" + row[4], flow_unit=row[8], cas_number=format_cas(row[6]).lstrip("0"), factor=row[7]) """List relevant sheets in Impact World Access file. Second item in tuple tells the source of compartment information. Compartment for water categories are not included in access file, defined below. Elementary flow names are used to define the compartment for land transformation and occupation. Compartment and Subcompartment data is available in the Access file for other categories.""" regional_sheets = [("CF - regionalized - WaterScarc - aggregated", "Raw/in water"), ("CF - regionalized - WaterAvailab_HH - aggregated", "Raw/in water"), ("CF - regionalized - LandTrans - aggregated", "Elementary Flow"), ("CF - regionalized - LandOcc - aggregated", "Elementary Flow"), ("CF - regionalized - EutroMar - aggregated", "Compartment"), ("CF - regionalized - PartMatterForm - aggregated","Compartment"), ("CF - regionalized - AcidFW - aggregated", "Compartment"), ("CF - regionalized - AcidTerr - aggregated", "Compartment"), ("CF - regionalized - EutroFW - aggregated", "Compartment"), ] for x in regional_sheets: if x[0] == "CF - regionalized - PartMatterForm - aggregated": # Extract global flows from the particulate matter Access sheet # Structure of this sheet is sql = "SELECT * FROM [" + x[0] + "] WHERE (([" + x[0] + "].Region In('World')))" crsr.execute(sql) rows = crsr.fetchall() for row in rows: dfutil.record(records, method="ImpactWorld+", indicator=row.ImpCat, indicator_unit=row.Unit.strip('[]').split('/')[0], flow=row.__getattribute__('Elem flow'), flow_category="Air/" + row.__getattribute__("Archetype 1"), flow_unit=row.Unit.strip('[]').split('/')[1], cas_number="", factor=row.CFvalue) else: sql = "SELECT * FROM [" + x[0] + "] WHERE (([" + x[0] + "].Resolution In('Global', 'Not regionalized')))" crsr.execute(sql) rows = crsr.fetchall() # extract column headers from Access sheet for exception testing cols = [column[0] for column in crsr.description] for row in rows: #Add water to detailed context information available in Access file if x[0] in ['CF - regionalized - WaterScarc - aggregated', 'CF - regionalized - WaterAvailab_HH - aggregated']: flow_stmt = 'Water, ' + row.__getattribute__('Elem flow') else: flow_stmt = row.__getattribute__('Elem flow') # Define context/compartment for flow based on impact category. if {'Compartment', 'Subcompartment'}.issubset(cols): category_stmt = row.Compartment + "/" + row.Subcompartment elif x[0] in ['CF - regionalized - LandTrans - aggregated', 'CF - regionalized - LandOcc - aggregated', 'CF - regionalized - WaterScarc - aggregated', 'CF - regionalized - WaterAvailab_HH - aggregated']: category_stmt = flow_stmt else: category_stmt = x[1] dfutil.record(records, method="ImpactWorld+", indicator = row.ImpCat, indicator_unit=row.Unit.strip('[]').split('/')[0], flow=flow_stmt, flow_category=category_stmt, flow_unit=row.Unit.strip('[]').split('/')[1], cas_number="", factor=row.__getattribute__('Weighted Average')) return dfutil.data_frame(records)