def _read(file: str) -> pd.DataFrame: log.info("read ReCiPe 2016 from file %s", file) wb = xlrd.open_workbook(file) records = [] for name in wb.sheet_names(): if _eqstr(name, "Version") or _eqstr(name, "Midpoint to endpoint factors"): continue _read_mid_points(wb.sheet_by_name(name), records) return dfutil.data_frame(records)
def get(subset=None) -> pd.DataFrame: """ Returns a dataframe of inventory based methods. :param subset: a list of dictionary keys from available inventories, if none selected all availabile inventories will be generated :return: df in standard LCIAmethod format """ method = df.data_frame(list()) method['Characterization Factor'] = pd.to_numeric( method['Characterization Factor']) if subset == None: list_of_inventories = subsets.get_subsets() else: list_of_inventories = subset alt_units = flowlist.get_alt_conversion() for inventory in list_of_inventories: flows = flowlist.get_flows(subset=inventory) flows.drop([ 'Formula', 'Synonyms', 'Class', 'External Reference', 'Preferred', 'AltUnit', 'AltUnitConversionFactor' ], axis=1, inplace=True) flows['Indicator'] = inventory flows['Indicator unit'] = subsets.get_inventory_unit(inventory) flows['Characterization Factor'] = 1 # Apply unit conversions where flow unit differs from indicator unit flows_w_conversion = pd.merge( flows, alt_units, how='left', left_on=['Flowable', 'Indicator unit', 'Unit'], right_on=['Flowable', 'AltUnit', 'Unit']) flows_w_conversion.loc[(flows_w_conversion['AltUnit'] == flows_w_conversion['Indicator unit']), 'Characterization Factor'] = flows_w_conversion[ 'AltUnitConversionFactor'] flows_w_conversion.drop( ['AltUnit', 'AltUnitConversionFactor', 'InverseConversionFactor'], axis=1, inplace=True) method = pd.concat([method, flows_w_conversion], ignore_index=True) method['Method'] = 'Inventory' return method
def _read(xls_file: str) -> pd.DataFrame: """Read the data from the Excel file with the given path into a Pandas data frame.""" log.info("read Traci 2.1 from file %s", xls_file) wb = xlrd.open_workbook(xls_file) sheet = wb.sheet_by_name("Substances") categories = {} for col in range(3, sheet.ncols): name = xls.cell_str(sheet, 0, col) if name == "": break cat_info = _category_info(name) if cat_info is not None: categories[col] = cat_info records = [] for row in range(1, sheet.nrows): flow = xls.cell_str(sheet, row, 2) if flow == "": break cas = format_cas(xls.cell_val(sheet, row, 1)) for col in range(3, sheet.ncols): cat_info = categories.get(col) if cat_info is None: continue factor = xls.cell_f64(sheet, row, col) if factor == 0.0: continue dataframe.record(records, method="TRACI 2.1", indicator=cat_info[0], indicator_unit=cat_info[1], flow=flow, flow_category=cat_info[2], flow_unit=cat_info[3], cas_number=cas, factor=factor) return dataframe.data_frame(records)
def run(self) -> pd.DataFrame: map_idx = self._build_map_index() log.info("applying flow mapping...") mapped = 0 preserved = 0 df = self.__df records = [] for row in range(0, self.__df.shape[0]): key = Mapper._flow_key( name=df.iat[row, 5], category=df.iat[row, 7], unit=df.iat[row, 8], ) targets = map_idx.get(key) # type: List[_FlowInfo] if targets is None: log.debug("could not map flow %s", key) if self.__preserve_unmapped: records.append(dfutil.as_list(df, row=row)) preserved += 1 continue rec = dfutil.as_list(df, row=row) for target in targets: r = rec.copy() r[5] = target.name r[6] = target.uuid r[7] = target.category r[8] = target.unit r[12] = r[12] / float(target.conversionfactor) records.append(r) mapped += 1 log.info( "created %i factors for mapped flows; " + "preserved %i factors for unmapped flows", mapped, preserved) return dfutil.data_frame(records)
def _read(access_file: str) -> pd.DataFrame: """Read the data from the Access database with the given path into a Pandas data frame.""" log.info("read ImpactWorld+ from file %s", access_file) path = cache.get_path(access_file) connStr = ( r'DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};' r'DBQ=' + path + ";") cnxn = pyodbc.connect(connStr) crsr = cnxn.cursor() records = [] # Extract non regionalized data from "CF - not regionalized - All other impact categories" crsr.execute("SELECT * FROM [CF - not regionalized - All other impact categories]") rows = crsr.fetchall() for row in rows: dfutil.record(records, method="ImpactWorld+", indicator = row[1], indicator_unit=row[2], flow=row[5], flow_category=row[3] + "/" + row[4], flow_unit=row[8], cas_number=format_cas(row[6]).lstrip("0"), factor=row[7]) """List relevant sheets in Impact World Access file. Second item in tuple tells the source of compartment information. Compartment for water categories are not included in access file, defined below. Elementary flow names are used to define the compartment for land transformation and occupation. Compartment and Subcompartment data is available in the Access file for other categories.""" regional_sheets = [("CF - regionalized - WaterScarc - aggregated", "Raw/in water"), ("CF - regionalized - WaterAvailab_HH - aggregated", "Raw/in water"), ("CF - regionalized - LandTrans - aggregated", "Elementary Flow"), ("CF - regionalized - LandOcc - aggregated", "Elementary Flow"), ("CF - regionalized - EutroMar - aggregated", "Compartment"), ("CF - regionalized - PartMatterForm - aggregated","Compartment"), ("CF - regionalized - AcidFW - aggregated", "Compartment"), ("CF - regionalized - AcidTerr - aggregated", "Compartment"), ("CF - regionalized - EutroFW - aggregated", "Compartment"), ] for x in regional_sheets: if x[0] == "CF - regionalized - PartMatterForm - aggregated": # Extract global flows from the particulate matter Access sheet # Structure of this sheet is sql = "SELECT * FROM [" + x[0] + "] WHERE (([" + x[0] + "].Region In('World')))" crsr.execute(sql) rows = crsr.fetchall() for row in rows: dfutil.record(records, method="ImpactWorld+", indicator=row.ImpCat, indicator_unit=row.Unit.strip('[]').split('/')[0], flow=row.__getattribute__('Elem flow'), flow_category="Air/" + row.__getattribute__("Archetype 1"), flow_unit=row.Unit.strip('[]').split('/')[1], cas_number="", factor=row.CFvalue) else: sql = "SELECT * FROM [" + x[0] + "] WHERE (([" + x[0] + "].Resolution In('Global', 'Not regionalized')))" crsr.execute(sql) rows = crsr.fetchall() # extract column headers from Access sheet for exception testing cols = [column[0] for column in crsr.description] for row in rows: #Add water to detailed context information available in Access file if x[0] in ['CF - regionalized - WaterScarc - aggregated', 'CF - regionalized - WaterAvailab_HH - aggregated']: flow_stmt = 'Water, ' + row.__getattribute__('Elem flow') else: flow_stmt = row.__getattribute__('Elem flow') # Define context/compartment for flow based on impact category. if {'Compartment', 'Subcompartment'}.issubset(cols): category_stmt = row.Compartment + "/" + row.Subcompartment elif x[0] in ['CF - regionalized - LandTrans - aggregated', 'CF - regionalized - LandOcc - aggregated', 'CF - regionalized - WaterScarc - aggregated', 'CF - regionalized - WaterAvailab_HH - aggregated']: category_stmt = flow_stmt else: category_stmt = x[1] dfutil.record(records, method="ImpactWorld+", indicator = row.ImpCat, indicator_unit=row.Unit.strip('[]').split('/')[0], flow=flow_stmt, flow_category=category_stmt, flow_unit=row.Unit.strip('[]').split('/')[1], cas_number="", factor=row.__getattribute__('Weighted Average')) return dfutil.data_frame(records)