def _convert_condition(self, condition): # ref_temp needs to be converted to Temperature type try: ref_temp, unit = re.findall(r'([0-9]+).+([CFK])', condition['ref_temp'])[0] ref_temp = sigfigs(ref_temp) condition['ref_temp'] = {'value': ref_temp, 'unit': unit} except (TypeError, IndexError): # probably encountered a None, which is fine. We will just leave # it alone and move to the next one. pass # unit needs to be converted to PyNUCOS unit/unit_type unit_map = { '% w/w': ('%', 'massfraction'), '%w/w': ('%', 'massfraction'), 'µg/g': ('µg/g', 'massfraction'), 'mg/g': ('mg/g', 'massfraction'), 'Pa.s': ('Pa.s', 'dynamicviscosity'), 'mPa.s': ('mPa.s', 'dynamicviscosity'), 'Pa': ('Pa', 'pressure'), 'kPa': ('kPa', 'pressure'), 'mN/m or dynes/cm': ('mN/m', 'interfacialtension'), 'g/cm2': ('g/cm^2', 'needleadhesion'), 'g/mL': ('g/mL', 'density'), '̊C': ('C', 'temperature'), } try: unit, unit_type = unit_map[condition['unit']] condition['unit'] = unit condition['unit_type'] = unit_type except KeyError: # print(f'unit key "{condition["unit"]}" not found') pass
def test_dist_cuts(self, samp_ind, cut_index, fraction, temp_f): samples = ExxonMapper(self.record).sub_samples cut = samples[samp_ind].distillation_data.cuts[cut_index] assert cut.fraction.value == fraction assert isclose(cut.vapor_temp.value, sigfigs(uc.convert("F", "C", temp_f), 5), rel_tol=1e-4)
def test_dist_end_point(self, sample_idx, expected): samples = ExxonMapper(self.record).sub_samples if expected is None: assert samples[sample_idx].distillation_data.end_point is None else: expected_c = sigfigs(uc.convert("F", "C", expected), 5) end_point = samples[sample_idx].distillation_data.end_point assert isclose(end_point.value, expected_c, rel_tol=1e-4) assert end_point.unit == 'C'
def _get_record_raw_columns(self, name): """ Return the columns in the Excel sheet referenced by the name of an oil. Note: the Excel sheet columns object has no direct indexing, only a next(). This is why we are using walk method to get our indexed columns. Note: It has been decided that we will only keep 5 significant digits of any floating point values in the datasheet. """ return list( zip(*[[sigfigs(cell.value, 5) for cell in col] for i, col in enumerate(self.db_sheet.columns) if i in self.col_indexes[name]]))
def _get_conditions_columns(self): """ The April 2020 update of the Environment Canada datasheet contained a few extra columns that contained data concerning the testing conditions for the measurements. They are: - Unit of Measurement: Instead of annotating the category and/or field names with unit information, they put this information into a dedicated column - Temperature: Instead of annotating temperature information into the field names, they put this information into a dedicated column. - Conditions of analysis: Other significant information concerning the measurements taken seem to be entered here. Most significantly, non-newtonian shear rate for viscosities. This was not there before. """ return list( zip(*[[sigfigs(strip(cell.value), 5) for cell in col] for i, col in enumerate(self.db_sheet.columns) if i in [2, 3, 4]]))
def test_round_sigfigs(val, num_figs, expected): assert util.sigfigs(val, num_figs) == expected
def set_aggregate_subsample_props(self): """ These are properties commonly associated with a sub-sample. There is a copy of this information inside every measurement, so we need to reconcile them to determine the identifying properties of each sub-sample. Sub-sample properties: - ests_id: One common value per sub-sample. This could be numeric, so we force it to be a string. - weathering_fraction: One value per sub-sample. These values look like some kind of code that EC uses. Probably not useful to us. - weathering_percent: One common value per sub-sample. These values are mostly a string in the format 'N.N%'. We will convert to a structure suitable for a Measurement type. - weathering_method: One common value per sub-sample. This is information that might be good to save, but it doesn't fit into the Adios oil model. """ first_objs = [ v for v in self.src_values if v['property_id'] == 'Density_0' ] first_sample_ids = [o['ests_id'] for o in first_objs] if not self.sample_ids == first_sample_ids: raise ValueError(f'duplicate sample_ids: {first_sample_ids}') for idx, o in enumerate(first_objs): sample_id = str(o['ests_id']) weathering_percent = o['weathering_percent'] if weathering_percent is None or weathering_percent == 'None': weathering_percent = None else: weathering_percent = { 'value': sigfigs(weathering_percent.rstrip('%'), sig=5), 'unit': '%' } if (weathering_percent is not None and isclose(weathering_percent['value'], 0.0)): name = 'Fresh Oil Sample' short_name = 'Fresh Oil' elif weathering_percent is not None: name = f'{weathering_percent["value"]}% Evaporated' short_name = f'{weathering_percent["value"]}% Evaporated' else: name = f'{o["weathering_fraction"]}' short_name = f'{o["weathering_fraction"]}'[:12] self.deep_set(self.oil_obj, f'sub_samples.{idx}.metadata.sample_id', sample_id) self.deep_set(self.oil_obj, f'sub_samples.{idx}.metadata.name', name) self.deep_set(self.oil_obj, f'sub_samples.{idx}.metadata.short_name', short_name) self.deep_set(self.oil_obj, f'sub_samples.{idx}.metadata.fraction_weathered', weathering_percent)