def get_all_records(data_dir): """ gets all the records from the JSON data stored in gitLab This is a generator that gets all the records, returning them one by one as record, path pairs :param data_dir: the directory that holds the data The record returned is an Oil object Use as such:: for oil, path in get_all_records(data_dir): work_with_the_record """ for fname in sorted(Path(data_dir).rglob("*.json")): with open(fname, encoding='utf-8') as jfile: try: pyjson = json.load(jfile) except Exception: print("Something went wrong loading:", fname) raise rec = Oil.from_py_json(pyjson) yield rec, fname
def test_from_py_json_nothing(self): """ You must specify at least an oil_id """ py_json = {} with pytest.raises(TypeError): _oil = Oil.from_py_json(py_json)
def get_obj_json(obj_path, collection_name): obj = json.load(open(obj_path, 'r', encoding='utf-8')) if collection_name == 'oil': oil = Oil.from_py_json(obj) oil.reset_validation() obj = oil.py_json() return obj
def py_json(self): rec = {} for attr in self.oil_props: rec[attr] = getattr(self, attr) obj = Oil.from_py_json(rec) return obj.py_json()
def test_version_too_high(): pyjs = { 'oil_id': 'AD00123', 'adios_data_model_version': "2.0.0", 'metadata': { 'name': 'An oil name' } } with pytest.raises(VersionError): _oil = Oil.from_py_json(pyjs)
def py_json(self): rec = self.record.dict() rec['sub_samples'] = [s.dict() for s in self.sub_samples] self.resolve_oil_api(rec) rec = Oil.from_py_json(rec) return rec.py_json()
def test_subsamples(self): """ Is it getting all the subsamples """ oil = Oil.from_py_json(BIG_RECORD) print("working with:", oil.metadata.name) assert len(oil.sub_samples) == 5 assert oil.sub_samples[0].metadata.name == "Fresh Oil Sample" assert oil.sub_samples[3].metadata.name == "25.3% Evaporated"
def test_from_py_json_minimal(self): """ Note: It seems we are not only checking for existence, but specific values. Parametrize?? """ py_json = {"oil_id": OIL_ID} oil = Oil.from_py_json(py_json) assert oil.oil_id == OIL_ID assert oil.status == [] assert oil.metadata.API is None
def test_to_open_file(): """ test saving an oil object to a filename """ oil = Oil.from_py_json(BIG_RECORD) oil.to_file(open(OUTPUT_DIR / "temp_to_file.json", 'w', encoding="utf-8")) # read it back as JSON with open(OUTPUT_DIR / "temp_to_file.json", encoding="utf-8") as infile: data = json.load(infile) assert data["oil_id"] == 'EC02234'
def test_repr_full(self): """ The repr should be reasonable This is a "full" record """ oil = Oil.from_py_json(BIG_RECORD) result = repr(oil) assert result.startswith("Oil(") assert "oil_id='EC02234'" in result
def test_version_bad(): """ If it doesn't have a version string, it should get the current one. """ pyjs = { 'oil_id': 'AD00123', 'adios_data_model_version': 1.2, 'metadata': { 'name': 'An oil name' } } with pytest.raises(ValueError): _oil = Oil.from_py_json(pyjs)
def test_version_none(): """ If it doesn't have a version string, it should get the current one. """ oil = Oil('XXXXXX') oil.metadata.name = 'An oil name' pyjs = oil.py_json() # remove the version: pyjs.pop('adios_data_model_version', None) oil = Oil.from_py_json(pyjs) assert oil.adios_data_model_version == ADIOS_DATA_MODEL_VERSION
def test_completeness_score(self, oil_json, expected): oil = Oil.from_py_json(oil_json) assert self.Dcheck(oil) == expected dist_data = oil.sub_samples[0].distillation_data # add fraction_recovered # less than one adds 1 point dist_data.fraction_recovered = MassFraction(0.8, unit="fraction") assert self.Dcheck(oil) == expected + 1 # exactly one adds 2 points dist_data.fraction_recovered = MassFraction(1.0, unit="fraction") assert self.Dcheck(oil) == expected + 2
def export_to_file(base_path, collection_name, record): if collection_name == 'oil': record = Oil.from_py_json(record).py_json() record_name = str(record['oil_id']) add_folder(os.path.join(base_path, collection_name), record_name[:2]) # There could be a lot of oil records, so we want to break them up by # prefix filename = os.path.join(base_path, collection_name, record_name[:2], f'{record_name}.json') else: record_name = str(record['_id']) filename = os.path.join(base_path, collection_name, f'{record_name}.json') with open(filename, 'w', encoding="utf-8") as outfile: json.dump(record, outfile, indent=4, default=json_handle_unparseable)
def test__id_ignored(self): """ checks that the an _id attribute of a dict will get ignored """ oil = Oil.from_py_json({ 'oil_id': "XX123456", '_id': 1234567, 'metadata': { 'name': "An oil name" }, }) assert oil.oil_id == "XX123456" with pytest.raises(AttributeError): _id = oil._id assert oil.metadata.name == "An oil name" joil = oil.py_json() assert '_id' not in joil
def test_sample_metadata(self): oil = Oil.from_py_json({ 'oil_id': "XX123456", 'metadata': { 'name': "An oil name" }, 'sub_samples': [ {'metadata': { 'fraction_evaporated': {'value': 11, 'unit': '%', 'unit_type': 'massfraction'} } } ] }) print(oil.sub_samples[0].metadata) print(oil.sub_samples[0].metadata.fraction_evaporated) assert len(oil.sub_samples) > 0 assert oil.sub_samples[0].metadata.name == 'Fresh Oil Sample' assert oil.sub_samples[0].metadata.short_name == 'Fresh Oil' assert oil.sub_samples[0].metadata.fraction_evaporated is not None
class TestFullRecordMetadata: """ tests loading a full record (or pretty full) from JSON """ oil = Oil.from_py_json(BIG_RECORD) def test_oil_id(self): oil = self.oil print(oil.oil_id) assert oil.oil_id == "EC02234" @pytest.mark.parametrize("attr, value", [ ("location", "Alberta, Canada"), ('name', 'Access West Blend Winter'), ('source_id', '2234'), ('sample_date', '2013-08-04'), ]) def test_location(self, attr, value): metadata = self.oil.metadata print(vars(metadata)) assert getattr(metadata, attr) == value
def test_completeness_score(self, oil_json, expected): oil = Oil.from_py_json(oil_json) assert completeness(oil) == round(expected / MAX_SCORE * 10)
}, "bulk_composition": [ { "name": "sulfur", "measurement": { "value": 0.0011, "unit": "fraction", "unit_type": "massfraction" } } ], "industry_properties": [ { "name": "Conradson Carbon Residue (CCR)", "measurement": { "value": 0.0054, "unit": "fraction", "unit_type": "massfraction" } } ] } ], "review_status": { "status": "Not Reviewed" } } """ # Round tripping through the Oil object to make sure it's consistent basic_noaa_fm = Oil.from_py_json(json.loads(basic_noaa_fm_json)).py_json()
def test_completeness_score(self, oil_json, expected): oil = Oil.from_py_json(oil_json) assert self.Vcheck(oil) == expected
def test_make_oil_from_partial(): # this one has stuff needed for the completeness tests, but not much else PARTIAL_JSON = { 'oil_id': 'EC09999', 'metadata': { 'comments': 'A comment' }, 'sub_samples': [ { "metadata": { "name": "Fresh Oil Sample", "short_name": "Fresh Oil", }, 'physical_properties': { 'densities': [ { 'density': { 'value': 1000, 'unit': 'kg/m^3', 'unit_type': 'density' }, 'ref_temp': { 'value': 10, 'unit': 'C', 'unit_type': 'temperature' } }, { 'density': { 'value': 950, 'unit': 'kg/m^3', 'unit_type': 'density' }, 'ref_temp': { 'value': 30, 'unit': 'C', 'unit_type': 'temperature' } }, ], 'dynamic_viscosities': [ { 'viscosity': { 'value': 1300, 'unit': 'mPa.s', 'unit_type': 'dynamicviscosity' }, 'ref_temp': { 'value': 0.0, 'unit': 'C', 'unit_type': 'temperature' }, }, { 'viscosity': { 'value': 1500, 'unit': 'mPa.s', 'unit_type': 'dynamicviscosity' }, 'ref_temp': { 'value': 20.0, 'unit': 'C', 'unit_type': 'temperature' }, }, ], }, 'environmental_behavior': { 'emulsions': [{ 'value': 10, 'unit': '%', 'unit_type': 'massfraction' }] }, 'distillation_data': { 'cuts': [ { 'fraction': { 'value': 0, 'unit': '%', 'unit_type': 'massfraction' }, 'vapor_temp': { 'value': 56, 'unit': 'C', 'unit_type': 'temperature' } }, { 'fraction': { 'value': 100, 'unit': '%', 'unit_type': 'massfraction' }, 'vapor_temp': { 'value': 84, 'unit': 'C', 'unit_type': 'temperature' } }, ] } }, { "metadata": { "name": "Evaporated Oil Sample", "short_name": "Evaporated Oil", "fraction_weathered": { 'value': 15, 'unit': '%', 'unit_type': 'massfraction' } }, 'physical_properties': { 'densities': [ { 'density': { 'value': 15, 'unit': 'kg/m^3', 'unit_type': 'density' }, 'ref_temp': { 'value': 10, 'unit': 'C', 'unit_type': 'temperature' } }, ], 'dynamic_viscosities': [ { 'viscosity': { 'value': 1500, 'unit': 'mPa.s', 'unit_type': 'dynamicviscosity' }, 'ref_temp': { 'value': 0.0, 'unit': 'C', 'unit_type': 'temperature' }, }, ], }, }, ] } print(PARTIAL_JSON["sub_samples"][0]['metadata']) oil = Oil.from_py_json(PARTIAL_JSON) assert oil.oil_id == 'EC09999' assert oil.metadata.comments == "A comment" assert oil.sub_samples[0].physical_properties.densities[ 0].density.value == 1000 assert oil.sub_samples[0].metadata.name == "Fresh Oil Sample"
def no_type_oil(): no_type_oil = {'oil_id': 'AD00123', 'metadata': {'name': 'An oil name'}} return Oil.from_py_json(no_type_oil)
def big_record(): return Oil.from_py_json(BIG_RECORD)
def py_json(self): rec = Oil.from_py_json(self.record) return rec.py_json()