def test_adding_arbitrary_files(tmpdir): """ Tests that adding arbitrary files works. """ test_filename = os.path.join(tmpdir.strpath, "temp.json") test_dict = {"a": 1, "b": 2} with open(test_filename, "wt") as fh: json.dump(test_dict, fh, sort_keys=True) asdf_filename = os.path.join(tmpdir.strpath, "test.h5") data_set = ASDFDataSet(asdf_filename) data_set.add_auxiliary_data_file( test_filename, tag="test_file", parameters={"1": 1}) data_set.__del__() del data_set new_data_set = ASDFDataSet(asdf_filename) # Extraction works the same as always, but now has a special attribute, # that returns the data as a BytesIO. aux_data = new_data_set.auxiliary_data.File.test_file assert aux_data.parameters == {"1": 1} assert aux_data.tag == "test_file" new_test_dict = json.loads(aux_data.file.read().decode()) assert test_dict == new_test_dict aux_data.file.seek(0, 0) with open(test_filename, "rb") as fh: assert fh.read() == aux_data.file.read()
def test_get_provenance_document_for_id(tmpdir): asdf_filename = os.path.join(tmpdir.strpath, "test.h5") data_set = ASDFDataSet(asdf_filename) filename = os.path.join(data_dir, "example_schematic_processing_chain.xml") doc = prov.read(filename) data_set.provenance["test_provenance"] = doc assert data_set.provenance.get_provenance_document_for_id( '{http://seisprov.org/seis_prov/0.1/#}sp002_dt_f87sf7sf78') == \ {"name": "test_provenance", "document": doc} assert data_set.provenance.get_provenance_document_for_id( '{http://seisprov.org/seis_prov/0.1/#}sp004_lp_f87sf7sf78') == \ {"name": "test_provenance", "document": doc} # Id not found. with pytest.raises(ASDFValueError) as err: data_set.provenance.get_provenance_document_for_id( '{http://seisprov.org/seis_prov/0.1/#}bogus_id') assert err.value.args[0] == ( "Document containing id " "'{http://seisprov.org/seis_prov/0.1/#}bogus_id'" " not found in the data set.") # Not a qualified id. with pytest.raises(ASDFValueError) as err: data_set.provenance.get_provenance_document_for_id("bla") assert err.value.args[0] == ("Not a valid qualified name.") data_set.__del__()
def test_waveform_accessor_printing(example_data_set): """ Pretty printing of the waveform accessor proxy objects. """ data_set = ASDFDataSet(example_data_set.filename) assert data_set.waveforms.AE_113A.__str__() == ( "Contents of the data set for station AE.113A:\n" " - Has a StationXML file\n" " - 1 Waveform Tag(s):\n" " raw_recording") data_set.__del__() del data_set
def test_accessing_non_existent_tag_raises(example_data_set): """ Accessing a non-existing station should raise. """ data_set = ASDFDataSet(example_data_set.filename) try: with pytest.raises(WaveformNotInFileException) as excinfo: data_set.waveforms.AE_113A.asdfasdf assert excinfo.value.args[0] == ("Tag 'asdfasdf' not part of the data " "set for station 'AE.113A'.") finally: data_set.__del__()
def test_empty_asdf_file_has_no_quakeml_dataset(tmpdir): """ There is no reason an empty ASDF file should have a QuakeML group. """ asdf_filename = os.path.join(tmpdir.strpath, "test.h5") data_set = ASDFDataSet(asdf_filename) data_set.__del__() f = h5py.File(asdf_filename) assert "QuakeML" not in f # It should still return an empty catalog object if the events are # requested. new_data_set = ASDFDataSet(asdf_filename) assert len(new_data_set.events) == 0 new_data_set.__del__()
def test_adding_waveforms_with_provenance_id(tmpdir): asdf_filename = os.path.join(tmpdir.strpath, "test.h5") data_path = os.path.join(data_dir, "small_sample_data_set") data_set = ASDFDataSet(asdf_filename) for filename in glob.glob(os.path.join(data_path, "*.mseed")): data_set.add_waveforms(filename, tag="raw_recording", provenance_id="{http://example.org}test") data_set.__del__() del data_set new_data_set = ASDFDataSet(asdf_filename) st = new_data_set.waveforms.TA_POKR.raw_recording for tr in st: assert tr.stats.asdf.provenance_id == "{http://example.org}test" new_data_set.__del__() del new_data_set
def test_reading_and_writing_auxiliary_data_with_provenance_id(tmpdir): asdf_filename = os.path.join(tmpdir.strpath, "test.h5") data_set = ASDFDataSet(asdf_filename) data = np.random.random((10, 10)) # The data must NOT start with a number. data_type = "RandomArray" tag = "test_data" parameters = {"a": 1, "b": 2.0, "e": "hallo"} provenance_id = "{http://example.org}test" data_set.add_auxiliary_data(data=data, data_type=data_type, tag=tag, parameters=parameters, provenance_id=provenance_id) data_set.__del__() del data_set new_data_set = ASDFDataSet(asdf_filename) assert new_data_set.auxiliary_data.RandomArray.test_data.provenance_id \ == provenance_id
def test_adding_auxiliary_data_with_invalid_data_type_name_raises(tmpdir): asdf_filename = os.path.join(tmpdir.strpath, "test.h5") data_set = ASDFDataSet(asdf_filename) data = np.random.random((10, 10)) # The data must NOT start with a number. data_type = "2DRandomArray" tag = "test_data" parameters = {"a": 1, "b": 2.0, "e": "hallo"} try: with pytest.raises(ASDFValueError) as err: data_set.add_auxiliary_data(data=data, data_type=data_type, tag=tag, parameters=parameters) assert err.value.args[0] == ( "Data type name '2DRandomArray' is invalid. It must validate " "against the regular expression '^[A-Z][A-Za-z0-9]*$'.") finally: data_set.__del__()
def test_trying_to_add_provenance_record_with_invalid_name_fails(tmpdir): """ The name must be valid according to a particular regular expression. """ asdf_filename = os.path.join(tmpdir.strpath, "test.h5") data_set = ASDFDataSet(asdf_filename) filename = os.path.join(data_dir, "example_schematic_processing_chain.xml") # First try adding it as a prov document. doc = prov.read(filename, format="xml") with pytest.raises(ASDFValueError) as err: data_set.add_provenance_document(doc, name="a-b-c") assert err.value.args[0] == ( "Name 'a-b-c' is invalid. It must validate against the regular " "expression '^[0-9a-z][0-9a-z_]*[0-9a-z]$'.") # Must sometimes be called to get around some bugs. data_set.__del__()
def test_adding_auxiliary_data_with_wrong_tag_name_raises(tmpdir): asdf_filename = os.path.join(tmpdir.strpath, "test.h5") data_set = ASDFDataSet(asdf_filename) # With provenance id. data = np.random.random((10, 10)) # The data must NOT start with a number. data_type = "RandomArray" tag = "A.B.C" with pytest.raises(ASDFValueError) as err: data_set.add_auxiliary_data( data=data, data_type=data_type, tag=tag, parameters={}) assert err.value.args[0] == ( "Tag name 'A.B.C' is invalid. It must validate " "against the regular expression " "'^[a-zA-Z0-9][a-zA-Z0-9_]*[a-zA-Z0-9]$'.") data_set.__del__()
def test_provenance_dicionary_behaviour(tmpdir): asdf_filename = os.path.join(tmpdir.strpath, "test.h5") data_set = ASDFDataSet(asdf_filename) filename = os.path.join(data_dir, "example_schematic_processing_chain.xml") # Add it as a document. doc = prov.read(filename, format="xml") # Setting via setitem. data_set.provenance["test_provenance"] = doc data_set.__del__() del data_set new_data_set = ASDFDataSet(asdf_filename) assert new_data_set.provenance.list() == ["test_provenance"] assert new_data_set.provenance["test_provenance"] == doc assert getattr(new_data_set.provenance, "test_provenance") == doc assert list(new_data_set.provenance.keys()) == ["test_provenance"] assert list(new_data_set.provenance.values()) == [doc] assert list(new_data_set.provenance.items()) == [("test_provenance", doc)]