def test_load_with_archive_filepath_modified(self): # Save an artifact for use in the following test case. fp = os.path.join(self.test_dir.name, 'artifact.qza') Artifact.import_data(FourInts, [-1, 42, 0, 43]).save(fp) # Load the artifact from a filepath then save a different artifact to # the same filepath. Assert that both artifacts produce the correct # views of their data. # # `load` used to be lazy, only extracting data when it needed to (e.g. # when `save` or `view` was called). This was buggy as the filepath # could have been deleted, or worse, modified to contain a different # .qza file. Thus, the wrong archive could be extracted on demand, or # the archive could be missing altogether. There isn't an easy # cross-platform compatible way to solve this problem, so Artifact.load # is no longer lazy and always extracts its data immediately. The real # motivation for lazy loading was for quick inspection of archives # without extracting/copying data, so that API is now provided through # Artifact.peek. artifact1 = Artifact.load(fp) Artifact.import_data(FourInts, [10, 11, 12, 13]).save(fp) artifact2 = Artifact.load(fp) self.assertEqual(artifact1.view(list), [-1, 42, 0, 43]) self.assertEqual(artifact2.view(list), [10, 11, 12, 13])
def get_artifacts(named=False): context_manager = Q2D3Context(os.getcwd()) return sorted( [(context_manager.names[Artifact.load(fp).uuid], Artifact.load(fp)) if named else Artifact.load(fp) for fp in context_manager.data.values()], key=lambda art: repr(art[1].type) if named else repr(art.type))
def test_load_visualization_as_artifact(self): fp = os.path.join(self.test_dir.name, 'visualization.qzv') visualization = Visualization._from_data_dir(self.data_dir, self.provenance) visualization.save(fp) with self.assertRaisesRegex( TypeError, 'Visualization.*Artifact.load.*Visualization.load'): Artifact.load(fp)
def __init__(self, data_dir, output_names=None): self._data_dir = data_dir # uuid to filepath data_files = glob.glob(os.path.join( self._data_dir, '*%s' % self._file_extension)) self.data = {Artifact.load(fp).uuid: fp for fp in data_files} self.names = { Artifact.load(fp).uuid: os.path.splitext(os.path.split(fp)[1])[0] for fp in data_files } self.output_names = output_names
def __init__(self, data_dir, output_names=None): self._data_dir = data_dir # uuid to filepath data_files = glob.glob( os.path.join(self._data_dir, '*%s' % self._file_extension)) self.data = {Artifact.load(fp).uuid: fp for fp in data_files} self.names = { Artifact.load(fp).uuid: os.path.splitext(os.path.split(fp)[1])[0] for fp in data_files } self.output_names = output_names
def test_roundtrip(self): fp1 = os.path.join(self.test_dir.name, 'artifact1.qza') fp2 = os.path.join(self.test_dir.name, 'artifact2.qza') artifact = Artifact._from_view([-1, 42, 0, 43], FourInts, self.provenance) artifact.save(fp1) artifact1 = Artifact.load(fp1) artifact1.save(fp2) artifact2 = Artifact.load(fp2) self.assertEqual(artifact1.type, artifact2.type) self.assertEqual(artifact1.provenance, artifact2.provenance) self.assertEqual(artifact1.uuid, artifact2.uuid) self.assertEqual(artifact1.view(list), artifact2.view(list)) self.assertEqual(artifact1.view(list), artifact2.view(list))
def test_load_and_save(self): fp1 = os.path.join(self.test_dir.name, 'artifact1.qza') fp2 = os.path.join(self.test_dir.name, 'artifact2.qza') artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43]) artifact.save(fp1) artifact = Artifact.load(fp1) # Overwriting its source file works. artifact.save(fp1) # Saving to a new file works. artifact.save(fp2) root_dir = str(artifact.uuid) expected = { 'VERSION', 'metadata.yaml', 'data/file1.txt', 'data/file2.txt', 'data/nested/file3.txt', 'data/nested/file4.txt', 'provenance/metadata.yaml', 'provenance/VERSION', 'provenance/action/action.yaml' } self.assertArchiveMembers(fp1, root_dir, expected) root_dir = str(artifact.uuid) expected = { 'VERSION', 'metadata.yaml', 'data/file1.txt', 'data/file2.txt', 'data/nested/file3.txt', 'data/nested/file4.txt', 'provenance/metadata.yaml', 'provenance/VERSION', 'provenance/action/action.yaml' } self.assertArchiveMembers(fp2, root_dir, expected)
def test_load_different_type_with_multiple_view_types(self): saved_artifact = Artifact.import_data(IntSequence1, [42, 42, 43, -999, 42]) fp = os.path.join(self.test_dir.name, 'artifact.qza') saved_artifact.save(fp) artifact = Artifact.load(fp) self.assertEqual(artifact.type, IntSequence1) self.assertEqual(artifact.uuid, saved_artifact.uuid) self.assertEqual(artifact.view(list), [42, 42, 43, -999, 42]) self.assertEqual(artifact.view(list), [42, 42, 43, -999, 42]) self.assertEqual(artifact.view(collections.Counter), collections.Counter({ 42: 3, 43: 1, -999: 1 })) self.assertEqual(artifact.view(collections.Counter), collections.Counter({ 42: 3, 43: 1, -999: 1 }))
def test_eq_same_uuid(self): fp = os.path.join(self.test_dir.name, 'artifact.qza') artifact1 = Artifact.import_data(FourInts, [-1, 42, 0, 43]) artifact1.save(fp) artifact2 = Artifact.load(fp) self.assertEqual(artifact1, artifact2)
def test_roundtrip(self): fp1 = os.path.join(self.test_dir.name, 'artifact1.qza') fp2 = os.path.join(self.test_dir.name, 'artifact2.qza') artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43]) artifact.save(fp1) artifact1 = Artifact.load(fp1) artifact1.save(fp2) artifact2 = Artifact.load(fp2) self.assertEqual(artifact1.type, artifact2.type) self.assertEqual(artifact1.format, artifact2.format) self.assertEqual(artifact1.uuid, artifact2.uuid) self.assertEqual(artifact1.view(list), artifact2.view(list)) # double view to make sure multiple views can be taken self.assertEqual(artifact1.view(list), artifact2.view(list))
def test_load_without_provenance(self): fp = os.path.join(self.test_dir.name, 'artifact.qza') self.artifact_without_provenance.save(fp) artifact = Artifact.load(fp) self.assertEqual(artifact.type, qiime.core.testing.TestType) self.assertEqual(artifact.provenance, None) self.assertEqual(artifact.uuid, self.artifact_without_provenance.uuid) self.assertEqual(artifact.view(list), [-1, 42, 0, 43, 43])
def test_load(self): saved_artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43]) fp = os.path.join(self.test_dir.name, 'artifact.qza') saved_artifact.save(fp) artifact = Artifact.load(fp) self.assertEqual(artifact.type, FourInts) self.assertEqual(artifact.uuid, saved_artifact.uuid) self.assertEqual(artifact.view(list), [-1, 42, 0, 43]) self.assertEqual(artifact.view(list), [-1, 42, 0, 43])
def test_load(self): saved_artifact = Artifact._from_view([-1, 42, 0, 43], FourInts, None) fp = os.path.join(self.test_dir.name, 'artifact.qza') saved_artifact.save(fp) artifact = Artifact.load(fp) self.assertEqual(artifact.type, FourInts) self.assertIsNone(artifact.provenance) self.assertEqual(artifact.uuid, saved_artifact.uuid) self.assertEqual(artifact.view(list), [-1, 42, 0, 43]) self.assertEqual(artifact.view(list), [-1, 42, 0, 43])
def test_ne_subclass_same_uuid(self): class ArtifactSubclass(Artifact): pass fp = os.path.join(self.test_dir.name, 'artifact.qza') artifact1 = ArtifactSubclass.import_data(FourInts, [-1, 42, 0, 43]) artifact1.save(fp) artifact2 = Artifact.load(fp) self.assertNotEqual(artifact1, artifact2) self.assertNotEqual(artifact2, artifact1)
def test_load_different_type_with_multiple_view_types(self): saved_artifact = Artifact._from_view([42, 42, 43, -999, 42], IntSequence1, None) fp = os.path.join(self.test_dir.name, 'artifact.qza') saved_artifact.save(fp) artifact = Artifact.load(fp) self.assertEqual(artifact.type, IntSequence1) self.assertIsNone(artifact.provenance) self.assertEqual(artifact.uuid, saved_artifact.uuid) self.assertEqual(artifact.view(list), [42, 42, 43, -999, 42]) self.assertEqual(artifact.view(list), [42, 42, 43, -999, 42]) self.assertEqual(artifact.view(collections.Counter), collections.Counter({42: 3, 43: 1, -999: 1})) self.assertEqual(artifact.view(collections.Counter), collections.Counter({42: 3, 43: 1, -999: 1}))
def test_load_and_save(self): fp1 = os.path.join(self.test_dir.name, 'artifact1.qza') fp2 = os.path.join(self.test_dir.name, 'artifact2.qza') artifact = Artifact._from_view([-1, 42, 0, 43], FourInts, self.provenance) artifact.save(fp1) artifact = Artifact.load(fp1) # Overwriting its source file works. artifact.save(fp1) # Saving to a new file works. artifact.save(fp2) with zipfile.ZipFile(fp1, mode='r') as zf: fps = set(zf.namelist()) expected = { 'artifact1/VERSION', 'artifact1/metadata.yaml', 'artifact1/README.md', 'artifact1/data/file1.txt', 'artifact1/data/file2.txt', 'artifact1/data/nested/file3.txt', 'artifact1/data/nested/file4.txt' } self.assertEqual(fps, expected) with zipfile.ZipFile(fp2, mode='r') as zf: fps = set(zf.namelist()) expected = { 'artifact2/VERSION', 'artifact2/metadata.yaml', 'artifact2/README.md', 'artifact2/data/file1.txt', 'artifact2/data/file2.txt', 'artifact2/data/nested/file3.txt', 'artifact2/data/nested/file4.txt' } self.assertEqual(fps, expected)
def load_artifacts(**kwargs): return {k: Artifact.load(ARTIFACTS[v]) for k, v in kwargs.items()}
def test_load_from_externally_created_zipfile(self): # If a user unzips a .qza to inspect contents and rezips using a # different ZIP library/implementation than the one provided by Python, # loading, saving, etc. should still work as expected. The Python ZIP # implementation doesn't store directories as entries when writing, but # the `zip` Unix and OS X command line utilities include both # directories and filepaths as entries. When reading these files with # Python's ZIP implementation, the directory entries are visible, so # their presence needs to be accounted for when extracting. # # The following artifact was created with: # # artifact = Artifact._from_view([-1, 42, 0, 43], FourInts, # self.provenance) # artifact.save('externally_created_zipfile.qza') # # Unzip and rezip using command line utility: # # unzip externally_created_zipfile.qza # rm externally_created_zipfile.qza # zip -r externally_created_zipfile.qza externally_created_zipfile # fp = pkg_resources.resource_filename( 'qiime.sdk.tests', 'data/externally_created_zipfile.qza') with zipfile.ZipFile(fp, mode='r') as zf: fps = set(zf.namelist()) expected = { # These are extra directory entries included by `zip` command # line utility. 'externally_created_zipfile/', 'externally_created_zipfile/data/', 'externally_created_zipfile/data/nested/', 'externally_created_zipfile/VERSION', 'externally_created_zipfile/metadata.yaml', 'externally_created_zipfile/README.md', 'externally_created_zipfile/data/file1.txt', 'externally_created_zipfile/data/file2.txt', 'externally_created_zipfile/data/nested/file3.txt', 'externally_created_zipfile/data/nested/file4.txt' } self.assertEqual(fps, expected) artifact = Artifact.load(fp) self.assertEqual(artifact.type, FourInts) self.assertEqual(artifact.provenance, self.provenance) self.assertIsInstance(artifact.uuid, uuid.UUID) self.assertEqual(artifact.view(list), [-1, 42, 0, 43]) self.assertEqual(artifact.view(list), [-1, 42, 0, 43]) fp = os.path.join(self.test_dir.name, 'artifact.qza') artifact.save(fp) with zipfile.ZipFile(fp, mode='r') as zf: fps = set(zf.namelist()) expected = { # Directory entries should not be present. 'artifact/VERSION', 'artifact/metadata.yaml', 'artifact/README.md', 'artifact/data/file1.txt', 'artifact/data/file2.txt', 'artifact/data/nested/file3.txt', 'artifact/data/nested/file4.txt' } self.assertEqual(fps, expected)