def test_artifact_validate_min(self): A = Artifact.import_data('IntSequence1', [1, 2, 3, 4]) A.validate(level='min') self.assertTrue(True) # Checkpoint assertion A = Artifact.import_data('Mapping', {'a': '1', 'b': '2'}) A.validate(level='min') self.assertTrue(True) # Checkpoint assertion
def test_extract(self): fp = os.path.join(self.test_dir.name, 'artifact.qza') artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43]) artifact.save(fp) root_dir = str(artifact.uuid) # pathlib normalizes away the `.`, it doesn't matter, but this is the # implementation we're using, so let's test against that assumption. output_dir = pathlib.Path(self.test_dir.name) / 'artifact-extract-test' result_dir = Artifact.extract(fp, output_dir=output_dir) self.assertEqual(result_dir, str(output_dir / root_dir)) expected = { 'VERSION', 'metadata.yaml', 'data/file1.txt', 'data/file2.txt', 'data/nested/file3.txt', 'data/nested/file4.txt', 'provenance/metadata.yaml', 'provenance/VERSION', 'provenance/citations.bib', 'provenance/action/action.yaml' } self.assertExtractedArchiveMembers(output_dir, root_dir, expected)
def test_call_with_optional_artifacts(self): method = self.plugin.methods['optional_artifacts_method'] ints1 = Artifact.import_data(IntSequence1, [0, 42, 43]) ints2 = Artifact.import_data(IntSequence1, [99, -22]) ints3 = Artifact.import_data(IntSequence2, [43, 43]) # No optional artifacts provided. obs = method(ints1, 42).output self.assertEqual(obs.view(list), [0, 42, 43, 42]) # One optional artifact provided. obs = method(ints1, 42, optional1=ints2).output self.assertEqual(obs.view(list), [0, 42, 43, 42, 99, -22]) # All optional artifacts provided. obs = method( ints1, 42, optional1=ints2, optional2=ints3, num2=111).output self.assertEqual(obs.view(list), [0, 42, 43, 42, 99, -22, 43, 43, 111]) # Invalid type provided as optional artifact. with self.assertRaisesRegex(TypeError, 'type IntSequence2.*subtype IntSequence1'): method(ints1, 42, optional1=ints3)
def test_classify(self): # test read direction detection and parallel classification classify = feature_classifier.methods.classify_sklearn seq_path = self.get_data_path('se-dna-sequences.fasta') reads = Artifact.import_data('FeatureData[Sequence]', seq_path) raw_reads = skbio.io.read( seq_path, format='fasta', constructor=skbio.DNA) rev_path = os.path.join(self.temp_dir.name, 'rev-dna-sequences.fasta') skbio.io.write((s.reverse_complement() for s in raw_reads), 'fasta', rev_path) rev_reads = Artifact.import_data('FeatureData[Sequence]', rev_path) result = classify(reads, self.classifier) fc = result.classification.view(pd.Series).to_dict() result = classify(rev_reads, self.classifier) rc = result.classification.view(pd.Series).to_dict() for taxon in fc: self.assertEqual(fc[taxon], rc[taxon]) result = classify(reads, self.classifier, read_orientation='same') fc = result.classification.view(pd.Series).to_dict() result = classify(rev_reads, self.classifier, read_orientation='reverse-complement') rc = result.classification.view(pd.Series).to_dict() for taxon in fc: self.assertEqual(fc[taxon], rc[taxon]) result = classify(reads, self.classifier, reads_per_batch=100, n_jobs=2) cc = result.classification.view(pd.Series).to_dict() for taxon in fc: self.assertEqual(fc[taxon], cc[taxon])
def test_import_data_invalid_type(self): with self.assertRaisesRegex(TypeError, 'concrete semantic type.*Visualization'): Artifact.import_data(qiime2.core.type.Visualization, self.test_dir) with self.assertRaisesRegex(TypeError, 'concrete semantic type.*Visualization'): Artifact.import_data('Visualization', self.test_dir)
def test_import_data_with_filepath_multi_file_data_layout(self): fp = os.path.join(self.test_dir.name, 'test.txt') with open(fp, 'w') as fh: fh.write('42\n') with self.assertRaisesRegex(qiime2.plugin.ValidationError, "FourIntsDirectoryFormat.*directory"): Artifact.import_data(FourInts, fp)
def test_eq_same_uuid(self): fp = os.path.join(self.test_dir.name, 'artifact.qza') artifact1 = Artifact.import_data(FourInts, [-1, 42, 0, 43]) artifact1.save(fp) artifact2 = Artifact.load(fp) self.assertEqual(artifact1, artifact2)
def test_import_data_with_unreachable_path(self): with self.assertRaisesRegex(ValueError, "does not exist"): Artifact.import_data(IntSequence1, os.path.join(self.test_dir.name, 'foo.txt')) with self.assertRaisesRegex(ValueError, "does not exist"): Artifact.import_data(FourInts, os.path.join(self.test_dir.name, 'bar', ''))
def test_artifact_validate_max(self): A = Artifact.import_data('Mapping', {'a': '1', 'b': '2'}) A.validate() self.assertTrue(True) # Checkpoint assertion A.validate(level='max') self.assertTrue(True) # Checkpoint assertion A = Artifact.import_data('IntSequence1', [1, 2, 3, 4, 5, 6, 7, 10]) with self.assertRaisesRegex(ValidationError, '3 more'): A.validate('max')
def test_class_weight(self): # we should be able to input class_weight to fit_classifier weights = Artifact.import_data( 'FeatureTable[RelativeFrequency]', self.get_data_path('class_weight.biom'), view_type='BIOMV100Format') reads = Artifact.import_data( 'FeatureData[Sequence]', self.get_data_path('se-dna-sequences.fasta')) fitter = feature_classifier.methods.fit_classifier_naive_bayes classifier1 = fitter(reads, self.taxonomy, class_weight=weights) classifier1 = classifier1.classifier class_weight = weights.view(biom.Table) classes = class_weight.ids('observation') class_weights = [] for wts in class_weight.iter_data(): class_weights.append(zip(classes, wts)) priors = json.dumps(list(zip(*sorted(class_weights[0])))[1]) classifier2 = fitter(reads, self.taxonomy, classify__class_prior=priors).classifier classify = feature_classifier.methods.classify_sklearn result1 = classify(reads, classifier1) result1 = result1.classification.view(pd.Series).to_dict() result2 = classify(reads, classifier2) result2 = result2.classification.view(pd.Series).to_dict() self.assertEqual(result1, result2) svc_spec = [['feat_ext', {'__type__': 'feature_extraction.text.HashingVectorizer', 'analyzer': 'char_wb', 'n_features': 8192, 'ngram_range': [8, 8], 'alternate_sign': False}], ['classify', {'__type__': 'linear_model.LogisticRegression'}]] classifier_spec = json.dumps(svc_spec) gen_fitter = feature_classifier.methods.fit_classifier_sklearn classifier1 = gen_fitter(reads, self.taxonomy, classifier_spec, class_weight=weights).classifier svc_spec[1][1]['class_weight'] = dict(class_weights[0]) classifier_spec = json.dumps(svc_spec) gen_fitter = feature_classifier.methods.fit_classifier_sklearn classifier2 = gen_fitter(reads, self.taxonomy, classifier_spec ).classifier result1 = classify(reads, classifier1) result1 = result1.classification.view(pd.Series).to_dict() result2 = classify(reads, classifier2) result2 = result2.classification.view(pd.Series).to_dict() self.assertEqual(set(result1.keys()), set(result2.keys())) for k in result1: self.assertEqual(result1[k], result2[k])
def test_load(self): saved_artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43]) fp = os.path.join(self.test_dir.name, 'artifact.qza') saved_artifact.save(fp) artifact = Artifact.load(fp) self.assertEqual(artifact.type, FourInts) self.assertEqual(artifact.uuid, saved_artifact.uuid) self.assertEqual(artifact.view(list), [-1, 42, 0, 43]) self.assertEqual(artifact.view(list), [-1, 42, 0, 43])
def test_peek(self): artifact = Artifact.import_data(FourInts, [0, 0, 42, 1000]) fp = os.path.join(self.test_dir.name, 'artifact.qza') artifact.save(fp) metadata = Artifact.peek(fp) self.assertIsInstance(metadata, ResultMetadata) self.assertEqual(metadata.type, 'FourInts') self.assertEqual(metadata.uuid, str(artifact.uuid)) self.assertEqual(metadata.format, 'FourIntsDirectoryFormat')
def test_import_data_with_invalid_format_single_file(self): fp = os.path.join(self.test_dir.name, 'foo.txt') with open(fp, 'w') as fh: fh.write('42\n') fh.write('43\n') fh.write('abc\n') fh.write('123\n') error_regex = "foo.txt.*IntSequenceFormat.*\n\n.*Line 3" with self.assertRaisesRegex(ValidationError, error_regex): Artifact.import_data(IntSequence1, fp)
def setUp(self): super().setUp() self.taxonomy = Artifact.import_data( 'FeatureData[Taxonomy]', self.get_data_path('taxonomy.tsv')) self.seq_path = self.get_data_path('se-dna-sequences.fasta') reads = Artifact.import_data('FeatureData[Sequence]', self.seq_path) fitter_name = _specific_fitters[0][0] fitter = getattr(feature_classifier.methods, 'fit_classifier_' + fitter_name) self.classifier = fitter(reads, self.taxonomy).classifier
def test_import_data_with_bad_validation_multi_files(self): data_dir = os.path.join(self.test_dir.name, 'test') os.mkdir(data_dir) with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh: fh.write('1\n') with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh: fh.write('2\n') error_regex = ("test.*RedundantSingleIntDirectoryFormat.*\n\n" ".*does not match") with self.assertRaisesRegex(ValidationError, error_regex): Artifact.import_data(SingleInt, data_dir)
def test_asynchronous(self): mapping_viz = self.plugin.visualizers['mapping_viz'] artifact1 = Artifact.import_data(Mapping, {'foo': 'abc', 'bar': 'def'}) artifact2 = Artifact.import_data( Mapping, {'baz': 'abc', 'bazz': 'ghi'}) future = mapping_viz.asynchronous(artifact1, artifact2, 'Key', 'Value') self.assertIsInstance(future, concurrent.futures.Future) result = future.result() # Test properties of the `Results` object. self.assertIsInstance(result, tuple) self.assertIsInstance(result, Results) self.assertEqual(len(result), 1) self.assertEqual(result.visualization, result[0]) result = result[0] self.assertIsInstance(result, Visualization) self.assertEqual(result.type, qiime2.core.type.Visualization) self.assertIsInstance(result.uuid, uuid.UUID) # TODO qiime2.sdk.Visualization doesn't have an API to access its # contents yet. For now, save and assert the correct files are present. filepath = os.path.join(self.test_dir.name, 'visualization.qzv') result.save(filepath) root_dir = str(result.uuid) expected = { 'VERSION', 'checksums.md5', 'metadata.yaml', 'data/index.html', 'data/css/style.css', 'provenance/metadata.yaml', 'provenance/VERSION', 'provenance/citations.bib', 'provenance/action/action.yaml', 'provenance/artifacts/%s/metadata.yaml' % artifact1.uuid, 'provenance/artifacts/%s/VERSION' % artifact1.uuid, 'provenance/artifacts/%s/citations.bib' % artifact1.uuid, 'provenance/artifacts/%s/action/action.yaml' % artifact1.uuid, 'provenance/artifacts/%s/metadata.yaml' % artifact2.uuid, 'provenance/artifacts/%s/VERSION' % artifact2.uuid, 'provenance/artifacts/%s/citations.bib' % artifact2.uuid, 'provenance/artifacts/%s/action/action.yaml' % artifact2.uuid } self.assertArchiveMembers(filepath, root_dir, expected)
def test_call_with_variadic_inputs(self): method = self.plugin.methods['variadic_input_method'] ints = [Artifact.import_data(IntSequence1, [1, 2, 3]), Artifact.import_data(IntSequence2, [4, 5, 6])] int_set = {Artifact.import_data(SingleInt, 7), Artifact.import_data(SingleInt, 8)} nums = {9, 10} opt_nums = [11, 12, 13] result, = method(ints, int_set, nums, opt_nums) self.assertEqual(result.view(list), list(range(1, 14)))
def setUp(self): super().setUp() reads = Artifact.import_data( 'FeatureData[Sequence]', self.get_data_path('se-dna-sequences.fasta')) taxonomy = Artifact.import_data( 'FeatureData[Taxonomy]', self.get_data_path('taxonomy.tsv')) classifier = fit_classifier_naive_bayes(reads, taxonomy) pipeline = classifier.classifier.view(Pipeline) transformer = self.get_transformer( Pipeline, TaxonomicClassiferTemporaryPickleDirFmt) self._sklp = transformer(pipeline) sklearn_pipeline = self._sklp.sklearn_pipeline.view(PickleFormat) self.sklearn_pipeline = str(sklearn_pipeline)
def test_populate_class_weight(self): # should populate the class weight of a pipeline weights = Artifact.import_data( 'FeatureTable[RelativeFrequency]', self.get_data_path('class_weight.biom'), view_type='BIOMV100Format') table = weights.view(biom.Table) svc_spec = [['feat_ext', {'__type__': 'feature_extraction.text.HashingVectorizer', 'analyzer': 'char_wb', 'n_features': 8192, 'ngram_range': [8, 8], 'alternate_sign': False}], ['classify', {'__type__': 'naive_bayes.GaussianNB'}]] pipeline1 = pipeline_from_spec(svc_spec) populate_class_weight(pipeline1, table) classes = table.ids('observation') class_weights = [] for wts in table.iter_data(): class_weights.append(zip(classes, wts)) svc_spec[1][1]['priors'] = list(zip(*sorted(class_weights[0])))[1] pipeline2 = pipeline_from_spec(svc_spec) for a, b in zip(pipeline1.get_params()['classify__priors'], pipeline2.get_params()['classify__priors']): self.assertAlmostEqual(a, b)
def test_visualizer_callable_output(self): artifact = Artifact.import_data(Mapping, {'foo': 'abc', 'bar': 'def'}) # Callable returns a value from `return_vals` return_vals = (True, False, [], {}, '', 0, 0.0) for return_val in return_vals: def func(output_dir: str, foo: dict) -> None: return return_val self.plugin.visualizers.register_function( func, {'foo': Mapping}, {}, '', '' ) visualizer = self.plugin.visualizers['func'] with self.assertRaisesRegex(TypeError, "should not return"): visualizer(foo=artifact) # Callable returns None (default function return) def func(output_dir: str, foo: dict) -> None: return None self.plugin.visualizers.register_function( func, {'foo': Mapping}, {}, '', '' ) visualizer = self.plugin.visualizers['func'] # Should not raise an exception output = visualizer(foo=artifact) self.assertIsInstance(output, Results) self.assertIsInstance(output.visualization, Visualization)
def test_from_view_and_save(self): fp = os.path.join(self.test_dir.name, 'artifact.qza') # Using four-ints data layout because it has multiple files, some of # which are in a nested directory. artifact = Artifact._from_view(FourInts, [-1, 42, 0, 43], list, self.provenance_capture) artifact.save(fp) root_dir = str(artifact.uuid) expected = { 'VERSION', 'checksums.md5', 'metadata.yaml', 'data/file1.txt', 'data/file2.txt', 'data/nested/file3.txt', 'data/nested/file4.txt', 'provenance/metadata.yaml', 'provenance/VERSION', 'provenance/citations.bib', 'provenance/action/action.yaml' } self.assertArchiveMembers(fp, root_dir, expected)
def setUp(self): super().setUp() seqs = skbio.io.read(self.get_data_path('dna-sequences.fasta'), format='fasta', constructor=skbio.DNA) tmpseqs = os.path.join(self.temp_dir.name, 'temp-seqs.fasta') skbio.io.write((s for s in islice(seqs, 10)), 'fasta', tmpseqs) self.sequences = Artifact.import_data('FeatureData[Sequence]', tmpseqs)
def inspect_artifact(uuid): try: metadata = Artifact.peek(ARTIFACTS[uuid]) except Exception: abort(404) return jsonify({'uuid': metadata.uuid, 'type': metadata.type})
def test_mismatched_taxonomy(self): wrong_taxa_fp = self.get_data_path('another-ref-taxa.tsv') wrong_taxa = Artifact.import_data('FeatureData[Taxonomy]', wrong_taxa_fp) with self.assertRaisesRegex(ValueError, 'Not all OTUs.*1 feature.*\n.*879972'): self.action(self.input_sequences, self.tree, wrong_taxa)
def test_async_with_multiple_outputs(self): split_ints = self.plugin.methods['split_ints'] artifact = Artifact.import_data(IntSequence1, [0, 42, -2, 43, 6]) future = split_ints.asynchronous(artifact) self.assertIsInstance(future, concurrent.futures.Future) result = future.result() self.assertIsInstance(result, tuple) self.assertEqual(len(result), 2) for output_artifact in result: self.assertIsInstance(output_artifact, Artifact) self.assertEqual(output_artifact.type, IntSequence1) self.assertIsInstance(output_artifact.uuid, uuid.UUID) # Output artifacts have different UUIDs. self.assertNotEqual(result[0].uuid, result[1].uuid) # Index lookup. self.assertEqual(result[0].view(list), [0, 42]) self.assertEqual(result[1].view(list), [-2, 43, 6]) # Test properties of the `Results` object. self.assertIsInstance(result, Results) self.assertEqual(result.left.view(list), [0, 42]) self.assertEqual(result.right.view(list), [-2, 43, 6])
def test_filter_features_nooverlap(self): # Just load up the reference tree instead of creating new test data wrong_tree_fp = self.get_data_path('ref-tree.nwk') wrong_tree = Artifact.import_data('Phylogeny[Rooted]', wrong_tree_fp) with self.assertRaisesRegex(ValueError, 'Not a single fragment.*empty'): self.action(self.table, wrong_tree)
def test_write_v1_archive(self): fp = os.path.join(self.temp_dir.name, 'artifact_v1.qza') with artifact_version(1): artifact = Artifact._from_view(FourInts, [-1, 42, 0, 43], list, self.provenance_capture) artifact.save(fp) root_dir = str(artifact.uuid) expected = { 'VERSION', 'metadata.yaml', 'data/file1.txt', 'data/file2.txt', 'data/nested/file3.txt', 'data/nested/file4.txt', 'provenance/metadata.yaml', 'provenance/VERSION', 'provenance/action/action.yaml', } self.assertArchiveMembers(fp, root_dir, expected) with zipfile.ZipFile(fp, mode='r') as zf: version = zf.read(os.path.join(root_dir, 'VERSION')) self.assertRegex(str(version), '^.*archive: 1.*$')
def test_async_with_multiple_outputs_matched_types(self): split_ints = self.plugin.methods['split_ints'] artifact = Artifact.import_data(IntSequence2, [0, 42, -2, 43, 6]) future = split_ints.asynchronous(artifact) self.assertIsInstance(future, concurrent.futures.Future) result = future.result() self.assertIsInstance(result, tuple) self.assertEqual(len(result), 2) for output_artifact in result: self.assertIsInstance(output_artifact, Artifact) self.assertEqual(output_artifact.type, IntSequence2) self.assertIsInstance(output_artifact.uuid, uuid.UUID) # Output artifacts have different UUIDs. self.assertNotEqual(result[0].uuid, result[1].uuid) # Index lookup. self.assertEqual(result[0].view(list), [0, 42]) self.assertEqual(result[1].view(list), [-2, 43, 6]) # Test properties of the `Results` object. self.assertIsInstance(result, Results) self.assertEqual(result.left.view(list), [0, 42]) self.assertEqual(result.right.view(list), [-2, 43, 6])
def test_low_memory_multinomial_nb(self): # results should not depend on chunk size fitter = feature_classifier.methods.fit_classifier_sklearn classify = feature_classifier.methods.classify_sklearn reads = Artifact.import_data( 'FeatureData[Sequence]', self.get_data_path('se-dna-sequences.fasta')) spec = [['feat_ext', {'__type__': 'feature_extraction.text.HashingVectorizer', 'analyzer': 'char', 'n_features': 8192, 'ngram_range': [8, 8], 'alternate_sign': False}], ['classify', {'__type__': 'custom.LowMemoryMultinomialNB', 'alpha': 0.01, 'chunk_size': 20000}]] classifier_spec = json.dumps(spec) result = fitter(reads, self.taxonomy, classifier_spec) result = classify(reads, result.classifier) gc = result.classification.view(pd.Series).to_dict() spec[1][1]['chunk_size'] = 20 classifier_spec = json.dumps(spec) result = fitter(reads, self.taxonomy, classifier_spec) result = classify(reads, result.classifier) sc = result.classification.view(pd.Series).to_dict() for taxon in gc: self.assertEqual(gc[taxon], sc[taxon])
def test_call_with_multiple_outputs(self): split_ints = self.plugin.methods['split_ints'] artifact = Artifact.import_data(IntSequence1, [0, 42, -2, 43, 6]) result = split_ints(artifact) self.assertIsInstance(result, tuple) self.assertEqual(len(result), 2) for output_artifact in result: self.assertIsInstance(output_artifact, Artifact) self.assertEqual(output_artifact.type, IntSequence1) self.assertIsInstance(output_artifact.uuid, uuid.UUID) # Output artifacts have different UUIDs. self.assertNotEqual(result[0].uuid, result[1].uuid) # Index lookup. self.assertEqual(result[0].view(list), [0, 42]) self.assertEqual(result[1].view(list), [-2, 43, 6]) # Test properties of the `Results` object. self.assertIsInstance(result, Results) self.assertEqual(result.left.view(list), [0, 42]) self.assertEqual(result.right.view(list), [-2, 43, 6])
def test_reader_transformer(self): fp = pkg_resources.resource_filename( 'q2_winnowing.tests', 'sample_data/test_in_dir') artifact = Artifact.import_data(Winnowed, fp) featureOrdering_df, auc_df, permanova_df = artifact.view( list )[0] # `Artifact.view` invokes the transformer that handles the # `WinnowedFormat` -> `dataframe` transformation. # print( featureOrdering_df, exp_featureOrdering ) pd.testing.assert_frame_equal( featureOrdering_df.astype(str), exp_featureOrdering.astype(str), check_dtype=False ) # Avoid checking values since reading df stores as objects while, hard coding in does not # ex) bool(False) == Object(False) in pandas is False although the values function the same. pd.testing.assert_frame_equal( auc_df.astype(str), exp_auc.astype(str), check_dtype=False ) pd.testing.assert_frame_equal( permanova_df.astype(str), exp_permanova.astype(str), check_dtype=False )
def test_writer_transformer(self): # `Artifact._from_view` invokes transformer that handles `dataframe` -> # `WinnowedFormat` with all input, because the `WinnowedDirectoryFormat` has # been registered as the directory format for the semantic type. artifact = Artifact._from_view(Winnowed, [(exp_featureOrdering, exp_auc, exp_permanova)], list, archive.ImportProvenanceCapture()) # Test that the directory and file format can be read again. got_featureOrdering, got_auc, got_permanova = artifact.view( list )[0] pd.testing.assert_frame_equal( got_featureOrdering.astype(str), exp_featureOrdering.astype(str), check_dtype=False ) # Avoid checking values since reading df stores as objects while, hard coding in does not # ex) bool(False) == Object(False) in pandas is False although the values function the same. pd.testing.assert_frame_equal( got_auc.astype(str), exp_auc.astype(str), check_dtype=False ) pd.testing.assert_frame_equal( got_permanova.astype(str), exp_permanova.astype(str), check_dtype=False )
def test_fit_classifier(self): # fit_classifier should generate a working taxonomic_classifier reads = Artifact.import_data( 'FeatureData[Sequence]', self.get_data_path('se-dna-sequences.fasta')) classifier_specification = \ [['feat_ext', {'__type__': 'feature_extraction.text.HashingVectorizer', 'analyzer': 'char_wb', 'n_features': 8192, 'ngram_range': [8, 8], 'non_negative': True}], ['classify', {'__type__': 'naive_bayes.MultinomialNB', 'alpha': 0.01}]] classifier_specification = json.dumps(classifier_specification) fit_classifier = feature_classifier.methods.fit_classifier result = fit_classifier(reads, self.taxonomy, classifier_specification) classify = feature_classifier.methods.classify result = classify(reads, result.classifier) ref = self.taxonomy.view(pd.Series).to_dict() cls = result.classification.view(pd.Series).to_dict() right = 0. for taxon in cls: right += ref[taxon].startswith(cls[taxon]) self.assertGreater(right/len(cls), 0.5)
def test_write_v4_archive(self): fp = os.path.join(self.temp_dir.name, 'artifact_v1.qza') with artifact_version(4): artifact = Artifact._from_view(FourInts, [-1, 42, 0, 43], list, self.provenance_capture) artifact.save(fp) root_dir = str(artifact.uuid) expected = { 'VERSION', 'metadata.yaml', 'data/file1.txt', 'data/file2.txt', 'data/nested/file3.txt', 'data/nested/file4.txt', 'provenance/metadata.yaml', 'provenance/VERSION', 'provenance/citations.bib', 'provenance/action/action.yaml', } self.assertArchiveMembers(fp, root_dir, expected) with zipfile.ZipFile(fp, mode='r') as zf: version = zf.read(os.path.join(root_dir, 'VERSION')) self.assertRegex(str(version), '^.*archive: 4.*$')
def test_asynchronous(self): concatenate_ints = self.plugin.methods['concatenate_ints'] artifact1 = Artifact.import_data(IntSequence1, [0, 42, 43]) artifact2 = Artifact.import_data(IntSequence2, [99, -22]) future = concatenate_ints.asynchronous( artifact1, artifact1, artifact2, 55, 1) self.assertIsInstance(future, concurrent.futures.Future) result = future.result() # Test properties of the `Results` object. self.assertIsInstance(result, tuple) self.assertIsInstance(result, Results) self.assertEqual(len(result), 1) self.assertEqual(result.concatenated_ints.view(list), [0, 42, 43, 0, 42, 43, 99, -22, 55, 1]) result = result[0] self.assertIsInstance(result, Artifact) self.assertEqual(result.type, IntSequence1) self.assertIsInstance(result.uuid, uuid.UUID) # Can retrieve multiple views of different type. exp_list_view = [0, 42, 43, 0, 42, 43, 99, -22, 55, 1] self.assertEqual(result.view(list), exp_list_view) self.assertEqual(result.view(list), exp_list_view) exp_counter_view = collections.Counter( {0: 2, 42: 2, 43: 2, 99: 1, -22: 1, 55: 1, 1: 1}) self.assertEqual(result.view(collections.Counter), exp_counter_view) self.assertEqual(result.view(collections.Counter), exp_counter_view) # Accepts IntSequence1 | IntSequence2 artifact3 = Artifact.import_data(IntSequence2, [10, 20]) future = concatenate_ints.asynchronous(artifact3, artifact1, artifact2, 55, 1) result, = future.result() self.assertEqual(result.type, IntSequence1) self.assertEqual(result.view(list), [10, 20, 0, 42, 43, 99, -22, 55, 1])
def test_classify_otus_experimental(self): ar_tree = Artifact.load(self.get_data_path('sepp_tree_tiny.qza')) ar_repseq = Artifact.load(self.get_data_path('real_data.qza')) obs_classification = classify_otus_experimental( ar_repseq.view(DNASequencesDirectoryFormat), ar_tree.view(NewickFormat)) exp_classification = pd.read_csv( self.get_data_path('taxonomy_real_data_tiny_otus.tsv'), index_col=0, sep="\t").fillna("") assert_frame_equal(obs_classification, exp_classification) ar_tree_small = Artifact.load( self.get_data_path('sepp_tree_small.qza')) obs_classification_small = classify_otus_experimental( ar_repseq.view(DNASequencesDirectoryFormat), ar_tree_small.view(NewickFormat)) exp_classification_small = pd.read_csv( self.get_data_path('taxonomy_real_data_small_otus.tsv'), index_col=0, sep="\t").fillna("") assert_frame_equal(obs_classification_small, exp_classification_small) ar_refphylo_tiny = Artifact.load( self.get_data_path('reference_phylogeny_tiny.qza')) ref_phylo_tiny = ar_refphylo_tiny.view(NewickFormat) with self.assertRaises(ValueError): classify_otus_experimental( ar_repseq.view(DNASequencesDirectoryFormat), ref_phylo_tiny) # test that missing taxon mappings result in an error ar_taxonomy = Artifact.load( self.get_data_path('taxonomy_missingotus.qza')) # capture stderr message and check if its content is as expected captured_stderr = StringIO() with redirect_stderr(captured_stderr): with self.assertRaises(ValueError): classify_otus_experimental( ar_repseq.view(DNASequencesDirectoryFormat), ar_tree.view(NewickFormat), reference_taxonomy=ar_taxonomy.view(pd.DataFrame)) self.assertIn('The taxonomy artifact you provided does not cont', captured_stderr.getvalue()) self.assertIn('539572', captured_stderr.getvalue())
def test_roundtrip(self): fp1 = os.path.join(self.test_dir.name, 'artifact1.qza') fp2 = os.path.join(self.test_dir.name, 'artifact2.qza') artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43]) artifact.save(fp1) artifact1 = Artifact.load(fp1) artifact1.save(fp2) artifact2 = Artifact.load(fp2) self.assertEqual(artifact1.type, artifact2.type) self.assertEqual(artifact1.format, artifact2.format) self.assertEqual(artifact1.uuid, artifact2.uuid) self.assertEqual(artifact1.view(list), artifact2.view(list)) # double view to make sure multiple views can be taken self.assertEqual(artifact1.view(list), artifact2.view(list))
def test_import_data_with_invalid_format_multi_file(self): data_dir = os.path.join(self.test_dir.name, 'test') os.mkdir(data_dir) with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh: fh.write('42\n') with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh: fh.write('43\n') nested = os.path.join(data_dir, 'nested') os.mkdir(nested) with open(os.path.join(nested, 'file3.txt'), 'w') as fh: fh.write('44\n') with open(os.path.join(nested, 'file4.txt'), 'w') as fh: fh.write('foo\n') error_regex = "file4.txt.*SingleIntFormat.*\n\n.*integer" with self.assertRaisesRegex(ValidationError, error_regex): Artifact.import_data(FourInts, data_dir)
def test_call_with_artifacts_and_parameters(self): concatenate_ints = self.plugin.methods['concatenate_ints'] artifact1 = Artifact.import_data(IntSequence1, [0, 42, 43]) artifact2 = Artifact.import_data(IntSequence2, [99, -22]) result = concatenate_ints(artifact1, artifact1, artifact2, 55, 1) # Test properties of the `Results` object. self.assertIsInstance(result, tuple) self.assertIsInstance(result, Results) self.assertEqual(len(result), 1) self.assertEqual(result.concatenated_ints.view(list), [0, 42, 43, 0, 42, 43, 99, -22, 55, 1]) result = result[0] self.assertIsInstance(result, Artifact) self.assertEqual(result.type, IntSequence1) self.assertIsInstance(result.uuid, uuid.UUID) # Can retrieve multiple views of different type. exp_list_view = [0, 42, 43, 0, 42, 43, 99, -22, 55, 1] self.assertEqual(result.view(list), exp_list_view) self.assertEqual(result.view(list), exp_list_view) exp_counter_view = collections.Counter({ 0: 2, 42: 2, 43: 2, 99: 1, -22: 1, 55: 1, 1: 1 }) self.assertEqual(result.view(collections.Counter), exp_counter_view) self.assertEqual(result.view(collections.Counter), exp_counter_view) # Accepts IntSequence1 | IntSequence2 artifact3 = Artifact.import_data(IntSequence2, [10, 20]) result, = concatenate_ints(artifact3, artifact1, artifact2, 55, 1) self.assertEqual(result.type, IntSequence1) self.assertEqual(result.view(list), [10, 20, 0, 42, 43, 99, -22, 55, 1])
def test_import_data_with_unrecognized_files(self): data_dir = os.path.join(self.test_dir.name, 'test') os.mkdir(data_dir) with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh: fh.write('42\n') with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh: fh.write('43\n') nested = os.path.join(data_dir, 'nested') os.mkdir(nested) with open(os.path.join(nested, 'file3.txt'), 'w') as fh: fh.write('44\n') with open(os.path.join(nested, 'foo.txt'), 'w') as fh: fh.write('45\n') error_regex = ("Unrecognized.*foo.txt.*FourIntsDirectoryFormat") with self.assertRaisesRegex(ValidationError, error_regex): Artifact.import_data(FourInts, data_dir)
def test_call_with_variadic_inputs(self): method = self.plugin.methods['variadic_input_method'] ints = [ Artifact.import_data(IntSequence1, [1, 2, 3]), Artifact.import_data(IntSequence2, [4, 5, 6]) ] int_set = { Artifact.import_data(SingleInt, 7), Artifact.import_data(SingleInt, 8) } nums = {9, 10} opt_nums = [11, 12, 13] result, = method(ints, int_set, nums, opt_nums) self.assertEqual(result.view(list), list(range(1, 14)))
def test_validate_artifact_bad(self): artifact = Artifact.import_data('IntSequence1', [1, 2, 3, 4]) with (artifact._archiver.root_dir / 'extra.file').open('w') as fh: fh.write('uh oh') with self.assertRaisesRegex(exceptions.ValidationError, r'extra\.file'): artifact.validate()
def test_async(self): mapping_viz = self.plugin.visualizers['mapping_viz'] artifact1 = Artifact.import_data(Mapping, {'foo': 'abc', 'bar': 'def'}) artifact2 = Artifact.import_data(Mapping, { 'baz': 'abc', 'bazz': 'ghi' }) future = mapping_viz. async (artifact1, artifact2, 'Key', 'Value') self.assertIsInstance(future, concurrent.futures.Future) result = future.result() # Test properties of the `Results` object. self.assertIsInstance(result, tuple) self.assertIsInstance(result, Results) self.assertEqual(len(result), 1) self.assertEqual(result.visualization, result[0]) result = result[0] self.assertIsInstance(result, Visualization) self.assertEqual(result.type, qiime2.core.type.Visualization) self.assertIsInstance(result.uuid, uuid.UUID) # TODO qiime2.sdk.Visualization doesn't have an API to access its # contents yet. For now, save and assert the correct files are present. filepath = os.path.join(self.test_dir.name, 'visualization.qzv') result.save(filepath) root_dir = str(result.uuid) expected = { 'VERSION', 'metadata.yaml', 'data/index.html', 'data/css/style.css', 'provenance/metadata.yaml', 'provenance/VERSION', 'provenance/action/action.yaml', 'provenance/artifacts/%s/metadata.yaml' % artifact1.uuid, 'provenance/artifacts/%s/VERSION' % artifact1.uuid, 'provenance/artifacts/%s/action/action.yaml' % artifact1.uuid, 'provenance/artifacts/%s/metadata.yaml' % artifact2.uuid, 'provenance/artifacts/%s/VERSION' % artifact2.uuid, 'provenance/artifacts/%s/action/action.yaml' % artifact2.uuid } self.assertArchiveMembers(filepath, root_dir, expected)
def test_load_and_save(self): fp1 = os.path.join(self.test_dir.name, 'artifact1.qza') fp2 = os.path.join(self.test_dir.name, 'artifact2.qza') artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43]) artifact.save(fp1) artifact = Artifact.load(fp1) # Overwriting its source file works. artifact.save(fp1) # Saving to a new file works. artifact.save(fp2) root_dir = str(artifact.uuid) expected = { 'VERSION', 'checksums.md5', 'metadata.yaml', 'data/file1.txt', 'data/file2.txt', 'data/nested/file3.txt', 'data/nested/file4.txt', 'provenance/metadata.yaml', 'provenance/VERSION', 'provenance/citations.bib', 'provenance/action/action.yaml' } self.assertArchiveMembers(fp1, root_dir, expected) root_dir = str(artifact.uuid) expected = { 'VERSION', 'checksums.md5', 'metadata.yaml', 'data/file1.txt', 'data/file2.txt', 'data/nested/file3.txt', 'data/nested/file4.txt', 'provenance/metadata.yaml', 'provenance/VERSION', 'provenance/citations.bib', 'provenance/action/action.yaml' } self.assertArchiveMembers(fp2, root_dir, expected)
def test_reader_transformer(self): fp = pkg_resources.resource_filename('q2_dummy_types.tests', 'data/int-sequence.txt') for type in IntSequence1, IntSequence2: artifact = Artifact.import_data(type, fp) # `Artifact.view` invokes the transformer that handles # the `SingleIntFormat` -> `list` transformation. self.assertEqual(artifact.view(list), [42, -1, 9, 10, 0, 999, 0])
def create_artifact(): request_body = request.get_json() artifact = Artifact.import_data(request_body['type'], request_body['path'], request_body['source_format']) path = os.path.join(os.getcwd(), request_body['name']) if not path.endswith('.qza'): path += '.qza' artifact.save(path) return ''
def test_primitive_param_not_valid_choice(self): pipeline = self.plugin.pipelines['failing_pipeline'] int_sequence = Artifact.import_data(IntSequence1, [0, 42, 43]) break_from = "invalid choice" # test String not a valid choice with self.assertRaisesRegex(TypeError, 'break_from.*\'invalid choice\''): pipeline(int_sequence, break_from)
def setUp(self): super().setUp() taxonomy = Artifact.import_data('FeatureData[Taxonomy]', self.get_data_path('taxonomy.tsv')) self.taxonomy = taxonomy.view(pd.Series) # TODO: use `Artifact.import_data` here once we have a transformer # for DNASequencesDirectoryFormat -> DNAFASTAFormat self.reads_fp = self.get_data_path('se-dna-sequences.fasta') self.reads = DNAFASTAFormat(self.reads_fp, mode='r')
def test_extract(self): fp = os.path.join(self.test_dir.name, 'artifact.qza') artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43]) artifact.save(fp) root_dir = str(artifact.uuid) output_dir = os.path.join(self.test_dir.name, 'artifact-extract-test') result_dir = Artifact.extract(fp, output_dir=output_dir) self.assertEqual(result_dir, os.path.join(output_dir, root_dir)) expected = { 'VERSION', 'metadata.yaml', 'data/file1.txt', 'data/file2.txt', 'data/nested/file3.txt', 'data/nested/file4.txt', 'provenance/metadata.yaml', 'provenance/VERSION', 'provenance/action/action.yaml' } self.assertExtractedArchiveMembers(output_dir, root_dir, expected)
def test_exercise_classify_otus_experimental(self): obs_artifact, = self.action(self.input_sequences, self.tree, self.taxonomy) obs = obs_artifact.view(pd.DataFrame) exp_artifact = Artifact.import_data( 'FeatureData[Taxonomy]', self.get_data_path('sepp-results.tsv')) exp = exp_artifact.view(pd.DataFrame) assert_frame_equal(obs, exp)
def test_extract_reads_expected_reverse(self): reverse_sequences = Artifact.import_data( 'FeatureData[Sequence]', self.get_data_path('dna-sequences-reverse.fasta')) results = extract_reads( reverse_sequences, f_primer=self.f_primer, r_primer=self.r_primer, min_length=4, read_orientation='reverse') self._test_results(results)
def test_import_data_with_good_validation_multi_files(self): data_dir = os.path.join(self.test_dir.name, 'test') os.mkdir(data_dir) with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh: fh.write('1\n') with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh: fh.write('1\n') a = Artifact.import_data(SingleInt, data_dir) self.assertEqual(1, a.view(int))