Beispiel #1
0
 def test_artifact_validate_min(self):
     A = Artifact.import_data('IntSequence1', [1, 2, 3, 4])
     A.validate(level='min')
     self.assertTrue(True)  # Checkpoint assertion
     A = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})
     A.validate(level='min')
     self.assertTrue(True)  # Checkpoint assertion
Beispiel #2
0
    def test_extract(self):
        fp = os.path.join(self.test_dir.name, 'artifact.qza')
        artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43])
        artifact.save(fp)

        root_dir = str(artifact.uuid)
        # pathlib normalizes away the `.`, it doesn't matter, but this is the
        # implementation we're using, so let's test against that assumption.
        output_dir = pathlib.Path(self.test_dir.name) / 'artifact-extract-test'
        result_dir = Artifact.extract(fp, output_dir=output_dir)
        self.assertEqual(result_dir, str(output_dir / root_dir))

        expected = {
            'VERSION',
            'metadata.yaml',
            'data/file1.txt',
            'data/file2.txt',
            'data/nested/file3.txt',
            'data/nested/file4.txt',
            'provenance/metadata.yaml',
            'provenance/VERSION',
            'provenance/citations.bib',
            'provenance/action/action.yaml'
        }

        self.assertExtractedArchiveMembers(output_dir, root_dir, expected)
Beispiel #3
0
    def test_call_with_optional_artifacts(self):
        method = self.plugin.methods['optional_artifacts_method']

        ints1 = Artifact.import_data(IntSequence1, [0, 42, 43])
        ints2 = Artifact.import_data(IntSequence1, [99, -22])
        ints3 = Artifact.import_data(IntSequence2, [43, 43])

        # No optional artifacts provided.
        obs = method(ints1, 42).output

        self.assertEqual(obs.view(list), [0, 42, 43, 42])

        # One optional artifact provided.
        obs = method(ints1, 42, optional1=ints2).output

        self.assertEqual(obs.view(list), [0, 42, 43, 42, 99, -22])

        # All optional artifacts provided.
        obs = method(
            ints1, 42, optional1=ints2, optional2=ints3, num2=111).output

        self.assertEqual(obs.view(list), [0, 42, 43, 42, 99, -22, 43, 43, 111])

        # Invalid type provided as optional artifact.
        with self.assertRaisesRegex(TypeError,
                                    'type IntSequence2.*subtype IntSequence1'):
            method(ints1, 42, optional1=ints3)
    def test_classify(self):
        # test read direction detection and parallel classification
        classify = feature_classifier.methods.classify_sklearn
        seq_path = self.get_data_path('se-dna-sequences.fasta')
        reads = Artifact.import_data('FeatureData[Sequence]', seq_path)
        raw_reads = skbio.io.read(
            seq_path, format='fasta', constructor=skbio.DNA)
        rev_path = os.path.join(self.temp_dir.name, 'rev-dna-sequences.fasta')
        skbio.io.write((s.reverse_complement() for s in raw_reads),
                       'fasta', rev_path)
        rev_reads = Artifact.import_data('FeatureData[Sequence]', rev_path)

        result = classify(reads, self.classifier)
        fc = result.classification.view(pd.Series).to_dict()
        result = classify(rev_reads, self.classifier)
        rc = result.classification.view(pd.Series).to_dict()

        for taxon in fc:
            self.assertEqual(fc[taxon], rc[taxon])

        result = classify(reads, self.classifier, read_orientation='same')
        fc = result.classification.view(pd.Series).to_dict()
        result = classify(rev_reads, self.classifier,
                          read_orientation='reverse-complement')
        rc = result.classification.view(pd.Series).to_dict()

        for taxon in fc:
            self.assertEqual(fc[taxon], rc[taxon])

        result = classify(reads, self.classifier, reads_per_batch=100,
                          n_jobs=2)
        cc = result.classification.view(pd.Series).to_dict()

        for taxon in fc:
            self.assertEqual(fc[taxon], cc[taxon])
Beispiel #5
0
    def test_import_data_invalid_type(self):
        with self.assertRaisesRegex(TypeError,
                                    'concrete semantic type.*Visualization'):
            Artifact.import_data(qiime2.core.type.Visualization, self.test_dir)

        with self.assertRaisesRegex(TypeError,
                                    'concrete semantic type.*Visualization'):
            Artifact.import_data('Visualization', self.test_dir)
Beispiel #6
0
    def test_import_data_with_filepath_multi_file_data_layout(self):
        fp = os.path.join(self.test_dir.name, 'test.txt')
        with open(fp, 'w') as fh:
            fh.write('42\n')

        with self.assertRaisesRegex(qiime2.plugin.ValidationError,
                                    "FourIntsDirectoryFormat.*directory"):
            Artifact.import_data(FourInts, fp)
Beispiel #7
0
    def test_eq_same_uuid(self):
        fp = os.path.join(self.test_dir.name, 'artifact.qza')
        artifact1 = Artifact.import_data(FourInts, [-1, 42, 0, 43])
        artifact1.save(fp)

        artifact2 = Artifact.load(fp)

        self.assertEqual(artifact1, artifact2)
Beispiel #8
0
    def test_import_data_with_unreachable_path(self):
        with self.assertRaisesRegex(ValueError, "does not exist"):
            Artifact.import_data(IntSequence1,
                                 os.path.join(self.test_dir.name, 'foo.txt'))

        with self.assertRaisesRegex(ValueError, "does not exist"):
            Artifact.import_data(FourInts,
                                 os.path.join(self.test_dir.name, 'bar', ''))
Beispiel #9
0
 def test_artifact_validate_max(self):
     A = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})
     A.validate()
     self.assertTrue(True)  # Checkpoint assertion
     A.validate(level='max')
     self.assertTrue(True)  # Checkpoint assertion
     A = Artifact.import_data('IntSequence1', [1, 2, 3, 4, 5, 6, 7, 10])
     with self.assertRaisesRegex(ValidationError, '3 more'):
         A.validate('max')
    def test_class_weight(self):
        # we should be able to input class_weight to fit_classifier
        weights = Artifact.import_data(
            'FeatureTable[RelativeFrequency]',
            self.get_data_path('class_weight.biom'),
            view_type='BIOMV100Format')
        reads = Artifact.import_data(
            'FeatureData[Sequence]',
            self.get_data_path('se-dna-sequences.fasta'))

        fitter = feature_classifier.methods.fit_classifier_naive_bayes
        classifier1 = fitter(reads, self.taxonomy, class_weight=weights)
        classifier1 = classifier1.classifier

        class_weight = weights.view(biom.Table)
        classes = class_weight.ids('observation')
        class_weights = []
        for wts in class_weight.iter_data():
            class_weights.append(zip(classes, wts))
        priors = json.dumps(list(zip(*sorted(class_weights[0])))[1])
        classifier2 = fitter(reads, self.taxonomy,
                             classify__class_prior=priors).classifier

        classify = feature_classifier.methods.classify_sklearn
        result1 = classify(reads, classifier1)
        result1 = result1.classification.view(pd.Series).to_dict()
        result2 = classify(reads, classifier2)
        result2 = result2.classification.view(pd.Series).to_dict()
        self.assertEqual(result1, result2)

        svc_spec = [['feat_ext',
                     {'__type__': 'feature_extraction.text.HashingVectorizer',
                      'analyzer': 'char_wb',
                      'n_features': 8192,
                      'ngram_range': [8, 8],
                      'alternate_sign': False}],
                    ['classify',
                     {'__type__': 'linear_model.LogisticRegression'}]]
        classifier_spec = json.dumps(svc_spec)
        gen_fitter = feature_classifier.methods.fit_classifier_sklearn
        classifier1 = gen_fitter(reads, self.taxonomy, classifier_spec,
                                 class_weight=weights).classifier

        svc_spec[1][1]['class_weight'] = dict(class_weights[0])
        classifier_spec = json.dumps(svc_spec)
        gen_fitter = feature_classifier.methods.fit_classifier_sklearn
        classifier2 = gen_fitter(reads, self.taxonomy, classifier_spec
                                 ).classifier

        result1 = classify(reads, classifier1)
        result1 = result1.classification.view(pd.Series).to_dict()
        result2 = classify(reads, classifier2)
        result2 = result2.classification.view(pd.Series).to_dict()
        self.assertEqual(set(result1.keys()), set(result2.keys()))
        for k in result1:
            self.assertEqual(result1[k], result2[k])
Beispiel #11
0
    def test_load(self):
        saved_artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43])
        fp = os.path.join(self.test_dir.name, 'artifact.qza')
        saved_artifact.save(fp)

        artifact = Artifact.load(fp)

        self.assertEqual(artifact.type, FourInts)
        self.assertEqual(artifact.uuid, saved_artifact.uuid)
        self.assertEqual(artifact.view(list), [-1, 42, 0, 43])
        self.assertEqual(artifact.view(list), [-1, 42, 0, 43])
Beispiel #12
0
    def test_peek(self):
        artifact = Artifact.import_data(FourInts, [0, 0, 42, 1000])
        fp = os.path.join(self.test_dir.name, 'artifact.qza')
        artifact.save(fp)

        metadata = Artifact.peek(fp)

        self.assertIsInstance(metadata, ResultMetadata)
        self.assertEqual(metadata.type, 'FourInts')
        self.assertEqual(metadata.uuid, str(artifact.uuid))
        self.assertEqual(metadata.format, 'FourIntsDirectoryFormat')
Beispiel #13
0
    def test_import_data_with_invalid_format_single_file(self):
        fp = os.path.join(self.test_dir.name, 'foo.txt')
        with open(fp, 'w') as fh:
            fh.write('42\n')
            fh.write('43\n')
            fh.write('abc\n')
            fh.write('123\n')

        error_regex = "foo.txt.*IntSequenceFormat.*\n\n.*Line 3"
        with self.assertRaisesRegex(ValidationError, error_regex):
            Artifact.import_data(IntSequence1, fp)
    def setUp(self):
        super().setUp()
        self.taxonomy = Artifact.import_data(
            'FeatureData[Taxonomy]', self.get_data_path('taxonomy.tsv'))

        self.seq_path = self.get_data_path('se-dna-sequences.fasta')
        reads = Artifact.import_data('FeatureData[Sequence]', self.seq_path)
        fitter_name = _specific_fitters[0][0]
        fitter = getattr(feature_classifier.methods,
                         'fit_classifier_' + fitter_name)
        self.classifier = fitter(reads, self.taxonomy).classifier
Beispiel #15
0
    def test_import_data_with_bad_validation_multi_files(self):
        data_dir = os.path.join(self.test_dir.name, 'test')
        os.mkdir(data_dir)
        with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh:
            fh.write('1\n')
        with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh:
            fh.write('2\n')

        error_regex = ("test.*RedundantSingleIntDirectoryFormat.*\n\n"
                       ".*does not match")
        with self.assertRaisesRegex(ValidationError, error_regex):
            Artifact.import_data(SingleInt, data_dir)
Beispiel #16
0
    def test_asynchronous(self):
        mapping_viz = self.plugin.visualizers['mapping_viz']

        artifact1 = Artifact.import_data(Mapping, {'foo': 'abc', 'bar': 'def'})
        artifact2 = Artifact.import_data(
            Mapping, {'baz': 'abc', 'bazz': 'ghi'})

        future = mapping_viz.asynchronous(artifact1, artifact2, 'Key', 'Value')

        self.assertIsInstance(future, concurrent.futures.Future)
        result = future.result()

        # Test properties of the `Results` object.
        self.assertIsInstance(result, tuple)
        self.assertIsInstance(result, Results)
        self.assertEqual(len(result), 1)
        self.assertEqual(result.visualization, result[0])

        result = result[0]

        self.assertIsInstance(result, Visualization)
        self.assertEqual(result.type, qiime2.core.type.Visualization)

        self.assertIsInstance(result.uuid, uuid.UUID)

        # TODO qiime2.sdk.Visualization doesn't have an API to access its
        # contents yet. For now, save and assert the correct files are present.
        filepath = os.path.join(self.test_dir.name, 'visualization.qzv')
        result.save(filepath)

        root_dir = str(result.uuid)
        expected = {
            'VERSION',
            'checksums.md5',
            'metadata.yaml',
            'data/index.html',
            'data/css/style.css',
            'provenance/metadata.yaml',
            'provenance/VERSION',
            'provenance/citations.bib',
            'provenance/action/action.yaml',
            'provenance/artifacts/%s/metadata.yaml' % artifact1.uuid,
            'provenance/artifacts/%s/VERSION' % artifact1.uuid,
            'provenance/artifacts/%s/citations.bib' % artifact1.uuid,
            'provenance/artifacts/%s/action/action.yaml' % artifact1.uuid,
            'provenance/artifacts/%s/metadata.yaml' % artifact2.uuid,
            'provenance/artifacts/%s/VERSION' % artifact2.uuid,
            'provenance/artifacts/%s/citations.bib' % artifact2.uuid,
            'provenance/artifacts/%s/action/action.yaml' % artifact2.uuid
        }

        self.assertArchiveMembers(filepath, root_dir, expected)
Beispiel #17
0
    def test_call_with_variadic_inputs(self):
        method = self.plugin.methods['variadic_input_method']

        ints = [Artifact.import_data(IntSequence1, [1, 2, 3]),
                Artifact.import_data(IntSequence2, [4, 5, 6])]
        int_set = {Artifact.import_data(SingleInt, 7),
                   Artifact.import_data(SingleInt, 8)}
        nums = {9, 10}
        opt_nums = [11, 12, 13]

        result, = method(ints, int_set, nums, opt_nums)

        self.assertEqual(result.view(list), list(range(1, 14)))
    def setUp(self):
        super().setUp()

        reads = Artifact.import_data(
            'FeatureData[Sequence]',
            self.get_data_path('se-dna-sequences.fasta'))
        taxonomy = Artifact.import_data(
            'FeatureData[Taxonomy]', self.get_data_path('taxonomy.tsv'))
        classifier = fit_classifier_naive_bayes(reads, taxonomy)
        pipeline = classifier.classifier.view(Pipeline)
        transformer = self.get_transformer(
            Pipeline, TaxonomicClassiferTemporaryPickleDirFmt)
        self._sklp = transformer(pipeline)
        sklearn_pipeline = self._sklp.sklearn_pipeline.view(PickleFormat)
        self.sklearn_pipeline = str(sklearn_pipeline)
    def test_populate_class_weight(self):
        # should populate the class weight of a pipeline
        weights = Artifact.import_data(
            'FeatureTable[RelativeFrequency]',
            self.get_data_path('class_weight.biom'),
            view_type='BIOMV100Format')
        table = weights.view(biom.Table)

        svc_spec = [['feat_ext',
                     {'__type__': 'feature_extraction.text.HashingVectorizer',
                      'analyzer': 'char_wb',
                      'n_features': 8192,
                      'ngram_range': [8, 8],
                      'alternate_sign': False}],
                    ['classify',
                     {'__type__': 'naive_bayes.GaussianNB'}]]
        pipeline1 = pipeline_from_spec(svc_spec)
        populate_class_weight(pipeline1, table)

        classes = table.ids('observation')
        class_weights = []
        for wts in table.iter_data():
            class_weights.append(zip(classes, wts))
        svc_spec[1][1]['priors'] = list(zip(*sorted(class_weights[0])))[1]
        pipeline2 = pipeline_from_spec(svc_spec)

        for a, b in zip(pipeline1.get_params()['classify__priors'],
                        pipeline2.get_params()['classify__priors']):
            self.assertAlmostEqual(a, b)
Beispiel #20
0
    def test_visualizer_callable_output(self):
        artifact = Artifact.import_data(Mapping, {'foo': 'abc', 'bar': 'def'})

        # Callable returns a value from `return_vals`
        return_vals = (True, False, [], {}, '', 0, 0.0)
        for return_val in return_vals:
            def func(output_dir: str, foo: dict) -> None:
                return return_val

            self.plugin.visualizers.register_function(
                func, {'foo': Mapping}, {}, '', ''
            )
            visualizer = self.plugin.visualizers['func']

            with self.assertRaisesRegex(TypeError, "should not return"):
                visualizer(foo=artifact)

        # Callable returns None (default function return)
        def func(output_dir: str, foo: dict) -> None:
            return None

        self.plugin.visualizers.register_function(
            func, {'foo': Mapping}, {}, '', ''
        )
        visualizer = self.plugin.visualizers['func']

        # Should not raise an exception
        output = visualizer(foo=artifact)
        self.assertIsInstance(output, Results)
        self.assertIsInstance(output.visualization, Visualization)
    def test_populate_class_weight(self):
        # should populate the class weight of a pipeline
        weights = Artifact.import_data(
            'FeatureTable[RelativeFrequency]',
            self.get_data_path('class_weight.biom'),
            view_type='BIOMV100Format')
        table = weights.view(biom.Table)

        svc_spec = [['feat_ext',
                     {'__type__': 'feature_extraction.text.HashingVectorizer',
                      'analyzer': 'char_wb',
                      'n_features': 8192,
                      'ngram_range': [8, 8],
                      'alternate_sign': False}],
                    ['classify',
                     {'__type__': 'naive_bayes.GaussianNB'}]]
        pipeline1 = pipeline_from_spec(svc_spec)
        populate_class_weight(pipeline1, table)

        classes = table.ids('observation')
        class_weights = []
        for wts in table.iter_data():
            class_weights.append(zip(classes, wts))
        svc_spec[1][1]['priors'] = list(zip(*sorted(class_weights[0])))[1]
        pipeline2 = pipeline_from_spec(svc_spec)

        for a, b in zip(pipeline1.get_params()['classify__priors'],
                        pipeline2.get_params()['classify__priors']):
            self.assertAlmostEqual(a, b)
Beispiel #22
0
    def test_from_view_and_save(self):
        fp = os.path.join(self.test_dir.name, 'artifact.qza')
        # Using four-ints data layout because it has multiple files, some of
        # which are in a nested directory.
        artifact = Artifact._from_view(FourInts, [-1, 42, 0, 43], list,
                                       self.provenance_capture)

        artifact.save(fp)

        root_dir = str(artifact.uuid)
        expected = {
            'VERSION',
            'checksums.md5',
            'metadata.yaml',
            'data/file1.txt',
            'data/file2.txt',
            'data/nested/file3.txt',
            'data/nested/file4.txt',
            'provenance/metadata.yaml',
            'provenance/VERSION',
            'provenance/citations.bib',
            'provenance/action/action.yaml'
        }

        self.assertArchiveMembers(fp, root_dir, expected)
 def setUp(self):
     super().setUp()
     seqs = skbio.io.read(self.get_data_path('dna-sequences.fasta'),
                          format='fasta', constructor=skbio.DNA)
     tmpseqs = os.path.join(self.temp_dir.name, 'temp-seqs.fasta')
     skbio.io.write((s for s in islice(seqs, 10)), 'fasta', tmpseqs)
     self.sequences = Artifact.import_data('FeatureData[Sequence]', tmpseqs)
Beispiel #24
0
def inspect_artifact(uuid):
    try:
        metadata = Artifact.peek(ARTIFACTS[uuid])
    except Exception:
        abort(404)

    return jsonify({'uuid': metadata.uuid, 'type': metadata.type})
 def test_mismatched_taxonomy(self):
     wrong_taxa_fp = self.get_data_path('another-ref-taxa.tsv')
     wrong_taxa = Artifact.import_data('FeatureData[Taxonomy]',
                                       wrong_taxa_fp)
     with self.assertRaisesRegex(ValueError,
                                 'Not all OTUs.*1 feature.*\n.*879972'):
         self.action(self.input_sequences, self.tree, wrong_taxa)
Beispiel #26
0
def inspect_artifact(uuid):
    try:
        metadata = Artifact.peek(ARTIFACTS[uuid])
    except Exception:
        abort(404)

    return jsonify({'uuid': metadata.uuid, 'type': metadata.type})
 def setUp(self):
     super().setUp()
     seqs = skbio.io.read(self.get_data_path('dna-sequences.fasta'),
                          format='fasta', constructor=skbio.DNA)
     tmpseqs = os.path.join(self.temp_dir.name, 'temp-seqs.fasta')
     skbio.io.write((s for s in islice(seqs, 10)), 'fasta', tmpseqs)
     self.sequences = Artifact.import_data('FeatureData[Sequence]', tmpseqs)
Beispiel #28
0
    def test_visualizer_callable_output(self):
        artifact = Artifact.import_data(Mapping, {'foo': 'abc', 'bar': 'def'})

        # Callable returns a value from `return_vals`
        return_vals = (True, False, [], {}, '', 0, 0.0)
        for return_val in return_vals:
            def func(output_dir: str, foo: dict) -> None:
                return return_val

            self.plugin.visualizers.register_function(
                func, {'foo': Mapping}, {}, '', ''
            )
            visualizer = self.plugin.visualizers['func']

            with self.assertRaisesRegex(TypeError, "should not return"):
                visualizer(foo=artifact)

        # Callable returns None (default function return)
        def func(output_dir: str, foo: dict) -> None:
            return None

        self.plugin.visualizers.register_function(
            func, {'foo': Mapping}, {}, '', ''
        )
        visualizer = self.plugin.visualizers['func']

        # Should not raise an exception
        output = visualizer(foo=artifact)
        self.assertIsInstance(output, Results)
        self.assertIsInstance(output.visualization, Visualization)
Beispiel #29
0
    def test_async_with_multiple_outputs(self):
        split_ints = self.plugin.methods['split_ints']

        artifact = Artifact.import_data(IntSequence1, [0, 42, -2, 43, 6])

        future = split_ints.asynchronous(artifact)

        self.assertIsInstance(future, concurrent.futures.Future)
        result = future.result()

        self.assertIsInstance(result, tuple)
        self.assertEqual(len(result), 2)

        for output_artifact in result:
            self.assertIsInstance(output_artifact, Artifact)
            self.assertEqual(output_artifact.type, IntSequence1)

            self.assertIsInstance(output_artifact.uuid, uuid.UUID)

        # Output artifacts have different UUIDs.
        self.assertNotEqual(result[0].uuid, result[1].uuid)

        # Index lookup.
        self.assertEqual(result[0].view(list), [0, 42])
        self.assertEqual(result[1].view(list), [-2, 43, 6])

        # Test properties of the `Results` object.
        self.assertIsInstance(result, Results)
        self.assertEqual(result.left.view(list), [0, 42])
        self.assertEqual(result.right.view(list), [-2, 43, 6])
 def test_filter_features_nooverlap(self):
     # Just load up the reference tree instead of creating new test data
     wrong_tree_fp = self.get_data_path('ref-tree.nwk')
     wrong_tree = Artifact.import_data('Phylogeny[Rooted]', wrong_tree_fp)
     with self.assertRaisesRegex(ValueError,
                                 'Not a single fragment.*empty'):
         self.action(self.table, wrong_tree)
Beispiel #31
0
    def test_from_view_and_save(self):
        fp = os.path.join(self.test_dir.name, 'artifact.qza')
        # Using four-ints data layout because it has multiple files, some of
        # which are in a nested directory.
        artifact = Artifact._from_view(FourInts, [-1, 42, 0, 43], list,
                                       self.provenance_capture)

        artifact.save(fp)

        root_dir = str(artifact.uuid)
        expected = {
            'VERSION',
            'checksums.md5',
            'metadata.yaml',
            'data/file1.txt',
            'data/file2.txt',
            'data/nested/file3.txt',
            'data/nested/file4.txt',
            'provenance/metadata.yaml',
            'provenance/VERSION',
            'provenance/citations.bib',
            'provenance/action/action.yaml'
        }

        self.assertArchiveMembers(fp, root_dir, expected)
Beispiel #32
0
    def test_write_v1_archive(self):
        fp = os.path.join(self.temp_dir.name, 'artifact_v1.qza')

        with artifact_version(1):
            artifact = Artifact._from_view(FourInts, [-1, 42, 0, 43], list,
                                           self.provenance_capture)
            artifact.save(fp)

        root_dir = str(artifact.uuid)
        expected = {
            'VERSION',
            'metadata.yaml',
            'data/file1.txt',
            'data/file2.txt',
            'data/nested/file3.txt',
            'data/nested/file4.txt',
            'provenance/metadata.yaml',
            'provenance/VERSION',
            'provenance/action/action.yaml',
        }
        self.assertArchiveMembers(fp, root_dir, expected)

        with zipfile.ZipFile(fp, mode='r') as zf:
            version = zf.read(os.path.join(root_dir, 'VERSION'))
        self.assertRegex(str(version), '^.*archive: 1.*$')
Beispiel #33
0
    def test_async_with_multiple_outputs_matched_types(self):
        split_ints = self.plugin.methods['split_ints']

        artifact = Artifact.import_data(IntSequence2, [0, 42, -2, 43, 6])

        future = split_ints.asynchronous(artifact)

        self.assertIsInstance(future, concurrent.futures.Future)
        result = future.result()

        self.assertIsInstance(result, tuple)
        self.assertEqual(len(result), 2)

        for output_artifact in result:
            self.assertIsInstance(output_artifact, Artifact)
            self.assertEqual(output_artifact.type, IntSequence2)

            self.assertIsInstance(output_artifact.uuid, uuid.UUID)

        # Output artifacts have different UUIDs.
        self.assertNotEqual(result[0].uuid, result[1].uuid)

        # Index lookup.
        self.assertEqual(result[0].view(list), [0, 42])
        self.assertEqual(result[1].view(list), [-2, 43, 6])

        # Test properties of the `Results` object.
        self.assertIsInstance(result, Results)
        self.assertEqual(result.left.view(list), [0, 42])
        self.assertEqual(result.right.view(list), [-2, 43, 6])
    def test_low_memory_multinomial_nb(self):
        # results should not depend on chunk size
        fitter = feature_classifier.methods.fit_classifier_sklearn
        classify = feature_classifier.methods.classify_sklearn
        reads = Artifact.import_data(
            'FeatureData[Sequence]',
            self.get_data_path('se-dna-sequences.fasta'))

        spec = [['feat_ext',
                {'__type__': 'feature_extraction.text.HashingVectorizer',
                 'analyzer': 'char',
                 'n_features': 8192,
                 'ngram_range': [8, 8],
                 'alternate_sign': False}],
                ['classify',
                 {'__type__': 'custom.LowMemoryMultinomialNB',
                  'alpha': 0.01,
                  'chunk_size': 20000}]]

        classifier_spec = json.dumps(spec)
        result = fitter(reads, self.taxonomy, classifier_spec)
        result = classify(reads, result.classifier)
        gc = result.classification.view(pd.Series).to_dict()

        spec[1][1]['chunk_size'] = 20
        classifier_spec = json.dumps(spec)
        result = fitter(reads, self.taxonomy, classifier_spec)
        result = classify(reads, result.classifier)
        sc = result.classification.view(pd.Series).to_dict()

        for taxon in gc:
            self.assertEqual(gc[taxon], sc[taxon])
Beispiel #35
0
    def test_call_with_multiple_outputs(self):
        split_ints = self.plugin.methods['split_ints']

        artifact = Artifact.import_data(IntSequence1, [0, 42, -2, 43, 6])

        result = split_ints(artifact)

        self.assertIsInstance(result, tuple)
        self.assertEqual(len(result), 2)

        for output_artifact in result:
            self.assertIsInstance(output_artifact, Artifact)
            self.assertEqual(output_artifact.type, IntSequence1)
            self.assertIsInstance(output_artifact.uuid, uuid.UUID)

        # Output artifacts have different UUIDs.
        self.assertNotEqual(result[0].uuid, result[1].uuid)

        # Index lookup.
        self.assertEqual(result[0].view(list), [0, 42])
        self.assertEqual(result[1].view(list), [-2, 43, 6])

        # Test properties of the `Results` object.
        self.assertIsInstance(result, Results)
        self.assertEqual(result.left.view(list), [0, 42])
        self.assertEqual(result.right.view(list), [-2, 43, 6])
    def test_reader_transformer(self):
        fp = pkg_resources.resource_filename(
            'q2_winnowing.tests', 'sample_data/test_in_dir')

        artifact = Artifact.import_data(Winnowed, fp)
        featureOrdering_df, auc_df, permanova_df = artifact.view( list )[0]
        # `Artifact.view` invokes the transformer that handles the
        # `WinnowedFormat` -> `dataframe` transformation.
        # print( featureOrdering_df, exp_featureOrdering )
        pd.testing.assert_frame_equal(
            featureOrdering_df.astype(str),
            exp_featureOrdering.astype(str),
            check_dtype=False
        ) # Avoid checking values since reading df stores as objects while, hard coding in does not
        # ex) bool(False) == Object(False) in pandas is False although the values function the same.
        pd.testing.assert_frame_equal(
            auc_df.astype(str),
            exp_auc.astype(str),
            check_dtype=False
        )
        pd.testing.assert_frame_equal(
            permanova_df.astype(str),
            exp_permanova.astype(str),
            check_dtype=False
        )
    def test_writer_transformer(self):
        # `Artifact._from_view` invokes transformer that handles `dataframe` ->
        # `WinnowedFormat` with all input, because the `WinnowedDirectoryFormat` has
        # been registered as the directory format for the semantic type.
        artifact = Artifact._from_view(Winnowed, [(exp_featureOrdering, exp_auc, exp_permanova)],
                                       list, archive.ImportProvenanceCapture())

        # Test that the directory and file format can be read again.
        got_featureOrdering, got_auc, got_permanova = artifact.view( list )[0]
        pd.testing.assert_frame_equal(
            got_featureOrdering.astype(str),
            exp_featureOrdering.astype(str),
            check_dtype=False
        ) # Avoid checking values since reading df stores as objects while, hard coding in does not
        # ex) bool(False) == Object(False) in pandas is False although the values function the same.
        pd.testing.assert_frame_equal(
            got_auc.astype(str),
            exp_auc.astype(str),
            check_dtype=False
        )
        pd.testing.assert_frame_equal(
            got_permanova.astype(str),
            exp_permanova.astype(str),
            check_dtype=False
        )
    def test_fit_classifier(self):
        # fit_classifier should generate a working taxonomic_classifier
        reads = Artifact.import_data(
            'FeatureData[Sequence]',
            self.get_data_path('se-dna-sequences.fasta'))

        classifier_specification = \
            [['feat_ext',
              {'__type__': 'feature_extraction.text.HashingVectorizer',
               'analyzer': 'char_wb',
               'n_features': 8192,
               'ngram_range': [8, 8],
               'non_negative': True}],
             ['classify',
              {'__type__': 'naive_bayes.MultinomialNB',
               'alpha': 0.01}]]
        classifier_specification = json.dumps(classifier_specification)
        fit_classifier = feature_classifier.methods.fit_classifier
        result = fit_classifier(reads, self.taxonomy, classifier_specification)

        classify = feature_classifier.methods.classify
        result = classify(reads, result.classifier)

        ref = self.taxonomy.view(pd.Series).to_dict()
        cls = result.classification.view(pd.Series).to_dict()

        right = 0.
        for taxon in cls:
            right += ref[taxon].startswith(cls[taxon])
        self.assertGreater(right/len(cls), 0.5)
Beispiel #39
0
    def test_write_v4_archive(self):
        fp = os.path.join(self.temp_dir.name, 'artifact_v1.qza')

        with artifact_version(4):
            artifact = Artifact._from_view(FourInts, [-1, 42, 0, 43], list,
                                           self.provenance_capture)
            artifact.save(fp)

        root_dir = str(artifact.uuid)
        expected = {
            'VERSION',
            'metadata.yaml',
            'data/file1.txt',
            'data/file2.txt',
            'data/nested/file3.txt',
            'data/nested/file4.txt',
            'provenance/metadata.yaml',
            'provenance/VERSION',
            'provenance/citations.bib',
            'provenance/action/action.yaml',
        }
        self.assertArchiveMembers(fp, root_dir, expected)

        with zipfile.ZipFile(fp, mode='r') as zf:
            version = zf.read(os.path.join(root_dir, 'VERSION'))
        self.assertRegex(str(version), '^.*archive: 4.*$')
Beispiel #40
0
    def test_asynchronous(self):
        concatenate_ints = self.plugin.methods['concatenate_ints']

        artifact1 = Artifact.import_data(IntSequence1, [0, 42, 43])
        artifact2 = Artifact.import_data(IntSequence2, [99, -22])

        future = concatenate_ints.asynchronous(
            artifact1, artifact1, artifact2, 55, 1)

        self.assertIsInstance(future, concurrent.futures.Future)
        result = future.result()

        # Test properties of the `Results` object.
        self.assertIsInstance(result, tuple)
        self.assertIsInstance(result, Results)
        self.assertEqual(len(result), 1)
        self.assertEqual(result.concatenated_ints.view(list),
                         [0, 42, 43, 0, 42, 43, 99, -22, 55, 1])

        result = result[0]

        self.assertIsInstance(result, Artifact)
        self.assertEqual(result.type, IntSequence1)

        self.assertIsInstance(result.uuid, uuid.UUID)

        # Can retrieve multiple views of different type.
        exp_list_view = [0, 42, 43, 0, 42, 43, 99, -22, 55, 1]
        self.assertEqual(result.view(list), exp_list_view)
        self.assertEqual(result.view(list), exp_list_view)

        exp_counter_view = collections.Counter(
            {0: 2, 42: 2, 43: 2, 99: 1, -22: 1, 55: 1, 1: 1})
        self.assertEqual(result.view(collections.Counter),
                         exp_counter_view)
        self.assertEqual(result.view(collections.Counter),
                         exp_counter_view)

        # Accepts IntSequence1 | IntSequence2
        artifact3 = Artifact.import_data(IntSequence2, [10, 20])
        future = concatenate_ints.asynchronous(artifact3, artifact1, artifact2,
                                               55, 1)
        result, = future.result()

        self.assertEqual(result.type, IntSequence1)
        self.assertEqual(result.view(list),
                         [10, 20, 0, 42, 43, 99, -22, 55, 1])
Beispiel #41
0
    def test_classify_otus_experimental(self):
        ar_tree = Artifact.load(self.get_data_path('sepp_tree_tiny.qza'))
        ar_repseq = Artifact.load(self.get_data_path('real_data.qza'))

        obs_classification = classify_otus_experimental(
            ar_repseq.view(DNASequencesDirectoryFormat),
            ar_tree.view(NewickFormat))
        exp_classification = pd.read_csv(
            self.get_data_path('taxonomy_real_data_tiny_otus.tsv'),
            index_col=0,
            sep="\t").fillna("")
        assert_frame_equal(obs_classification, exp_classification)

        ar_tree_small = Artifact.load(
            self.get_data_path('sepp_tree_small.qza'))
        obs_classification_small = classify_otus_experimental(
            ar_repseq.view(DNASequencesDirectoryFormat),
            ar_tree_small.view(NewickFormat))

        exp_classification_small = pd.read_csv(
            self.get_data_path('taxonomy_real_data_small_otus.tsv'),
            index_col=0,
            sep="\t").fillna("")
        assert_frame_equal(obs_classification_small, exp_classification_small)

        ar_refphylo_tiny = Artifact.load(
            self.get_data_path('reference_phylogeny_tiny.qza'))
        ref_phylo_tiny = ar_refphylo_tiny.view(NewickFormat)
        with self.assertRaises(ValueError):
            classify_otus_experimental(
                ar_repseq.view(DNASequencesDirectoryFormat), ref_phylo_tiny)

        # test that missing taxon mappings result in an error
        ar_taxonomy = Artifact.load(
            self.get_data_path('taxonomy_missingotus.qza'))

        # capture stderr message and check if its content is as expected
        captured_stderr = StringIO()
        with redirect_stderr(captured_stderr):
            with self.assertRaises(ValueError):
                classify_otus_experimental(
                    ar_repseq.view(DNASequencesDirectoryFormat),
                    ar_tree.view(NewickFormat),
                    reference_taxonomy=ar_taxonomy.view(pd.DataFrame))
        self.assertIn('The taxonomy artifact you provided does not cont',
                      captured_stderr.getvalue())
        self.assertIn('539572', captured_stderr.getvalue())
Beispiel #42
0
    def test_roundtrip(self):
        fp1 = os.path.join(self.test_dir.name, 'artifact1.qza')
        fp2 = os.path.join(self.test_dir.name, 'artifact2.qza')
        artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43])

        artifact.save(fp1)

        artifact1 = Artifact.load(fp1)
        artifact1.save(fp2)
        artifact2 = Artifact.load(fp2)

        self.assertEqual(artifact1.type, artifact2.type)
        self.assertEqual(artifact1.format, artifact2.format)
        self.assertEqual(artifact1.uuid, artifact2.uuid)
        self.assertEqual(artifact1.view(list), artifact2.view(list))
        # double view to make sure multiple views can be taken
        self.assertEqual(artifact1.view(list), artifact2.view(list))
Beispiel #43
0
    def test_import_data_with_invalid_format_multi_file(self):
        data_dir = os.path.join(self.test_dir.name, 'test')
        os.mkdir(data_dir)
        with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh:
            fh.write('42\n')
        with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh:
            fh.write('43\n')
        nested = os.path.join(data_dir, 'nested')
        os.mkdir(nested)
        with open(os.path.join(nested, 'file3.txt'), 'w') as fh:
            fh.write('44\n')
        with open(os.path.join(nested, 'file4.txt'), 'w') as fh:
            fh.write('foo\n')

        error_regex = "file4.txt.*SingleIntFormat.*\n\n.*integer"
        with self.assertRaisesRegex(ValidationError, error_regex):
            Artifact.import_data(FourInts, data_dir)
Beispiel #44
0
    def test_call_with_artifacts_and_parameters(self):
        concatenate_ints = self.plugin.methods['concatenate_ints']

        artifact1 = Artifact.import_data(IntSequence1, [0, 42, 43])
        artifact2 = Artifact.import_data(IntSequence2, [99, -22])

        result = concatenate_ints(artifact1, artifact1, artifact2, 55, 1)

        # Test properties of the `Results` object.
        self.assertIsInstance(result, tuple)
        self.assertIsInstance(result, Results)
        self.assertEqual(len(result), 1)
        self.assertEqual(result.concatenated_ints.view(list),
                         [0, 42, 43, 0, 42, 43, 99, -22, 55, 1])

        result = result[0]

        self.assertIsInstance(result, Artifact)
        self.assertEqual(result.type, IntSequence1)

        self.assertIsInstance(result.uuid, uuid.UUID)

        # Can retrieve multiple views of different type.
        exp_list_view = [0, 42, 43, 0, 42, 43, 99, -22, 55, 1]
        self.assertEqual(result.view(list), exp_list_view)
        self.assertEqual(result.view(list), exp_list_view)

        exp_counter_view = collections.Counter({
            0: 2,
            42: 2,
            43: 2,
            99: 1,
            -22: 1,
            55: 1,
            1: 1
        })
        self.assertEqual(result.view(collections.Counter), exp_counter_view)
        self.assertEqual(result.view(collections.Counter), exp_counter_view)

        # Accepts IntSequence1 | IntSequence2
        artifact3 = Artifact.import_data(IntSequence2, [10, 20])
        result, = concatenate_ints(artifact3, artifact1, artifact2, 55, 1)

        self.assertEqual(result.type, IntSequence1)
        self.assertEqual(result.view(list),
                         [10, 20, 0, 42, 43, 99, -22, 55, 1])
Beispiel #45
0
    def test_import_data_with_unrecognized_files(self):
        data_dir = os.path.join(self.test_dir.name, 'test')
        os.mkdir(data_dir)
        with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh:
            fh.write('42\n')
        with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh:
            fh.write('43\n')
        nested = os.path.join(data_dir, 'nested')
        os.mkdir(nested)
        with open(os.path.join(nested, 'file3.txt'), 'w') as fh:
            fh.write('44\n')
        with open(os.path.join(nested, 'foo.txt'), 'w') as fh:
            fh.write('45\n')

        error_regex = ("Unrecognized.*foo.txt.*FourIntsDirectoryFormat")
        with self.assertRaisesRegex(ValidationError, error_regex):
            Artifact.import_data(FourInts, data_dir)
Beispiel #46
0
    def test_call_with_variadic_inputs(self):
        method = self.plugin.methods['variadic_input_method']

        ints = [
            Artifact.import_data(IntSequence1, [1, 2, 3]),
            Artifact.import_data(IntSequence2, [4, 5, 6])
        ]
        int_set = {
            Artifact.import_data(SingleInt, 7),
            Artifact.import_data(SingleInt, 8)
        }
        nums = {9, 10}
        opt_nums = [11, 12, 13]

        result, = method(ints, int_set, nums, opt_nums)

        self.assertEqual(result.view(list), list(range(1, 14)))
Beispiel #47
0
    def test_validate_artifact_bad(self):
        artifact = Artifact.import_data('IntSequence1', [1, 2, 3, 4])
        with (artifact._archiver.root_dir / 'extra.file').open('w') as fh:
            fh.write('uh oh')

        with self.assertRaisesRegex(exceptions.ValidationError,
                                    r'extra\.file'):
            artifact.validate()
Beispiel #48
0
    def test_async(self):
        mapping_viz = self.plugin.visualizers['mapping_viz']

        artifact1 = Artifact.import_data(Mapping, {'foo': 'abc', 'bar': 'def'})
        artifact2 = Artifact.import_data(Mapping, {
            'baz': 'abc',
            'bazz': 'ghi'
        })

        future = mapping_viz. async (artifact1, artifact2, 'Key', 'Value')

        self.assertIsInstance(future, concurrent.futures.Future)
        result = future.result()

        # Test properties of the `Results` object.
        self.assertIsInstance(result, tuple)
        self.assertIsInstance(result, Results)
        self.assertEqual(len(result), 1)
        self.assertEqual(result.visualization, result[0])

        result = result[0]

        self.assertIsInstance(result, Visualization)
        self.assertEqual(result.type, qiime2.core.type.Visualization)

        self.assertIsInstance(result.uuid, uuid.UUID)

        # TODO qiime2.sdk.Visualization doesn't have an API to access its
        # contents yet. For now, save and assert the correct files are present.
        filepath = os.path.join(self.test_dir.name, 'visualization.qzv')
        result.save(filepath)

        root_dir = str(result.uuid)
        expected = {
            'VERSION', 'metadata.yaml', 'data/index.html',
            'data/css/style.css', 'provenance/metadata.yaml',
            'provenance/VERSION', 'provenance/action/action.yaml',
            'provenance/artifacts/%s/metadata.yaml' % artifact1.uuid,
            'provenance/artifacts/%s/VERSION' % artifact1.uuid,
            'provenance/artifacts/%s/action/action.yaml' % artifact1.uuid,
            'provenance/artifacts/%s/metadata.yaml' % artifact2.uuid,
            'provenance/artifacts/%s/VERSION' % artifact2.uuid,
            'provenance/artifacts/%s/action/action.yaml' % artifact2.uuid
        }

        self.assertArchiveMembers(filepath, root_dir, expected)
Beispiel #49
0
    def test_import_data_with_invalid_format_multi_file(self):
        data_dir = os.path.join(self.test_dir.name, 'test')
        os.mkdir(data_dir)
        with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh:
            fh.write('42\n')
        with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh:
            fh.write('43\n')
        nested = os.path.join(data_dir, 'nested')
        os.mkdir(nested)
        with open(os.path.join(nested, 'file3.txt'), 'w') as fh:
            fh.write('44\n')
        with open(os.path.join(nested, 'file4.txt'), 'w') as fh:
            fh.write('foo\n')

        error_regex = "file4.txt.*SingleIntFormat.*\n\n.*integer"
        with self.assertRaisesRegex(ValidationError, error_regex):
            Artifact.import_data(FourInts, data_dir)
Beispiel #50
0
    def test_import_data_with_unrecognized_files(self):
        data_dir = os.path.join(self.test_dir.name, 'test')
        os.mkdir(data_dir)
        with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh:
            fh.write('42\n')
        with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh:
            fh.write('43\n')
        nested = os.path.join(data_dir, 'nested')
        os.mkdir(nested)
        with open(os.path.join(nested, 'file3.txt'), 'w') as fh:
            fh.write('44\n')
        with open(os.path.join(nested, 'foo.txt'), 'w') as fh:
            fh.write('45\n')

        error_regex = ("Unrecognized.*foo.txt.*FourIntsDirectoryFormat")
        with self.assertRaisesRegex(ValidationError, error_regex):
            Artifact.import_data(FourInts, data_dir)
Beispiel #51
0
    def test_load_and_save(self):
        fp1 = os.path.join(self.test_dir.name, 'artifact1.qza')
        fp2 = os.path.join(self.test_dir.name, 'artifact2.qza')
        artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43])
        artifact.save(fp1)

        artifact = Artifact.load(fp1)
        # Overwriting its source file works.
        artifact.save(fp1)
        # Saving to a new file works.
        artifact.save(fp2)

        root_dir = str(artifact.uuid)
        expected = {
            'VERSION',
            'checksums.md5',
            'metadata.yaml',
            'data/file1.txt',
            'data/file2.txt',
            'data/nested/file3.txt',
            'data/nested/file4.txt',
            'provenance/metadata.yaml',
            'provenance/VERSION',
            'provenance/citations.bib',
            'provenance/action/action.yaml'
        }

        self.assertArchiveMembers(fp1, root_dir, expected)

        root_dir = str(artifact.uuid)
        expected = {
            'VERSION',
            'checksums.md5',
            'metadata.yaml',
            'data/file1.txt',
            'data/file2.txt',
            'data/nested/file3.txt',
            'data/nested/file4.txt',
            'provenance/metadata.yaml',
            'provenance/VERSION',
            'provenance/citations.bib',
            'provenance/action/action.yaml'
        }

        self.assertArchiveMembers(fp2, root_dir, expected)
Beispiel #52
0
    def test_load_and_save(self):
        fp1 = os.path.join(self.test_dir.name, 'artifact1.qza')
        fp2 = os.path.join(self.test_dir.name, 'artifact2.qza')
        artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43])
        artifact.save(fp1)

        artifact = Artifact.load(fp1)
        # Overwriting its source file works.
        artifact.save(fp1)
        # Saving to a new file works.
        artifact.save(fp2)

        root_dir = str(artifact.uuid)
        expected = {
            'VERSION',
            'checksums.md5',
            'metadata.yaml',
            'data/file1.txt',
            'data/file2.txt',
            'data/nested/file3.txt',
            'data/nested/file4.txt',
            'provenance/metadata.yaml',
            'provenance/VERSION',
            'provenance/citations.bib',
            'provenance/action/action.yaml'
        }

        self.assertArchiveMembers(fp1, root_dir, expected)

        root_dir = str(artifact.uuid)
        expected = {
            'VERSION',
            'checksums.md5',
            'metadata.yaml',
            'data/file1.txt',
            'data/file2.txt',
            'data/nested/file3.txt',
            'data/nested/file4.txt',
            'provenance/metadata.yaml',
            'provenance/VERSION',
            'provenance/citations.bib',
            'provenance/action/action.yaml'
        }

        self.assertArchiveMembers(fp2, root_dir, expected)
    def test_reader_transformer(self):
        fp = pkg_resources.resource_filename('q2_dummy_types.tests',
                                             'data/int-sequence.txt')

        for type in IntSequence1, IntSequence2:
            artifact = Artifact.import_data(type, fp)
            # `Artifact.view` invokes the transformer that handles
            # the `SingleIntFormat` -> `list` transformation.
            self.assertEqual(artifact.view(list), [42, -1, 9, 10, 0, 999, 0])
Beispiel #54
0
def create_artifact():
    request_body = request.get_json()
    artifact = Artifact.import_data(request_body['type'], request_body['path'],
                                    request_body['source_format'])
    path = os.path.join(os.getcwd(), request_body['name'])
    if not path.endswith('.qza'):
        path += '.qza'
    artifact.save(path)
    return ''
Beispiel #55
0
    def test_primitive_param_not_valid_choice(self):
        pipeline = self.plugin.pipelines['failing_pipeline']
        int_sequence = Artifact.import_data(IntSequence1, [0, 42, 43])
        break_from = "invalid choice"

        # test String not a valid choice
        with self.assertRaisesRegex(TypeError,
                                    'break_from.*\'invalid choice\''):
            pipeline(int_sequence, break_from)
 def setUp(self):
     super().setUp()
     taxonomy = Artifact.import_data('FeatureData[Taxonomy]',
                                     self.get_data_path('taxonomy.tsv'))
     self.taxonomy = taxonomy.view(pd.Series)
     # TODO: use `Artifact.import_data` here once we have a transformer
     # for DNASequencesDirectoryFormat -> DNAFASTAFormat
     self.reads_fp = self.get_data_path('se-dna-sequences.fasta')
     self.reads = DNAFASTAFormat(self.reads_fp, mode='r')
Beispiel #57
0
    def test_extract(self):
        fp = os.path.join(self.test_dir.name, 'artifact.qza')
        artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43])
        artifact.save(fp)

        root_dir = str(artifact.uuid)
        output_dir = os.path.join(self.test_dir.name, 'artifact-extract-test')
        result_dir = Artifact.extract(fp, output_dir=output_dir)
        self.assertEqual(result_dir, os.path.join(output_dir, root_dir))

        expected = {
            'VERSION', 'metadata.yaml', 'data/file1.txt', 'data/file2.txt',
            'data/nested/file3.txt', 'data/nested/file4.txt',
            'provenance/metadata.yaml', 'provenance/VERSION',
            'provenance/action/action.yaml'
        }

        self.assertExtractedArchiveMembers(output_dir, root_dir, expected)
    def test_exercise_classify_otus_experimental(self):
        obs_artifact, = self.action(self.input_sequences, self.tree,
                                    self.taxonomy)
        obs = obs_artifact.view(pd.DataFrame)

        exp_artifact = Artifact.import_data(
            'FeatureData[Taxonomy]', self.get_data_path('sepp-results.tsv'))
        exp = exp_artifact.view(pd.DataFrame)

        assert_frame_equal(obs, exp)
Beispiel #59
0
    def test_extract_reads_expected_reverse(self):
        reverse_sequences = Artifact.import_data(
            'FeatureData[Sequence]',
            self.get_data_path('dna-sequences-reverse.fasta'))

        results = extract_reads(
            reverse_sequences, f_primer=self.f_primer, r_primer=self.r_primer,
            min_length=4, read_orientation='reverse')

        self._test_results(results)
Beispiel #60
0
    def test_import_data_with_good_validation_multi_files(self):
        data_dir = os.path.join(self.test_dir.name, 'test')
        os.mkdir(data_dir)
        with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh:
            fh.write('1\n')
        with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh:
            fh.write('1\n')

        a = Artifact.import_data(SingleInt, data_dir)
        self.assertEqual(1, a.view(int))