Ejemplo n.º 1
0
 def test_artifact_validate_min(self):
     A = Artifact.import_data('IntSequence1', [1, 2, 3, 4])
     A.validate(level='min')
     self.assertTrue(True)  # Checkpoint assertion
     A = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})
     A.validate(level='min')
     self.assertTrue(True)  # Checkpoint assertion
Ejemplo n.º 2
0
    def test_call_with_optional_artifacts(self):
        method = self.plugin.methods['optional_artifacts_method']

        ints1 = Artifact.import_data(IntSequence1, [0, 42, 43])
        ints2 = Artifact.import_data(IntSequence1, [99, -22])
        ints3 = Artifact.import_data(IntSequence2, [43, 43])

        # No optional artifacts provided.
        obs = method(ints1, 42).output

        self.assertEqual(obs.view(list), [0, 42, 43, 42])

        # One optional artifact provided.
        obs = method(ints1, 42, optional1=ints2).output

        self.assertEqual(obs.view(list), [0, 42, 43, 42, 99, -22])

        # All optional artifacts provided.
        obs = method(
            ints1, 42, optional1=ints2, optional2=ints3, num2=111).output

        self.assertEqual(obs.view(list), [0, 42, 43, 42, 99, -22, 43, 43, 111])

        # Invalid type provided as optional artifact.
        with self.assertRaisesRegex(TypeError,
                                    'type IntSequence2.*subtype IntSequence1'):
            method(ints1, 42, optional1=ints3)
Ejemplo n.º 3
0
    def test_load_with_archive_filepath_modified(self):
        # Save an artifact for use in the following test case.
        fp = os.path.join(self.test_dir.name, 'artifact.qza')
        Artifact.import_data(FourInts, [-1, 42, 0, 43]).save(fp)

        # Load the artifact from a filepath then save a different artifact to
        # the same filepath. Assert that both artifacts produce the correct
        # views of their data.
        #
        # `load` used to be lazy, only extracting data when it needed to (e.g.
        # when `save` or `view` was called). This was buggy as the filepath
        # could have been deleted, or worse, modified to contain a different
        # .qza file. Thus, the wrong archive could be extracted on demand, or
        # the archive could be missing altogether. There isn't an easy
        # cross-platform compatible way to solve this problem, so Artifact.load
        # is no longer lazy and always extracts its data immediately. The real
        # motivation for lazy loading was for quick inspection of archives
        # without extracting/copying data, so that API is now provided through
        # Artifact.peek.
        artifact1 = Artifact.load(fp)
        Artifact.import_data(FourInts, [10, 11, 12, 13]).save(fp)
        artifact2 = Artifact.load(fp)

        self.assertEqual(artifact1.view(list), [-1, 42, 0, 43])
        self.assertEqual(artifact2.view(list), [10, 11, 12, 13])
    def test_classify(self):
        # test read direction detection and parallel classification
        classify = feature_classifier.methods.classify_sklearn
        seq_path = self.get_data_path('se-dna-sequences.fasta')
        reads = Artifact.import_data('FeatureData[Sequence]', seq_path)
        raw_reads = skbio.io.read(
            seq_path, format='fasta', constructor=skbio.DNA)
        rev_path = os.path.join(self.temp_dir.name, 'rev-dna-sequences.fasta')
        skbio.io.write((s.reverse_complement() for s in raw_reads),
                       'fasta', rev_path)
        rev_reads = Artifact.import_data('FeatureData[Sequence]', rev_path)

        result = classify(reads, self.classifier)
        fc = result.classification.view(pd.Series).to_dict()
        result = classify(rev_reads, self.classifier)
        rc = result.classification.view(pd.Series).to_dict()

        for taxon in fc:
            self.assertEqual(fc[taxon], rc[taxon])

        result = classify(reads, self.classifier, read_orientation='same')
        fc = result.classification.view(pd.Series).to_dict()
        result = classify(rev_reads, self.classifier,
                          read_orientation='reverse-complement')
        rc = result.classification.view(pd.Series).to_dict()

        for taxon in fc:
            self.assertEqual(fc[taxon], rc[taxon])

        result = classify(reads, self.classifier, reads_per_batch=100,
                          n_jobs=2)
        cc = result.classification.view(pd.Series).to_dict()

        for taxon in fc:
            self.assertEqual(fc[taxon], cc[taxon])
Ejemplo n.º 5
0
    def test_import_data_with_unreachable_path(self):
        with self.assertRaisesRegex(ValueError, "does not exist"):
            Artifact.import_data(IntSequence1,
                                 os.path.join(self.test_dir.name, 'foo.txt'))

        with self.assertRaisesRegex(ValueError, "does not exist"):
            Artifact.import_data(FourInts,
                                 os.path.join(self.test_dir.name, 'bar', ''))
Ejemplo n.º 6
0
    def test_import_data_with_filepath_multi_file_data_layout(self):
        fp = os.path.join(self.test_dir.name, 'test.txt')
        with open(fp, 'w') as fh:
            fh.write('42\n')

        with self.assertRaisesRegex(qiime2.plugin.ValidationError,
                                    "FourIntsDirectoryFormat.*directory"):
            Artifact.import_data(FourInts, fp)
Ejemplo n.º 7
0
    def test_import_data_invalid_type(self):
        with self.assertRaisesRegex(TypeError,
                                    'concrete semantic type.*Visualization'):
            Artifact.import_data(qiime2.core.type.Visualization, self.test_dir)

        with self.assertRaisesRegex(TypeError,
                                    'concrete semantic type.*Visualization'):
            Artifact.import_data('Visualization', self.test_dir)
Ejemplo n.º 8
0
 def test_artifact_validate_max(self):
     A = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})
     A.validate()
     self.assertTrue(True)  # Checkpoint assertion
     A.validate(level='max')
     self.assertTrue(True)  # Checkpoint assertion
     A = Artifact.import_data('IntSequence1', [1, 2, 3, 4, 5, 6, 7, 10])
     with self.assertRaisesRegex(ValidationError, '3 more'):
         A.validate('max')
    def test_class_weight(self):
        # we should be able to input class_weight to fit_classifier
        weights = Artifact.import_data(
            'FeatureTable[RelativeFrequency]',
            self.get_data_path('class_weight.biom'),
            view_type='BIOMV100Format')
        reads = Artifact.import_data(
            'FeatureData[Sequence]',
            self.get_data_path('se-dna-sequences.fasta'))

        fitter = feature_classifier.methods.fit_classifier_naive_bayes
        classifier1 = fitter(reads, self.taxonomy, class_weight=weights)
        classifier1 = classifier1.classifier

        class_weight = weights.view(biom.Table)
        classes = class_weight.ids('observation')
        class_weights = []
        for wts in class_weight.iter_data():
            class_weights.append(zip(classes, wts))
        priors = json.dumps(list(zip(*sorted(class_weights[0])))[1])
        classifier2 = fitter(reads, self.taxonomy,
                             classify__class_prior=priors).classifier

        classify = feature_classifier.methods.classify_sklearn
        result1 = classify(reads, classifier1)
        result1 = result1.classification.view(pd.Series).to_dict()
        result2 = classify(reads, classifier2)
        result2 = result2.classification.view(pd.Series).to_dict()
        self.assertEqual(result1, result2)

        svc_spec = [['feat_ext',
                     {'__type__': 'feature_extraction.text.HashingVectorizer',
                      'analyzer': 'char_wb',
                      'n_features': 8192,
                      'ngram_range': [8, 8],
                      'alternate_sign': False}],
                    ['classify',
                     {'__type__': 'linear_model.LogisticRegression'}]]
        classifier_spec = json.dumps(svc_spec)
        gen_fitter = feature_classifier.methods.fit_classifier_sklearn
        classifier1 = gen_fitter(reads, self.taxonomy, classifier_spec,
                                 class_weight=weights).classifier

        svc_spec[1][1]['class_weight'] = dict(class_weights[0])
        classifier_spec = json.dumps(svc_spec)
        gen_fitter = feature_classifier.methods.fit_classifier_sklearn
        classifier2 = gen_fitter(reads, self.taxonomy, classifier_spec
                                 ).classifier

        result1 = classify(reads, classifier1)
        result1 = result1.classification.view(pd.Series).to_dict()
        result2 = classify(reads, classifier2)
        result2 = result2.classification.view(pd.Series).to_dict()
        self.assertEqual(set(result1.keys()), set(result2.keys()))
        for k in result1:
            self.assertEqual(result1[k], result2[k])
Ejemplo n.º 10
0
    def test_import_data_with_invalid_format_single_file(self):
        fp = os.path.join(self.test_dir.name, 'foo.txt')
        with open(fp, 'w') as fh:
            fh.write('42\n')
            fh.write('43\n')
            fh.write('abc\n')
            fh.write('123\n')

        error_regex = "foo.txt.*IntSequenceFormat.*\n\n.*Line 3"
        with self.assertRaisesRegex(ValidationError, error_regex):
            Artifact.import_data(IntSequence1, fp)
    def setUp(self):
        super().setUp()
        self.taxonomy = Artifact.import_data(
            'FeatureData[Taxonomy]', self.get_data_path('taxonomy.tsv'))

        self.seq_path = self.get_data_path('se-dna-sequences.fasta')
        reads = Artifact.import_data('FeatureData[Sequence]', self.seq_path)
        fitter_name = _specific_fitters[0][0]
        fitter = getattr(feature_classifier.methods,
                         'fit_classifier_' + fitter_name)
        self.classifier = fitter(reads, self.taxonomy).classifier
Ejemplo n.º 12
0
    def test_import_data_with_bad_validation_multi_files(self):
        data_dir = os.path.join(self.test_dir.name, 'test')
        os.mkdir(data_dir)
        with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh:
            fh.write('1\n')
        with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh:
            fh.write('2\n')

        error_regex = ("test.*RedundantSingleIntDirectoryFormat.*\n\n"
                       ".*does not match")
        with self.assertRaisesRegex(ValidationError, error_regex):
            Artifact.import_data(SingleInt, data_dir)
Ejemplo n.º 13
0
    def test_asynchronous(self):
        mapping_viz = self.plugin.visualizers['mapping_viz']

        artifact1 = Artifact.import_data(Mapping, {'foo': 'abc', 'bar': 'def'})
        artifact2 = Artifact.import_data(
            Mapping, {'baz': 'abc', 'bazz': 'ghi'})

        future = mapping_viz.asynchronous(artifact1, artifact2, 'Key', 'Value')

        self.assertIsInstance(future, concurrent.futures.Future)
        result = future.result()

        # Test properties of the `Results` object.
        self.assertIsInstance(result, tuple)
        self.assertIsInstance(result, Results)
        self.assertEqual(len(result), 1)
        self.assertEqual(result.visualization, result[0])

        result = result[0]

        self.assertIsInstance(result, Visualization)
        self.assertEqual(result.type, qiime2.core.type.Visualization)

        self.assertIsInstance(result.uuid, uuid.UUID)

        # TODO qiime2.sdk.Visualization doesn't have an API to access its
        # contents yet. For now, save and assert the correct files are present.
        filepath = os.path.join(self.test_dir.name, 'visualization.qzv')
        result.save(filepath)

        root_dir = str(result.uuid)
        expected = {
            'VERSION',
            'checksums.md5',
            'metadata.yaml',
            'data/index.html',
            'data/css/style.css',
            'provenance/metadata.yaml',
            'provenance/VERSION',
            'provenance/citations.bib',
            'provenance/action/action.yaml',
            'provenance/artifacts/%s/metadata.yaml' % artifact1.uuid,
            'provenance/artifacts/%s/VERSION' % artifact1.uuid,
            'provenance/artifacts/%s/citations.bib' % artifact1.uuid,
            'provenance/artifacts/%s/action/action.yaml' % artifact1.uuid,
            'provenance/artifacts/%s/metadata.yaml' % artifact2.uuid,
            'provenance/artifacts/%s/VERSION' % artifact2.uuid,
            'provenance/artifacts/%s/citations.bib' % artifact2.uuid,
            'provenance/artifacts/%s/action/action.yaml' % artifact2.uuid
        }

        self.assertArchiveMembers(filepath, root_dir, expected)
Ejemplo n.º 14
0
    def test_call_with_variadic_inputs(self):
        method = self.plugin.methods['variadic_input_method']

        ints = [Artifact.import_data(IntSequence1, [1, 2, 3]),
                Artifact.import_data(IntSequence2, [4, 5, 6])]
        int_set = {Artifact.import_data(SingleInt, 7),
                   Artifact.import_data(SingleInt, 8)}
        nums = {9, 10}
        opt_nums = [11, 12, 13]

        result, = method(ints, int_set, nums, opt_nums)

        self.assertEqual(result.view(list), list(range(1, 14)))
    def setUp(self):
        super().setUp()

        reads = Artifact.import_data(
            'FeatureData[Sequence]',
            self.get_data_path('se-dna-sequences.fasta'))
        taxonomy = Artifact.import_data(
            'FeatureData[Taxonomy]', self.get_data_path('taxonomy.tsv'))
        classifier = fit_classifier_naive_bayes(reads, taxonomy)
        pipeline = classifier.classifier.view(Pipeline)
        transformer = self.get_transformer(
            Pipeline, TaxonomicClassiferTemporaryPickleDirFmt)
        self._sklp = transformer(pipeline)
        sklearn_pipeline = self._sklp.sklearn_pipeline.view(PickleFormat)
        self.sklearn_pipeline = str(sklearn_pipeline)
Ejemplo n.º 16
0
    def test_async(self):
        mapping_viz = self.plugin.visualizers['mapping_viz']

        artifact1 = Artifact.import_data(Mapping, {'foo': 'abc', 'bar': 'def'})
        artifact2 = Artifact.import_data(
            Mapping, {'baz': 'abc', 'bazz': 'ghi'})

        future = mapping_viz.async(artifact1, artifact2, 'Key', 'Value')

        self.assertIsInstance(future, concurrent.futures.Future)
        result = future.result()

        # Test properties of the `Results` object.
        self.assertIsInstance(result, tuple)
        self.assertIsInstance(result, Results)
        self.assertEqual(len(result), 1)
        self.assertEqual(result.visualization, result[0])

        result = result[0]

        self.assertIsInstance(result, Visualization)
        self.assertEqual(result.type, qiime2.core.type.Visualization)

        self.assertIsInstance(result.uuid, uuid.UUID)

        # TODO qiime2.sdk.Visualization doesn't have an API to access its
        # contents yet. For now, save and assert the correct files are present.
        filepath = os.path.join(self.test_dir.name, 'visualization.qzv')
        result.save(filepath)

        root_dir = str(result.uuid)
        expected = {
            'VERSION',
            'metadata.yaml',
            'data/index.html',
            'data/css/style.css',
            'provenance/metadata.yaml',
            'provenance/VERSION',
            'provenance/action/action.yaml',
            'provenance/artifacts/%s/metadata.yaml' % artifact1.uuid,
            'provenance/artifacts/%s/VERSION' % artifact1.uuid,
            'provenance/artifacts/%s/action/action.yaml' % artifact1.uuid,
            'provenance/artifacts/%s/metadata.yaml' % artifact2.uuid,
            'provenance/artifacts/%s/VERSION' % artifact2.uuid,
            'provenance/artifacts/%s/action/action.yaml' % artifact2.uuid
        }

        self.assertArchiveMembers(filepath, root_dir, expected)
Ejemplo n.º 17
0
    def test_load_different_type_with_multiple_view_types(self):
        saved_artifact = Artifact.import_data(IntSequence1,
                                              [42, 42, 43, -999, 42])
        fp = os.path.join(self.test_dir.name, 'artifact.qza')
        saved_artifact.save(fp)

        artifact = Artifact.load(fp)

        self.assertEqual(artifact.type, IntSequence1)
        self.assertEqual(artifact.uuid, saved_artifact.uuid)

        self.assertEqual(artifact.view(list), [42, 42, 43, -999, 42])
        self.assertEqual(artifact.view(list), [42, 42, 43, -999, 42])

        self.assertEqual(artifact.view(collections.Counter),
                         collections.Counter({
                             42: 3,
                             43: 1,
                             -999: 1
                         }))
        self.assertEqual(artifact.view(collections.Counter),
                         collections.Counter({
                             42: 3,
                             43: 1,
                             -999: 1
                         }))
    def test_populate_class_weight(self):
        # should populate the class weight of a pipeline
        weights = Artifact.import_data(
            'FeatureTable[RelativeFrequency]',
            self.get_data_path('class_weight.biom'),
            view_type='BIOMV100Format')
        table = weights.view(biom.Table)

        svc_spec = [['feat_ext',
                     {'__type__': 'feature_extraction.text.HashingVectorizer',
                      'analyzer': 'char_wb',
                      'n_features': 8192,
                      'ngram_range': [8, 8],
                      'alternate_sign': False}],
                    ['classify',
                     {'__type__': 'naive_bayes.GaussianNB'}]]
        pipeline1 = pipeline_from_spec(svc_spec)
        populate_class_weight(pipeline1, table)

        classes = table.ids('observation')
        class_weights = []
        for wts in table.iter_data():
            class_weights.append(zip(classes, wts))
        svc_spec[1][1]['priors'] = list(zip(*sorted(class_weights[0])))[1]
        pipeline2 = pipeline_from_spec(svc_spec)

        for a, b in zip(pipeline1.get_params()['classify__priors'],
                        pipeline2.get_params()['classify__priors']):
            self.assertAlmostEqual(a, b)
Ejemplo n.º 19
0
    def test_reader_transformer(self):
        fp = pkg_resources.resource_filename(
            'q2_winnowing.tests', 'sample_data/test_in_dir')

        artifact = Artifact.import_data(Winnowed, fp)
        featureOrdering_df, auc_df, permanova_df = artifact.view( list )[0]
        # `Artifact.view` invokes the transformer that handles the
        # `WinnowedFormat` -> `dataframe` transformation.
        # print( featureOrdering_df, exp_featureOrdering )
        pd.testing.assert_frame_equal(
            featureOrdering_df.astype(str),
            exp_featureOrdering.astype(str),
            check_dtype=False
        ) # Avoid checking values since reading df stores as objects while, hard coding in does not
        # ex) bool(False) == Object(False) in pandas is False although the values function the same.
        pd.testing.assert_frame_equal(
            auc_df.astype(str),
            exp_auc.astype(str),
            check_dtype=False
        )
        pd.testing.assert_frame_equal(
            permanova_df.astype(str),
            exp_permanova.astype(str),
            check_dtype=False
        )
Ejemplo n.º 20
0
    def test_visualizer_callable_output(self):
        artifact = Artifact.import_data(Mapping, {'foo': 'abc', 'bar': 'def'})

        # Callable returns a value from `return_vals`
        return_vals = (True, False, [], {}, '', 0, 0.0)
        for return_val in return_vals:
            def func(output_dir: str, foo: dict) -> None:
                return return_val

            self.plugin.visualizers.register_function(
                func, {'foo': Mapping}, {}, '', ''
            )
            visualizer = self.plugin.visualizers['func']

            with self.assertRaisesRegex(TypeError, "should not return"):
                visualizer(foo=artifact)

        # Callable returns None (default function return)
        def func(output_dir: str, foo: dict) -> None:
            return None

        self.plugin.visualizers.register_function(
            func, {'foo': Mapping}, {}, '', ''
        )
        visualizer = self.plugin.visualizers['func']

        # Should not raise an exception
        output = visualizer(foo=artifact)
        self.assertIsInstance(output, Results)
        self.assertIsInstance(output.visualization, Visualization)
Ejemplo n.º 21
0
    def test_load_and_save(self):
        fp1 = os.path.join(self.test_dir.name, 'artifact1.qza')
        fp2 = os.path.join(self.test_dir.name, 'artifact2.qza')
        artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43])
        artifact.save(fp1)

        artifact = Artifact.load(fp1)
        # Overwriting its source file works.
        artifact.save(fp1)
        # Saving to a new file works.
        artifact.save(fp2)

        root_dir = str(artifact.uuid)
        expected = {
            'VERSION', 'metadata.yaml', 'data/file1.txt', 'data/file2.txt',
            'data/nested/file3.txt', 'data/nested/file4.txt',
            'provenance/metadata.yaml', 'provenance/VERSION',
            'provenance/action/action.yaml'
        }

        self.assertArchiveMembers(fp1, root_dir, expected)

        root_dir = str(artifact.uuid)
        expected = {
            'VERSION', 'metadata.yaml', 'data/file1.txt', 'data/file2.txt',
            'data/nested/file3.txt', 'data/nested/file4.txt',
            'provenance/metadata.yaml', 'provenance/VERSION',
            'provenance/action/action.yaml'
        }

        self.assertArchiveMembers(fp2, root_dir, expected)
    def test_populate_class_weight(self):
        # should populate the class weight of a pipeline
        weights = Artifact.import_data(
            'FeatureTable[RelativeFrequency]',
            self.get_data_path('class_weight.biom'),
            view_type='BIOMV100Format')
        table = weights.view(biom.Table)

        svc_spec = [['feat_ext',
                     {'__type__': 'feature_extraction.text.HashingVectorizer',
                      'analyzer': 'char_wb',
                      'n_features': 8192,
                      'ngram_range': [8, 8],
                      'alternate_sign': False}],
                    ['classify',
                     {'__type__': 'naive_bayes.GaussianNB'}]]
        pipeline1 = pipeline_from_spec(svc_spec)
        populate_class_weight(pipeline1, table)

        classes = table.ids('observation')
        class_weights = []
        for wts in table.iter_data():
            class_weights.append(zip(classes, wts))
        svc_spec[1][1]['priors'] = list(zip(*sorted(class_weights[0])))[1]
        pipeline2 = pipeline_from_spec(svc_spec)

        for a, b in zip(pipeline1.get_params()['classify__priors'],
                        pipeline2.get_params()['classify__priors']):
            self.assertAlmostEqual(a, b)
 def test_filter_features_nooverlap(self):
     # Just load up the reference tree instead of creating new test data
     wrong_tree_fp = self.get_data_path('ref-tree.nwk')
     wrong_tree = Artifact.import_data('Phylogeny[Rooted]', wrong_tree_fp)
     with self.assertRaisesRegex(ValueError,
                                 'Not a single fragment.*empty'):
         self.action(self.table, wrong_tree)
Ejemplo n.º 24
0
    def test_low_memory_multinomial_nb(self):
        # results should not depend on chunk size
        fitter = feature_classifier.methods.fit_classifier_sklearn
        classify = feature_classifier.methods.classify_sklearn
        reads = Artifact.import_data(
            'FeatureData[Sequence]',
            self.get_data_path('se-dna-sequences.fasta'))

        spec = [['feat_ext',
                {'__type__': 'feature_extraction.text.HashingVectorizer',
                 'analyzer': 'char',
                 'n_features': 8192,
                 'ngram_range': [8, 8],
                 'alternate_sign': False}],
                ['classify',
                 {'__type__': 'custom.LowMemoryMultinomialNB',
                  'alpha': 0.01,
                  'chunk_size': 20000}]]

        classifier_spec = json.dumps(spec)
        result = fitter(reads, self.taxonomy, classifier_spec)
        result = classify(reads, result.classifier)
        gc = result.classification.view(pd.Series).to_dict()

        spec[1][1]['chunk_size'] = 20
        classifier_spec = json.dumps(spec)
        result = fitter(reads, self.taxonomy, classifier_spec)
        result = classify(reads, result.classifier)
        sc = result.classification.view(pd.Series).to_dict()

        for taxon in gc:
            self.assertEqual(gc[taxon], sc[taxon])
Ejemplo n.º 25
0
    def test_extract(self):
        fp = os.path.join(self.test_dir.name, 'artifact.qza')
        artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43])
        artifact.save(fp)

        root_dir = str(artifact.uuid)
        # pathlib normalizes away the `.`, it doesn't matter, but this is the
        # implementation we're using, so let's test against that assumption.
        output_dir = pathlib.Path(self.test_dir.name) / 'artifact-extract-test'
        result_dir = Artifact.extract(fp, output_dir=output_dir)
        self.assertEqual(result_dir, str(output_dir / root_dir))

        expected = {
            'VERSION',
            'metadata.yaml',
            'data/file1.txt',
            'data/file2.txt',
            'data/nested/file3.txt',
            'data/nested/file4.txt',
            'provenance/metadata.yaml',
            'provenance/VERSION',
            'provenance/citations.bib',
            'provenance/action/action.yaml'
        }

        self.assertExtractedArchiveMembers(output_dir, root_dir, expected)
 def test_mismatched_taxonomy(self):
     wrong_taxa_fp = self.get_data_path('another-ref-taxa.tsv')
     wrong_taxa = Artifact.import_data('FeatureData[Taxonomy]',
                                       wrong_taxa_fp)
     with self.assertRaisesRegex(ValueError,
                                 'Not all OTUs.*1 feature.*\n.*879972'):
         self.action(self.input_sequences, self.tree, wrong_taxa)
Ejemplo n.º 27
0
    def test_visualizer_callable_output(self):
        artifact = Artifact.import_data(Mapping, {'foo': 'abc', 'bar': 'def'})

        # Callable returns a value from `return_vals`
        return_vals = (True, False, [], {}, '', 0, 0.0)
        for return_val in return_vals:

            def func(output_dir: str, foo: dict) -> None:
                return return_val

            self.plugin.visualizers.register_function(func, {'foo': Mapping},
                                                      {}, '', '')
            visualizer = self.plugin.visualizers['func']

            with self.assertRaisesRegex(TypeError, "should not return"):
                visualizer(foo=artifact)

        # Callable returns None (default function return)
        def func(output_dir: str, foo: dict) -> None:
            return None

        self.plugin.visualizers.register_function(func, {'foo': Mapping}, {},
                                                  '', '')
        visualizer = self.plugin.visualizers['func']

        # Should not raise an exception
        output = visualizer(foo=artifact)
        self.assertIsInstance(output, Results)
        self.assertIsInstance(output.visualization, Visualization)
Ejemplo n.º 28
0
 def setUp(self):
     super().setUp()
     seqs = skbio.io.read(self.get_data_path('dna-sequences.fasta'),
                          format='fasta', constructor=skbio.DNA)
     tmpseqs = os.path.join(self.temp_dir.name, 'temp-seqs.fasta')
     skbio.io.write((s for s in islice(seqs, 10)), 'fasta', tmpseqs)
     self.sequences = Artifact.import_data('FeatureData[Sequence]', tmpseqs)
    def test_fit_classifier(self):
        # fit_classifier should generate a working taxonomic_classifier
        reads = Artifact.import_data(
            'FeatureData[Sequence]',
            self.get_data_path('se-dna-sequences.fasta'))

        classifier_specification = \
            [['feat_ext',
              {'__type__': 'feature_extraction.text.HashingVectorizer',
               'analyzer': 'char_wb',
               'n_features': 8192,
               'ngram_range': [8, 8],
               'non_negative': True}],
             ['classify',
              {'__type__': 'naive_bayes.MultinomialNB',
               'alpha': 0.01}]]
        classifier_specification = json.dumps(classifier_specification)
        fit_classifier = feature_classifier.methods.fit_classifier
        result = fit_classifier(reads, self.taxonomy, classifier_specification)

        classify = feature_classifier.methods.classify
        result = classify(reads, result.classifier)

        ref = self.taxonomy.view(pd.Series).to_dict()
        cls = result.classification.view(pd.Series).to_dict()

        right = 0.
        for taxon in cls:
            right += ref[taxon].startswith(cls[taxon])
        self.assertGreater(right/len(cls), 0.5)
Ejemplo n.º 30
0
    def test_async_with_multiple_outputs(self):
        split_ints = self.plugin.methods['split_ints']

        artifact = Artifact.import_data(IntSequence1, [0, 42, -2, 43, 6])

        future = split_ints.asynchronous(artifact)

        self.assertIsInstance(future, concurrent.futures.Future)
        result = future.result()

        self.assertIsInstance(result, tuple)
        self.assertEqual(len(result), 2)

        for output_artifact in result:
            self.assertIsInstance(output_artifact, Artifact)
            self.assertEqual(output_artifact.type, IntSequence1)

            self.assertIsInstance(output_artifact.uuid, uuid.UUID)

        # Output artifacts have different UUIDs.
        self.assertNotEqual(result[0].uuid, result[1].uuid)

        # Index lookup.
        self.assertEqual(result[0].view(list), [0, 42])
        self.assertEqual(result[1].view(list), [-2, 43, 6])

        # Test properties of the `Results` object.
        self.assertIsInstance(result, Results)
        self.assertEqual(result.left.view(list), [0, 42])
        self.assertEqual(result.right.view(list), [-2, 43, 6])
Ejemplo n.º 31
0
    def test_call_with_multiple_outputs(self):
        split_ints = self.plugin.methods['split_ints']

        artifact = Artifact.import_data(IntSequence1, [0, 42, -2, 43, 6])

        result = split_ints(artifact)

        self.assertIsInstance(result, tuple)
        self.assertEqual(len(result), 2)

        for output_artifact in result:
            self.assertIsInstance(output_artifact, Artifact)
            self.assertEqual(output_artifact.type, IntSequence1)
            self.assertIsInstance(output_artifact.uuid, uuid.UUID)

        # Output artifacts have different UUIDs.
        self.assertNotEqual(result[0].uuid, result[1].uuid)

        # Index lookup.
        self.assertEqual(result[0].view(list), [0, 42])
        self.assertEqual(result[1].view(list), [-2, 43, 6])

        # Test properties of the `Results` object.
        self.assertIsInstance(result, Results)
        self.assertEqual(result.left.view(list), [0, 42])
        self.assertEqual(result.right.view(list), [-2, 43, 6])
Ejemplo n.º 32
0
    def test_async_with_multiple_outputs_matched_types(self):
        split_ints = self.plugin.methods['split_ints']

        artifact = Artifact.import_data(IntSequence2, [0, 42, -2, 43, 6])

        future = split_ints.asynchronous(artifact)

        self.assertIsInstance(future, concurrent.futures.Future)
        result = future.result()

        self.assertIsInstance(result, tuple)
        self.assertEqual(len(result), 2)

        for output_artifact in result:
            self.assertIsInstance(output_artifact, Artifact)
            self.assertEqual(output_artifact.type, IntSequence2)

            self.assertIsInstance(output_artifact.uuid, uuid.UUID)

        # Output artifacts have different UUIDs.
        self.assertNotEqual(result[0].uuid, result[1].uuid)

        # Index lookup.
        self.assertEqual(result[0].view(list), [0, 42])
        self.assertEqual(result[1].view(list), [-2, 43, 6])

        # Test properties of the `Results` object.
        self.assertIsInstance(result, Results)
        self.assertEqual(result.left.view(list), [0, 42])
        self.assertEqual(result.right.view(list), [-2, 43, 6])
Ejemplo n.º 33
0
    def test_extract(self):
        fp = os.path.join(self.test_dir.name, 'artifact.qza')
        artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43])
        artifact.save(fp)

        root_dir = str(artifact.uuid)
        # pathlib normalizes away the `.`, it doesn't matter, but this is the
        # implementation we're using, so let's test against that assumption.
        output_dir = pathlib.Path(self.test_dir.name) / 'artifact-extract-test'
        result_dir = Artifact.extract(fp, output_dir=output_dir)
        self.assertEqual(result_dir, str(output_dir / root_dir))

        expected = {
            'VERSION',
            'checksums.md5',
            'metadata.yaml',
            'data/file1.txt',
            'data/file2.txt',
            'data/nested/file3.txt',
            'data/nested/file4.txt',
            'provenance/metadata.yaml',
            'provenance/VERSION',
            'provenance/citations.bib',
            'provenance/action/action.yaml'
        }

        self.assertExtractedArchiveMembers(output_dir, root_dir, expected)
 def setUp(self):
     super().setUp()
     seqs = skbio.io.read(self.get_data_path('dna-sequences.fasta'),
                          format='fasta', constructor=skbio.DNA)
     tmpseqs = os.path.join(self.temp_dir.name, 'temp-seqs.fasta')
     skbio.io.write((s for s in islice(seqs, 10)), 'fasta', tmpseqs)
     self.sequences = Artifact.import_data('FeatureData[Sequence]', tmpseqs)
Ejemplo n.º 35
0
    def test_asynchronous(self):
        concatenate_ints = self.plugin.methods['concatenate_ints']

        artifact1 = Artifact.import_data(IntSequence1, [0, 42, 43])
        artifact2 = Artifact.import_data(IntSequence2, [99, -22])

        future = concatenate_ints.asynchronous(
            artifact1, artifact1, artifact2, 55, 1)

        self.assertIsInstance(future, concurrent.futures.Future)
        result = future.result()

        # Test properties of the `Results` object.
        self.assertIsInstance(result, tuple)
        self.assertIsInstance(result, Results)
        self.assertEqual(len(result), 1)
        self.assertEqual(result.concatenated_ints.view(list),
                         [0, 42, 43, 0, 42, 43, 99, -22, 55, 1])

        result = result[0]

        self.assertIsInstance(result, Artifact)
        self.assertEqual(result.type, IntSequence1)

        self.assertIsInstance(result.uuid, uuid.UUID)

        # Can retrieve multiple views of different type.
        exp_list_view = [0, 42, 43, 0, 42, 43, 99, -22, 55, 1]
        self.assertEqual(result.view(list), exp_list_view)
        self.assertEqual(result.view(list), exp_list_view)

        exp_counter_view = collections.Counter(
            {0: 2, 42: 2, 43: 2, 99: 1, -22: 1, 55: 1, 1: 1})
        self.assertEqual(result.view(collections.Counter),
                         exp_counter_view)
        self.assertEqual(result.view(collections.Counter),
                         exp_counter_view)

        # Accepts IntSequence1 | IntSequence2
        artifact3 = Artifact.import_data(IntSequence2, [10, 20])
        future = concatenate_ints.asynchronous(artifact3, artifact1, artifact2,
                                               55, 1)
        result, = future.result()

        self.assertEqual(result.type, IntSequence1)
        self.assertEqual(result.view(list),
                         [10, 20, 0, 42, 43, 99, -22, 55, 1])
Ejemplo n.º 36
0
    def test_eq_same_uuid(self):
        fp = os.path.join(self.test_dir.name, 'artifact.qza')
        artifact1 = Artifact.import_data(FourInts, [-1, 42, 0, 43])
        artifact1.save(fp)

        artifact2 = Artifact.load(fp)

        self.assertEqual(artifact1, artifact2)
Ejemplo n.º 37
0
    def test_call_with_artifacts_and_parameters(self):
        concatenate_ints = self.plugin.methods['concatenate_ints']

        artifact1 = Artifact.import_data(IntSequence1, [0, 42, 43])
        artifact2 = Artifact.import_data(IntSequence2, [99, -22])

        result = concatenate_ints(artifact1, artifact1, artifact2, 55, 1)

        # Test properties of the `Results` object.
        self.assertIsInstance(result, tuple)
        self.assertIsInstance(result, Results)
        self.assertEqual(len(result), 1)
        self.assertEqual(result.concatenated_ints.view(list),
                         [0, 42, 43, 0, 42, 43, 99, -22, 55, 1])

        result = result[0]

        self.assertIsInstance(result, Artifact)
        self.assertEqual(result.type, IntSequence1)

        self.assertIsInstance(result.uuid, uuid.UUID)

        # Can retrieve multiple views of different type.
        exp_list_view = [0, 42, 43, 0, 42, 43, 99, -22, 55, 1]
        self.assertEqual(result.view(list), exp_list_view)
        self.assertEqual(result.view(list), exp_list_view)

        exp_counter_view = collections.Counter({
            0: 2,
            42: 2,
            43: 2,
            99: 1,
            -22: 1,
            55: 1,
            1: 1
        })
        self.assertEqual(result.view(collections.Counter), exp_counter_view)
        self.assertEqual(result.view(collections.Counter), exp_counter_view)

        # Accepts IntSequence1 | IntSequence2
        artifact3 = Artifact.import_data(IntSequence2, [10, 20])
        result, = concatenate_ints(artifact3, artifact1, artifact2, 55, 1)

        self.assertEqual(result.type, IntSequence1)
        self.assertEqual(result.view(list),
                         [10, 20, 0, 42, 43, 99, -22, 55, 1])
Ejemplo n.º 38
0
    def test_import_data_with_invalid_format_multi_file(self):
        data_dir = os.path.join(self.test_dir.name, 'test')
        os.mkdir(data_dir)
        with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh:
            fh.write('42\n')
        with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh:
            fh.write('43\n')
        nested = os.path.join(data_dir, 'nested')
        os.mkdir(nested)
        with open(os.path.join(nested, 'file3.txt'), 'w') as fh:
            fh.write('44\n')
        with open(os.path.join(nested, 'file4.txt'), 'w') as fh:
            fh.write('foo\n')

        error_regex = "file4.txt.*SingleIntFormat.*\n\n.*integer"
        with self.assertRaisesRegex(ValidationError, error_regex):
            Artifact.import_data(FourInts, data_dir)
Ejemplo n.º 39
0
    def test_call_with_variadic_inputs(self):
        method = self.plugin.methods['variadic_input_method']

        ints = [
            Artifact.import_data(IntSequence1, [1, 2, 3]),
            Artifact.import_data(IntSequence2, [4, 5, 6])
        ]
        int_set = {
            Artifact.import_data(SingleInt, 7),
            Artifact.import_data(SingleInt, 8)
        }
        nums = {9, 10}
        opt_nums = [11, 12, 13]

        result, = method(ints, int_set, nums, opt_nums)

        self.assertEqual(result.view(list), list(range(1, 14)))
Ejemplo n.º 40
0
    def test_import_data_with_unrecognized_files(self):
        data_dir = os.path.join(self.test_dir.name, 'test')
        os.mkdir(data_dir)
        with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh:
            fh.write('42\n')
        with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh:
            fh.write('43\n')
        nested = os.path.join(data_dir, 'nested')
        os.mkdir(nested)
        with open(os.path.join(nested, 'file3.txt'), 'w') as fh:
            fh.write('44\n')
        with open(os.path.join(nested, 'foo.txt'), 'w') as fh:
            fh.write('45\n')

        error_regex = ("Unrecognized.*foo.txt.*FourIntsDirectoryFormat")
        with self.assertRaisesRegex(ValidationError, error_regex):
            Artifact.import_data(FourInts, data_dir)
Ejemplo n.º 41
0
    def test_import_data_with_invalid_format_multi_file(self):
        data_dir = os.path.join(self.test_dir.name, 'test')
        os.mkdir(data_dir)
        with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh:
            fh.write('42\n')
        with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh:
            fh.write('43\n')
        nested = os.path.join(data_dir, 'nested')
        os.mkdir(nested)
        with open(os.path.join(nested, 'file3.txt'), 'w') as fh:
            fh.write('44\n')
        with open(os.path.join(nested, 'file4.txt'), 'w') as fh:
            fh.write('foo\n')

        error_regex = "file4.txt.*SingleIntFormat.*\n\n.*integer"
        with self.assertRaisesRegex(ValidationError, error_regex):
            Artifact.import_data(FourInts, data_dir)
Ejemplo n.º 42
0
    def test_eq_same_uuid(self):
        fp = os.path.join(self.test_dir.name, 'artifact.qza')
        artifact1 = Artifact.import_data(FourInts, [-1, 42, 0, 43])
        artifact1.save(fp)

        artifact2 = Artifact.load(fp)

        self.assertEqual(artifact1, artifact2)
Ejemplo n.º 43
0
    def test_validate_artifact_bad(self):
        artifact = Artifact.import_data('IntSequence1', [1, 2, 3, 4])
        with (artifact._archiver.root_dir / 'extra.file').open('w') as fh:
            fh.write('uh oh')

        with self.assertRaisesRegex(exceptions.ValidationError,
                                    r'extra\.file'):
            artifact.validate()
Ejemplo n.º 44
0
    def test_import_data_with_unrecognized_files(self):
        data_dir = os.path.join(self.test_dir.name, 'test')
        os.mkdir(data_dir)
        with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh:
            fh.write('42\n')
        with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh:
            fh.write('43\n')
        nested = os.path.join(data_dir, 'nested')
        os.mkdir(nested)
        with open(os.path.join(nested, 'file3.txt'), 'w') as fh:
            fh.write('44\n')
        with open(os.path.join(nested, 'foo.txt'), 'w') as fh:
            fh.write('45\n')

        error_regex = ("Unrecognized.*foo.txt.*FourIntsDirectoryFormat")
        with self.assertRaisesRegex(ValidationError, error_regex):
            Artifact.import_data(FourInts, data_dir)
    def test_reader_transformer(self):
        fp = pkg_resources.resource_filename('q2_dummy_types.tests',
                                             'data/int-sequence.txt')

        for type in IntSequence1, IntSequence2:
            artifact = Artifact.import_data(type, fp)
            # `Artifact.view` invokes the transformer that handles
            # the `SingleIntFormat` -> `list` transformation.
            self.assertEqual(artifact.view(list), [42, -1, 9, 10, 0, 999, 0])
Ejemplo n.º 46
0
    def test_primitive_param_not_valid_choice(self):
        pipeline = self.plugin.pipelines['failing_pipeline']
        int_sequence = Artifact.import_data(IntSequence1, [0, 42, 43])
        break_from = "invalid choice"

        # test String not a valid choice
        with self.assertRaisesRegex(TypeError,
                                    'break_from.*\'invalid choice\''):
            pipeline(int_sequence, break_from)
 def setUp(self):
     super().setUp()
     taxonomy = Artifact.import_data(
         'FeatureData[Taxonomy]', self.get_data_path('taxonomy.tsv'))
     self.taxonomy = taxonomy.view(pd.Series)
     # TODO: use `Artifact.import_data` here once we have a transformer
     # for DNASequencesDirectoryFormat -> DNAFASTAFormat
     self.reads_fp = self.get_data_path('se-dna-sequences.fasta')
     self.reads = DNAFASTAFormat(self.reads_fp, mode='r')
Ejemplo n.º 48
0
    def test_reader_transformer(self):
        fp = pkg_resources.resource_filename(
            'q2_dummy_types.tests', 'data/int-sequence.txt')

        for type in IntSequence1, IntSequence2:
            artifact = Artifact.import_data(type, fp)
            # `Artifact.view` invokes the transformer that handles
            # the `SingleIntFormat` -> `list` transformation.
            self.assertEqual(artifact.view(list), [42, -1, 9, 10, 0, 999, 0])
Ejemplo n.º 49
0
    def test_reader_transformer(self):
        fp = pkg_resources.resource_filename(
            'q2_dummy_types.tests', 'data/mapping.tsv')

        artifact = Artifact.import_data(Mapping, fp)
        # `Artifact.view` invokes the transformer that handles the
        # `MappingFormat` -> `dict` transformation.
        self.assertEqual(artifact.view(dict),
                         {'foo': 'abc', 'bar': 'def', 'bazz': 'ghijkl'})
Ejemplo n.º 50
0
def create_artifact():
    request_body = request.get_json()
    artifact = Artifact.import_data(request_body['type'], request_body['path'],
                                    request_body['source_format'])
    path = os.path.join(os.getcwd(), request_body['name'])
    if not path.endswith('.qza'):
        path += '.qza'
    artifact.save(path)
    return ''
 def setUp(self):
     super().setUp()
     taxonomy = Artifact.import_data('FeatureData[Taxonomy]',
                                     self.get_data_path('taxonomy.tsv'))
     self.taxonomy = taxonomy.view(pd.Series)
     # TODO: use `Artifact.import_data` here once we have a transformer
     # for DNASequencesDirectoryFormat -> DNAFASTAFormat
     self.reads_fp = self.get_data_path('se-dna-sequences.fasta')
     self.reads = DNAFASTAFormat(self.reads_fp, mode='r')
Ejemplo n.º 52
0
    def test_data_import(self):
        fp = pkg_resources.resource_filename('q2_dummy_types.tests',
                                             'data/mapping.tsv')

        # `Artifact.import_data` copies `mapping.tsv` into the artifact after
        # performing validation on the file.
        artifact = Artifact.import_data(Mapping, fp)

        self.assertEqual(artifact.type, Mapping)
        self.assertIsInstance(artifact.uuid, uuid.UUID)
Ejemplo n.º 53
0
    def test_extract_reads_expected_reverse(self):
        reverse_sequences = Artifact.import_data(
            'FeatureData[Sequence]',
            self.get_data_path('dna-sequences-reverse.fasta'))

        results = extract_reads(
            reverse_sequences, f_primer=self.f_primer, r_primer=self.r_primer,
            min_length=4, read_orientation='reverse')

        self._test_results(results)
Ejemplo n.º 54
0
    def test_import_data_with_good_validation_multi_files(self):
        data_dir = os.path.join(self.test_dir.name, 'test')
        os.mkdir(data_dir)
        with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh:
            fh.write('1\n')
        with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh:
            fh.write('1\n')

        a = Artifact.import_data(SingleInt, data_dir)
        self.assertEqual(1, a.view(int))
    def test_exercise_classify_otus_experimental(self):
        obs_artifact, = self.action(self.input_sequences, self.tree,
                                    self.taxonomy)
        obs = obs_artifact.view(pd.DataFrame)

        exp_artifact = Artifact.import_data(
            'FeatureData[Taxonomy]', self.get_data_path('sepp-results.tsv'))
        exp = exp_artifact.view(pd.DataFrame)

        assert_frame_equal(obs, exp)
Ejemplo n.º 56
0
    def test_import_data_with_good_validation_multi_files(self):
        data_dir = os.path.join(self.test_dir.name, 'test')
        os.mkdir(data_dir)
        with open(os.path.join(data_dir, 'file1.txt'), 'w') as fh:
            fh.write('1\n')
        with open(os.path.join(data_dir, 'file2.txt'), 'w') as fh:
            fh.write('1\n')

        a = Artifact.import_data(SingleInt, data_dir)
        self.assertEqual(1, a.view(int))
Ejemplo n.º 57
0
    def test_data_import(self):
        fp = pkg_resources.resource_filename(
            'q2_winnowing.tests', 'sample_data/test_in_dir')

        # `Artifact.import_data` copies `test_in_featureData.tsv` into the artifact after
        # performing validation on the file.
        artifact = Artifact.import_data(Winnowed, fp)

        self.assertEqual(artifact.type, Winnowed)
        self.assertIsInstance(artifact.uuid, uuid.UUID)
Ejemplo n.º 58
0
def create_artifact():
    request_body = request.get_json()
    artifact = Artifact.import_data(request_body['type'],
                                    request_body['path'],
                                    request_body['source_format'])
    path = os.path.join(os.getcwd(), request_body['name'])
    if not path.endswith('.qza'):
        path += '.qza'
    artifact.save(path)
    return ''
Ejemplo n.º 59
0
    def test_ne_different_type_same_uuid(self):
        artifact = Artifact.import_data(FourInts, [-1, 42, 0, 43])

        class Faker:
            @property
            def uuid(self):
                return artifact.uuid

        faker = Faker()

        self.assertNotEqual(artifact, faker)
Ejemplo n.º 60
0
    def test_peek(self):
        artifact = Artifact.import_data(FourInts, [0, 0, 42, 1000])
        fp = os.path.join(self.test_dir.name, 'artifact.qza')
        artifact.save(fp)

        metadata = Artifact.peek(fp)

        self.assertIsInstance(metadata, ResultMetadata)
        self.assertEqual(metadata.type, 'FourInts')
        self.assertEqual(metadata.uuid, str(artifact.uuid))
        self.assertEqual(metadata.format, 'FourIntsDirectoryFormat')