def start(self):
     self.metadata_file = self.create_tempfile(suffix='.txt')
     metadata_path = self.metadata_file.name
     Metadata(self.metadata_table).save(metadata_path)
     self.faith_pd_file = self.create_tempfile(suffix='.qza')
     faith_pd_path = self.faith_pd_file.name
     faith_pd_artifact = Artifact.import_data(
         "SampleData[AlphaDiversity]",
         self.faith_pd_data,
     )
     faith_pd_artifact.save(faith_pd_path)
     self.taxonomy_file = self.create_tempfile(suffix='.qza')
     taxonomy_path = self.taxonomy_file.name
     imported_artifact = Artifact.import_data("FeatureData[Taxonomy]",
                                              self.taxonomy_greengenes_df)
     imported_artifact.save(taxonomy_path)
     self.table_file = self.create_tempfile(suffix='.qza')
     table_path = self.table_file.name
     imported_artifact = Artifact.import_data("FeatureTable[Frequency]",
                                              self.table)
     imported_artifact.save(table_path)
     config.resources.update({
         'metadata': metadata_path,
         'alpha_resources': {
             'faith-pd': faith_pd_path,
         },
         'table_resources': {
             'greengenes': {
                 'table': table_path,
                 'feature-data-taxonomy': taxonomy_path,
             }
         },
     })
     resources.update(config.resources)
    def test_core_metrics_phylogenetic(self):
        table = biom.Table(np.array([[0, 11, 11], [13, 11, 11]]),
                           ['O1', 'O2'],
                           ['S1', 'S2', 'S3'])
        table = Artifact.import_data('FeatureTable[Frequency]', table)

        tree = skbio.TreeNode.read(io.StringIO(
            '((O1:0.25, O2:0.50):0.25, O3:0.75)root;'))
        tree = Artifact.import_data('Phylogeny[Rooted]', tree)

        metadata = Metadata(
            pd.DataFrame({'foo': ['1', '2', '3']},
                         index=pd.Index(['S1', 'S2', 'S3'], name='id')))

        results = self.core_metrics_phylogenetic(table, tree, 13, metadata)

        self.assertEqual(len(results), 17)

        self.assertEqual(repr(results.bray_curtis_distance_matrix.type),
                         'DistanceMatrix')
        self.assertEqual(repr(results.jaccard_emperor.type), 'Visualization')

        # pipelines preserve the output's type, in this case, beta_phylogenetic
        # returns this type, and that is passed through to the final output
        # (as long as the type is a subtype of the signature).
        self.assertEqual(
            repr(results.faith_pd_vector.type),
            "SampleData[AlphaDiversity] % Properties('phylogenetic')")

        expected = pd.Series({'S1': 1, 'S2': 2, 'S3': 2},
                             name='observed_otus')
        pdt.assert_series_equal(results[2].view(pd.Series), expected)
def rarefy_wrapper(state: PipelineState, target_count: int) -> PipelineState:
    table = Artifact.import_data("FeatureTable[Frequency]", state.df) \
        .view(biom.Table)
    table = rarefy(table, target_count)
    df = Artifact.import_data("FeatureTable[Frequency]", table) \
        .view(pd.DataFrame)
    return state.update_df(df)
Example #4
0
def main():
    # Parse arguments
    options = get_args()

    seq_loc, tax_loc = get_file_loc(options.gene, options.perc_id)

    # Training feature classifiers with q2-feature-classifier
    # https://docs.qiime2.org/2019.1/tutorials/feature-classifier/
    silva_132 = Artifact.import_data('FeatureData[Sequence]', seq_loc )

    silva_132_taxonomy = Artifact.import_data('FeatureData[Taxonomy]', tax_loc,
                                           view_type = 'HeaderlessTSVTaxonomyFormat')
   
    # extract reference reads
    # V3-V4: 341f: CCTACGGGNGGCWGCAG; 806r: GACTACHVGGGTATCTAATCC
    ref_seqs_s = extract_reads(sequences = silva_132,
                           f_primer = options.f_primer,
                           r_primer = options.r_primer)

    # train the classifier
    silva_classifier = fit_classifier_naive_bayes(reference_reads = ref_seqs_s.reads,
                                              reference_taxonomy = silva_132_taxonomy)

    # save the classifier
    silva_classifier.classifier.save("silva132_" + options.perc_id + "_v3v4_" + options.gene)
Example #5
0
    def setUp(self):
        ConfigTestCase.setUp(self)
        TempfileTestCase.setUp(self)
        self.no_resources_repo = AlphaRepo()
        resource_filename1 = self.create_tempfile(suffix='.qza').name
        resource_filename2 = self.create_tempfile(suffix='.qza').name
        test_series1 = pd.Series({
            'sample1': 7.15,
            'sample2': 9.04
        },
                                 name='chao1')
        test_series2 = pd.Series(
            {
                'sample3': 7.24,
                'sample2': 9.04,
                'sample4': 8.25
            },
            name='faith_pd')

        imported_artifact = Artifact.import_data("SampleData[AlphaDiversity]",
                                                 test_series1)
        imported_artifact.save(resource_filename1)

        imported_artifact = Artifact.import_data("SampleData[AlphaDiversity]",
                                                 test_series2)
        imported_artifact.save(resource_filename2)
        config.resources.update({
            'alpha_resources': {
                'chao1': resource_filename1,
                'faith_pd': resource_filename2,
            }
        })
        resources.update(config.resources)

        self.repo = AlphaRepo()
Example #6
0
def get_sequence_strata(k, ref_taxa, ref_seqs, n_jobs):
    taxonomy, ref_seqs = load_references(ref_taxa, ref_seqs)
    taxa_stats = Counter(taxonomy.values())
    strata = {t: [t] for t in taxonomy.values() if taxa_stats[t] >= k}
    kref = (s for s in ref_seqs if taxonomy[s.metadata['id']] in strata)
    ref_art = Artifact.import_data('FeatureData[Sequence]', DNAIterator(kref))
    tax_art = Artifact.import_data('FeatureData[Taxonomy]',
                                   ref_taxa,
                                   view_type='HeaderlessTSVTaxonomyFormat')
    classifier = feature_classifier.methods.fit_classifier_naive_bayes(
        ref_art, tax_art, classify__alpha=0.001, feat_ext__ngram_range='[7,7]')
    classifier = classifier.classifier
    tiddlers = DNAIterator(s for s in ref_seqs
                           if taxonomy[s.metadata['id']] not in strata)
    tid_art = Artifact.import_data('FeatureData[Sequence]', tiddlers)
    tid_tax = feature_classifier.methods.classify_sklearn(tid_art,
                                                          classifier,
                                                          confidence=0.,
                                                          n_jobs=n_jobs)
    tid_tax = tid_tax.classification.view(DataFrame)
    stratum_votes = defaultdict(Counter)
    for sid in tid_tax.index:
        stratum_votes[taxonomy[sid]][tid_tax['Taxon'][sid]] += \
         float(tid_tax['Confidence'][sid])
    taxon_defaults = {}
    for taxon in stratum_votes:
        most_common = stratum_votes[taxon].most_common()
        merge_taxon, max_conf = most_common[0]
        assert len(most_common) == 1 or most_common[1][1] != max_conf
        taxon_defaults[taxon] = strata[merge_taxon]
        strata[merge_taxon].append(taxon)
    taxon_defaults.update(strata)
    seq_ids = [s.metadata['id'] for s in ref_seqs]
    strata = [taxon_defaults[taxonomy[sid]][0] for sid in seq_ids]
    return seq_ids, strata, taxon_defaults
Example #7
0
    def setUp(self):
        dummy_plugin = get_dummy_plugin()

        self.runner = CliRunner()
        self.tempdir = tempfile.mkdtemp(prefix='qiime2-q2cli-test-temp-')

        self.metadata_file_mixed_types = os.path.join(
                self.tempdir, 'metadata-mixed-types.tsv')
        with open(self.metadata_file_mixed_types, 'w') as f:
            f.write('id\tnumbers\tstrings\n0\t42\tabc\n1\t-1.5\tdef\n')

        self.bad_metadata_file = os.path.join(
                self.tempdir, 'bad-metadata.tsv')
        with open(self.bad_metadata_file, 'w') as f:
            f.write('wrong\tnumbers\tstrings\nid1\t42\tabc\nid2\t-1.5\tdef\n')

        self.metadata_artifact = os.path.join(self.tempdir, 'metadata.qza')
        Artifact.import_data(
            'Mapping', {'a': 'dog', 'b': 'cat'}).save(self.metadata_artifact)

        self.ints1 = os.path.join(self.tempdir, 'ints1.qza')
        ints1 = Artifact.import_data(
            'IntSequence1', [0, 42, 43], list)
        ints1.save(self.ints1)

        self.ints2 = os.path.join(self.tempdir, 'ints')
        ints1.export_data(self.ints2)

        self.viz = os.path.join(self.tempdir, 'viz.qzv')
        most_common_viz = dummy_plugin.actions['most_common_viz']
        self.viz = most_common_viz(ints1).visualization.save(self.viz)
Example #8
0
    def test_generalized_unifrac(self):
        bt_fp = self.get_data_path('vaw.biom')
        bt = Artifact.import_data('FeatureTable[Frequency]', bt_fp)
        tree_fp = self.get_data_path('vaw.nwk')
        tree = Artifact.import_data('Phylogeny[Rooted]', tree_fp)

        actual = self.beta_phylogenetic(table=bt,
                                        phylogeny=tree,
                                        metric='generalized_unifrac',
                                        alpha=0.5)

        data = np.array([
            [0.0000000, 0.4040518, 0.6285560, 0.5869439, 0.4082483, 0.2995673],
            [0.4040518, 0.0000000, 0.4160597, 0.7071068, 0.7302479, 0.4860856],
            [0.6285560, 0.4160597, 0.0000000, 0.8005220, 0.9073159, 0.5218198],
            [0.5869439, 0.7071068, 0.8005220, 0.0000000, 0.4117216, 0.3485667],
            [0.4082483, 0.7302479, 0.9073159, 0.4117216, 0.0000000, 0.6188282],
            [0.2995673, 0.4860856, 0.5218198, 0.3485667, 0.6188282, 0.0000000]
        ])
        ids = ('Sample1', 'Sample2', 'Sample3', 'Sample4', 'Sample5',
               'Sample6')
        expected = skbio.DistanceMatrix(data, ids=ids)

        self.assertEqual(len(actual), 1)
        self.assertEqual(repr(actual.distance_matrix.type), 'DistanceMatrix')
        actual = actual[0].view(skbio.DistanceMatrix)

        self.assertEqual(actual.ids, expected.ids)
        for id1 in actual.ids:
            for id2 in actual.ids:
                npt.assert_almost_equal(actual[id1, id2], expected[id1, id2])
Example #9
0
def save_expected(results_dir, test_samples, expected, train_taxa):
    known_taxa = set()
    for taxon in set(train_taxa.view(DataFrame)['Taxon'].values):
        while ';' in taxon:
            known_taxa.add(taxon)
            taxon, _ = taxon.rsplit(';', 1)
        known_taxa.add(taxon)

    for sid in expected:
        taxon = expected[sid]
        while taxon not in known_taxa:
            taxon, _ = taxon.rsplit(';', 1)
        expected[sid] = taxon

    expected_dir = join(results_dir, 'expected')
    if not os.path.exists(expected_dir):
        os.mkdir(expected_dir)
    abundance_dir = join(results_dir, 'abundance')
    if not os.path.exists(abundance_dir):
        os.mkdir(abundance_dir)
    for sample_id in test_samples.ids():
        sample = extract_sample([sample_id], test_samples)
        ids = sample.ids(axis='observation')
        taxa = [expected[s] for s in ids]
        df = DataFrame({'Taxon': taxa}, index=ids, columns=['Taxon'])
        df.index.name = 'Feature ID'
        Artifact.import_data('FeatureData[Taxonomy]',
                             df).save(join(expected_dir, sample_id + '.qza'))
        df = DataFrame(dict(zip(ids, sample.data(sample_id))),
                       index=['Frequency'],
                       columns=ids)
        Artifact.import_data('FeatureTable[Frequency]',
                             df).save(join(abundance_dir, sample_id + '.qza'))
Example #10
0
    def test_variance_adjusted_normalized(self):
        bt_fp = self.get_data_path('vaw.biom')
        bt = Artifact.import_data('FeatureTable[Frequency]', bt_fp)
        tree_fp = self.get_data_path('vaw.nwk')
        tree = Artifact.import_data('Phylogeny[Rooted]', tree_fp)

        actual = self.beta_phylogenetic(table=bt,
                                        phylogeny=tree,
                                        metric='weighted_normalized_unifrac',
                                        variance_adjusted=True)

        data = np.array([
            [0.0000000, 0.4086040, 0.6240185, 0.4639481, 0.2857143, 0.2766318],
            [0.4086040, 0.0000000, 0.3798594, 0.6884992, 0.6807616, 0.4735781],
            [0.6240185, 0.3798594, 0.0000000, 0.7713254, 0.8812897, 0.5047114],
            [0.4639481, 0.6884992, 0.7713254, 0.0000000, 0.6666667, 0.2709298],
            [0.2857143, 0.6807616, 0.8812897, 0.6666667, 0.0000000, 0.4735991],
            [0.2766318, 0.4735781, 0.5047114, 0.2709298, 0.4735991, 0.0000000]
        ])
        ids = ('Sample1', 'Sample2', 'Sample3', 'Sample4', 'Sample5',
               'Sample6')
        expected = skbio.DistanceMatrix(data, ids=ids)

        self.assertEqual(len(actual), 1)
        self.assertEqual(repr(actual.distance_matrix.type), 'DistanceMatrix')
        actual = actual[0].view(skbio.DistanceMatrix)

        self.assertEqual(actual.ids, expected.ids)
        for id1 in actual.ids:
            for id2 in actual.ids:
                npt.assert_almost_equal(actual[id1, id2], expected[id1, id2])
Example #11
0
    def test_beta_unweighted_parallel(self):
        bt_fp = self.get_data_path('crawford.biom')
        bt = Artifact.import_data('FeatureTable[Frequency]', bt_fp)
        tree_fp = self.get_data_path('crawford.nwk')
        tree = Artifact.import_data('Phylogeny[Rooted]', tree_fp)

        actual = self.beta_phylogenetic(table=bt,
                                        phylogeny=tree,
                                        metric='unweighted_unifrac',
                                        threads=2)

        # computed with beta-phylogenetic
        data = np.array([
            0.71836067, 0.71317361, 0.69746044, 0.62587207, 0.72826674,
            0.72065895, 0.72640581, 0.73606053, 0.70302967, 0.73407301,
            0.6548042, 0.71547381, 0.78397813, 0.72318399, 0.76138933,
            0.61041275, 0.62331299, 0.71848305, 0.70416337, 0.75258475,
            0.79249029, 0.64392779, 0.70052733, 0.69832716, 0.77818938,
            0.72959894, 0.75782689, 0.71005144, 0.75065046, 0.78944369,
            0.63593642, 0.71283615, 0.58314638, 0.69200762, 0.68972056,
            0.71514083
        ])
        ids = ('10084.PC.481', '10084.PC.593', '10084.PC.356', '10084.PC.355',
               '10084.PC.354', '10084.PC.636', '10084.PC.635', '10084.PC.607',
               '10084.PC.634')
        expected = skbio.DistanceMatrix(data, ids=ids)

        self.assertEqual(len(actual), 1)
        self.assertEqual(repr(actual.distance_matrix.type), 'DistanceMatrix')
        actual = actual[0].view(skbio.DistanceMatrix)

        self.assertEqual(actual.ids, expected.ids)
        for id1 in actual.ids:
            for id2 in actual.ids:
                npt.assert_almost_equal(actual[id1, id2], expected[id1, id2])
Example #12
0
    def setUp(self):
        super().setUp()
        self.method = self.plugin.actions['beta_phylogenetic_meta_passthrough']
        empty_table = biom.Table(np.array([]), [], [])
        self.empty_table = Artifact.import_data('FeatureTable[Frequency]',
                                                empty_table)

        # checking parity with the unifrac.meta tests
        table1 = pkg_resources.resource_filename('unifrac.tests',
                                                 'data/e1.biom')
        table2 = pkg_resources.resource_filename('unifrac.tests',
                                                 'data/e2.biom')
        self.tables = [
            Artifact.import_data('FeatureTable[Frequency]', table1),
            Artifact.import_data('FeatureTable[Frequency]', table2)
        ]

        tree1 = pkg_resources.resource_filename('unifrac.tests',
                                                'data/t1.newick')
        tree2 = pkg_resources.resource_filename('unifrac.tests',
                                                'data/t2.newick')
        self.trees = [
            Artifact.import_data('Phylogeny[Rooted]', tree1),
            Artifact.import_data('Phylogeny[Rooted]', tree2)
        ]
Example #13
0
    def test_with_artifacts(self):
        artifact1 = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})
        artifact2 = Artifact.import_data('Mapping', {'d': '4'})

        md_from_artifact1 = Metadata.from_artifact(artifact1)
        md_from_artifact2 = Metadata.from_artifact(artifact2)
        md_no_artifact = Metadata(
            pd.DataFrame({'c': ['3', '42']},
                         index=pd.Index(['0', '1'], name='id')))

        # Merge three metadata objects -- the first has an artifact, the second
        # does not, and the third has an artifact.
        obs_md = md_from_artifact1.merge(md_no_artifact, md_from_artifact2)

        exp_df = pd.DataFrame({
            'a': '1',
            'b': '2',
            'c': '3',
            'd': '4'
        },
                              index=pd.Index(['0'], name='id'))
        exp_md = Metadata(exp_df)
        exp_md._add_artifacts((artifact1, artifact2))

        self.assertEqual(obs_md, exp_md)
        self.assertEqual(obs_md.artifacts, (artifact1, artifact2))
Example #14
0
 def setUp(self):
     _ranks = pd.DataFrame([[4.1, 1.3, 2.1], [0.1, 0.3, 0.2],
                            [2.2, 4.3, 3.2], [-6.3, -4.4, 2.1]],
                           index=pd.Index([c for c in 'ABCD'], name='id'),
                           columns=['m1', 'm2', 'm3'])
     self.ranks = Artifact.import_data('FeatureData[Conditional]', _ranks)
     self.taxa = CategoricalMetadataColumn(
         pd.Series([
             'k__Bacteria; p__Proteobacteria; c__Deltaproteobacteria; '
             'o__Desulfobacterales; f__Desulfobulbaceae; g__; s__',
             'k__Bacteria; p__Cyanobacteria; c__Chloroplast; o__Streptophyta',
             'k__Bacteria; p__Proteobacteria; c__Alphaproteobacteria; '
             'o__Rickettsiales; f__mitochondria; g__Lardizabala; s__biternata',
             'k__Archaea; p__Euryarchaeota; c__Methanomicrobia; '
             'o__Methanosarcinales; f__Methanosarcinaceae; g__Methanosarcina'
         ],
                   index=pd.Index([c for c in 'ABCD'], name='feature-id'),
                   name='Taxon'))
     metabolites = biom.Table(np.array([[9, 8, 2], [2, 1, 2], [9, 4, 5],
                                        [8, 8, 7]]),
                              sample_ids=['s1', 's2', 's3'],
                              observation_ids=['m1', 'm2', 'm3', 'm4'])
     self.metabolites = Artifact.import_data('FeatureTable[Frequency]',
                                             metabolites)
     microbes = biom.Table(np.array([[1, 2, 3], [3, 6, 3], [1, 9, 9],
                                     [8, 8, 7]]),
                           sample_ids=['s1', 's2', 's3'],
                           observation_ids=[i for i in 'ABCD'])
     self.microbes = Artifact.import_data('FeatureTable[Frequency]',
                                          microbes)
Example #15
0
    def setUp(self):
        dummy_plugin = get_dummy_plugin()

        self.runner = CliRunner()
        self.tempdir = tempfile.mkdtemp(prefix='qiime2-q2cli-test-temp-')

        self.metadata_file_mixed_types = os.path.join(
            self.tempdir, 'metadata-mixed-types.tsv')
        with open(self.metadata_file_mixed_types, 'w') as f:
            f.write('id\tnumbers\tstrings\n0\t42\tabc\n1\t-1.5\tdef\n')

        self.bad_metadata_file = os.path.join(self.tempdir, 'bad-metadata.tsv')
        with open(self.bad_metadata_file, 'w') as f:
            f.write('wrong\tnumbers\tstrings\nid1\t42\tabc\nid2\t-1.5\tdef\n')

        self.metadata_artifact = os.path.join(self.tempdir, 'metadata.qza')
        Artifact.import_data('Mapping', {
            'a': 'dog',
            'b': 'cat'
        }).save(self.metadata_artifact)

        self.ints1 = os.path.join(self.tempdir, 'ints1.qza')
        ints1 = Artifact.import_data('IntSequence1', [0, 42, 43], list)
        ints1.save(self.ints1)

        self.ints2 = os.path.join(self.tempdir, 'ints')
        ints1.export_data(self.ints2)

        self.viz = os.path.join(self.tempdir, 'viz.qzv')
        most_common_viz = dummy_plugin.actions['most_common_viz']
        self.viz = most_common_viz(ints1).visualization.save(self.viz)
Example #16
0
    def test_variadic_inputs(self):
        qiime_cli = RootCommand()
        command = qiime_cli.get_command(ctx=None, name='dummy-plugin')
        output_path = os.path.join(self.tempdir, 'output.qza')

        ints1 = Artifact.import_data('IntSequence1', [1, 2, 3]).save(
            os.path.join(self.tempdir, 'ints1.qza'))
        ints2 = Artifact.import_data('IntSequence2', [4, 5, 6]).save(
            os.path.join(self.tempdir, 'ints2.qza'))
        set1 = Artifact.import_data('SingleInt', 7).save(
            os.path.join(self.tempdir, 'set1.qza'))
        set2 = Artifact.import_data('SingleInt', 8).save(
            os.path.join(self.tempdir, 'set2.qza'))

        result = self.runner.invoke(
            command,
            ['variadic-input-method', '--i-ints', ints1, '--i-ints', ints2,
             '--i-int-set', set1, '--i-int-set', set2, '--p-nums', '9',
             '--p-nums', '10', '--p-opt-nums', '11', '--p-opt-nums', '12',
             '--p-opt-nums', '13', '--o-output', output_path, '--verbose'])

        self.assertEqual(result.exit_code, 0)
        self.assertTrue(os.path.exists(output_path))

        output = Artifact.load(output_path)
        self.assertEqual(output.view(list), list(range(1, 14)))
Example #17
0
    def test_variadic_inputs(self):
        qiime_cli = RootCommand()
        command = qiime_cli.get_command(ctx=None, name='dummy-plugin')
        output_path = os.path.join(self.tempdir, 'output.qza')

        ints1 = Artifact.import_data('IntSequence1', [1, 2, 3]).save(
            os.path.join(self.tempdir, 'ints1.qza'))
        ints2 = Artifact.import_data('IntSequence2', [4, 5, 6]).save(
            os.path.join(self.tempdir, 'ints2.qza'))
        set1 = Artifact.import_data('SingleInt', 7).save(
            os.path.join(self.tempdir, 'set1.qza'))
        set2 = Artifact.import_data('SingleInt', 8).save(
            os.path.join(self.tempdir, 'set2.qza'))

        result = self.runner.invoke(command, [
            'variadic-input-method', '--i-ints', ints1, '--i-ints', ints2,
            '--i-int-set', set1, '--i-int-set', set2, '--p-nums', '9',
            '--p-nums', '10', '--p-opt-nums', '11', '--p-opt-nums', '12',
            '--p-opt-nums', '13', '--o-output', output_path, '--verbose'
        ])

        self.assertEqual(result.exit_code, 0)
        self.assertTrue(os.path.exists(output_path))

        output = Artifact.load(output_path)
        self.assertEqual(output.view(list), list(range(1, 14)))
Example #18
0
    def setUp(self):
        get_dummy_plugin()
        self.runner = CliRunner()
        self.plugin_command = RootCommand().get_command(ctx=None,
                                                        name='dummy-plugin')
        self.tempdir = tempfile.mkdtemp(prefix='qiime2-q2cli-test-temp-')

        self.input_artifact = os.path.join(self.tempdir, 'in.qza')
        Artifact.import_data(IntSequence1, [0, 42, 43],
                             list).save(self.input_artifact)
        self.output_artifact = os.path.join(self.tempdir, 'out.qza')

        self.metadata_file1 = os.path.join(self.tempdir, 'metadata1.tsv')
        with open(self.metadata_file1, 'w') as f:
            f.write('id\tcol1\n0\tfoo\nid1\tbar\n')

        self.metadata_file_alt_id_header = os.path.join(
            self.tempdir, 'metadata-alt-id-header.tsv')
        with open(self.metadata_file_alt_id_header, 'w') as f:
            f.write('#SampleID\tcol1\n0\tfoo\nid1\tbar\n')

        self.metadata_file2 = os.path.join(self.tempdir, 'metadata2.tsv')
        with open(self.metadata_file2, 'w') as f:
            f.write('id\tcol2\n0\tbaz\nid1\tbaa\n')

        self.metadata_file_mixed_types = os.path.join(
            self.tempdir, 'metadata-mixed-types.tsv')
        with open(self.metadata_file_mixed_types, 'w') as f:
            f.write('id\tnumbers\tstrings\nid1\t42\tabc\nid2\t-1.5\tdef\n')

        self.metadata_artifact = os.path.join(self.tempdir, 'metadata.qza')
        Artifact.import_data('Mapping', {
            'a': 'dog',
            'b': 'cat'
        }).save(self.metadata_artifact)
Example #19
0
    def test_mismatch(self):
        a = Artifact.import_data('Bar', 'element 1', view_type=str)
        b = Artifact.import_data('C3[Foo, Foo, Foo]',
                                 'element 2', view_type=str)

        with self.assertRaises(TypeError):
            self.run_action(a=a, b=b)
    def test_core_metrics_phylogenetic_multiple_jobs(self):
        table = biom.Table(np.array([[0, 11, 11], [13, 11, 11]]),
                           ['O1', 'O2'],
                           ['S1', 'S2', 'S3'])
        table = Artifact.import_data('FeatureTable[Frequency]', table)

        tree = skbio.TreeNode.read(io.StringIO(
            '((O1:0.25, O2:0.50):0.25, O3:0.75)root;'))
        tree = Artifact.import_data('Phylogeny[Rooted]', tree)

        metadata = Metadata(
            pd.DataFrame({'foo': ['1', '2', '3']},
                         index=pd.Index(['S1', 'S2', 'S3'], name='id')))

        results = self.core_metrics_phylogenetic(table, tree, 13, metadata,
                                                 n_jobs=2)

        self.assertEqual(len(results), 17)

        self.assertEqual(repr(results.bray_curtis_distance_matrix.type),
                         'DistanceMatrix')
        self.assertEqual(repr(results.jaccard_emperor.type), 'Visualization')

        # pipelines preserve the output's type, in this case, beta_phylogenetic
        # returns this type, and that is passed through to the final output
        # (as long as the type is a subtype of the signature).
        self.assertEqual(
            repr(results.faith_pd_vector.type),
            "SampleData[AlphaDiversity] % Properties(['phylogenetic'])")

        expected = pd.Series({'S1': 1, 'S2': 2, 'S3': 2},
                             name='observed_otus')
        pdt.assert_series_equal(results[2].view(pd.Series), expected)
Example #21
0
    def setUp(self):
        super().setUp()
        # expected computed with skbio.diversity.beta_diversity
        self.expected = skbio.DistanceMatrix([[0.00, 0.25, 0.25],
                                             [0.25, 0.00, 0.00],
                                             [0.25, 0.00, 0.00]],
                                             ids=['S1', 'S2', 'S3'])

        table_fp = self.get_data_path('two_feature_table.biom')
        self.table_as_BIOMV210Format = BIOMV210Format(table_fp, mode='r')
        rf_table_fp = self.get_data_path('two_feature_rf_table.biom')
        self.rf_table_as_BIOMV210Format = BIOMV210Format(rf_table_fp, mode='r')
        p_a_table_fp = self.get_data_path('two_feature_p_a_table.biom')
        self.p_a_table_as_BIOMV210Format = BIOMV210Format(p_a_table_fp,
                                                          mode='r')
        self.table_as_artifact = Artifact.import_data(
                    'FeatureTable[Frequency]', self.table_as_BIOMV210Format)

        tree_fp = self.get_data_path('three_feature.tree')
        self.tree_as_NewickFormat = NewickFormat(tree_fp, mode='r')
        self.tree_as_artifact = Artifact.import_data(
                    'Phylogeny[Rooted]', self.tree_as_NewickFormat)

        self.unweighted_unifrac_thru_framework = self.plugin.actions[
                    'unweighted_unifrac']
Example #22
0
    def setUp(self):
        super().setUp()
        self.series1_filename = self.create_tempfile(suffix='.qza').name
        self.series2_filename = self.create_tempfile(suffix='.qza').name

        self.series_1 = pd.Series(
            {
                'sample-foo-bar': 7.24,
                'sample-baz-qux': 8.25,
                'sample-3': 6.4,
            },
            name='observed_otus')

        self.series_2 = pd.Series(
            {
                'sample-foo-bar': 9.01,
                'sample-qux-quux': 9.04
            }, name='chao1')

        imported_artifact = Artifact.import_data("SampleData[AlphaDiversity]",
                                                 self.series_1)
        imported_artifact.save(self.series1_filename)
        imported_artifact = Artifact.import_data("SampleData[AlphaDiversity]",
                                                 self.series_2)
        imported_artifact.save(self.series2_filename)
        config.resources.update({
            'alpha_resources': {
                'observed_otus': self.series1_filename,
                'chao1': self.series2_filename,
            }
        })
        resources.update(config.resources)
Example #23
0
    def test_mismatch(self):
        a = Artifact.import_data('Foo', 'element 1', view_type=str)
        b = Artifact.import_data('Bar', 'element 2', view_type=str)
        extra = Artifact.import_data('Foo', 'always foo', view_type=str)

        with self.assertRaisesRegex(ValueError, 'match.*same output'):
            self.run_action(a=a, b=b, extra=extra)
Example #24
0
    def test_match_nested(self):
        a = Artifact.import_data('C1[Baz]', "element 1", view_type=str)
        b = Artifact.import_data('C1[Baz]', "element 2", view_type=str)

        viz, = self.run_action(a=a, b=b)

        contents = (viz._archiver.data_dir / 'index.html').read_text()
        self.assertIn('element 1', contents)
        self.assertIn('element 2', contents)
Example #25
0
    def test_predicate_on_second(self):
        a = Artifact.import_data('Bar', 'element 1', view_type=str)
        b = Artifact.import_data('Bar % Properties("A")',
                                 'element 2', view_type=str)
        extra = Artifact.import_data('Foo', 'always foo', view_type=str)

        x, = self.run_action(a=a, b=b, extra=extra)

        self.assertEqual(repr(x.type), 'Baz')
Example #26
0
    def test_beta_phylogenetic_empty_table(self):
        t = self.get_data_path('empty.biom')
        t = Artifact.import_data('FeatureTable[Frequency]', t)
        tree = self.get_data_path('three_feature.tree')
        tree = Artifact.import_data('Phylogeny[Rooted]', tree)

        with self.assertRaisesRegex(ValueError, 'empty'):
            self.beta_phylogenetic(table=t, phylogeny=tree,
                                   metric='unweighted_unifrac')
Example #27
0
 def setUp(self):
     super().setUp()
     self.method = self.plugin.actions['alpha_passthrough']
     empty_table = biom.Table(np.array([]), [], [])
     self.empty_table = Artifact.import_data('FeatureTable[Frequency]',
                                             empty_table)
     crawford_tbl = self.get_data_path('crawford.biom')
     self.crawford_tbl = Artifact.import_data('FeatureTable[Frequency]',
                                              crawford_tbl)
Example #28
0
    def test_match_baz_misc(self):
        a = Artifact.import_data('C1[Baz]', 'element 1', view_type=str)
        b = Artifact.import_data('C3[Foo, Bar, Baz]',
                                 'element 2', view_type=str)

        x, y = self.run_action(a=a, b=b)

        self.assertEqual(repr(x.type), 'C2[Foo, Foo]')
        self.assertEqual(repr(y.type), 'Baz')
Example #29
0
 def setUp(self):
     super().setUp()
     self.input_seqs_paired = Artifact.import_data(
         'SampleData[PairedEndSequencesWithQuality]',
         self.get_data_path('demux-1'))
     self.input_seqs_single = Artifact.import_data(
         'SampleData[SequencesWithQuality]',
         self.get_data_path('demux-1_se'))
     self.viz = self.plugin.visualizers['fastq_stats']
Example #30
0
def age_prediction_with_train_data(train_table: biom.Table, 
                                    train_metadata:qiime2.Metadata, 
                                    train_target_field:str,
                                    test_table:biom.Table, 
                                    test_metadata: qiime2.metadata,
                                    n_jobs_or_threads: int = 4,
                                    cv: int = 5,
                                    n_estimators: int = 500,
                                    parameter_tuning: bool = False) -> pd.DataFrame:
    '''
    Predict age in the test microbiome dataset based on a trained model.
    Parameters
    ----------
    train_table: biom.Table
        Feature table with relative abundances for model training. Samples are in columns,
        features (i.e. OTUs) are in rows.
    train_metadata : qiime2.MetadataColumn
        metadata column with samples labeled as age values ranging from 0 to 120.
    test_table: Feature table with relative abundances for model testing. Samples are in columns,
        features (i.e. OTUs) are in rows.
    test_metadata : qiime2.MetadataColumn
        metadata column with samples labeled as age values ranging from 0 to 120.
    Returns
    -------
    updated_test_metadata: A pd.DataFrame with an updated test metadata where
        microbiome age has been inerted into the last column.
    '''
    
    # Filter metadata to only include IDs present in the table.
    # Also ensures every distance table ID is present in the metadata.
    train_metadata = train_metadata.filter_ids(train_table.ids(axis='sample'))
    train_metadata = train_metadata.drop_missing_values() 
    # filter sample IDs with missing values in the train_table
    train_table = train_table.filter(metadata.ids) 

    train_table_q2 = Artifact.import_data("FeatureTable[Frequency]", train_table)
    train_metadata_q2 = q2.Metadata(train_metadata) # q2 metadata
    train_y_q2=train_metadata_q2.get_column(train_target_field)

    
    # train the model with q2-sample-classifier
    out = regress_samples(q2_train_X, q2_train_y, cv=cv, n_jobs=n_jobs_or_threads, n_estimators=n_estimators, parameter_tuning=parameter_tuning)
    q2_model = out.sample_estimator

    # age prediction in the test table 
    test_table_q2 = Artifact.import_data("FeatureTable[Frequency]", test_table)
    #test_metadata_q2 = q2.Metadata(test_metadata) # q2 metadata
    predictions=predict_regression(test_table_q2, q2_model).predictions
    #predictions.save(OUTDIR+'test_predictions.qza')

    test_pred_df=predictions.view(pd.Series)

    updated_test_metadata = pd.concat([test_metadata, test_pred_df], axis=1, sort=False)
    #result.to_csv(OUTDIR+'test_predictions_metadata.tsv',sep='\t') 
    
    return updated_test_metadata
Example #31
0
 def test_reconstruct_fragment_rep_seqs(self):
     recon_map = Artifact.import_data(
         'FeatureData[SidleReconstruction]', 
         pd.DataFrame(data=[['seq01|seq02'], 
                            ['seq01|seq02'], 
                            ['seq03|seq04'], 
                            ['seq03|seq04'], 
                            ['seq05']],
                   index=pd.Index(['seq01', 'seq02', 'seq03', 'seq04', 
                                   'seq05'], name='db-seq'),
                   columns=['clean_name'])
         )
     recon_summary = Artifact.import_data(
         'FeatureData[ReconstructionSummary]',
         Metadata(pd.DataFrame(data=[[1, 2, 2, 0, 'asv01|asv02'],
                                     [2, 3, 1.5, np.std([1, 2], ddof=1), 
                                      'asv03|asv04'],
                                     [2, 2, 1, 0, 'asv07|asv08']],
                              index=pd.Index(['seq01|seq02', 'seq03|seq04', 
                                              'seq05'], name='feature-id'),
                             columns=['num-regions', 'total-kmers-mapped', 
                                      'mean-kmer-per-region', 
                                      'stdv-kmer-per-region', 
                                      'mapped-asvs']))
     )
     aligned_seqs = Artifact.import_data(
         'FeatureData[AlignedSequence]', 
         skbio.TabularMSA([
             DNA('CTAGTCATGCGAAGCGGCTCAGGATGATGATGAAGAC-------------------'
                 '--------------', metadata={'id': 'seq01'}),
             DNA('CTAGTCATGCGAAGCGGCTCAGGATGATGATGAAGAC-------------------'
                 '--------------', metadata={'id': 'seq02'}),
             DNA('CATAGTCATWTCCGCGTTGGAGTTATGATGATGAWACCACCTCGTCCCAGTTCCGC'
                 'GCTTCTGACGTGC-', metadata={'id': 'seq03'}),
             DNA('------------------GGAGTTATGATGA--AGACCACCTCGTCCCAGTTCCGC'
                 'GCTTCTGACGTGCC', metadata={'id': 'seq04'}),
             DNA('CATAGTCATCGTTTATGTATGCCCATGATGATGCGAGCACCTCGTATGGATGTAGA'
                 'GCCACTGACGTGCG', metadata={'id': 'seq05'}),
         ])
     )
     known = pd.Series(
         data=['GCGAAGCGGCTCAGG',
               'WTCCGCGTTGGAGTTATGATGATGAGACCACCTCGTCCCAGTTCCGCGCTTC'],
         index=pd.Index(['seq01|seq02', 'seq03|seq04']),
         )
     test = sidle.reconstruct_fragment_rep_seqs(
         region=['Bludhaven', 'Gotham'],
         kmer_map=[Artifact.load(os.path.join(self.base_dir, 
                                 'frag_r1_db_map.qza')),
                   Artifact.load(os.path.join(self.base_dir, 
                                 'frag_r2_db_map.qza'))],
         reconstruction_map=recon_map, 
         reconstruction_summary=recon_summary, 
         aligned_sequences=aligned_seqs,
         ).representative_fragments
     pdt.assert_series_equal(known, test.view(pd.Series).astype(str))
 def test_qza_integration(self):
     table_qza = Artifact.import_data("FeatureTable[Frequency]", self.table)
     taxonomy_qza = Artifact.import_data(
         "FeatureData[Taxonomy]",
         self.taxonomy_df,
     )
     table = table_qza.view(biom.Table)
     taxonomy_df = taxonomy_qza.view(pd.DataFrame)
     taxonomy = Taxonomy(table, taxonomy_df)
     taxonomy.get_group(['sample-1', 'sample-2'], 'foo')
Example #33
0
    def test_add_artifacts(self):
        # First two artifacts have the same data but different UUIDs.
        artifact1 = Artifact.import_data('Mapping', {'a': '1', 'b': '3'})
        self.mdc._add_artifacts([artifact1])

        artifact2 = Artifact.import_data('Mapping', {'a': '1', 'b': '3'})
        artifact3 = Artifact.import_data('IntSequence1', [1, 2, 3, 4])
        self.mdc._add_artifacts([artifact2, artifact3])

        self.assertEqual(self.mdc.artifacts, (artifact1, artifact2, artifact3))
    def test_add_artifacts(self):
        # First two artifacts have the same data but different UUIDs.
        artifact1 = Artifact.import_data('Mapping', {'a': '1', 'b': '3'})
        self.mdc._add_artifacts([artifact1])

        artifact2 = Artifact.import_data('Mapping', {'a': '1', 'b': '3'})
        artifact3 = Artifact.import_data('IntSequence1', [1, 2, 3, 4])
        self.mdc._add_artifacts([artifact2, artifact3])

        self.assertEqual(self.mdc.artifacts, (artifact1, artifact2, artifact3))
Example #35
0
    def test_artifact_mismatch(self):
        # Metadata created from different artifacts shouldn't compare equal,
        # even if the data is the same.
        artifact1 = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})
        artifact2 = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})

        md1 = artifact1.view(Metadata)
        md2 = artifact2.view(Metadata)

        pdt.assert_frame_equal(md1.to_dataframe(), md2.to_dataframe())
        self.assertReallyNotEqual(md1, md2)
    def test_add_duplicate_artifact(self):
        artifact1 = Artifact.import_data('Mapping', {'a': '1', 'b': '3'})
        artifact2 = Artifact.import_data('IntSequence1', [1, 2, 3, 4])
        self.mdc._add_artifacts([artifact1, artifact2])

        with self.assertRaisesRegex(
                ValueError, "Duplicate source artifacts.*DummyMetadataColumn.*"
                            "artifact: Mapping"):
            self.mdc._add_artifacts([artifact1])

        # Test that the object hasn't been mutated.
        self.assertEqual(self.mdc.artifacts, (artifact1, artifact2))
Example #37
0
    def setUp(self):
        get_dummy_plugin()
        self.runner = CliRunner()
        self.tempdir = tempfile.mkdtemp(prefix='qiime2-q2cli-test-temp-')
        self.artifact1_path = os.path.join(self.tempdir, 'a1.qza')
        self.mapping_path = os.path.join(self.tempdir, 'mapping.qza')

        artifact1 = Artifact.import_data(IntSequence1, [0, 42, 43])
        artifact1.save(self.artifact1_path)
        self.artifact1_root_dir = str(artifact1.uuid)

        mapping = Artifact.import_data('Mapping', {'foo': '42'})
        mapping.save(self.mapping_path)
Example #38
0
    def test_repeated_multiple_option(self):
        input_path = os.path.join(self.tempdir, 'ints.qza')
        artifact = Artifact.import_data(IntSequence1, [0, 42, 43], list)
        artifact.save(input_path)

        metadata_path1 = os.path.join(self.tempdir, 'metadata1.tsv')
        with open(metadata_path1, 'w') as f:
            f.write('id\tcol1\nid1\tfoo\nid2\tbar\n')
        metadata_path2 = os.path.join(self.tempdir, 'metadata2.tsv')
        with open(metadata_path2, 'w') as f:
            f.write('id\tcol2\nid1\tbaz\nid2\tbaa\n')

        output_path = os.path.join(self.tempdir, 'out.qza')

        qiime_cli = RootCommand()
        command = qiime_cli.get_command(ctx=None, name='dummy-plugin')

        result = self.runner.invoke(
            command, ['identity-with-metadata', '--i-ints', input_path,
                      '--o-out', output_path, '--m-metadata-file',
                      metadata_path1, '--m-metadata-file', metadata_path2,
                      '--verbose'])

        self.assertEqual(result.exit_code, 0)
        self.assertTrue(os.path.exists(output_path))
        self.assertEqual(Artifact.load(output_path).view(list), [0, 42, 43])
    def test_add_non_artifact(self):
        artifact = Artifact.import_data('Mapping', {'a': '1', 'b': '3'})

        with self.assertRaisesRegex(TypeError, "Artifact object.*42"):
            self.mdc._add_artifacts([artifact, 42])

        # Test that the object hasn't been mutated.
        self.assertEqual(self.mdc.artifacts, ())
    def setUp(self):
        super().setUp()
        self.align_to_tree_mafft_fasttree = self.plugin.pipelines[
                                    'align_to_tree_mafft_fasttree']

        input_sequences_fp = self.get_data_path('dna-sequences-1.fasta')
        self.input_sequences = Artifact.import_data('FeatureData[Sequence]',
                                                    input_sequences_fp)
Example #41
0
    def setUp(self):
        get_dummy_plugin()
        self.runner = CliRunner()
        self.plugin_command = RootCommand().get_command(
            ctx=None, name='dummy-plugin')
        self.tempdir = tempfile.mkdtemp(prefix='qiime2-q2cli-test-temp-')

        self.input_artifact = os.path.join(self.tempdir, 'in.qza')
        Artifact.import_data(
            IntSequence1, [0, 42, 43], list).save(self.input_artifact)
        self.output_artifact = os.path.join(self.tempdir, 'out.qza')

        self.metadata_file1 = os.path.join(self.tempdir, 'metadata1.tsv')
        with open(self.metadata_file1, 'w') as f:
            f.write('id\tcol1\n0\tfoo\nid1\tbar\n')

        self.metadata_file_alt_id_header = os.path.join(
                self.tempdir, 'metadata-alt-id-header.tsv')
        with open(self.metadata_file_alt_id_header, 'w') as f:
            f.write('#SampleID\tcol1\n0\tfoo\nid1\tbar\n')

        self.metadata_file2 = os.path.join(self.tempdir, 'metadata2.tsv')
        with open(self.metadata_file2, 'w') as f:
            f.write('id\tcol2\n0\tbaz\nid1\tbaa\n')

        self.metadata_file_mixed_types = os.path.join(
                self.tempdir, 'metadata-mixed-types.tsv')
        with open(self.metadata_file_mixed_types, 'w') as f:
            f.write('id\tnumbers\tstrings\nid1\t42\tabc\nid2\t-1.5\tdef\n')

        self.metadata_artifact = os.path.join(self.tempdir, 'metadata.qza')
        Artifact.import_data(
            'Mapping', {'a': 'dog', 'b': 'cat'}).save(self.metadata_artifact)

        self.cmd_config = os.path.join(self.tempdir, 'conf.ini')
        with open(self.cmd_config, 'w') as f:
            f.write('[dummy-plugin.identity-with-metadata]\n'
                    'm-metadata-file=%s\n' % self.metadata_file1)
            f.write('[dummy-plugin.identity-with-optional-metadata]\n'
                    'm-metadata-file=%s\n' % self.metadata_file1)
            f.write('[dummy-plugin.identity-with-metadata-column]\n'
                    'm-metadata-file=%s\n'
                    'm-metadata-column=col1\n' % self.metadata_file1)
            f.write('[dummy-plugin.identity-with-optional-metadata-column]\n'
                    'm-metadata-file=%s\n'
                    'm-metadata-column=col1\n' % self.metadata_file1)
    def test_with_artifacts(self):
        artifact1 = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})
        artifact2 = Artifact.import_data('Mapping', {'d': '4'})

        mdc = DummyMetadataColumn(pd.Series(
            [1, 2, 3], name='col1',
            index=pd.Index(['a', 'b', 'c'], name='id')))
        mdc._add_artifacts([artifact1, artifact2])

        obs = mdc.filter_ids({'a', 'c'})

        exp = DummyMetadataColumn(pd.Series(
            [1, 3], name='col1', index=pd.Index(['a', 'c'], name='id')))
        exp._add_artifacts([artifact1, artifact2])

        self.assertEqual(obs, exp)
        self.assertEqual(obs.artifacts, (artifact1, artifact2))
    def test_artifacts_mismatch(self):
        artifact1 = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})
        artifact2 = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})
        series = pd.Series([42, 43], name='col1',
                           index=pd.Index(['id1', 'id2'], name='id'))

        # No artifacts
        mdc1 = DummyMetadataColumn(series)

        # Has an artifact
        mdc2 = DummyMetadataColumn(series)
        mdc2._add_artifacts([artifact1])

        # Has a different artifact
        mdc3 = DummyMetadataColumn(series)
        mdc3._add_artifacts([artifact2])

        self.assertReallyNotEqual(mdc1, mdc2)
        self.assertReallyNotEqual(mdc2, mdc3)
    def test_core_metrics_phylogenetic_rarefy_drops_sample(self):
        table = biom.Table(np.array([[0, 11, 11], [12, 11, 11]]),
                           ['O1', 'O2'],
                           ['S1', 'S2', 'S3'])
        table = Artifact.import_data('FeatureTable[Frequency]', table)

        tree = skbio.TreeNode.read(io.StringIO(
            '((O1:0.25, O2:0.50):0.25, O3:0.75)root;'))
        tree = Artifact.import_data('Phylogeny[Rooted]', tree)

        metadata = Metadata(pd.DataFrame({'foo': ['1', '2', '3']},
                                         index=['S1', 'S2', 'S3']))

        results = self.core_metrics_phylogenetic(table, tree, 13, metadata)

        self.assertEqual(len(results), 17)

        expected = pd.Series({'S2': 2, 'S3': 2},
                             name='observed_otus')
        pdt.assert_series_equal(results[2].view(pd.Series), expected)
Example #45
0
    def test_artifacts_are_propagated(self):
        A = Artifact.import_data('Mapping', {'a': '1', 'b': '3'})
        md = A.view(Metadata)

        obs = md.get_column('b')

        # TODO update to use MetadataColumn.__eq__
        self.assertEqual(obs.artifacts, (A,))
        pdt.assert_series_equal(
            obs.to_series(),
            pd.Series(['3'], index=pd.Index(['0'], name='id'), name='b'))
Example #46
0
    def test_source_mismatch(self):
        # Metadata created from an artifact vs not shouldn't compare equal,
        # even if the data is the same.
        artifact = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})
        md_from_artifact = artifact.view(Metadata)

        md_no_artifact = Metadata(md_from_artifact.to_dataframe())

        pdt.assert_frame_equal(md_from_artifact.to_dataframe(),
                               md_no_artifact.to_dataframe())
        self.assertReallyNotEqual(md_from_artifact, md_no_artifact)
Example #47
0
    def test_with_artifacts(self):
        artifact1 = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})
        artifact2 = Artifact.import_data('Mapping', {'d': '4'})

        md_from_artifact1 = artifact1.view(Metadata)
        md_from_artifact2 = artifact2.view(Metadata)
        md_no_artifact = Metadata(pd.DataFrame(
            {'c': ['3', '42']}, index=pd.Index(['0', '1'], name='id')))

        # Merge three metadata objects -- the first has an artifact, the second
        # does not, and the third has an artifact.
        obs_md = md_from_artifact1.merge(md_no_artifact, md_from_artifact2)

        exp_df = pd.DataFrame(
            {'a': '1', 'b': '2', 'c': '3', 'd': '4'},
            index=pd.Index(['0'], name='id'))
        exp_md = Metadata(exp_df)
        exp_md._add_artifacts((artifact1, artifact2))

        self.assertEqual(obs_md, exp_md)
        self.assertEqual(obs_md.artifacts, (artifact1, artifact2))
    def test_equality_with_artifact(self):
        artifact = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})

        mdc1 = DummyMetadataColumn(pd.Series(
            [42, 43], name='col1', index=pd.Index(['id1', 'id2'], name='id')))
        mdc1._add_artifacts([artifact])

        mdc2 = DummyMetadataColumn(pd.Series(
            [42, 43], name='col1', index=pd.Index(['id1', 'id2'], name='id')))
        mdc2._add_artifacts([artifact])

        self.assertReallyEqual(mdc1, mdc2)
    def setUp(self):
        super().setUp()
        self.beta_correlation = self.plugin.pipelines['beta_correlation']
        dm = skbio.DistanceMatrix([[0, 1, 2],
                                   [1, 0, 1],
                                   [2, 1, 0]],
                                  ids=['sample1', 'sample2', 'sample3'])
        self.dm = Artifact.import_data('DistanceMatrix', dm)

        self.md = qiime2.NumericMetadataColumn(
            pd.Series([1, 2, 3], name='number',
                      index=pd.Index(['sample1', 'sample2', 'sample3'],
                                     name='id')))
Example #50
0
    def setUp(self):
        get_dummy_plugin()
        self.runner = CliRunner()
        self.plugin_command = RootCommand().get_command(
            ctx=None, name='dummy-plugin')
        self.tempdir = tempfile.mkdtemp(prefix='qiime2-q2cli-test-temp-')

        self.input_artifact = os.path.join(self.tempdir, 'in.qza')
        Artifact.import_data(
            IntSequence1, [0, 42, 43], list).save(self.input_artifact)
        self.output_artifact = os.path.join(self.tempdir, 'out.qza')

        self.metadata_file1 = os.path.join(self.tempdir, 'metadata1.tsv')
        with open(self.metadata_file1, 'w') as f:
            f.write('id\tcol1\n0\tfoo\nid1\tbar\n')

        self.metadata_file2 = os.path.join(self.tempdir, 'metadata2.tsv')
        with open(self.metadata_file2, 'w') as f:
            f.write('id\tcol2\n0\tbaz\nid1\tbaa\n')

        self.metadata_artifact = os.path.join(self.tempdir, 'metadata.qza')
        Artifact.import_data(
            'Mapping', {'a': 'dog', 'b': 'cat'}).save(self.metadata_artifact)

        self.cmd_config = os.path.join(self.tempdir, 'conf.ini')
        with open(self.cmd_config, 'w') as f:
            f.write('[dummy-plugin.identity-with-metadata]\n'
                    'm-metadata-file=%s\n' % self.metadata_file1)
            f.write('[dummy-plugin.identity-with-optional-metadata]\n'
                    'm-metadata-file=%s\n' % self.metadata_file1)
            f.write('[dummy-plugin.identity-with-metadata-category]\n'
                    'm-metadata-file=%s\n'
                    'm-metadata-category=col1\n' % self.metadata_file1)
            f.write('[dummy-plugin.identity-with-optional-metadata-category]\n'
                    'm-metadata-file=%s\n'
                    'm-metadata-category=col1\n' % self.metadata_file1)
    def test_core_metrics(self):
        table = biom.Table(np.array([[0, 11, 11], [13, 11, 11]]),
                           ['O1', 'O2'],
                           ['S1', 'S2', 'S3'])
        table = Artifact.import_data('FeatureTable[Frequency]', table)

        metadata = Metadata(pd.DataFrame({'foo': ['1', '2', '3']},
                                         index=['S1', 'S2', 'S3']))

        results = self.core_metrics(table, 13, metadata)

        self.assertEqual(len(results), 10)
        self.assertEqual(repr(results.bray_curtis_distance_matrix.type),
                         'DistanceMatrix')
        self.assertEqual(repr(results.jaccard_emperor.type), 'Visualization')

        expected = pd.Series({'S1': 1, 'S2': 2, 'S3': 2}, name='observed_otus')
        pdt.assert_series_equal(results[1].view(pd.Series), expected)
    def test_artifacts_are_propagated(self):
        artifact = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})

        series = pd.Series(
            [0.0, np.nan, 3.3, np.nan, np.nan, 4.4], name='col1',
            index=pd.Index(['a', 'b', 'c', 'd', 'e', 'f'], name='sampleid'))
        mdc = DummyMetadataColumn(series)
        mdc._add_artifacts([artifact])

        obs = mdc.drop_missing_values()

        exp = DummyMetadataColumn(pd.Series(
            [0.0, 3.3, 4.4], name='col1',
            index=pd.Index(['a', 'c', 'f'], name='sampleid')))
        exp._add_artifacts([artifact])

        self.assertEqual(obs, exp)
        self.assertEqual(obs.artifacts, (artifact,))
Example #53
0
    def setUp(self):
        get_dummy_plugin()

        self.runner = CliRunner()
        self.plugin_command = RootCommand().get_command(
            ctx=None, name='dummy-plugin')
        self.tempdir = tempfile.mkdtemp(prefix='qiime2-q2cli-test-temp-')

        self.ints1 = os.path.join(self.tempdir, 'ints1.qza')
        Artifact.import_data(
            IntSequence1, [0, 42, 43], list).save(self.ints1)
        self.ints2 = os.path.join(self.tempdir, 'ints2.qza')
        Artifact.import_data(
            IntSequence1, [99, -22], list).save(self.ints2)
        self.ints3 = os.path.join(self.tempdir, 'ints3.qza')
        Artifact.import_data(
            IntSequence2, [43, 43], list).save(self.ints3)
        self.output = os.path.join(self.tempdir, 'output.qza')
Example #54
0
    def test_equality_with_artifact(self):
        artifact = Artifact.import_data('Mapping', {'a': '1', 'b': '2'})
        md1 = artifact.view(Metadata)
        md2 = artifact.view(Metadata)

        self.assertReallyEqual(md1, md2)