Exemple #1
0
    def test_97_percent_clustering_feature4_most_abundant(self):
        input_table = biom.Table(np.array([[4, 5, 6],
                                           [1, 1, 2],
                                           [7, 8, 9],
                                           [100, 101, 103]]),
                                 ['feature1', 'feature2', 'feature3',
                                  'feature4'],
                                 ['sample1', 'sample2', 'sample3'])
        exp_table = biom.Table(np.array([[111, 114, 118],
                                         [1, 1, 2]]),
                               ['feature4', 'feature2'],
                               ['sample1', 'sample2', 'sample3'])

        with redirected_stdio(stderr=os.devnull):
            obs_table, obs_sequences = cluster_features_de_novo(
                sequences=self.input_sequences, table=input_table,
                perc_identity=0.97)
        # order of identifiers is important for biom.Table equality
        obs_table = \
            obs_table.sort_order(exp_table.ids(axis='observation'),
                                 axis='observation')
        self.assertEqual(obs_table, exp_table)

        # sequences are reverse-sorted by abundance in output
        obs_seqs = _read_seqs(obs_sequences)
        exp_seqs = [self.input_sequences_list[3], self.input_sequences_list[1]]
        self.assertEqual(obs_seqs, exp_seqs)
Exemple #2
0
 def test_extra_features_in_sequences(self):
     input_table = biom.Table(np.array([[0, 1, 3], [1, 1, 2], [4, 5, 6]]),
                              ['feature1', 'feature2', 'feature3'],
                              ['sample1', 'sample2', 'sample3'])
     with self.assertRaisesRegex(ValueError,
                                 expected_regex='Feature feature4 is pre'):
         clustered_table, clustered_sequences = cluster_features_de_novo(
             sequences=self.input_sequences, table=input_table,
             perc_identity=1.0)
 def test_no_overlapping_feature_ids(self):
     input_table = biom.Table(
         np.array([[0, 1, 3], [1, 1, 2], [4, 5, 6], [7, 8, 9], [1, 1, 1]]),
         ['f1', 'f2', 'f3', 'f4', 'f5'], ['sample1', 'sample2', 'sample3'])
     with self.assertRaisesRegex(ValueError,
                                 expected_regex='Feature feature1 is pre'):
         clustered_table, clustered_sequences = cluster_features_de_novo(
             sequences=self.input_sequences,
             table=input_table,
             perc_identity=1.0)
Exemple #4
0
    def test_no_clustering(self):
        with redirected_stdio(stderr=os.devnull):
            obs_table, obs_sequences = cluster_features_de_novo(
                sequences=self.input_sequences, table=self.input_table,
                perc_identity=1.0)
        # order of identifiers is important for biom.Table equality
        obs_table = \
            obs_table.sort_order(self.input_table.ids(axis='observation'),
                                 axis='observation')
        self.assertEqual(obs_table, self.input_table)

        obs_seqs = _read_seqs(obs_sequences)

        # sequences are reverse-sorted by abundance in output
        exp_seqs = [self.input_sequences_list[0], self.input_sequences_list[3],
                    self.input_sequences_list[2], self.input_sequences_list[1]]
        self.assertEqual(obs_seqs, exp_seqs)
Exemple #5
0
    def test_1_percent_clustering(self):
        exp_table = biom.Table(np.array([[112, 115, 120]]),
                               ['feature1'],
                               ['sample1', 'sample2', 'sample3'])

        with redirected_stdio(stderr=os.devnull):
            obs_table, obs_sequences = cluster_features_de_novo(
                sequences=self.input_sequences, table=self.input_table,
                perc_identity=0.01)
        # order of identifiers is important for biom.Table equality
        obs_table = \
            obs_table.sort_order(exp_table.ids(axis='observation'),
                                 axis='observation')
        self.assertEqual(obs_table, exp_table)

        # sequences are reverse-sorted by abundance in output
        obs_seqs = _read_seqs(obs_sequences)
        exp_seqs = [self.input_sequences_list[0]]
        self.assertEqual(obs_seqs, exp_seqs)
Exemple #6
0
    def test_short_sequences(self):
        input_sequences_fp = self.get_data_path('dna-sequences-short.fasta')
        input_sequences = DNAFASTAFormat(input_sequences_fp, mode='r')

        input_table = biom.Table(np.array([[0, 1, 3], [1, 1, 2]]),
                                 ['feature1', 'feature2'],
                                 ['sample1', 'sample2', 'sample3'])

        exp_table = biom.Table(np.array([[1, 2, 5]]), ['feature1'],
                               ['sample1', 'sample2', 'sample3'])

        with redirected_stdio(stderr=os.devnull):
            obs_table, obs_sequences = cluster_features_de_novo(
                sequences=input_sequences,
                table=input_table,
                perc_identity=0.01)
        # order of identifiers is important for biom.Table equality
        obs_table = \
            obs_table.sort_order(exp_table.ids(axis='observation'),
                                 axis='observation')