Esempio n. 1
0
    def test_no_cluster(self):
        e = [
            ['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'],
            [
                '4.11.ribosomal_protein_L10', 'minimal',
                'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACT', '2',
                '4.88',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'
            ],
            [
                '4.12.ribosomal_protein_L11_rplK', 'minimal',
                'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACA', '4',
                '9.76',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ]
        ]
        exp = "\n".join(["\t".join(x) for x in e] + [''])

        table_collection = OtuTableCollection()
        table_collection.add_otu_table(StringIO(exp))

        clusters = list(Clusterer().each_cluster(table_collection, 1.0))
        self.assertEqual(2, len(clusters))
        self.assertIsInstance(clusters[0], SampleWiseClusteredOtu)
        sorted_clusters = list(sorted(clusters, key=lambda x: x.count))
        c = sorted_clusters[0]
        self.assertEqual(2, c.count)
        self.assertEqual(4.88, c.coverage)
        c = sorted_clusters[1]
        self.assertEqual(4, c.count)
        self.assertEqual(9.76, c.coverage)
Esempio n. 2
0
 def test_wide_format(self):
     e = [
         ['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'],
         [
             '4.11.ribosomal_protein_L10', 'minimal',
             'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA',
             '2', '4.88',
             'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'
         ],
         [
             '4.12.ribosomal_protein_L11_rplK', 'minimal',
             'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG',
             '4', '9.76',
             'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
         ],
         [
             '4.11.ribosomal_protein_L10', 'maximal',
             'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA',
             '2', '4.88',
             'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'
         ]
     ]
     exp = "\n".join(["\t".join(x) for x in e] + [''])
     output = StringIO()
     table_collection = OtuTableCollection()
     table_collection.add_otu_table(StringIO(exp))
     Summariser().write_wide_format_otu_table(
         table_collection=table_collection, output_table_io=output)
     self.assertEqual(
         'marker\tsequence\tminimal\tmaximal\ttaxonomy\n4.11.ribosomal_protein_L10\tTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA\t2\t2\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus\n4.12.ribosomal_protein_L11_rplK\tCCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG\t4\t0\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales\n',
         output.getvalue())
Esempio n. 3
0
    def test_archive_to_otu_table_conversion(self):
        archive = '{"fields": ["gene", "sample", "sequence", "num_hits", "coverage", "taxonomy", "read_names", "nucleotides_aligned", "taxonomy_by_known?"], "singlem_package_sha256s": ["2b2afe0114de20451fccfe74360756376dc83d001d890e84e322ab0833eca6ba", "7f406a73d8bb176994055cb966ff350e208986d12c8215722686c17c26e548c7", "735b44ae547c133163cb7d40f417292c35423864d00c95e7f1b32091b27d46c5", "8fc6dcce2766cc01defb3b5c689a1ed8ce9d59b725c67e58c2044dafaae908b3", "172df49937742b8411d41d217500d862567374401eaf393b25107b22ac630202", "4cb1bf226bf28d8198ed5c29e8a76df411d96a6c3ce1256af16887b9a184b0a6", "d473d3ae677e6e46202461ccdedb2aef23c0a10a3412422586b37e397ca37294", "431a2860bb890cd1c7193c565cbf0cc227850cba36fb17fe94df686e74ee9b11", "faa663527bb9aea63cef03859311f2e7f55fe98590a5ec85c5ba85815a6fd13e", "a0daf111380e6e499ad9c10c3ac413aa9016c7503dd459825100168524bff0d1", "aba631d4735aeb9d2dfbbbfee1c0739bf9e99ad6532a3be04ff627f3e6efdae2", "bba10c1feb0c26bdf46aa3d1dcb992744a699cde5cf02bb2728f8397378b342f", "4d91dd794b25fd256508f0814f6a2d31e20dc85e0aa9ea405031398565276768", "9b23c524a6210af0706eea7252c2d378888029f141b9305c3e88cbac3fd83f88", "50a209417b455a48bc67702d6a3809a172c57f00785d8c705a3322e6e2a71f72"], "version": 1, "alignment_hmm_sha256s": ["dd9b7e283598360b89ec91ff3f5c509361a6108a2eadc44bfb29646b1510f6b7", "b1bb943c3449a78f937db960bfdf6b2bed641388d33fce3cb2d5f69e79946ea6", "de92c90f2c83e380ae3953972fb63fcb8ce868dab87a305f9f1811b84ffb3d39", "453ed4a62608a4aec36117a2dd1a276709ff6b130ecb8d7b1612926bfab25527", "20cc450cf4157ecf1772e0325d4d8ed400b597d888a5cb5044ca69098f935656", "4b0bf5b3d7fd2ca16e54eed59d3a07eab388f70f7078ac096bf415f1c04731d9", "7cbba7ba0ed58d21c7519ba3fcef0abe43378e5c38c985b0d5e0e5219f141d92", "4a3bbe5ac594ef3c7c820e74544828e19eca68bf860d64f928729eb4530fce4e", "06a4bed0a765971b891ca4a4bf5680aeef4a4a249ce0c028798c0e912f0ccfb4", "2678fe218ca860a2d88bdbf76935d8c78a00ab6603a041a432505d754ef08250", "b54ff98aa03ab31af39c737a569b23ee4ed9296c8ea088562bfb3db87c38fe4a", "4ae31f14067bf183f38dca20f2aefb580e5ff25848881dd988908b70b67761bb", "d7bb3d544133f38110a329712b3ace7e7d7c989dafa3815d2d5a292b4c575f50", "7639bb919ef54f7baff3ed3a8c924efca97ed375cf4120a6e05d98fd6ef52cbb", "6923b889888ea34fabf463b2c8ad5fe23c94828f1a2631a07601f246f5e87150"], "otus": [["4.11.ribosomal_protein_L10", "minimal", "TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA", 2, 4.878048780487805, "Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus", ["HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482", "HWI-ST1243:156:D1K83ACXX:7:1105:19152:28331"], [60, 60], false], ["4.12.ribosomal_protein_L11_rplK", "minimal", "CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG", 4, 9.75609756097561, "Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales", ["HWI-ST1243:156:D1K83ACXX:7:1109:18214:9910", "HWI-ST1243:156:D1K83ACXX:7:1103:21187:63124", "HWI-ST1243:156:D1K83ACXX:7:1108:10813:6928", "HWI-ST1243:156:D1K83ACXX:7:1105:12385:81842"], [60, 60, 60, 60], false]]}'
        e = [
            ['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'],
            [
                '4.11.ribosomal_protein_L10', 'minimal',
                'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA',
                '2', '4.88',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'
            ],
            [
                '4.12.ribosomal_protein_L11_rplK', 'minimal',
                'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG',
                '4', '9.76',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ]
        ]
        exp = "\n".join(["\t".join(x) for x in e] + [''])

        output = StringIO()
        table_collection = OtuTableCollection()
        table_collection.add_archive_otu_table(StringIO(archive))
        Summariser().write_otu_table(table_collection=table_collection,
                                     output_table_io=output,
                                     output_extras=False)
        self.assertEqual(exp, output.getvalue())
Esempio n. 4
0
    def test_multiple_genes_and_samples(self):
        a = [self.headers,['2.12.ribosomal_protein_L11_rplK.gpkg','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Archaea; p__Firmicutes; c__Bacilli; o__Bacillales'],
             ['2.12.ribosomal_protein_L11_rplK.gpkg','minimal','AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','9','18.07','Root; d__Bacteria'],
             ['2.12.ribosomal_protein_L11_rplK.gpkg','minimal','GAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','8','17.57','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
             
             ['2.13.ribosomal_protein_L11_rplK.gpkg','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','9','18.07','Root; d__Archaea; p__Firmicutes; c__Bacilli; o__Bacillales'],
             ['2.13.ribosomal_protein_L11_rplK.gpkg','minimal','AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','8','17.57','Root; d__Bacteria'],
             ['2.13.ribosomal_protein_L11_rplK.gpkg','minimal','GAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
             
             ['2.12.ribosomal_protein_L11_rplK.gpkg','minimal2','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Archaea; p__Firmicutes; c__Bacilli; o__Bacillales'],
             ['2.12.ribosomal_protein_L11_rplK.gpkg','minimal2','AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','9','18.07','Root; d__Bacteria'],
             ['2.12.ribosomal_protein_L11_rplK.gpkg','minimal2','GAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','8','17.57','Root; d__Aacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
            ]
        table = "\n".join(["\t".join(x) for x in a]+[''])
        
        e = [self.output_headers,
             ['reference','2.12.ribosomal_protein_L11_rplK.gpkg','minimal','0','AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','9','18.07','Root; d__Bacteria'],
             ['strain','2.12.ribosomal_protein_L11_rplK.gpkg','minimal','3','GAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','8','17.57','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
             ['reference','2.13.ribosomal_protein_L11_rplK.gpkg','minimal','0','AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','8','17.57','Root; d__Bacteria'],
             ['strain','2.13.ribosomal_protein_L11_rplK.gpkg','minimal','3','GAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
             ['reference','2.12.ribosomal_protein_L11_rplK.gpkg','minimal2','0','AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','9','18.07','Root; d__Bacteria'],
            ]
        exp = "\n".join(["\t".join(x) for x in e]+[''])

        output = StringIO()
        table_collection = OtuTableCollection()
        table_collection.set_target_taxonomy_by_string('Root; d__Bacteria')
        table_collection.add_otu_table(StringIO(table))
        StrainSummariser().summarise_strains(\
                        table_collection = table_collection,
                        output_table_io = output)
        self.assertEqual(exp, output.getvalue())
Esempio n. 5
0
    def test_not_enough_samples(self):
        e = [
            ['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'],
            [
                '4.11.ribosomal_protein_L10', 'minimal',
                'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA',
                '1', '4.88',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'
            ],
            [
                '4.12.ribosomal_protein_L11_rplK', 'minimal',
                'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT',
                '2', '9.76',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ]
        ]
        exp = "\n".join(["\t".join(x) for x in e] + [''])

        table_collection = OtuTableCollection()
        table_collection.add_otu_table(StringIO(exp))

        rares = Rarefier().rarefy(
            table_collection, 2, random_generator=PredictableRandomGenerator())
        self.assertIsInstance(rares, OtuTable)
        rares = list(rares)
        self.assertEqual(1, len(rares))
        self.assertEqual(
            'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT',
            rares[0].sequence)
        self.assertEqual([2], [e.count for e in rares])
 def test_cycle(self):
     otu_table = \
     [['gene','sample','sequence','num_hits','coverage','taxonomy'],
      ['ribosomal_protein_L11_rplK_gpkg','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','14.4',
       'Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
      ['ribosomal_protein_S2_rpsB_gpkg','minimal','CGTCGTTGGAACCCAAAAATGAAAAAATATATCTTCACTGAGAGAAATGGTATTTATATC','6','12.2',
       'Root; k__Bacteria; p__Firmicutes; c__Bacilli'],
      ['ribosomal_protein_S17_gpkg','minimal','GCTAAATTAGGAGACATTGTTAAAATTCAAGAAACTCGTCCTTTATCAGCAACAAAACGT','9','18.8',
       'Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']]
     otu_table = "\n".join(["\t".join(x) for x in otu_table])
     
     
     with tempdir.TempDir() as tmp:
         db_path = os.path.join(tmp, 'my.sdb')
         
         collection = OtuTableCollection()
         collection.add_otu_table(StringIO.StringIO(otu_table))
         SequenceDatabase.create_from_otu_table(db_path, collection)
         
         db2 = SequenceDatabase.acquire(db_path)
         s1 = db2.extract_sequence(1)
         self.assertEqual('ribosomal_protein_L11_rplK_gpkg', s1.marker)
         self.assertEqual('minimal',s1.sample_name)
         self.assertEqual('GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',s1.sequence)
         self.assertEqual(7, s1.count)
         self.assertEqual('Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales', s1.taxonomy)
         
         s3 = db2.extract_sequence(3)
         self.assertEqual('GCTAAATTAGGAGACATTGTTAAAATTCAAGAAACTCGTCCTTTATCAGCAACAAAACGT',s3.sequence)
         self.assertEqual('Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus', s3.taxonomy)
         
         self.assertEqual(os.path.join(db_path,"sequences.fasta"), db2.sequences_fasta_file)
Esempio n. 7
0
 def test_nothing_returned(self):
     e = [['gene','sample','sequence','num_hits','coverage','taxonomy']]
     exp = "\n".join(["\t".join(x) for x in e]+[''])
     table_collection = OtuTableCollection()
     table_collection.add_otu_table(StringIO(exp))
     self.assertEqual(0, len(list(table_collection)))
     rares = Rarefier().rarefy(table_collection, 0)
     self.assertEqual(0, len(list(rares)))
Esempio n. 8
0
 def test_krona(self):
     archive = '{"fields": ["gene", "sample", "sequence", "num_hits", "coverage", "taxonomy", "read_names", "nucleotides_aligned", "taxonomy_by_known?"], "singlem_package_sha256s": ["2b2afe0114de20451fccfe74360756376dc83d001d890e84e322ab0833eca6ba", "7f406a73d8bb176994055cb966ff350e208986d12c8215722686c17c26e548c7", "735b44ae547c133163cb7d40f417292c35423864d00c95e7f1b32091b27d46c5", "8fc6dcce2766cc01defb3b5c689a1ed8ce9d59b725c67e58c2044dafaae908b3", "172df49937742b8411d41d217500d862567374401eaf393b25107b22ac630202", "4cb1bf226bf28d8198ed5c29e8a76df411d96a6c3ce1256af16887b9a184b0a6", "d473d3ae677e6e46202461ccdedb2aef23c0a10a3412422586b37e397ca37294", "431a2860bb890cd1c7193c565cbf0cc227850cba36fb17fe94df686e74ee9b11", "faa663527bb9aea63cef03859311f2e7f55fe98590a5ec85c5ba85815a6fd13e", "a0daf111380e6e499ad9c10c3ac413aa9016c7503dd459825100168524bff0d1", "aba631d4735aeb9d2dfbbbfee1c0739bf9e99ad6532a3be04ff627f3e6efdae2", "bba10c1feb0c26bdf46aa3d1dcb992744a699cde5cf02bb2728f8397378b342f", "4d91dd794b25fd256508f0814f6a2d31e20dc85e0aa9ea405031398565276768", "9b23c524a6210af0706eea7252c2d378888029f141b9305c3e88cbac3fd83f88", "50a209417b455a48bc67702d6a3809a172c57f00785d8c705a3322e6e2a71f72"], "version": 1, "alignment_hmm_sha256s": ["dd9b7e283598360b89ec91ff3f5c509361a6108a2eadc44bfb29646b1510f6b7", "b1bb943c3449a78f937db960bfdf6b2bed641388d33fce3cb2d5f69e79946ea6", "de92c90f2c83e380ae3953972fb63fcb8ce868dab87a305f9f1811b84ffb3d39", "453ed4a62608a4aec36117a2dd1a276709ff6b130ecb8d7b1612926bfab25527", "20cc450cf4157ecf1772e0325d4d8ed400b597d888a5cb5044ca69098f935656", "4b0bf5b3d7fd2ca16e54eed59d3a07eab388f70f7078ac096bf415f1c04731d9", "7cbba7ba0ed58d21c7519ba3fcef0abe43378e5c38c985b0d5e0e5219f141d92", "4a3bbe5ac594ef3c7c820e74544828e19eca68bf860d64f928729eb4530fce4e", "06a4bed0a765971b891ca4a4bf5680aeef4a4a249ce0c028798c0e912f0ccfb4", "2678fe218ca860a2d88bdbf76935d8c78a00ab6603a041a432505d754ef08250", "b54ff98aa03ab31af39c737a569b23ee4ed9296c8ea088562bfb3db87c38fe4a", "4ae31f14067bf183f38dca20f2aefb580e5ff25848881dd988908b70b67761bb", "d7bb3d544133f38110a329712b3ace7e7d7c989dafa3815d2d5a292b4c575f50", "7639bb919ef54f7baff3ed3a8c924efca97ed375cf4120a6e05d98fd6ef52cbb", "6923b889888ea34fabf463b2c8ad5fe23c94828f1a2631a07601f246f5e87150"], "otus": [["4.11.ribosomal_protein_L10", "minimal", "TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA", 2, 4.878048780487805, "Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus", ["HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482", "HWI-ST1243:156:D1K83ACXX:7:1105:19152:28331"], [60, 60], false], ["4.12.ribosomal_protein_L11_rplK", "minimal", "CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG", 4, 9.75609756097561, "Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales", ["HWI-ST1243:156:D1K83ACXX:7:1109:18214:9910", "HWI-ST1243:156:D1K83ACXX:7:1103:21187:63124", "HWI-ST1243:156:D1K83ACXX:7:1108:10813:6928", "HWI-ST1243:156:D1K83ACXX:7:1105:12385:81842"], [60, 60, 60, 60], false]]}'
     table_collection = OtuTableCollection()
     table_collection.add_archive_otu_table(StringIO(archive))
     with tempdir.TempDir() as tmp:
         Summariser.summarise(krona_output=os.path.join(tmp, 'KronaOK.html'),
                              table_collection=table_collection)
         self.assertTrue(os.path.exists(os.path.join(tmp,'KronaOK.html')))
Esempio n. 9
0
    def test_minimal(self):
        a = [
            self.headers,
            [
                '2.12.ribosomal_protein_L11_rplK.gpkg', 'minimal',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '7', '17.07',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
            [
                '2.12.ribosomal_protein_L11_rplK.gpkg', 'minimal',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '9', '18.07',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
            [
                '2.12.ribosomal_protein_L11_rplK.gpkg', 'minimal',
                'GAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '8', '17.57',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
        ]
        table = "\n".join(["\t".join(x) for x in a] + [''])

        e = [
            self.output_headers,
            [
                'reference', '2.12.ribosomal_protein_L11_rplK.gpkg', 'minimal',
                '0',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '9', '18.07',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
            [
                'strain', '2.12.ribosomal_protein_L11_rplK.gpkg', 'minimal',
                '3',
                'GAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '8', '17.57',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
            [
                'strain', '2.12.ribosomal_protein_L11_rplK.gpkg', 'minimal',
                '1',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '7', '17.07',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
        ]
        exp = "\n".join(["\t".join(x) for x in e] + [''])

        output = StringIO()
        table_collection = OtuTableCollection()
        table_collection.add_otu_table(StringIO(table))
        StrainSummariser().summarise_strains(\
                        table_collection = table_collection,
                        output_table_io = output)
        self.assertEqual(exp, output.getvalue())
Esempio n. 10
0
 def test_nothing_returned(self):
     e = [[
         'gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'
     ]]
     exp = "\n".join(["\t".join(x) for x in e] + [''])
     table_collection = OtuTableCollection()
     table_collection.add_otu_table(StringIO(exp))
     self.assertEqual(0, len(list(table_collection)))
     rares = Rarefier().rarefy(table_collection, 0)
     self.assertEqual(0, len(list(rares)))
Esempio n. 11
0
    def test_multiple_genes(self):
        e = [
            ['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'],
            [
                '4.11.ribosomal_protein_L11', 'minimal',
                'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA',
                '2', '4.88',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'
            ],
            [
                '4.11.ribosomal_protein_L10', 'minimal',
                'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT',
                '1', '9.76',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
            [
                '4.11.ribosomal_protein_L10', 'minimal',
                'ATACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT',
                '1', '9.76',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ]
        ]
        exp = "\n".join(["\t".join(x) for x in e] + [''])

        expected = [
            ['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'],
            [
                '4.11.ribosomal_protein_L11', 'minimal',
                'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA',
                2, 4.88,
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'
            ],
            [
                '4.11.ribosomal_protein_L10', 'minimal',
                'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT',
                1, 9.76,
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
            [
                '4.11.ribosomal_protein_L10', 'minimal',
                'ATACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT',
                1, 9.76,
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ]
        ]

        table_collection = OtuTableCollection()
        table_collection.add_otu_table(StringIO(exp))

        rares = Rarefier().rarefy(
            table_collection, 2, random_generator=PredictableRandomGenerator())
        self.assertIsInstance(rares, OtuTable)
        self.assertEqualOtuTable2(expected, rares)
Esempio n. 12
0
    def test_multiple_samples(self):
        metagenome_otu_table = [self.headers,
                    ['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
                    ['4.11.ribosomal_protein_L10','another','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']
                    ]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])

        genomes_otu_table = [self.headers,['4.12.ribosomal_protein_L11_rplK','genome','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli']
                    ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection)
        self.assertEqual(2, len(app.appraisal_results))
        a = app.appraisal_results[1]
        self.assertEqual('minimal', a.metagenome_sample_name)
        self.assertEqual(7, a.num_binned)
        self.assertEqual(0, a.num_not_found)
        a = app.appraisal_results[0]
        self.assertEqual('another', a.metagenome_sample_name)
        self.assertEqual(0, a.num_binned)
        self.assertEqual(4, a.num_not_found)
Esempio n. 13
0
    def test_contamination_near_enough(self):
        metagenome_otu_table = [
            self.headers,[
                '4.12.ribosomal_protein_L11_rplK',
                'minimal',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
            [
                '4.16.ribosomal_protein_S5',
                'another',
                'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '8','12.50','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])
        genomes_otu_table = [
            self.headers,[
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.16.ribosomal_protein_S5',
                'genome',
                'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',#one base pair different to the one above
                '1','1.06','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']
        ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection)
        self.assertEqual(2, len(app.appraisal_results))
        a = app.appraisal_results[0]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(8, a.num_not_found)
        a = app.appraisal_results[1]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(7, a.num_not_found)

        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection,
                                 sequence_identity=0.9)
        self.assertEqual(2, len(app.appraisal_results))
        def compare_res(res): return res.metagenome_sample_name
        sorted_results = list(sorted(app.appraisal_results, key=compare_res))
        a = sorted_results[0]
        self.assertEqual('another', a.metagenome_sample_name)
        self.assertEqual(8, a.num_binned)
        self.assertEqual(0, a.num_not_found)
        a = sorted_results[1]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(7, a.num_not_found)
Esempio n. 14
0
 def test_wide_format(self):
     e = [['gene','sample','sequence','num_hits','coverage','taxonomy'],
         ['4.11.ribosomal_protein_L10','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'],
         ['4.12.ribosomal_protein_L11_rplK','minimal','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
         ['4.11.ribosomal_protein_L10','maximal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']]
     exp = "\n".join(["\t".join(x) for x in e]+[''])
     output = StringIO()
     table_collection = OtuTableCollection()
     table_collection.add_otu_table(StringIO(exp))
     Summariser().write_wide_format_otu_table(
         table_collection = table_collection,
         output_table_io = output)
     self.assertEqual('marker\tsequence\tminimal\tmaximal\ttaxonomy\n4.11.ribosomal_protein_L10\tTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA\t2\t2\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus\n4.12.ribosomal_protein_L11_rplK\tCCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG\t4\t0\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales\n',
                      output.getvalue())
Esempio n. 15
0
    def test_using_real_generator(self):
        e = [['gene','sample','sequence','num_hits','coverage','taxonomy'],
             ['4.11.ribosomal_protein_L10','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'],
            ]
        exp = "\n".join(["\t".join(x) for x in e]+[''])

        table_collection = OtuTableCollection()
        table_collection.add_otu_table(StringIO(exp))

        rares = Rarefier().rarefy(table_collection, 2)
        self.assertIsInstance(rares, OtuTable)
        rares = list(rares)
        self.assertEqual(1, len(rares))
        self.assertEqual(2, rares[0].count)
Esempio n. 16
0
    def test_cluster_across_samples(self):
        e = [['gene','sample','sequence','num_hits','coverage','taxonomy'],
            ['4.11.ribosomal_protein_L10','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACT','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'],
            ['4.12.ribosomal_protein_L11_rplK','ma','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACA','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales']
            ]
        exp = "\n".join(["\t".join(x) for x in e]+[''])

        table_collection = OtuTableCollection()
        table_collection.add_otu_table(StringIO(exp))
        
        clusters = list(Clusterer().each_cluster(table_collection, 58.5/60))
        self.assertEqual(2, len(clusters))
        self.assertIsInstance(clusters[0], SampleWiseClusteredOtu)
        self.assertEqual(clusters[0].representative_otu, clusters[1].representative_otu)
        self.assertEqual('ma', clusters[0].representative_otu.sample_name)
Esempio n. 17
0
    def test_multiple_genes(self):
        e = [['gene','sample','sequence','num_hits','coverage','taxonomy'],
             ['4.11.ribosomal_protein_L11','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'],
             ['4.11.ribosomal_protein_L10','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT','1','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
             ['4.11.ribosomal_protein_L10','minimal','ATACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT','1','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales']
            ]
        exp = "\n".join(["\t".join(x) for x in e]+[''])

        table_collection = OtuTableCollection()
        table_collection.add_otu_table(StringIO(exp))

        rares = Rarefier().rarefy(table_collection, 2, random_generator=PredictableRandomGenerator())
        self.assertIsInstance(rares, OtuTable)
        rares = list(rares)
        self.assertEqual(3, len(rares))
        self.assertEqual([1,1,2], [e.count for e in rares])
Esempio n. 18
0
    def test_cluster_two(self):
        e = [['gene','sample','sequence','num_hits','coverage','taxonomy'],
            ['4.11.ribosomal_protein_L10','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'],
            ['4.12.ribosomal_protein_L11_rplK','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales']
            ]
        exp = "\n".join(["\t".join(x) for x in e]+[''])

        table_collection = OtuTableCollection()
        table_collection.add_otu_table(StringIO(exp))

        clusters = list(Clusterer().each_cluster(table_collection, 0.5))
        self.assertEqual(1, len(clusters))
        self.assertIsInstance(clusters[0], SampleWiseClusteredOtu)
        c = clusters[0]
        self.assertEqual(6, c.count)
        self.assertEqual(9.76/4*6, c.coverage)
Esempio n. 19
0
    def test_contamination(self):
        metagenome_otu_table = [
            self.headers,
            [
                '4.12.ribosomal_protein_L11_rplK', 'minimal',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '7', '17.07',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
            [
                '4.16.ribosomal_protein_S5', 'minimal',
                'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '8', '12.50',
                'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'
            ]
        ]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])
        genomes_otu_table = [
            self.headers,
            [
                '4.12.ribosomal_protein_L11_rplK', 'genome',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1', '1.02', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli'
            ],
            [
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',  #one base pair different to the one above
                '1',
                '1.02',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli'
            ],
            [
                '4.16.ribosomal_protein_S5', 'genome',
                'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '1', '1.06',
                'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'
            ]
        ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        app = appraiser.appraise(
            genome_otu_table_collection=genome_collection,
            metagenome_otu_table_collection=metagenome_collection)
        self.assertEqual(1, len(app.appraisal_results))
        a = app.appraisal_results[0]
        self.assertEqual(8, a.num_binned)
        self.assertEqual(7, a.num_not_found)
Esempio n. 20
0
    def test_clusterer_all_cluster_two_samples_some_cluster(self):
        # non-As and genome cluster together but are not exactly the same
        metagenome_otu_table = [self.headers,
                    ['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
                    ['4.12.ribosomal_protein_L11_rplK','minimal','AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA','12','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
                    ['4.11.ribosomal_protein_L10','maximal',     'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'],
                    ['4.11.ribosomal_protein_L10','maximal',     'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATG','1','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']
                    ]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])

        genomes_otu_table = [self.headers,['4.12.ribosomal_protein_L11_rplK','genome','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATA','1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli']
                    ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection,
                                 sequence_identity=0.7)
        self.assertEqual(2, len(app.appraisal_results))

        a = app.appraisal_results[1]
        self.assertEqual('minimal', a.metagenome_sample_name)
        self.assertEqual(7, a.num_binned)
        self.assertEqual(12, a.num_not_found)
        self.assertEqual(1, len(a.binned_otus))
        self.assertEqual(1, len(a.not_found_otus))
        self.assertEqual('GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                         a.binned_otus[0].sequence)
        self.assertEqual('minimal',
                         a.binned_otus[0].sample_name)
        self.assertEqual('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA',
                         a.not_found_otus[0].sequence)
        self.assertEqual('minimal',
                         a.not_found_otus[0].sample_name)

        a = app.appraisal_results[0]
        self.assertEqual('maximal', a.metagenome_sample_name)
        self.assertEqual(1, a.num_binned)
        self.assertEqual(4, a.num_not_found)
        self.assertEqual(1, len(a.binned_otus))
        self.assertEqual(1, len(a.not_found_otus))
        self.assertEqual('GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATG',
                         a.binned_otus[0].sequence)
        self.assertEqual('maximal',
                         a.binned_otus[0].sample_name)
        self.assertEqual('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA',
                         a.not_found_otus[0].sequence)
        self.assertEqual('maximal',
                         a.not_found_otus[0].sample_name)
Esempio n. 21
0
    def test_using_real_generator(self):
        e = [
            ['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'],
            [
                '4.11.ribosomal_protein_L10', 'minimal',
                'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA',
                '2', '4.88',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'
            ],
        ]
        exp = "\n".join(["\t".join(x) for x in e] + [''])

        table_collection = OtuTableCollection()
        table_collection.add_otu_table(StringIO(exp))

        rares = Rarefier().rarefy(table_collection, 2)
        self.assertIsInstance(rares, OtuTable)
        rares = list(rares)
        self.assertEqual(1, len(rares))
        self.assertEqual(2, rares[0].count)
Esempio n. 22
0
    def test_assembly_input(self):
        metagenome_otu_table = [
            self.headers,
            ['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
            ['4.11.ribosomal_protein_L10','minimal','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'],
            ['4.14.ribosomal_protein_L16_L10E_rplP','minimal','CAAAAAAAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','5','10.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])
        assembly_otu_table = [
            self.headers,
            ['4.12.ribosomal_protein_L11_rplK','assembly','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','1','1.007','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
            ['4.11.ribosomal_protein_L10','assembly','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','1','1.01','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']]
        assemblies = "\n".join(["\t".join(x) for x in assembly_otu_table])

        genomes_otu_table = [
            self.headers,
            ['4.12.ribosomal_protein_L11_rplK','genome','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli']]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        assembly_collection = OtuTableCollection()
        assembly_collection.add_otu_table(StringIO(assemblies))
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection,
                                 assembly_otu_table_collection=assembly_collection)
        self.assertEqual(1, len(app.appraisal_results))
        a = app.appraisal_results[0]
        self.assertEqual(7, a.num_binned)
        self.assertEqual(11, a.num_assembled)
        self.assertEqual(5, a.num_not_found)
        self.assertEqual('minimal', a.metagenome_sample_name)
        self.assertEqual(1, len(a.binned_otus))
        self.assertEqual('GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                         a.binned_otus[0].sequence)
        self.assertEqual(2, len(a.assembled_otus))
        self.assertEqual(sorted(['GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                                 'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG']),
                         sorted([o.sequence for o in a.assembled_otus]))
        self.assertEqual(1, len(a.not_found_otus))
        self.assertEqual('CAAAAAAAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG',
                         a.not_found_otus[0].sequence)
Esempio n. 23
0
    def test_target_taxonomy(self):
        metagenome_otu_table = [
            self.headers,
            [
                '4.12.ribosomal_protein_L11_rplK', 'minimal',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '7', '17.07',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
            [
                '4.11.ribosomal_protein_L10', 'minimal',
                'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG',
                '4', '9.76',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'
            ],
            [
                '4.11.ribosomal_protein_L10', 'minimal',
                'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTA',
                '5', '10.76',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'
            ]
        ]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])

        table_collection = OtuTableCollection()
        table_collection.add_otu_table(StringIO(metagenomes))
        self.assertEqual(["minimal\t8.0\t4.75"], [
            str(rp)
            for rp in Chancer().predict_samples(metagenomes=table_collection,
                                                target_taxonomy=[])
        ])
        self.assertEqual(["minimal\t9.0\t2.5"], [
            str(rp) for rp in Chancer().predict_samples(
                metagenomes=table_collection,
                target_taxonomy=Taxonomy.split_taxonomy(
                    'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae'
                ))
        ])
Esempio n. 24
0
    def test_print_appraisal(self):
        metagenome_otu_table = [self.headers,
                    ['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
                    ['4.11.ribosomal_protein_L10','another','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']
                    ]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])

        genomes_otu_table = [self.headers,['4.12.ribosomal_protein_L11_rplK','genome','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli']
                    ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection)
        self.assertEqual(2, len(app.appraisal_results))
        a = app.appraisal_results[1]
        self.assertEqual('minimal', a.metagenome_sample_name)
        self.assertEqual(7, a.num_binned)
        self.assertEqual(0, a.num_not_found)
        a = app.appraisal_results[0]
        self.assertEqual('another', a.metagenome_sample_name)
        self.assertEqual(0, a.num_binned)
        self.assertEqual(4, a.num_not_found)

        to_print = StringIO()
        appraiser.print_appraisal(app, True, to_print)
        self.assertEqual("sample\tnum_binned\tnum_not_found\tpercent_binned\nanother\t0\t4\t0.0\nminimal\t7\t0\t100.0\ntotal\t7\t4\t63.6\naverage\t3.5\t2.0\t50.0\n", to_print.getvalue())

        to_print = StringIO()
        found_otu_table_io = StringIO()
        not_found_otu_table_io = StringIO()
        appraiser.print_appraisal(app, True, to_print,
                                  binned_otu_table_io=found_otu_table_io,
                                  unaccounted_for_otu_table_io=not_found_otu_table_io)
        self.assertEqual("\n".join([
                          "\t".join(self.headers),
                          "\t".join(['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'])
                          ])+"\n",
                         found_otu_table_io.getvalue())
        self.assertEqual("\n".join([
                          "\t".join(self.headers),
                          "\t".join(['4.11.ribosomal_protein_L10','another','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'])])+"\n",
                         not_found_otu_table_io.getvalue())
Esempio n. 25
0
    def test_target_taxonomy(self):
        metagenome_otu_table = [
            self.headers,
            ['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
            ['4.11.ribosomal_protein_L10','minimal','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'],
            ['4.11.ribosomal_protein_L10','minimal','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTA','5','10.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']
        ]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])

        table_collection = OtuTableCollection()
        table_collection.add_otu_table(StringIO(metagenomes))
        self.assertEqual(
            ["minimal\t8.0\t4.75"],
            [str(rp) for rp in Chancer().predict_samples(
                metagenomes = table_collection,
                target_taxonomy = [])]
        )
        self.assertEqual(
            ["minimal\t9.0\t2.5"],
            [str(rp) for rp in Chancer().predict_samples(
                metagenomes = table_collection,
                target_taxonomy = Taxonomy.split_taxonomy(
                    'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae'))]
        )
Esempio n. 26
0
    def test_appraise_plot_real_data(self):
        """Not a real test, just developing the code"""
        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        with open(os.path.join(path_to_data, 'appraise_example2', 'SRR5040536.reads.long_sample_names.otu_table.csv')) as f:
            metagenome_collection.add_otu_table(f)
        genome_collection = OtuTableCollection()
        with open(os.path.join(path_to_data, 'appraise_example2', 'SRR5040536.binned.otu_table.csv')) as f:
            genome_collection.add_otu_table(f)
        assembly_collection = OtuTableCollection()
        with open(os.path.join(path_to_data, 'appraise_example2', 'SRR5040536.assembly.otu_table.csv')) as f:
            assembly_collection.add_otu_table(f)
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection,
                                 assembly_otu_table_collection=assembly_collection)

        with tempfile.NamedTemporaryFile(suffix='.svg',prefix='single_test_appraisal.') as f:
            app.plot(
                output_svg_base='/tmp/a.svg',#f.name,
                cluster_identity = 0.89,
                doing_assembly=True,
                doing_binning=True
            )
Esempio n. 27
0
    def test_contamination(self):
        metagenome_otu_table = [
            self.headers,[
                '4.12.ribosomal_protein_L11_rplK',
                'minimal',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
            [
                '4.16.ribosomal_protein_S5',
                'minimal',
                'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '8','12.50','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])
        genomes_otu_table = [
            self.headers,[
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.16.ribosomal_protein_S5',
                'genome',
                'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '1','1.06','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']
        ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection)
        self.assertEqual(1, len(app.appraisal_results))
        a = app.appraisal_results[0]
        self.assertEqual(8, a.num_binned)
        self.assertEqual(7, a.num_not_found)
Esempio n. 28
0
    def test_appraise_assembly_imperfectly(self):
        metagenome_otu_table = [
            self.headers,
            [
                '4.12.ribosomal_protein_L11_rplK', 'minimal',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '7', '17.07',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
            [
                '4.16.ribosomal_protein_S5', 'another',
                'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '8', '12.50',
                'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'
            ],
            [
                '4.16.ribosomal_protein_S5',
                'another',
                'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATGGCCGGCATCCGC',  # way different to the one above
                '9',
                '17.50',
                'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'
            ]
        ]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])
        genomes_otu_table = [
            self.headers,
            [
                '4.12.ribosomal_protein_L11_rplK', 'genome',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1', '1.02', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli'
            ],
            [
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',  #one base pair different to the one above
                '1',
                '1.02',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli'
            ],
            [
                '4.16.ribosomal_protein_S5', 'genome',
                'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '1', '1.06',
                'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'
            ]
        ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])
        assemblies_otu_table = [
            self.headers,
            [
                '4.12.ribosomal_protein_L11_rplK', 'assembly',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1', '1.02', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli'
            ],
            [
                '4.12.ribosomal_protein_L11_rplK',
                'assembly',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',  #one base pair different to the one above
                '1',
                '1.02',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli'
            ],
            [
                '4.16.ribosomal_protein_S5', 'assembly',
                'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '1', '1.06',
                'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'
            ]
        ]
        assemblies = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        assembly_collection = OtuTableCollection()
        assembly_collection.add_otu_table(StringIO(assemblies))
        app = appraiser.appraise(
            genome_otu_table_collection=genome_collection,
            metagenome_otu_table_collection=metagenome_collection,
            assembly_otu_table_collection=assembly_collection)
        self.assertEqual(2, len(app.appraisal_results))
        res = self._sort_appraisal_results(app.appraisal_results)
        a = res[0]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(0, a.num_assembled)
        self.assertEqual(8 + 9, a.num_not_found)
        a = res[1]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(7, a.num_assembled)
        self.assertEqual(0, a.num_not_found)

        app = appraiser.appraise(
            genome_otu_table_collection=genome_collection,
            metagenome_otu_table_collection=metagenome_collection,
            assembly_otu_table_collection=assembly_collection,
            sequence_identity=0.9)
        self.assertEqual(2, len(app.appraisal_results))
        res = self._sort_appraisal_results(app.appraisal_results)
        a = res[0]
        self.assertEqual('another', a.metagenome_sample_name)
        self.assertEqual(8, a.num_binned)
        self.assertEqual(8, a.num_assembled)
        self.assertEqual(9, a.num_not_found)
        a = res[1]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(7, a.num_assembled)
        self.assertEqual(0, a.num_not_found)
Esempio n. 29
0
    def test_appraise_plot_real_data(self):
        """Not a real test, just developing the code"""
        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        with open(
                os.path.join(
                    path_to_data, 'appraise_example2',
                    'SRR5040536.reads.long_sample_names.otu_table.csv')) as f:
            metagenome_collection.add_otu_table(f)
        genome_collection = OtuTableCollection()
        with open(
                os.path.join(path_to_data, 'appraise_example2',
                             'SRR5040536.binned.otu_table.csv')) as f:
            genome_collection.add_otu_table(f)
        assembly_collection = OtuTableCollection()
        with open(
                os.path.join(path_to_data, 'appraise_example2',
                             'SRR5040536.assembly.otu_table.csv')) as f:
            assembly_collection.add_otu_table(f)
        app = appraiser.appraise(
            genome_otu_table_collection=genome_collection,
            metagenome_otu_table_collection=metagenome_collection,
            assembly_otu_table_collection=assembly_collection)

        with tempfile.NamedTemporaryFile(suffix='.svg',
                                         prefix='single_test_appraisal.') as f:
            app.plot(
                output_svg_base='/tmp/a.svg',  #f.name,
                cluster_identity=0.89,
                doing_assembly=True,
                doing_binning=True)
Esempio n. 30
0
    def test_assembly_input(self):
        metagenome_otu_table = [
            self.headers,
            [
                '4.12.ribosomal_protein_L11_rplK', 'minimal',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '7', '17.07',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
            [
                '4.11.ribosomal_protein_L10', 'minimal',
                'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG',
                '4', '9.76',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'
            ],
            [
                '4.14.ribosomal_protein_L16_L10E_rplP', 'minimal',
                'CAAAAAAAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG',
                '5', '10.76',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'
            ]
        ]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])
        assembly_otu_table = [
            self.headers,
            [
                '4.12.ribosomal_protein_L11_rplK', 'assembly',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1', '1.007',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
            [
                '4.11.ribosomal_protein_L10', 'assembly',
                'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG',
                '1', '1.01',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'
            ]
        ]
        assemblies = "\n".join(["\t".join(x) for x in assembly_otu_table])

        genomes_otu_table = [
            self.headers,
            [
                '4.12.ribosomal_protein_L11_rplK', 'genome',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1', '1.02', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli'
            ]
        ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        assembly_collection = OtuTableCollection()
        assembly_collection.add_otu_table(StringIO(assemblies))
        app = appraiser.appraise(
            genome_otu_table_collection=genome_collection,
            metagenome_otu_table_collection=metagenome_collection,
            assembly_otu_table_collection=assembly_collection)
        self.assertEqual(1, len(app.appraisal_results))
        a = app.appraisal_results[0]
        self.assertEqual(7, a.num_binned)
        self.assertEqual(11, a.num_assembled)
        self.assertEqual(5, a.num_not_found)
        self.assertEqual('minimal', a.metagenome_sample_name)
        self.assertEqual(1, len(a.binned_otus))
        self.assertEqual(
            'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
            a.binned_otus[0].sequence)
        self.assertEqual(2, len(a.assembled_otus))
        self.assertEqual(
            sorted([
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG'
            ]), sorted([o.sequence for o in a.assembled_otus]))
        self.assertEqual(1, len(a.not_found_otus))
        self.assertEqual(
            'CAAAAAAAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG',
            a.not_found_otus[0].sequence)
Esempio n. 31
0
    def test_appraise_assembly_imperfectly(self):
        metagenome_otu_table = [
            self.headers,[
                '4.12.ribosomal_protein_L11_rplK',
                'minimal',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
            [
                '4.16.ribosomal_protein_S5',
                'another',
                'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '8','12.50','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'],
            [
                '4.16.ribosomal_protein_S5',
                'another',
                'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATGGCCGGCATCCGC', # way different to the one above
                '9','17.50','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])
        genomes_otu_table = [
            self.headers,[
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.16.ribosomal_protein_S5',
                'genome',
                'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '1','1.06','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']
        ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])
        assemblies_otu_table = [
            self.headers,[
                '4.12.ribosomal_protein_L11_rplK',
                'assembly',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.12.ribosomal_protein_L11_rplK',
                'assembly',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.16.ribosomal_protein_S5',
                'assembly',
                'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '1','1.06','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']
        ]
        assemblies = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        assembly_collection = OtuTableCollection()
        assembly_collection.add_otu_table(StringIO(assemblies))
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection,
                                 assembly_otu_table_collection=assembly_collection)
        self.assertEqual(2, len(app.appraisal_results))
        res = self._sort_appraisal_results(app.appraisal_results)
        a = res[0]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(0, a.num_assembled)
        self.assertEqual(8+9, a.num_not_found)
        a = res[1]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(7, a.num_assembled)
        self.assertEqual(0, a.num_not_found)

        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection,
                                 assembly_otu_table_collection=assembly_collection,
                                 sequence_identity=0.9)
        self.assertEqual(2, len(app.appraisal_results))
        res = self._sort_appraisal_results(app.appraisal_results)
        a = res[0]
        self.assertEqual('another', a.metagenome_sample_name)
        self.assertEqual(8, a.num_binned)
        self.assertEqual(8, a.num_assembled)
        self.assertEqual(9, a.num_not_found)
        a = res[1]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(7, a.num_assembled)
        self.assertEqual(0, a.num_not_found)