def test_no_cluster(self): e = [ ['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'], [ '4.11.ribosomal_protein_L10', 'minimal', 'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACT', '2', '4.88', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus' ], [ '4.12.ribosomal_protein_L11_rplK', 'minimal', 'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACA', '4', '9.76', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ] ] exp = "\n".join(["\t".join(x) for x in e] + ['']) table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(exp)) clusters = list(Clusterer().each_cluster(table_collection, 1.0)) self.assertEqual(2, len(clusters)) self.assertIsInstance(clusters[0], SampleWiseClusteredOtu) sorted_clusters = list(sorted(clusters, key=lambda x: x.count)) c = sorted_clusters[0] self.assertEqual(2, c.count) self.assertEqual(4.88, c.coverage) c = sorted_clusters[1] self.assertEqual(4, c.count) self.assertEqual(9.76, c.coverage)
def test_wide_format(self): e = [ ['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'], [ '4.11.ribosomal_protein_L10', 'minimal', 'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA', '2', '4.88', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus' ], [ '4.12.ribosomal_protein_L11_rplK', 'minimal', 'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG', '4', '9.76', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ], [ '4.11.ribosomal_protein_L10', 'maximal', 'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA', '2', '4.88', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus' ] ] exp = "\n".join(["\t".join(x) for x in e] + ['']) output = StringIO() table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(exp)) Summariser().write_wide_format_otu_table( table_collection=table_collection, output_table_io=output) self.assertEqual( 'marker\tsequence\tminimal\tmaximal\ttaxonomy\n4.11.ribosomal_protein_L10\tTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA\t2\t2\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus\n4.12.ribosomal_protein_L11_rplK\tCCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG\t4\t0\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales\n', output.getvalue())
def test_archive_to_otu_table_conversion(self): archive = '{"fields": ["gene", "sample", "sequence", "num_hits", "coverage", "taxonomy", "read_names", "nucleotides_aligned", "taxonomy_by_known?"], "singlem_package_sha256s": ["2b2afe0114de20451fccfe74360756376dc83d001d890e84e322ab0833eca6ba", "7f406a73d8bb176994055cb966ff350e208986d12c8215722686c17c26e548c7", "735b44ae547c133163cb7d40f417292c35423864d00c95e7f1b32091b27d46c5", "8fc6dcce2766cc01defb3b5c689a1ed8ce9d59b725c67e58c2044dafaae908b3", "172df49937742b8411d41d217500d862567374401eaf393b25107b22ac630202", "4cb1bf226bf28d8198ed5c29e8a76df411d96a6c3ce1256af16887b9a184b0a6", "d473d3ae677e6e46202461ccdedb2aef23c0a10a3412422586b37e397ca37294", "431a2860bb890cd1c7193c565cbf0cc227850cba36fb17fe94df686e74ee9b11", "faa663527bb9aea63cef03859311f2e7f55fe98590a5ec85c5ba85815a6fd13e", "a0daf111380e6e499ad9c10c3ac413aa9016c7503dd459825100168524bff0d1", "aba631d4735aeb9d2dfbbbfee1c0739bf9e99ad6532a3be04ff627f3e6efdae2", "bba10c1feb0c26bdf46aa3d1dcb992744a699cde5cf02bb2728f8397378b342f", "4d91dd794b25fd256508f0814f6a2d31e20dc85e0aa9ea405031398565276768", "9b23c524a6210af0706eea7252c2d378888029f141b9305c3e88cbac3fd83f88", "50a209417b455a48bc67702d6a3809a172c57f00785d8c705a3322e6e2a71f72"], "version": 1, "alignment_hmm_sha256s": ["dd9b7e283598360b89ec91ff3f5c509361a6108a2eadc44bfb29646b1510f6b7", "b1bb943c3449a78f937db960bfdf6b2bed641388d33fce3cb2d5f69e79946ea6", "de92c90f2c83e380ae3953972fb63fcb8ce868dab87a305f9f1811b84ffb3d39", "453ed4a62608a4aec36117a2dd1a276709ff6b130ecb8d7b1612926bfab25527", "20cc450cf4157ecf1772e0325d4d8ed400b597d888a5cb5044ca69098f935656", "4b0bf5b3d7fd2ca16e54eed59d3a07eab388f70f7078ac096bf415f1c04731d9", "7cbba7ba0ed58d21c7519ba3fcef0abe43378e5c38c985b0d5e0e5219f141d92", "4a3bbe5ac594ef3c7c820e74544828e19eca68bf860d64f928729eb4530fce4e", "06a4bed0a765971b891ca4a4bf5680aeef4a4a249ce0c028798c0e912f0ccfb4", "2678fe218ca860a2d88bdbf76935d8c78a00ab6603a041a432505d754ef08250", "b54ff98aa03ab31af39c737a569b23ee4ed9296c8ea088562bfb3db87c38fe4a", "4ae31f14067bf183f38dca20f2aefb580e5ff25848881dd988908b70b67761bb", "d7bb3d544133f38110a329712b3ace7e7d7c989dafa3815d2d5a292b4c575f50", "7639bb919ef54f7baff3ed3a8c924efca97ed375cf4120a6e05d98fd6ef52cbb", "6923b889888ea34fabf463b2c8ad5fe23c94828f1a2631a07601f246f5e87150"], "otus": [["4.11.ribosomal_protein_L10", "minimal", "TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA", 2, 4.878048780487805, "Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus", ["HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482", "HWI-ST1243:156:D1K83ACXX:7:1105:19152:28331"], [60, 60], false], ["4.12.ribosomal_protein_L11_rplK", "minimal", "CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG", 4, 9.75609756097561, "Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales", ["HWI-ST1243:156:D1K83ACXX:7:1109:18214:9910", "HWI-ST1243:156:D1K83ACXX:7:1103:21187:63124", "HWI-ST1243:156:D1K83ACXX:7:1108:10813:6928", "HWI-ST1243:156:D1K83ACXX:7:1105:12385:81842"], [60, 60, 60, 60], false]]}' e = [ ['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'], [ '4.11.ribosomal_protein_L10', 'minimal', 'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA', '2', '4.88', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus' ], [ '4.12.ribosomal_protein_L11_rplK', 'minimal', 'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG', '4', '9.76', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ] ] exp = "\n".join(["\t".join(x) for x in e] + ['']) output = StringIO() table_collection = OtuTableCollection() table_collection.add_archive_otu_table(StringIO(archive)) Summariser().write_otu_table(table_collection=table_collection, output_table_io=output, output_extras=False) self.assertEqual(exp, output.getvalue())
def test_multiple_genes_and_samples(self): a = [self.headers,['2.12.ribosomal_protein_L11_rplK.gpkg','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Archaea; p__Firmicutes; c__Bacilli; o__Bacillales'], ['2.12.ribosomal_protein_L11_rplK.gpkg','minimal','AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','9','18.07','Root; d__Bacteria'], ['2.12.ribosomal_protein_L11_rplK.gpkg','minimal','GAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','8','17.57','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ['2.13.ribosomal_protein_L11_rplK.gpkg','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','9','18.07','Root; d__Archaea; p__Firmicutes; c__Bacilli; o__Bacillales'], ['2.13.ribosomal_protein_L11_rplK.gpkg','minimal','AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','8','17.57','Root; d__Bacteria'], ['2.13.ribosomal_protein_L11_rplK.gpkg','minimal','GAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ['2.12.ribosomal_protein_L11_rplK.gpkg','minimal2','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Archaea; p__Firmicutes; c__Bacilli; o__Bacillales'], ['2.12.ribosomal_protein_L11_rplK.gpkg','minimal2','AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','9','18.07','Root; d__Bacteria'], ['2.12.ribosomal_protein_L11_rplK.gpkg','minimal2','GAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','8','17.57','Root; d__Aacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ] table = "\n".join(["\t".join(x) for x in a]+['']) e = [self.output_headers, ['reference','2.12.ribosomal_protein_L11_rplK.gpkg','minimal','0','AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','9','18.07','Root; d__Bacteria'], ['strain','2.12.ribosomal_protein_L11_rplK.gpkg','minimal','3','GAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','8','17.57','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ['reference','2.13.ribosomal_protein_L11_rplK.gpkg','minimal','0','AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','8','17.57','Root; d__Bacteria'], ['strain','2.13.ribosomal_protein_L11_rplK.gpkg','minimal','3','GAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ['reference','2.12.ribosomal_protein_L11_rplK.gpkg','minimal2','0','AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','9','18.07','Root; d__Bacteria'], ] exp = "\n".join(["\t".join(x) for x in e]+['']) output = StringIO() table_collection = OtuTableCollection() table_collection.set_target_taxonomy_by_string('Root; d__Bacteria') table_collection.add_otu_table(StringIO(table)) StrainSummariser().summarise_strains(\ table_collection = table_collection, output_table_io = output) self.assertEqual(exp, output.getvalue())
def test_not_enough_samples(self): e = [ ['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'], [ '4.11.ribosomal_protein_L10', 'minimal', 'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA', '1', '4.88', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus' ], [ '4.12.ribosomal_protein_L11_rplK', 'minimal', 'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT', '2', '9.76', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ] ] exp = "\n".join(["\t".join(x) for x in e] + ['']) table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(exp)) rares = Rarefier().rarefy( table_collection, 2, random_generator=PredictableRandomGenerator()) self.assertIsInstance(rares, OtuTable) rares = list(rares) self.assertEqual(1, len(rares)) self.assertEqual( 'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT', rares[0].sequence) self.assertEqual([2], [e.count for e in rares])
def test_cycle(self): otu_table = \ [['gene','sample','sequence','num_hits','coverage','taxonomy'], ['ribosomal_protein_L11_rplK_gpkg','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','14.4', 'Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ['ribosomal_protein_S2_rpsB_gpkg','minimal','CGTCGTTGGAACCCAAAAATGAAAAAATATATCTTCACTGAGAGAAATGGTATTTATATC','6','12.2', 'Root; k__Bacteria; p__Firmicutes; c__Bacilli'], ['ribosomal_protein_S17_gpkg','minimal','GCTAAATTAGGAGACATTGTTAAAATTCAAGAAACTCGTCCTTTATCAGCAACAAAACGT','9','18.8', 'Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']] otu_table = "\n".join(["\t".join(x) for x in otu_table]) with tempdir.TempDir() as tmp: db_path = os.path.join(tmp, 'my.sdb') collection = OtuTableCollection() collection.add_otu_table(StringIO.StringIO(otu_table)) SequenceDatabase.create_from_otu_table(db_path, collection) db2 = SequenceDatabase.acquire(db_path) s1 = db2.extract_sequence(1) self.assertEqual('ribosomal_protein_L11_rplK_gpkg', s1.marker) self.assertEqual('minimal',s1.sample_name) self.assertEqual('GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',s1.sequence) self.assertEqual(7, s1.count) self.assertEqual('Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales', s1.taxonomy) s3 = db2.extract_sequence(3) self.assertEqual('GCTAAATTAGGAGACATTGTTAAAATTCAAGAAACTCGTCCTTTATCAGCAACAAAACGT',s3.sequence) self.assertEqual('Root; k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus', s3.taxonomy) self.assertEqual(os.path.join(db_path,"sequences.fasta"), db2.sequences_fasta_file)
def test_nothing_returned(self): e = [['gene','sample','sequence','num_hits','coverage','taxonomy']] exp = "\n".join(["\t".join(x) for x in e]+['']) table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(exp)) self.assertEqual(0, len(list(table_collection))) rares = Rarefier().rarefy(table_collection, 0) self.assertEqual(0, len(list(rares)))
def test_krona(self): archive = '{"fields": ["gene", "sample", "sequence", "num_hits", "coverage", "taxonomy", "read_names", "nucleotides_aligned", "taxonomy_by_known?"], "singlem_package_sha256s": ["2b2afe0114de20451fccfe74360756376dc83d001d890e84e322ab0833eca6ba", "7f406a73d8bb176994055cb966ff350e208986d12c8215722686c17c26e548c7", "735b44ae547c133163cb7d40f417292c35423864d00c95e7f1b32091b27d46c5", "8fc6dcce2766cc01defb3b5c689a1ed8ce9d59b725c67e58c2044dafaae908b3", "172df49937742b8411d41d217500d862567374401eaf393b25107b22ac630202", "4cb1bf226bf28d8198ed5c29e8a76df411d96a6c3ce1256af16887b9a184b0a6", "d473d3ae677e6e46202461ccdedb2aef23c0a10a3412422586b37e397ca37294", "431a2860bb890cd1c7193c565cbf0cc227850cba36fb17fe94df686e74ee9b11", "faa663527bb9aea63cef03859311f2e7f55fe98590a5ec85c5ba85815a6fd13e", "a0daf111380e6e499ad9c10c3ac413aa9016c7503dd459825100168524bff0d1", "aba631d4735aeb9d2dfbbbfee1c0739bf9e99ad6532a3be04ff627f3e6efdae2", "bba10c1feb0c26bdf46aa3d1dcb992744a699cde5cf02bb2728f8397378b342f", "4d91dd794b25fd256508f0814f6a2d31e20dc85e0aa9ea405031398565276768", "9b23c524a6210af0706eea7252c2d378888029f141b9305c3e88cbac3fd83f88", "50a209417b455a48bc67702d6a3809a172c57f00785d8c705a3322e6e2a71f72"], "version": 1, "alignment_hmm_sha256s": ["dd9b7e283598360b89ec91ff3f5c509361a6108a2eadc44bfb29646b1510f6b7", "b1bb943c3449a78f937db960bfdf6b2bed641388d33fce3cb2d5f69e79946ea6", "de92c90f2c83e380ae3953972fb63fcb8ce868dab87a305f9f1811b84ffb3d39", "453ed4a62608a4aec36117a2dd1a276709ff6b130ecb8d7b1612926bfab25527", "20cc450cf4157ecf1772e0325d4d8ed400b597d888a5cb5044ca69098f935656", "4b0bf5b3d7fd2ca16e54eed59d3a07eab388f70f7078ac096bf415f1c04731d9", "7cbba7ba0ed58d21c7519ba3fcef0abe43378e5c38c985b0d5e0e5219f141d92", "4a3bbe5ac594ef3c7c820e74544828e19eca68bf860d64f928729eb4530fce4e", "06a4bed0a765971b891ca4a4bf5680aeef4a4a249ce0c028798c0e912f0ccfb4", "2678fe218ca860a2d88bdbf76935d8c78a00ab6603a041a432505d754ef08250", "b54ff98aa03ab31af39c737a569b23ee4ed9296c8ea088562bfb3db87c38fe4a", "4ae31f14067bf183f38dca20f2aefb580e5ff25848881dd988908b70b67761bb", "d7bb3d544133f38110a329712b3ace7e7d7c989dafa3815d2d5a292b4c575f50", "7639bb919ef54f7baff3ed3a8c924efca97ed375cf4120a6e05d98fd6ef52cbb", "6923b889888ea34fabf463b2c8ad5fe23c94828f1a2631a07601f246f5e87150"], "otus": [["4.11.ribosomal_protein_L10", "minimal", "TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA", 2, 4.878048780487805, "Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus", ["HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482", "HWI-ST1243:156:D1K83ACXX:7:1105:19152:28331"], [60, 60], false], ["4.12.ribosomal_protein_L11_rplK", "minimal", "CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG", 4, 9.75609756097561, "Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales", ["HWI-ST1243:156:D1K83ACXX:7:1109:18214:9910", "HWI-ST1243:156:D1K83ACXX:7:1103:21187:63124", "HWI-ST1243:156:D1K83ACXX:7:1108:10813:6928", "HWI-ST1243:156:D1K83ACXX:7:1105:12385:81842"], [60, 60, 60, 60], false]]}' table_collection = OtuTableCollection() table_collection.add_archive_otu_table(StringIO(archive)) with tempdir.TempDir() as tmp: Summariser.summarise(krona_output=os.path.join(tmp, 'KronaOK.html'), table_collection=table_collection) self.assertTrue(os.path.exists(os.path.join(tmp,'KronaOK.html')))
def test_minimal(self): a = [ self.headers, [ '2.12.ribosomal_protein_L11_rplK.gpkg', 'minimal', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '7', '17.07', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ], [ '2.12.ribosomal_protein_L11_rplK.gpkg', 'minimal', 'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '9', '18.07', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ], [ '2.12.ribosomal_protein_L11_rplK.gpkg', 'minimal', 'GAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '8', '17.57', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ], ] table = "\n".join(["\t".join(x) for x in a] + ['']) e = [ self.output_headers, [ 'reference', '2.12.ribosomal_protein_L11_rplK.gpkg', 'minimal', '0', 'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '9', '18.07', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ], [ 'strain', '2.12.ribosomal_protein_L11_rplK.gpkg', 'minimal', '3', 'GAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '8', '17.57', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ], [ 'strain', '2.12.ribosomal_protein_L11_rplK.gpkg', 'minimal', '1', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '7', '17.07', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ], ] exp = "\n".join(["\t".join(x) for x in e] + ['']) output = StringIO() table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(table)) StrainSummariser().summarise_strains(\ table_collection = table_collection, output_table_io = output) self.assertEqual(exp, output.getvalue())
def test_nothing_returned(self): e = [[ 'gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy' ]] exp = "\n".join(["\t".join(x) for x in e] + ['']) table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(exp)) self.assertEqual(0, len(list(table_collection))) rares = Rarefier().rarefy(table_collection, 0) self.assertEqual(0, len(list(rares)))
def test_multiple_genes(self): e = [ ['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'], [ '4.11.ribosomal_protein_L11', 'minimal', 'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA', '2', '4.88', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus' ], [ '4.11.ribosomal_protein_L10', 'minimal', 'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT', '1', '9.76', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ], [ '4.11.ribosomal_protein_L10', 'minimal', 'ATACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT', '1', '9.76', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ] ] exp = "\n".join(["\t".join(x) for x in e] + ['']) expected = [ ['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'], [ '4.11.ribosomal_protein_L11', 'minimal', 'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA', 2, 4.88, 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus' ], [ '4.11.ribosomal_protein_L10', 'minimal', 'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT', 1, 9.76, 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ], [ '4.11.ribosomal_protein_L10', 'minimal', 'ATACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT', 1, 9.76, 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ] ] table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(exp)) rares = Rarefier().rarefy( table_collection, 2, random_generator=PredictableRandomGenerator()) self.assertIsInstance(rares, OtuTable) self.assertEqualOtuTable2(expected, rares)
def test_multiple_samples(self): metagenome_otu_table = [self.headers, ['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ['4.11.ribosomal_protein_L10','another','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'] ] metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table]) genomes_otu_table = [self.headers,['4.12.ribosomal_protein_L11_rplK','genome','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'] ] genomes = "\n".join(["\t".join(x) for x in genomes_otu_table]) appraiser = Appraiser() metagenome_collection = OtuTableCollection() metagenome_collection.add_otu_table(StringIO(metagenomes)) genome_collection = OtuTableCollection() genome_collection.add_otu_table(StringIO(genomes)) app = appraiser.appraise(genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection) self.assertEqual(2, len(app.appraisal_results)) a = app.appraisal_results[1] self.assertEqual('minimal', a.metagenome_sample_name) self.assertEqual(7, a.num_binned) self.assertEqual(0, a.num_not_found) a = app.appraisal_results[0] self.assertEqual('another', a.metagenome_sample_name) self.assertEqual(0, a.num_binned) self.assertEqual(4, a.num_not_found)
def test_contamination_near_enough(self): metagenome_otu_table = [ self.headers,[ '4.12.ribosomal_protein_L11_rplK', 'minimal', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], [ '4.16.ribosomal_protein_S5', 'another', 'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC', '8','12.50','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']] metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table]) genomes_otu_table = [ self.headers,[ '4.12.ribosomal_protein_L11_rplK', 'genome', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'], [ '4.12.ribosomal_protein_L11_rplK', 'genome', 'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'], [ '4.16.ribosomal_protein_S5', 'genome', 'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',#one base pair different to the one above '1','1.06','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'] ] genomes = "\n".join(["\t".join(x) for x in genomes_otu_table]) appraiser = Appraiser() metagenome_collection = OtuTableCollection() metagenome_collection.add_otu_table(StringIO(metagenomes)) genome_collection = OtuTableCollection() genome_collection.add_otu_table(StringIO(genomes)) app = appraiser.appraise(genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection) self.assertEqual(2, len(app.appraisal_results)) a = app.appraisal_results[0] self.assertEqual(0, a.num_binned) self.assertEqual(8, a.num_not_found) a = app.appraisal_results[1] self.assertEqual(0, a.num_binned) self.assertEqual(7, a.num_not_found) app = appraiser.appraise(genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection, sequence_identity=0.9) self.assertEqual(2, len(app.appraisal_results)) def compare_res(res): return res.metagenome_sample_name sorted_results = list(sorted(app.appraisal_results, key=compare_res)) a = sorted_results[0] self.assertEqual('another', a.metagenome_sample_name) self.assertEqual(8, a.num_binned) self.assertEqual(0, a.num_not_found) a = sorted_results[1] self.assertEqual(0, a.num_binned) self.assertEqual(7, a.num_not_found)
def test_wide_format(self): e = [['gene','sample','sequence','num_hits','coverage','taxonomy'], ['4.11.ribosomal_protein_L10','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'], ['4.12.ribosomal_protein_L11_rplK','minimal','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ['4.11.ribosomal_protein_L10','maximal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']] exp = "\n".join(["\t".join(x) for x in e]+['']) output = StringIO() table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(exp)) Summariser().write_wide_format_otu_table( table_collection = table_collection, output_table_io = output) self.assertEqual('marker\tsequence\tminimal\tmaximal\ttaxonomy\n4.11.ribosomal_protein_L10\tTTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA\t2\t2\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus\n4.12.ribosomal_protein_L11_rplK\tCCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG\t4\t0\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales\n', output.getvalue())
def test_using_real_generator(self): e = [['gene','sample','sequence','num_hits','coverage','taxonomy'], ['4.11.ribosomal_protein_L10','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'], ] exp = "\n".join(["\t".join(x) for x in e]+['']) table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(exp)) rares = Rarefier().rarefy(table_collection, 2) self.assertIsInstance(rares, OtuTable) rares = list(rares) self.assertEqual(1, len(rares)) self.assertEqual(2, rares[0].count)
def test_cluster_across_samples(self): e = [['gene','sample','sequence','num_hits','coverage','taxonomy'], ['4.11.ribosomal_protein_L10','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACT','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'], ['4.12.ribosomal_protein_L11_rplK','ma','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACA','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'] ] exp = "\n".join(["\t".join(x) for x in e]+['']) table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(exp)) clusters = list(Clusterer().each_cluster(table_collection, 58.5/60)) self.assertEqual(2, len(clusters)) self.assertIsInstance(clusters[0], SampleWiseClusteredOtu) self.assertEqual(clusters[0].representative_otu, clusters[1].representative_otu) self.assertEqual('ma', clusters[0].representative_otu.sample_name)
def test_multiple_genes(self): e = [['gene','sample','sequence','num_hits','coverage','taxonomy'], ['4.11.ribosomal_protein_L11','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'], ['4.11.ribosomal_protein_L10','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT','1','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ['4.11.ribosomal_protein_L10','minimal','ATACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT','1','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'] ] exp = "\n".join(["\t".join(x) for x in e]+['']) table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(exp)) rares = Rarefier().rarefy(table_collection, 2, random_generator=PredictableRandomGenerator()) self.assertIsInstance(rares, OtuTable) rares = list(rares) self.assertEqual(3, len(rares)) self.assertEqual([1,1,2], [e.count for e in rares])
def test_cluster_two(self): e = [['gene','sample','sequence','num_hits','coverage','taxonomy'], ['4.11.ribosomal_protein_L10','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA','2','4.88','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'], ['4.12.ribosomal_protein_L11_rplK','minimal','TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTT','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'] ] exp = "\n".join(["\t".join(x) for x in e]+['']) table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(exp)) clusters = list(Clusterer().each_cluster(table_collection, 0.5)) self.assertEqual(1, len(clusters)) self.assertIsInstance(clusters[0], SampleWiseClusteredOtu) c = clusters[0] self.assertEqual(6, c.count) self.assertEqual(9.76/4*6, c.coverage)
def test_contamination(self): metagenome_otu_table = [ self.headers, [ '4.12.ribosomal_protein_L11_rplK', 'minimal', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '7', '17.07', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ], [ '4.16.ribosomal_protein_S5', 'minimal', 'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC', '8', '12.50', 'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria' ] ] metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table]) genomes_otu_table = [ self.headers, [ '4.12.ribosomal_protein_L11_rplK', 'genome', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '1', '1.02', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli' ], [ '4.12.ribosomal_protein_L11_rplK', 'genome', 'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above '1', '1.02', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli' ], [ '4.16.ribosomal_protein_S5', 'genome', 'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC', '1', '1.06', 'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria' ] ] genomes = "\n".join(["\t".join(x) for x in genomes_otu_table]) appraiser = Appraiser() metagenome_collection = OtuTableCollection() metagenome_collection.add_otu_table(StringIO(metagenomes)) genome_collection = OtuTableCollection() genome_collection.add_otu_table(StringIO(genomes)) app = appraiser.appraise( genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection) self.assertEqual(1, len(app.appraisal_results)) a = app.appraisal_results[0] self.assertEqual(8, a.num_binned) self.assertEqual(7, a.num_not_found)
def test_clusterer_all_cluster_two_samples_some_cluster(self): # non-As and genome cluster together but are not exactly the same metagenome_otu_table = [self.headers, ['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ['4.12.ribosomal_protein_L11_rplK','minimal','AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA','12','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ['4.11.ribosomal_protein_L10','maximal', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'], ['4.11.ribosomal_protein_L10','maximal', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATG','1','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'] ] metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table]) genomes_otu_table = [self.headers,['4.12.ribosomal_protein_L11_rplK','genome','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATA','1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'] ] genomes = "\n".join(["\t".join(x) for x in genomes_otu_table]) appraiser = Appraiser() metagenome_collection = OtuTableCollection() metagenome_collection.add_otu_table(StringIO(metagenomes)) genome_collection = OtuTableCollection() genome_collection.add_otu_table(StringIO(genomes)) app = appraiser.appraise(genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection, sequence_identity=0.7) self.assertEqual(2, len(app.appraisal_results)) a = app.appraisal_results[1] self.assertEqual('minimal', a.metagenome_sample_name) self.assertEqual(7, a.num_binned) self.assertEqual(12, a.num_not_found) self.assertEqual(1, len(a.binned_otus)) self.assertEqual(1, len(a.not_found_otus)) self.assertEqual('GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', a.binned_otus[0].sequence) self.assertEqual('minimal', a.binned_otus[0].sample_name) self.assertEqual('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', a.not_found_otus[0].sequence) self.assertEqual('minimal', a.not_found_otus[0].sample_name) a = app.appraisal_results[0] self.assertEqual('maximal', a.metagenome_sample_name) self.assertEqual(1, a.num_binned) self.assertEqual(4, a.num_not_found) self.assertEqual(1, len(a.binned_otus)) self.assertEqual(1, len(a.not_found_otus)) self.assertEqual('GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATG', a.binned_otus[0].sequence) self.assertEqual('maximal', a.binned_otus[0].sample_name) self.assertEqual('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', a.not_found_otus[0].sequence) self.assertEqual('maximal', a.not_found_otus[0].sample_name)
def test_using_real_generator(self): e = [ ['gene', 'sample', 'sequence', 'num_hits', 'coverage', 'taxonomy'], [ '4.11.ribosomal_protein_L10', 'minimal', 'TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA', '2', '4.88', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus' ], ] exp = "\n".join(["\t".join(x) for x in e] + ['']) table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(exp)) rares = Rarefier().rarefy(table_collection, 2) self.assertIsInstance(rares, OtuTable) rares = list(rares) self.assertEqual(1, len(rares)) self.assertEqual(2, rares[0].count)
def test_assembly_input(self): metagenome_otu_table = [ self.headers, ['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ['4.11.ribosomal_protein_L10','minimal','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'], ['4.14.ribosomal_protein_L16_L10E_rplP','minimal','CAAAAAAAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','5','10.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']] metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table]) assembly_otu_table = [ self.headers, ['4.12.ribosomal_protein_L11_rplK','assembly','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','1','1.007','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ['4.11.ribosomal_protein_L10','assembly','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','1','1.01','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']] assemblies = "\n".join(["\t".join(x) for x in assembly_otu_table]) genomes_otu_table = [ self.headers, ['4.12.ribosomal_protein_L11_rplK','genome','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli']] genomes = "\n".join(["\t".join(x) for x in genomes_otu_table]) appraiser = Appraiser() metagenome_collection = OtuTableCollection() metagenome_collection.add_otu_table(StringIO(metagenomes)) genome_collection = OtuTableCollection() genome_collection.add_otu_table(StringIO(genomes)) assembly_collection = OtuTableCollection() assembly_collection.add_otu_table(StringIO(assemblies)) app = appraiser.appraise(genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection, assembly_otu_table_collection=assembly_collection) self.assertEqual(1, len(app.appraisal_results)) a = app.appraisal_results[0] self.assertEqual(7, a.num_binned) self.assertEqual(11, a.num_assembled) self.assertEqual(5, a.num_not_found) self.assertEqual('minimal', a.metagenome_sample_name) self.assertEqual(1, len(a.binned_otus)) self.assertEqual('GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', a.binned_otus[0].sequence) self.assertEqual(2, len(a.assembled_otus)) self.assertEqual(sorted(['GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', 'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG']), sorted([o.sequence for o in a.assembled_otus])) self.assertEqual(1, len(a.not_found_otus)) self.assertEqual('CAAAAAAAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG', a.not_found_otus[0].sequence)
def test_target_taxonomy(self): metagenome_otu_table = [ self.headers, [ '4.12.ribosomal_protein_L11_rplK', 'minimal', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '7', '17.07', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ], [ '4.11.ribosomal_protein_L10', 'minimal', 'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG', '4', '9.76', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus' ], [ '4.11.ribosomal_protein_L10', 'minimal', 'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTA', '5', '10.76', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus' ] ] metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table]) table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(metagenomes)) self.assertEqual(["minimal\t8.0\t4.75"], [ str(rp) for rp in Chancer().predict_samples(metagenomes=table_collection, target_taxonomy=[]) ]) self.assertEqual(["minimal\t9.0\t2.5"], [ str(rp) for rp in Chancer().predict_samples( metagenomes=table_collection, target_taxonomy=Taxonomy.split_taxonomy( 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae' )) ])
def test_print_appraisal(self): metagenome_otu_table = [self.headers, ['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ['4.11.ribosomal_protein_L10','another','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'] ] metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table]) genomes_otu_table = [self.headers,['4.12.ribosomal_protein_L11_rplK','genome','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'] ] genomes = "\n".join(["\t".join(x) for x in genomes_otu_table]) appraiser = Appraiser() metagenome_collection = OtuTableCollection() metagenome_collection.add_otu_table(StringIO(metagenomes)) genome_collection = OtuTableCollection() genome_collection.add_otu_table(StringIO(genomes)) app = appraiser.appraise(genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection) self.assertEqual(2, len(app.appraisal_results)) a = app.appraisal_results[1] self.assertEqual('minimal', a.metagenome_sample_name) self.assertEqual(7, a.num_binned) self.assertEqual(0, a.num_not_found) a = app.appraisal_results[0] self.assertEqual('another', a.metagenome_sample_name) self.assertEqual(0, a.num_binned) self.assertEqual(4, a.num_not_found) to_print = StringIO() appraiser.print_appraisal(app, True, to_print) self.assertEqual("sample\tnum_binned\tnum_not_found\tpercent_binned\nanother\t0\t4\t0.0\nminimal\t7\t0\t100.0\ntotal\t7\t4\t63.6\naverage\t3.5\t2.0\t50.0\n", to_print.getvalue()) to_print = StringIO() found_otu_table_io = StringIO() not_found_otu_table_io = StringIO() appraiser.print_appraisal(app, True, to_print, binned_otu_table_io=found_otu_table_io, unaccounted_for_otu_table_io=not_found_otu_table_io) self.assertEqual("\n".join([ "\t".join(self.headers), "\t".join(['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales']) ])+"\n", found_otu_table_io.getvalue()) self.assertEqual("\n".join([ "\t".join(self.headers), "\t".join(['4.11.ribosomal_protein_L10','another','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'])])+"\n", not_found_otu_table_io.getvalue())
def test_target_taxonomy(self): metagenome_otu_table = [ self.headers, ['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], ['4.11.ribosomal_protein_L10','minimal','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'], ['4.11.ribosomal_protein_L10','minimal','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTA','5','10.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'] ] metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table]) table_collection = OtuTableCollection() table_collection.add_otu_table(StringIO(metagenomes)) self.assertEqual( ["minimal\t8.0\t4.75"], [str(rp) for rp in Chancer().predict_samples( metagenomes = table_collection, target_taxonomy = [])] ) self.assertEqual( ["minimal\t9.0\t2.5"], [str(rp) for rp in Chancer().predict_samples( metagenomes = table_collection, target_taxonomy = Taxonomy.split_taxonomy( 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae'))] )
def test_appraise_plot_real_data(self): """Not a real test, just developing the code""" appraiser = Appraiser() metagenome_collection = OtuTableCollection() with open(os.path.join(path_to_data, 'appraise_example2', 'SRR5040536.reads.long_sample_names.otu_table.csv')) as f: metagenome_collection.add_otu_table(f) genome_collection = OtuTableCollection() with open(os.path.join(path_to_data, 'appraise_example2', 'SRR5040536.binned.otu_table.csv')) as f: genome_collection.add_otu_table(f) assembly_collection = OtuTableCollection() with open(os.path.join(path_to_data, 'appraise_example2', 'SRR5040536.assembly.otu_table.csv')) as f: assembly_collection.add_otu_table(f) app = appraiser.appraise(genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection, assembly_otu_table_collection=assembly_collection) with tempfile.NamedTemporaryFile(suffix='.svg',prefix='single_test_appraisal.') as f: app.plot( output_svg_base='/tmp/a.svg',#f.name, cluster_identity = 0.89, doing_assembly=True, doing_binning=True )
def test_contamination(self): metagenome_otu_table = [ self.headers,[ '4.12.ribosomal_protein_L11_rplK', 'minimal', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], [ '4.16.ribosomal_protein_S5', 'minimal', 'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC', '8','12.50','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']] metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table]) genomes_otu_table = [ self.headers,[ '4.12.ribosomal_protein_L11_rplK', 'genome', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'], [ '4.12.ribosomal_protein_L11_rplK', 'genome', 'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'], [ '4.16.ribosomal_protein_S5', 'genome', 'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC', '1','1.06','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'] ] genomes = "\n".join(["\t".join(x) for x in genomes_otu_table]) appraiser = Appraiser() metagenome_collection = OtuTableCollection() metagenome_collection.add_otu_table(StringIO(metagenomes)) genome_collection = OtuTableCollection() genome_collection.add_otu_table(StringIO(genomes)) app = appraiser.appraise(genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection) self.assertEqual(1, len(app.appraisal_results)) a = app.appraisal_results[0] self.assertEqual(8, a.num_binned) self.assertEqual(7, a.num_not_found)
def test_appraise_assembly_imperfectly(self): metagenome_otu_table = [ self.headers, [ '4.12.ribosomal_protein_L11_rplK', 'minimal', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '7', '17.07', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ], [ '4.16.ribosomal_protein_S5', 'another', 'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC', '8', '12.50', 'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria' ], [ '4.16.ribosomal_protein_S5', 'another', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATGGCCGGCATCCGC', # way different to the one above '9', '17.50', 'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria' ] ] metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table]) genomes_otu_table = [ self.headers, [ '4.12.ribosomal_protein_L11_rplK', 'genome', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '1', '1.02', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli' ], [ '4.12.ribosomal_protein_L11_rplK', 'genome', 'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above '1', '1.02', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli' ], [ '4.16.ribosomal_protein_S5', 'genome', 'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC', '1', '1.06', 'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria' ] ] genomes = "\n".join(["\t".join(x) for x in genomes_otu_table]) assemblies_otu_table = [ self.headers, [ '4.12.ribosomal_protein_L11_rplK', 'assembly', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '1', '1.02', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli' ], [ '4.12.ribosomal_protein_L11_rplK', 'assembly', 'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above '1', '1.02', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli' ], [ '4.16.ribosomal_protein_S5', 'assembly', 'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC', '1', '1.06', 'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria' ] ] assemblies = "\n".join(["\t".join(x) for x in genomes_otu_table]) appraiser = Appraiser() metagenome_collection = OtuTableCollection() metagenome_collection.add_otu_table(StringIO(metagenomes)) genome_collection = OtuTableCollection() genome_collection.add_otu_table(StringIO(genomes)) assembly_collection = OtuTableCollection() assembly_collection.add_otu_table(StringIO(assemblies)) app = appraiser.appraise( genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection, assembly_otu_table_collection=assembly_collection) self.assertEqual(2, len(app.appraisal_results)) res = self._sort_appraisal_results(app.appraisal_results) a = res[0] self.assertEqual(0, a.num_binned) self.assertEqual(0, a.num_assembled) self.assertEqual(8 + 9, a.num_not_found) a = res[1] self.assertEqual(0, a.num_binned) self.assertEqual(7, a.num_assembled) self.assertEqual(0, a.num_not_found) app = appraiser.appraise( genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection, assembly_otu_table_collection=assembly_collection, sequence_identity=0.9) self.assertEqual(2, len(app.appraisal_results)) res = self._sort_appraisal_results(app.appraisal_results) a = res[0] self.assertEqual('another', a.metagenome_sample_name) self.assertEqual(8, a.num_binned) self.assertEqual(8, a.num_assembled) self.assertEqual(9, a.num_not_found) a = res[1] self.assertEqual(0, a.num_binned) self.assertEqual(7, a.num_assembled) self.assertEqual(0, a.num_not_found)
def test_appraise_plot_real_data(self): """Not a real test, just developing the code""" appraiser = Appraiser() metagenome_collection = OtuTableCollection() with open( os.path.join( path_to_data, 'appraise_example2', 'SRR5040536.reads.long_sample_names.otu_table.csv')) as f: metagenome_collection.add_otu_table(f) genome_collection = OtuTableCollection() with open( os.path.join(path_to_data, 'appraise_example2', 'SRR5040536.binned.otu_table.csv')) as f: genome_collection.add_otu_table(f) assembly_collection = OtuTableCollection() with open( os.path.join(path_to_data, 'appraise_example2', 'SRR5040536.assembly.otu_table.csv')) as f: assembly_collection.add_otu_table(f) app = appraiser.appraise( genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection, assembly_otu_table_collection=assembly_collection) with tempfile.NamedTemporaryFile(suffix='.svg', prefix='single_test_appraisal.') as f: app.plot( output_svg_base='/tmp/a.svg', #f.name, cluster_identity=0.89, doing_assembly=True, doing_binning=True)
def test_assembly_input(self): metagenome_otu_table = [ self.headers, [ '4.12.ribosomal_protein_L11_rplK', 'minimal', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '7', '17.07', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ], [ '4.11.ribosomal_protein_L10', 'minimal', 'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG', '4', '9.76', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus' ], [ '4.14.ribosomal_protein_L16_L10E_rplP', 'minimal', 'CAAAAAAAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG', '5', '10.76', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus' ] ] metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table]) assembly_otu_table = [ self.headers, [ '4.12.ribosomal_protein_L11_rplK', 'assembly', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '1', '1.007', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales' ], [ '4.11.ribosomal_protein_L10', 'assembly', 'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG', '1', '1.01', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus' ] ] assemblies = "\n".join(["\t".join(x) for x in assembly_otu_table]) genomes_otu_table = [ self.headers, [ '4.12.ribosomal_protein_L11_rplK', 'genome', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '1', '1.02', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli' ] ] genomes = "\n".join(["\t".join(x) for x in genomes_otu_table]) appraiser = Appraiser() metagenome_collection = OtuTableCollection() metagenome_collection.add_otu_table(StringIO(metagenomes)) genome_collection = OtuTableCollection() genome_collection.add_otu_table(StringIO(genomes)) assembly_collection = OtuTableCollection() assembly_collection.add_otu_table(StringIO(assemblies)) app = appraiser.appraise( genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection, assembly_otu_table_collection=assembly_collection) self.assertEqual(1, len(app.appraisal_results)) a = app.appraisal_results[0] self.assertEqual(7, a.num_binned) self.assertEqual(11, a.num_assembled) self.assertEqual(5, a.num_not_found) self.assertEqual('minimal', a.metagenome_sample_name) self.assertEqual(1, len(a.binned_otus)) self.assertEqual( 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', a.binned_otus[0].sequence) self.assertEqual(2, len(a.assembled_otus)) self.assertEqual( sorted([ 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', 'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG' ]), sorted([o.sequence for o in a.assembled_otus])) self.assertEqual(1, len(a.not_found_otus)) self.assertEqual( 'CAAAAAAAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG', a.not_found_otus[0].sequence)
def test_appraise_assembly_imperfectly(self): metagenome_otu_table = [ self.headers,[ '4.12.ribosomal_protein_L11_rplK', 'minimal', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'], [ '4.16.ribosomal_protein_S5', 'another', 'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC', '8','12.50','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'], [ '4.16.ribosomal_protein_S5', 'another', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATGGCCGGCATCCGC', # way different to the one above '9','17.50','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']] metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table]) genomes_otu_table = [ self.headers,[ '4.12.ribosomal_protein_L11_rplK', 'genome', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'], [ '4.12.ribosomal_protein_L11_rplK', 'genome', 'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'], [ '4.16.ribosomal_protein_S5', 'genome', 'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC', '1','1.06','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'] ] genomes = "\n".join(["\t".join(x) for x in genomes_otu_table]) assemblies_otu_table = [ self.headers,[ '4.12.ribosomal_protein_L11_rplK', 'assembly', 'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'], [ '4.12.ribosomal_protein_L11_rplK', 'assembly', 'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'], [ '4.16.ribosomal_protein_S5', 'assembly', 'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC', '1','1.06','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'] ] assemblies = "\n".join(["\t".join(x) for x in genomes_otu_table]) appraiser = Appraiser() metagenome_collection = OtuTableCollection() metagenome_collection.add_otu_table(StringIO(metagenomes)) genome_collection = OtuTableCollection() genome_collection.add_otu_table(StringIO(genomes)) assembly_collection = OtuTableCollection() assembly_collection.add_otu_table(StringIO(assemblies)) app = appraiser.appraise(genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection, assembly_otu_table_collection=assembly_collection) self.assertEqual(2, len(app.appraisal_results)) res = self._sort_appraisal_results(app.appraisal_results) a = res[0] self.assertEqual(0, a.num_binned) self.assertEqual(0, a.num_assembled) self.assertEqual(8+9, a.num_not_found) a = res[1] self.assertEqual(0, a.num_binned) self.assertEqual(7, a.num_assembled) self.assertEqual(0, a.num_not_found) app = appraiser.appraise(genome_otu_table_collection=genome_collection, metagenome_otu_table_collection=metagenome_collection, assembly_otu_table_collection=assembly_collection, sequence_identity=0.9) self.assertEqual(2, len(app.appraisal_results)) res = self._sort_appraisal_results(app.appraisal_results) a = res[0] self.assertEqual('another', a.metagenome_sample_name) self.assertEqual(8, a.num_binned) self.assertEqual(8, a.num_assembled) self.assertEqual(9, a.num_not_found) a = res[1] self.assertEqual(0, a.num_binned) self.assertEqual(7, a.num_assembled) self.assertEqual(0, a.num_not_found)