Example #1
0
    def test_appraise_plot_real_data(self):
        """Not a real test, just developing the code"""
        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        with open(
                os.path.join(
                    path_to_data, 'appraise_example2',
                    'SRR5040536.reads.long_sample_names.otu_table.csv')) as f:
            metagenome_collection.add_otu_table(f)
        genome_collection = OtuTableCollection()
        with open(
                os.path.join(path_to_data, 'appraise_example2',
                             'SRR5040536.binned.otu_table.csv')) as f:
            genome_collection.add_otu_table(f)
        assembly_collection = OtuTableCollection()
        with open(
                os.path.join(path_to_data, 'appraise_example2',
                             'SRR5040536.assembly.otu_table.csv')) as f:
            assembly_collection.add_otu_table(f)
        app = appraiser.appraise(
            genome_otu_table_collection=genome_collection,
            metagenome_otu_table_collection=metagenome_collection,
            assembly_otu_table_collection=assembly_collection)

        with tempfile.NamedTemporaryFile(suffix='.svg',
                                         prefix='single_test_appraisal.') as f:
            app.plot(
                output_svg_base='/tmp/a.svg',  #f.name,
                cluster_identity=0.89,
                doing_assembly=True,
                doing_binning=True)
Example #2
0
    def test_multiple_samples(self):
        metagenome_otu_table = [self.headers,
                    ['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
                    ['4.11.ribosomal_protein_L10','another','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']
                    ]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])

        genomes_otu_table = [self.headers,['4.12.ribosomal_protein_L11_rplK','genome','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli']
                    ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection)
        self.assertEqual(2, len(app.appraisal_results))
        a = app.appraisal_results[1]
        self.assertEqual('minimal', a.metagenome_sample_name)
        self.assertEqual(7, a.num_binned)
        self.assertEqual(0, a.num_not_found)
        a = app.appraisal_results[0]
        self.assertEqual('another', a.metagenome_sample_name)
        self.assertEqual(0, a.num_binned)
        self.assertEqual(4, a.num_not_found)
Example #3
0
    def test_contamination_near_enough(self):
        metagenome_otu_table = [
            self.headers,[
                '4.12.ribosomal_protein_L11_rplK',
                'minimal',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
            [
                '4.16.ribosomal_protein_S5',
                'another',
                'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '8','12.50','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])
        genomes_otu_table = [
            self.headers,[
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.16.ribosomal_protein_S5',
                'genome',
                'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',#one base pair different to the one above
                '1','1.06','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']
        ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection)
        self.assertEqual(2, len(app.appraisal_results))
        a = app.appraisal_results[0]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(8, a.num_not_found)
        a = app.appraisal_results[1]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(7, a.num_not_found)

        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection,
                                 sequence_identity=0.9)
        self.assertEqual(2, len(app.appraisal_results))
        def compare_res(res): return res.metagenome_sample_name
        sorted_results = list(sorted(app.appraisal_results, key=compare_res))
        a = sorted_results[0]
        self.assertEqual('another', a.metagenome_sample_name)
        self.assertEqual(8, a.num_binned)
        self.assertEqual(0, a.num_not_found)
        a = sorted_results[1]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(7, a.num_not_found)
Example #4
0
    def test_contamination(self):
        metagenome_otu_table = [
            self.headers,
            [
                '4.12.ribosomal_protein_L11_rplK', 'minimal',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '7', '17.07',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
            [
                '4.16.ribosomal_protein_S5', 'minimal',
                'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '8', '12.50',
                'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'
            ]
        ]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])
        genomes_otu_table = [
            self.headers,
            [
                '4.12.ribosomal_protein_L11_rplK', 'genome',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1', '1.02', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli'
            ],
            [
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',  #one base pair different to the one above
                '1',
                '1.02',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli'
            ],
            [
                '4.16.ribosomal_protein_S5', 'genome',
                'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '1', '1.06',
                'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'
            ]
        ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        app = appraiser.appraise(
            genome_otu_table_collection=genome_collection,
            metagenome_otu_table_collection=metagenome_collection)
        self.assertEqual(1, len(app.appraisal_results))
        a = app.appraisal_results[0]
        self.assertEqual(8, a.num_binned)
        self.assertEqual(7, a.num_not_found)
Example #5
0
    def test_clusterer_all_cluster_two_samples_some_cluster(self):
        # non-As and genome cluster together but are not exactly the same
        metagenome_otu_table = [self.headers,
                    ['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
                    ['4.12.ribosomal_protein_L11_rplK','minimal','AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA','12','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
                    ['4.11.ribosomal_protein_L10','maximal',     'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'],
                    ['4.11.ribosomal_protein_L10','maximal',     'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATG','1','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']
                    ]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])

        genomes_otu_table = [self.headers,['4.12.ribosomal_protein_L11_rplK','genome','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATA','1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli']
                    ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection,
                                 sequence_identity=0.7)
        self.assertEqual(2, len(app.appraisal_results))

        a = app.appraisal_results[1]
        self.assertEqual('minimal', a.metagenome_sample_name)
        self.assertEqual(7, a.num_binned)
        self.assertEqual(12, a.num_not_found)
        self.assertEqual(1, len(a.binned_otus))
        self.assertEqual(1, len(a.not_found_otus))
        self.assertEqual('GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                         a.binned_otus[0].sequence)
        self.assertEqual('minimal',
                         a.binned_otus[0].sample_name)
        self.assertEqual('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA',
                         a.not_found_otus[0].sequence)
        self.assertEqual('minimal',
                         a.not_found_otus[0].sample_name)

        a = app.appraisal_results[0]
        self.assertEqual('maximal', a.metagenome_sample_name)
        self.assertEqual(1, a.num_binned)
        self.assertEqual(4, a.num_not_found)
        self.assertEqual(1, len(a.binned_otus))
        self.assertEqual(1, len(a.not_found_otus))
        self.assertEqual('GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATG',
                         a.binned_otus[0].sequence)
        self.assertEqual('maximal',
                         a.binned_otus[0].sample_name)
        self.assertEqual('AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA',
                         a.not_found_otus[0].sequence)
        self.assertEqual('maximal',
                         a.not_found_otus[0].sample_name)
Example #6
0
    def test_assembly_input(self):
        metagenome_otu_table = [
            self.headers,
            ['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
            ['4.11.ribosomal_protein_L10','minimal','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'],
            ['4.14.ribosomal_protein_L16_L10E_rplP','minimal','CAAAAAAAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','5','10.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])
        assembly_otu_table = [
            self.headers,
            ['4.12.ribosomal_protein_L11_rplK','assembly','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','1','1.007','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
            ['4.11.ribosomal_protein_L10','assembly','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','1','1.01','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']]
        assemblies = "\n".join(["\t".join(x) for x in assembly_otu_table])

        genomes_otu_table = [
            self.headers,
            ['4.12.ribosomal_protein_L11_rplK','genome','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli']]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        assembly_collection = OtuTableCollection()
        assembly_collection.add_otu_table(StringIO(assemblies))
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection,
                                 assembly_otu_table_collection=assembly_collection)
        self.assertEqual(1, len(app.appraisal_results))
        a = app.appraisal_results[0]
        self.assertEqual(7, a.num_binned)
        self.assertEqual(11, a.num_assembled)
        self.assertEqual(5, a.num_not_found)
        self.assertEqual('minimal', a.metagenome_sample_name)
        self.assertEqual(1, len(a.binned_otus))
        self.assertEqual('GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                         a.binned_otus[0].sequence)
        self.assertEqual(2, len(a.assembled_otus))
        self.assertEqual(sorted(['GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                                 'CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG']),
                         sorted([o.sequence for o in a.assembled_otus]))
        self.assertEqual(1, len(a.not_found_otus))
        self.assertEqual('CAAAAAAAAAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG',
                         a.not_found_otus[0].sequence)
Example #7
0
    def test_contamination(self):
        metagenome_otu_table = [
            self.headers,[
                '4.12.ribosomal_protein_L11_rplK',
                'minimal',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
            [
                '4.16.ribosomal_protein_S5',
                'minimal',
                'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '8','12.50','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])
        genomes_otu_table = [
            self.headers,[
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.16.ribosomal_protein_S5',
                'genome',
                'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '1','1.06','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']
        ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection)
        self.assertEqual(1, len(app.appraisal_results))
        a = app.appraisal_results[0]
        self.assertEqual(8, a.num_binned)
        self.assertEqual(7, a.num_not_found)
Example #8
0
    def test_appraise_plot_real_data(self):
        """Not a real test, just developing the code"""
        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        with open(os.path.join(path_to_data, 'appraise_example2', 'SRR5040536.reads.long_sample_names.otu_table.csv')) as f:
            metagenome_collection.add_otu_table(f)
        genome_collection = OtuTableCollection()
        with open(os.path.join(path_to_data, 'appraise_example2', 'SRR5040536.binned.otu_table.csv')) as f:
            genome_collection.add_otu_table(f)
        assembly_collection = OtuTableCollection()
        with open(os.path.join(path_to_data, 'appraise_example2', 'SRR5040536.assembly.otu_table.csv')) as f:
            assembly_collection.add_otu_table(f)
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection,
                                 assembly_otu_table_collection=assembly_collection)

        with tempfile.NamedTemporaryFile(suffix='.svg',prefix='single_test_appraisal.') as f:
            app.plot(
                output_svg_base='/tmp/a.svg',#f.name,
                cluster_identity = 0.89,
                doing_assembly=True,
                doing_binning=True
            )
Example #9
0
    def test_print_appraisal(self):
        metagenome_otu_table = [self.headers,
                    ['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
                    ['4.11.ribosomal_protein_L10','another','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus']
                    ]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])

        genomes_otu_table = [self.headers,['4.12.ribosomal_protein_L11_rplK','genome','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli']
                    ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection)
        self.assertEqual(2, len(app.appraisal_results))
        a = app.appraisal_results[1]
        self.assertEqual('minimal', a.metagenome_sample_name)
        self.assertEqual(7, a.num_binned)
        self.assertEqual(0, a.num_not_found)
        a = app.appraisal_results[0]
        self.assertEqual('another', a.metagenome_sample_name)
        self.assertEqual(0, a.num_binned)
        self.assertEqual(4, a.num_not_found)

        to_print = StringIO()
        appraiser.print_appraisal(app, True, to_print)
        self.assertEqual("sample\tnum_binned\tnum_not_found\tpercent_binned\nanother\t0\t4\t0.0\nminimal\t7\t0\t100.0\ntotal\t7\t4\t63.6\naverage\t3.5\t2.0\t50.0\n", to_print.getvalue())

        to_print = StringIO()
        found_otu_table_io = StringIO()
        not_found_otu_table_io = StringIO()
        appraiser.print_appraisal(app, True, to_print,
                                  binned_otu_table_io=found_otu_table_io,
                                  unaccounted_for_otu_table_io=not_found_otu_table_io)
        self.assertEqual("\n".join([
                          "\t".join(self.headers),
                          "\t".join(['4.12.ribosomal_protein_L11_rplK','minimal','GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC','7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'])
                          ])+"\n",
                         found_otu_table_io.getvalue())
        self.assertEqual("\n".join([
                          "\t".join(self.headers),
                          "\t".join(['4.11.ribosomal_protein_L10','another','CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG','4','9.76','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus'])])+"\n",
                         not_found_otu_table_io.getvalue())
Example #10
0
    def test_appraise_assembly_imperfectly(self):
        metagenome_otu_table = [
            self.headers,
            [
                '4.12.ribosomal_protein_L11_rplK', 'minimal',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '7', '17.07',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'
            ],
            [
                '4.16.ribosomal_protein_S5', 'another',
                'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '8', '12.50',
                'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'
            ],
            [
                '4.16.ribosomal_protein_S5',
                'another',
                'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATGGCCGGCATCCGC',  # way different to the one above
                '9',
                '17.50',
                'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'
            ]
        ]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])
        genomes_otu_table = [
            self.headers,
            [
                '4.12.ribosomal_protein_L11_rplK', 'genome',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1', '1.02', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli'
            ],
            [
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',  #one base pair different to the one above
                '1',
                '1.02',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli'
            ],
            [
                '4.16.ribosomal_protein_S5', 'genome',
                'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '1', '1.06',
                'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'
            ]
        ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])
        assemblies_otu_table = [
            self.headers,
            [
                '4.12.ribosomal_protein_L11_rplK', 'assembly',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1', '1.02', 'Root; d__Bacteria; p__Firmicutes; c__Bacilli'
            ],
            [
                '4.12.ribosomal_protein_L11_rplK',
                'assembly',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',  #one base pair different to the one above
                '1',
                '1.02',
                'Root; d__Bacteria; p__Firmicutes; c__Bacilli'
            ],
            [
                '4.16.ribosomal_protein_S5', 'assembly',
                'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '1', '1.06',
                'Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'
            ]
        ]
        assemblies = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        assembly_collection = OtuTableCollection()
        assembly_collection.add_otu_table(StringIO(assemblies))
        app = appraiser.appraise(
            genome_otu_table_collection=genome_collection,
            metagenome_otu_table_collection=metagenome_collection,
            assembly_otu_table_collection=assembly_collection)
        self.assertEqual(2, len(app.appraisal_results))
        res = self._sort_appraisal_results(app.appraisal_results)
        a = res[0]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(0, a.num_assembled)
        self.assertEqual(8 + 9, a.num_not_found)
        a = res[1]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(7, a.num_assembled)
        self.assertEqual(0, a.num_not_found)

        app = appraiser.appraise(
            genome_otu_table_collection=genome_collection,
            metagenome_otu_table_collection=metagenome_collection,
            assembly_otu_table_collection=assembly_collection,
            sequence_identity=0.9)
        self.assertEqual(2, len(app.appraisal_results))
        res = self._sort_appraisal_results(app.appraisal_results)
        a = res[0]
        self.assertEqual('another', a.metagenome_sample_name)
        self.assertEqual(8, a.num_binned)
        self.assertEqual(8, a.num_assembled)
        self.assertEqual(9, a.num_not_found)
        a = res[1]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(7, a.num_assembled)
        self.assertEqual(0, a.num_not_found)
Example #11
0
    def test_appraise_assembly_imperfectly(self):
        metagenome_otu_table = [
            self.headers,[
                '4.12.ribosomal_protein_L11_rplK',
                'minimal',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '7','17.07','Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales'],
            [
                '4.16.ribosomal_protein_S5',
                'another',
                'GGTACCGGCGTCATCGCCGGTGGCGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '8','12.50','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria'],
            [
                '4.16.ribosomal_protein_S5',
                'another',
                'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATGGCCGGCATCCGC', # way different to the one above
                '9','17.50','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']]
        metagenomes = "\n".join(["\t".join(x) for x in metagenome_otu_table])
        genomes_otu_table = [
            self.headers,[
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.12.ribosomal_protein_L11_rplK',
                'genome',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.16.ribosomal_protein_S5',
                'genome',
                'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '1','1.06','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']
        ]
        genomes = "\n".join(["\t".join(x) for x in genomes_otu_table])
        assemblies_otu_table = [
            self.headers,[
                '4.12.ribosomal_protein_L11_rplK',
                'assembly',
                'GGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC',
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.12.ribosomal_protein_L11_rplK',
                'assembly',
                'AGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTGAACATC', #one base pair different to the one above
                '1','1.02','Root; d__Bacteria; p__Firmicutes; c__Bacilli'],
            [
                '4.16.ribosomal_protein_S5',
                'assembly',
                'GGTACCGGCGTCATCGCCGGTGGGGCGGCACGCGCCATCTTGGAGATGGCCGGCATCCGC',
                '1','1.06','Root; d__Bacteria; p__Actinobacteria; c__Actinobacteria']
        ]
        assemblies = "\n".join(["\t".join(x) for x in genomes_otu_table])

        appraiser = Appraiser()
        metagenome_collection = OtuTableCollection()
        metagenome_collection.add_otu_table(StringIO(metagenomes))
        genome_collection = OtuTableCollection()
        genome_collection.add_otu_table(StringIO(genomes))
        assembly_collection = OtuTableCollection()
        assembly_collection.add_otu_table(StringIO(assemblies))
        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection,
                                 assembly_otu_table_collection=assembly_collection)
        self.assertEqual(2, len(app.appraisal_results))
        res = self._sort_appraisal_results(app.appraisal_results)
        a = res[0]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(0, a.num_assembled)
        self.assertEqual(8+9, a.num_not_found)
        a = res[1]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(7, a.num_assembled)
        self.assertEqual(0, a.num_not_found)

        app = appraiser.appraise(genome_otu_table_collection=genome_collection,
                                 metagenome_otu_table_collection=metagenome_collection,
                                 assembly_otu_table_collection=assembly_collection,
                                 sequence_identity=0.9)
        self.assertEqual(2, len(app.appraisal_results))
        res = self._sort_appraisal_results(app.appraisal_results)
        a = res[0]
        self.assertEqual('another', a.metagenome_sample_name)
        self.assertEqual(8, a.num_binned)
        self.assertEqual(8, a.num_assembled)
        self.assertEqual(9, a.num_not_found)
        a = res[1]
        self.assertEqual(0, a.num_binned)
        self.assertEqual(7, a.num_assembled)
        self.assertEqual(0, a.num_not_found)