Example #1
0
    def test_create_blast_from_file(self):
        file_var = "NotExist"
        e_var = 1e-50
        batch_size = 20
        outfile_var = "foolname"
        setting = Setting()

        setting.add_all(blast_e_value=e_var, blast_wdir=self.Blast_dir,
                        blast_infile=file_var, blast_outfile=outfile_var,
                        blast_batch_size=batch_size,
                        )

        with self.assertRaises(IOError):
            RunBlast.create_blast_from_file(setting_class=setting)

        infile_var = self.Blast_dir + "bIn"
        setting.add("blast_infile", infile_var)
        blast = RunBlast.create_blast_from_file(setting)
        self.assertEqual(blast.results, dict())
        self.record_index = SeqIO.index(infile_var, "fasta")
        for key in self.record_index:
#            print key
            self.assertEqual(str(self.record_index[key].seq), str(blast.record_index[key].seq))
            self.assertEqual(str(self.record_index[key].id), str(blast.record_index[key].id))

        self.assertEqual(blast.e_threshold, e_var)
        self.assertEqual(blast.batch_size, batch_size)
Example #2
0
    def test_RunBlast_Batch(self):
        self.e_threshold = 1e-50  # one of the default values on the website
        try:
            os.remove(self.Blast_dir + "AmiGO_Record.temp")
        except IOError:
            pass
        blast = RunBlast(records=self.record_index, e_value=self.e_threshold,
                         wdir=self.Blast_dir, debug=False)
        blast.run()

        expected = dict({'lcl|AE014075.1_gene_1': set([]),
                         'lcl|AE014075.1_gene_2': set([]),  # no result with filter 1e-50 #'GO:0004803', 'GO:0006313']),
                         'lcl|AE014075.1_gene_3': set([
                                                      'GO:0050661', 'GO:0016597', 'GO:0016310', 'GO:0005524', 'GO:0000166', 'GO:0055114', 'GO:0009088', 'GO:0004412', 'GO:0004072', 'GO:0009089', 'GO:0009090', 'GO:0009086', 'GO:0009067', 'GO:0006520', 'GO:0008152', 'GO:0008652', 'GO:0009507', 'GO:0016491', 'GO:0009570', 'GO:0006164', 'GO:0000023', 'GO:0019252', 'GO:0019761', 'GO:0043085', 'GO:0071470', 'GO:0005634', 'GO:0005737', 'GO:0009097', 'GO:0005575'
                                                      ]),
                         'lcl|AE014075.1_gene_4': set([
                                                      'GO:0016310', 'GO:0005524', 'GO:0000166', 'GO:0004413', 'GO:0009088', 'GO:0005737'
                                                      ]),
                         'lcl|AE014075.1_gene_5': set([
                                                       'GO:0030170', 'GO:0004795', 'GO:0009088', 'GO:0008652', 'GO:0005737', 'GO:0016829', 'GO:0006520', 'GO:0005634'
                                                       ])
# Old list
#                         'lcl|AE014075.1_gene_3': set(['GO:0071470', 'GO:0016310', 'GO:0005886', 'GO:0009067', 'GO:0000023', 'GO:0016597', 'GO:0043085', 'GO:0016491', 'GO:0005737', 'GO:0050661', 'GO:0040007', 'GO:0005618', 'GO:0009570', 'GO:0005634', 'GO:0006520', 'GO:0019877', 'GO:0000166', 'GO:0016740', 'GO:0009097', 'GO:0009090', 'GO:0019252', 'GO:0019761', 'GO:0016301', 'GO:0008152', 'GO:0009088', 'GO:0055114', 'GO:0009507', 'GO:0008652', 'GO:0005829', 'GO:0006555', 'GO:0004412', 'GO:0005575', 'GO:0009089', 'GO:0005524', 'GO:0006164', 'GO:0006531', 'GO:0009086', 'GO:0004072', 'GO:0009082']),
#                         'lcl|AE014075.1_gene_4': set(['GO:0005737', 'GO:0006566', 'GO:0000394', 'GO:0016310', 'GO:0009617', 'GO:0004413', 'GO:0000166', 'GO:0019344', 'GO:0009620', 'GO:0009088', 'GO:0009570', 'GO:0009086', 'GO:0005524', 'GO:0009507']),
#                         'lcl|AE014075.1_gene_5': set(['GO:0005125', 'GO:0016311', 'GO:0046360', 'GO:0003674', 'GO:0030170', 'GO:0004795', 'GO:0005737', 'GO:0006566', 'GO:0005615', 'GO:0005634', 'GO:0006520', 'GO:0005524', 'GO:0008150', 'GO:0070905', 'GO:0008152', 'GO:0009071', 'GO:0008652', 'GO:0006897', 'GO:0005829', 'GO:0005575', 'GO:0009088', 'GO:0004765', 'GO:0016829'])

                            })
        for k, v in expected.items():
            seq = blast.results[k]
            self.assertEqual(v, seq.combined_terms, "Error in %s. \nExpected: %s\nActual: %s\n" %
                              (k, sorted(expected[k]), sorted(seq.combined_terms)))
Example #3
0
    def test_init_dict(self):  # allterms, default_value=0)
        expected = dict({"GO:01": 0,
                         "GO:03": 0,
                         "GO:04": 0,
                         "GO:05": 0,
                         })

        new_dict = RunBlast(records=self.record_index, e_value=self.e_threshold,
            wdir=self.Blast_dir, outfile="BlastOut")
        default_dict = new_dict.init_dict(self.template_set_small, 0)
        self.assertEqual(expected, default_dict)
Example #4
0
    def test_RunBlast(self):


        blast = RunBlast(records=self.record_index, e_value=self.e_threshold, wdir=self.Blast_dir)

        blast.run_single()

        expected = dict({'lcl|AE014075.1_gene_1': set([]),
                         'lcl|AE014075.1_gene_2': set(['GO:0004803', 'GO:0006313']),
                         'lcl|AE014075.1_gene_3': set(['GO:0071470', 'GO:0016310', 'GO:0005886', 'GO:0009067', 'GO:0000023', 'GO:0016597', 'GO:0043085', 'GO:0016491', 'GO:0005737', 'GO:0050661', 'GO:0040007', 'GO:0005618', 'GO:0009570', 'GO:0005634', 'GO:0006520', 'GO:0019877', 'GO:0000166', 'GO:0016740', 'GO:0009097', 'GO:0009090', 'GO:0019252', 'GO:0019761', 'GO:0016301', 'GO:0008152', 'GO:0009088', 'GO:0055114', 'GO:0009507', 'GO:0008652', 'GO:0005829', 'GO:0006555', 'GO:0004412', 'GO:0005575', 'GO:0009089', 'GO:0005524', 'GO:0006164', 'GO:0006531', 'GO:0009086', 'GO:0004072', 'GO:0009082']),
                         'lcl|AE014075.1_gene_4': set(['GO:0005737', 'GO:0006566', 'GO:0000394', 'GO:0016310', 'GO:0009617', 'GO:0004413', 'GO:0000166', 'GO:0019344', 'GO:0009620', 'GO:0009088', 'GO:0009570', 'GO:0009086', 'GO:0005524', 'GO:0009507']),
                         'lcl|AE014075.1_gene_5': set(['GO:0005125', 'GO:0016311', 'GO:0046360', 'GO:0003674', 'GO:0030170', 'GO:0004795', 'GO:0005737', 'GO:0006566', 'GO:0005615', 'GO:0005634', 'GO:0006520', 'GO:0005524', 'GO:0008150', 'GO:0070905', 'GO:0008152', 'GO:0009071', 'GO:0008652', 'GO:0006897', 'GO:0005829', 'GO:0005575', 'GO:0009088', 'GO:0004765', 'GO:0016829'])

                         })
        for k, v in expected.items():
            seq = blast.results[k]
            self.assertEqual(v, seq.combined_terms, "Error in %s" % k)
Example #5
0
    def test_update_counter_from_dictionaries(self):
#        start with empty dictionary
        new_Blast = RunBlast(records=self.record_index, e_value=self.e_threshold,
            wdir=self.Blast_dir, outfile="BlastOut")

        expected = dict({"GO:01": 1,
                         "GO:03": 2,
                         "GO:04": 2,
                         "GO:05": 1, })
        new_dict = new_Blast.init_dict(self.template_set_small, 0)
        new_Blast.update_counter_from_dictionaries(new_dict, self.template_set_small)
#        print(new_dict)
        self.assertEqual(expected, new_dict)


        expected = dict({"GO:01": 3,
                         "GO:02": 1,
                         "GO:03": 2,
                         "GO:04": 2,
                         "GO:05": 2,
                         "GO:06": 1,
                         "GO:07": 1})
        new_dict = new_Blast.init_dict(self.template_set, 0)
        new_Blast.update_counter_from_dictionaries(new_dict, self.template_set)

        self.assertEqual(expected, new_dict)
Example #6
0
    def test_RunBlast_subset(self):

        sub_record = dict({'lcl|AE014075.1_gene_4': self.record_index['lcl|AE014075.1_gene_4'],
                           'lcl|AE014075.1_gene_5': self.record_index['lcl|AE014075.1_gene_5']
                           })
        blast = RunBlast(records=sub_record, e_value=1e-200, wdir=self.Blast_dir)
        blast.run_single()

        expected = dict({'lcl|AE014075.1_gene_4': set([]),
#                         'lcl|AE014075.1_gene_5':set(['GO:0004795', 'GO:0030170', 'GO:0009088'])
                        'lcl|AE014075.1_gene_5': set(['GO:0004795', 'GO:0009088'])
                        })

        for k, v in expected.items():
            seq = blast.results[k]
            self.assertEqual(v, seq.combined_terms)
#
        for k, v in blast.results.items():
            print k, v.combined_terms