def test_GeneScores_add_from_file_bug_list(self): """ GeneScores class: Test add_from_file bug list """ gene_scores=store.GeneScores() gene_scores.add_from_file(cfg.genetable_file) # Test the bug list is as expected self.assertEqual(sorted(cfg.genetable_file_bug_scores.keys()),sorted(gene_scores.bug_list()))
def test_GeneScores_add_from_file_scores(self): """ GeneScores class: Test add_from_file scores """ gene_scores=store.GeneScores() gene_scores.add_from_file(cfg.genetable_file) # Test the scores for all bugs and genes for bug in cfg.genetable_file_bug_scores: self.assertDictEqual(cfg.genetable_file_bug_scores[bug],gene_scores.scores_for_bug(bug))
def test_GeneScores_add_from_file_id_mapping_bug_list(self): """ GeneScores class: Test add_from_file bug list with id mapping """ gene_scores = store.GeneScores() gene_scores.add_from_file( cfg.genetable_file, id_mapping_file=cfg.id_mapping_gene_table_file) # Test the bug list is as expected self.assertEqual( sorted(cfg.genetable_file_bug_scores_id_mapping.keys()), sorted(gene_scores.bug_list()))
def test_GeneScores_get_score(self): """ GeneScores class: Test get_score function """ gene_scores=store.GeneScores() bug1_scores={"gene1":1,"gene2":2} gene_scores.add(bug1_scores,"bug1") bug2_scores={"gene1":1,"gene2":2} gene_scores.add(bug2_scores,"bug2") self.assertEqual(gene_scores.get_score("bug1","gene2"),2)
def test_GeneScores_add(self): """ GeneScores class: Test add function """ gene_scores=store.GeneScores() bug1_scores={"gene1":1,"gene2":2} gene_scores.add(bug1_scores,"bug1") bug2_scores={"gene1":1,"gene2":2} gene_scores.add(bug2_scores,"bug2") self.assertEqual(gene_scores.count_genes_for_bug("bug1"),2)
def test_gene_families_tsv_output(self): """ Test the gene families function and the blast config indexes Test UniRef50_unknown is read in and used for gene scores but not printed Test the tsv output """ # create a set of alignments alignments = store.Alignments() # load the usearch output file_handle = open(cfg.usearch_file) for line in file_handle: if not re.search("^#", line): data = line.strip().split(config.blast_delimiter) referenceids = data[config.blast_reference_index].split("|") queryid = data[config.blast_query_index] evalue = float(data[config.blast_evalue_index]) alignments.add(referenceids[1], 1, queryid, evalue, referenceids[0]) file_handle.close() # set the output format config.output_format = "tsv" # set the location of the file to write to as a temp file file_out, gene_families_file = tempfile.mkstemp() os.close(file_out) config.genefamilies_file = gene_families_file # create gene_scores instance gene_scores = store.GeneScores() # obtain the gene families gene_families_file = quantify_families.gene_families( alignments, gene_scores) # check the gene families output is as expected self.assertTrue( filecmp.cmp(gene_families_file, cfg.gene_familes_file, shallow=False)) # delete the temp file utils.remove_temp_file(gene_families_file)
def test_GeneScores_add_second_set(self): """ GeneScores class: Test add function Test adding a second set of scores to bug set """ gene_scores=store.GeneScores() bug1_scores={"gene1":1,"gene2":2} gene_scores.add(bug1_scores,"bug1") bug1_scores_2={"gene3":1,"gene4":2, "gene2":22} gene_scores.add(bug1_scores_2,"bug1") self.assertEqual(gene_scores.count_genes_for_bug("bug1"),4)
def test_GeneScores_scores_for_bug(self): """ GeneScores class: Test scores_for_bug """ gene_scores=store.GeneScores() bug1_scores={"gene1":1,"gene2":2} gene_scores.add(bug1_scores,"bug1") bug1_scores_2={"gene3":1,"gene4":2, "gene2":22} gene_scores.add(bug1_scores_2,"bug1") # Test that the most recent score for gene2 is returned self.assertDictEqual(gene_scores.scores_for_bug("bug1"), {"gene1":1,"gene2":22,"gene3":1,"gene4":2})
def test_GeneScores_get_score_second_set(self): """ GeneScores class: Test get_score function Test getting the score for a second set of scores added to bug set """ gene_scores=store.GeneScores() bug1_scores={"gene1":1,"gene2":2} gene_scores.add(bug1_scores,"bug1") bug1_scores_2={"gene3":1,"gene4":2, "gene2":22} gene_scores.add(bug1_scores_2,"bug1") # Test that the most recent score for gene2 is returned self.assertEqual(gene_scores.get_score("bug1","gene2"),22)
def test_Alignments_compute_gene_scores_double_gene_double_query(self): """ Test the compute_gene_scores function Test two hits to gene with more than one hit per query """ # create a set of hits # bug, reference, reference_length, query, evalue = hit eval1 = 1e-4 eval2 = 3e-7 eval3 = 2e-10 eval4 = 2e-10 gene1_length = 2 gene2_length = 3 gene3_length = 4 # Create a set of alignments alignments_store = store.Alignments() alignments_store.add("gene1", gene1_length, "query1", eval1, "bug1") alignments_store.add("gene2", gene2_length, "query1", eval2, "bug1") alignments_store.add("gene2", gene2_length, "query2", eval3, "bug1") alignments_store.add("gene3", gene3_length, "query3", eval4, "bug1") gene_scores_store = store.GeneScores() # compute gene scores alignments_store.convert_alignments_to_gene_scores(gene_scores_store) # gene1 hit1_score = math.exp(-eval1) hit2_score = math.exp(-eval2) query1_sum = hit1_score + hit2_score # convert lengths to per kb gene2_length = gene2_length / 1000.0 # gene2 hit3_score = math.exp(-eval3) query2_sum = hit3_score gene_score = hit3_score / query2_sum / gene2_length + hit2_score / query1_sum / gene2_length self.assertAlmostEqual(gene_scores_store.get_score("bug1", "gene2"), gene_score, places=12)
def test_GeneScores_add_from_file_gene_list(self): """ GeneScores class: Test add_from_file gene list """ gene_scores=store.GeneScores() gene_scores.add_from_file(cfg.genetable_file) # Create a list of all of the genes in the table genes={} for bug in cfg.genetable_file_bug_scores: for gene in cfg.genetable_file_bug_scores[bug]: genes[gene]=1 # Test the gene list is as expected self.assertEqual(sorted(genes.keys()),sorted(gene_scores.gene_list()))
def test_Alignments_compute_gene_scores_single_gene_single_query(self): """ Test the compute_gene_scores function Test one hit for gene with one hit for query """ # create a set of hits eval1 = 1e-4 eval2 = 3e-7 eval3 = 2e-10 eval4 = 2e-10 gene1_length = 2 gene2_length = 3 gene3_length = 4 # Create a set of alignments alignments_store = store.Alignments() alignments_store.add("gene1", gene1_length, "query1", eval1, "bug1") alignments_store.add("gene2", gene2_length, "query1", eval2, "bug1") alignments_store.add("gene2", gene2_length, "query2", eval3, "bug1") alignments_store.add("gene3", gene3_length, "query3", eval4, "bug1") gene_scores_store = store.GeneScores() # compute gene scores alignments_store.convert_alignments_to_gene_scores(gene_scores_store) # convert lengths to per kb gene3_length = gene3_length / 1000.0 # gene3 hit4_score = math.exp(-eval4) query3_sum = hit4_score gene_score = hit4_score / query3_sum / gene3_length self.assertEqual(gene_scores_store.get_score("bug1", "gene3"), gene_score)