def test_GeneScores_add_from_file_bug_list(self):
     """
     GeneScores class: Test add_from_file bug list
     """
     
     gene_scores=store.GeneScores()
     
     gene_scores.add_from_file(cfg.genetable_file)
     
     # Test the bug list is as expected
     self.assertEqual(sorted(cfg.genetable_file_bug_scores.keys()),sorted(gene_scores.bug_list()))
 def test_GeneScores_add_from_file_scores(self):
     """
     GeneScores class: Test add_from_file scores
     """
     
     gene_scores=store.GeneScores()
     
     gene_scores.add_from_file(cfg.genetable_file)
     
     # Test the scores for all bugs and genes
     for bug in cfg.genetable_file_bug_scores:
         self.assertDictEqual(cfg.genetable_file_bug_scores[bug],gene_scores.scores_for_bug(bug))
Exemple #3
0
    def test_GeneScores_add_from_file_id_mapping_bug_list(self):
        """
        GeneScores class: Test add_from_file bug list with id mapping
        """

        gene_scores = store.GeneScores()

        gene_scores.add_from_file(
            cfg.genetable_file, id_mapping_file=cfg.id_mapping_gene_table_file)

        # Test the bug list is as expected
        self.assertEqual(
            sorted(cfg.genetable_file_bug_scores_id_mapping.keys()),
            sorted(gene_scores.bug_list()))
 def test_GeneScores_get_score(self):
     """
     GeneScores class: Test get_score function
     """
     
     gene_scores=store.GeneScores()
     
     bug1_scores={"gene1":1,"gene2":2}
     gene_scores.add(bug1_scores,"bug1")
     
     bug2_scores={"gene1":1,"gene2":2}
     gene_scores.add(bug2_scores,"bug2")
     
     self.assertEqual(gene_scores.get_score("bug1","gene2"),2)
 def test_GeneScores_add(self):
     """
     GeneScores class: Test add function
     """
     
     gene_scores=store.GeneScores()
     
     bug1_scores={"gene1":1,"gene2":2}
     gene_scores.add(bug1_scores,"bug1")
     
     bug2_scores={"gene1":1,"gene2":2}
     gene_scores.add(bug2_scores,"bug2")
     
     self.assertEqual(gene_scores.count_genes_for_bug("bug1"),2)
    def test_gene_families_tsv_output(self):
        """
        Test the gene families function and the blast config indexes
        Test UniRef50_unknown is read in and used for gene scores but not printed
        Test the tsv output
        """

        # create a set of alignments
        alignments = store.Alignments()

        # load the usearch output
        file_handle = open(cfg.usearch_file)

        for line in file_handle:
            if not re.search("^#", line):
                data = line.strip().split(config.blast_delimiter)

                referenceids = data[config.blast_reference_index].split("|")
                queryid = data[config.blast_query_index]
                evalue = float(data[config.blast_evalue_index])

                alignments.add(referenceids[1], 1, queryid, evalue,
                               referenceids[0])

        file_handle.close()

        # set the output format
        config.output_format = "tsv"

        # set the location of the file to write to as a temp file
        file_out, gene_families_file = tempfile.mkstemp()
        os.close(file_out)
        config.genefamilies_file = gene_families_file

        # create gene_scores instance
        gene_scores = store.GeneScores()

        # obtain the gene families
        gene_families_file = quantify_families.gene_families(
            alignments, gene_scores)

        # check the gene families output is as expected
        self.assertTrue(
            filecmp.cmp(gene_families_file,
                        cfg.gene_familes_file,
                        shallow=False))

        # delete the temp file
        utils.remove_temp_file(gene_families_file)
 def test_GeneScores_add_second_set(self):
     """
     GeneScores class: Test add function
     Test adding a second set of scores to bug set
     """
     
     gene_scores=store.GeneScores()
     
     bug1_scores={"gene1":1,"gene2":2}
     gene_scores.add(bug1_scores,"bug1")
     
     bug1_scores_2={"gene3":1,"gene4":2, "gene2":22}
     gene_scores.add(bug1_scores_2,"bug1")
     
     self.assertEqual(gene_scores.count_genes_for_bug("bug1"),4)
   def test_GeneScores_scores_for_bug(self):
       """
       GeneScores class: Test scores_for_bug
       """
 
       gene_scores=store.GeneScores()
       
       bug1_scores={"gene1":1,"gene2":2}
       gene_scores.add(bug1_scores,"bug1")
       
       bug1_scores_2={"gene3":1,"gene4":2, "gene2":22}
       gene_scores.add(bug1_scores_2,"bug1")
       
       # Test that the most recent score for gene2 is returned
       self.assertDictEqual(gene_scores.scores_for_bug("bug1"),
           {"gene1":1,"gene2":22,"gene3":1,"gene4":2})      
 def test_GeneScores_get_score_second_set(self):
     """
     GeneScores class: Test get_score function
     Test getting the score for a second set of scores added to bug set
     """
     
     gene_scores=store.GeneScores()
     
     bug1_scores={"gene1":1,"gene2":2}
     gene_scores.add(bug1_scores,"bug1")
     
     bug1_scores_2={"gene3":1,"gene4":2, "gene2":22}
     gene_scores.add(bug1_scores_2,"bug1")
     
     # Test that the most recent score for gene2 is returned
     self.assertEqual(gene_scores.get_score("bug1","gene2"),22)
Exemple #10
0
    def test_Alignments_compute_gene_scores_double_gene_double_query(self):
        """
        Test the compute_gene_scores function
        Test two hits to gene with more than one hit per query
        """

        # create a set of hits
        # bug, reference, reference_length, query, evalue = hit

        eval1 = 1e-4
        eval2 = 3e-7
        eval3 = 2e-10
        eval4 = 2e-10

        gene1_length = 2
        gene2_length = 3
        gene3_length = 4

        # Create a set of alignments
        alignments_store = store.Alignments()
        alignments_store.add("gene1", gene1_length, "query1", eval1, "bug1")
        alignments_store.add("gene2", gene2_length, "query1", eval2, "bug1")
        alignments_store.add("gene2", gene2_length, "query2", eval3, "bug1")
        alignments_store.add("gene3", gene3_length, "query3", eval4, "bug1")

        gene_scores_store = store.GeneScores()

        # compute gene scores
        alignments_store.convert_alignments_to_gene_scores(gene_scores_store)

        # gene1
        hit1_score = math.exp(-eval1)
        hit2_score = math.exp(-eval2)
        query1_sum = hit1_score + hit2_score

        # convert lengths to per kb
        gene2_length = gene2_length / 1000.0

        # gene2
        hit3_score = math.exp(-eval3)
        query2_sum = hit3_score
        gene_score = hit3_score / query2_sum / gene2_length + hit2_score / query1_sum / gene2_length

        self.assertAlmostEqual(gene_scores_store.get_score("bug1", "gene2"),
                               gene_score,
                               places=12)
 def test_GeneScores_add_from_file_gene_list(self):
     """
     GeneScores class: Test add_from_file gene list
     """
     
     gene_scores=store.GeneScores()
     
     gene_scores.add_from_file(cfg.genetable_file)
     
     # Create a list of all of the genes in the table
     genes={}
     for bug in cfg.genetable_file_bug_scores:
         for gene in cfg.genetable_file_bug_scores[bug]:
             genes[gene]=1
     
     # Test the gene list is as expected
     self.assertEqual(sorted(genes.keys()),sorted(gene_scores.gene_list()))
Exemple #12
0
    def test_Alignments_compute_gene_scores_single_gene_single_query(self):
        """
        Test the compute_gene_scores function
        Test one hit for gene with one hit for query
        """

        # create a set of hits
        eval1 = 1e-4
        eval2 = 3e-7
        eval3 = 2e-10
        eval4 = 2e-10

        gene1_length = 2
        gene2_length = 3
        gene3_length = 4

        # Create a set of alignments
        alignments_store = store.Alignments()
        alignments_store.add("gene1", gene1_length, "query1", eval1, "bug1")
        alignments_store.add("gene2", gene2_length, "query1", eval2, "bug1")
        alignments_store.add("gene2", gene2_length, "query2", eval3, "bug1")
        alignments_store.add("gene3", gene3_length, "query3", eval4, "bug1")

        gene_scores_store = store.GeneScores()

        # compute gene scores
        alignments_store.convert_alignments_to_gene_scores(gene_scores_store)

        # convert lengths to per kb
        gene3_length = gene3_length / 1000.0

        # gene3
        hit4_score = math.exp(-eval4)
        query3_sum = hit4_score
        gene_score = hit4_score / query3_sum / gene3_length

        self.assertEqual(gene_scores_store.get_score("bug1", "gene3"),
                         gene_score)