def test_gene_families_tsv_output_with_names(self):
        """
        Test the gene families function and the blast config indexes
        Test UniRef50_unknown is read in and used for gene scores but not printed
        Test the tsv output
        Test that gene families have names applied to them
        Test unmapped reads total is written with the same precision as other lines
        """

        # update the max decimals to allow for rounding
        config.output_max_decimals = 7

        # set to a smaller mapping file
        original_gene_family_mapping_file = config.gene_family_name_mapping_file
        config.gene_family_name_mapping_file = cfg.gene_families_to_names_file

        # create a set of alignments
        alignments = store.Alignments()

        # load the usearch output
        file_handle = open(cfg.usearch_uniref50_file)

        for line in file_handle:
            if not re.search("^#", line):
                data = line.strip().split(config.blast_delimiter)

                referenceids = data[config.blast_reference_index].split("|")
                queryid = data[config.blast_query_index]
                identity = float(data[config.blast_identity_index])

                alignments.add(referenceids[1], 1, queryid, identity,
                               referenceids[0])

        file_handle.close()

        # set the output format
        config.output_format = "tsv"

        # set the location of the file to write to as a temp file
        file_out, gene_families_file = tempfile.mkstemp()
        os.close(file_out)
        config.genefamilies_file = gene_families_file

        # create gene_scores instance
        gene_scores = store.GeneScores()

        # obtain the gene families
        gene_families_file = families.gene_families(alignments, gene_scores, 1)

        # check the gene families output is as expected
        self.assertTrue(
            filecmp.cmp(gene_families_file,
                        cfg.gene_familes_uniref50_with_names_file,
                        shallow=False))

        # reset the mapping file
        config.gene_family_name_mapping_file = original_gene_family_mapping_file

        # delete the temp file
        utils.remove_temp_file(gene_families_file)
예제 #2
0
    def test_compute_gene_abundance_in_pathways_without_reactions_database(
            self):
        """
        Test the compute gene abundance function
        Test the GeneScores add function
        Test without a reactions database (the pathways database is composed of genes)
        """

        gene_scores = store.GeneScores()
        # Add gene scores for two bugs
        reactions_in_pathways_present = {}
        bug = "bug1"
        gene_scores.add_single_score(bug, "gene1", 1)
        gene_scores.add_single_score(bug, "gene2", 2)
        gene_scores.add_single_score(bug, "gene4", 4)
        reactions_in_pathways_present[bug] = ["gene1", "gene2"]

        bug = "bug2"
        # Test with different values of gene1 for each bug
        gene_scores.add_single_score(bug, "gene1", 1.1)
        gene_scores.add_single_score(bug, "gene7", 7)
        gene_scores.add_single_score(bug, "gene6", 6)
        reactions_in_pathways_present[bug] = ["gene6"]

        reactions_database = None
        gene_abundance_in_pathways, remaining_gene_abundance = modules.compute_gene_abundance_in_pathways(
            gene_scores, reactions_database, reactions_in_pathways_present)

        # Check the gene abundances in pathways are correct
        self.assertEqual(gene_abundance_in_pathways["bug1"], 3)
        self.assertEqual(gene_abundance_in_pathways["bug2"], 6)

        # Check the gene abundances not in pathways are correct
        self.assertEqual(remaining_gene_abundance["bug1"], 4)
        self.assertAlmostEqual(remaining_gene_abundance["bug2"], 8.1)
예제 #3
0
    def test_pathways_abundance_with_names(self):
        """
        Test the pathways abundance computation (xipe and minpath are off)
        Test the pathways print function
        Test the pathways mapping to names
        Test the unmapped and unintegrated values are printed
        """

        # update the max decimals to allow for rounding
        config.output_max_decimals = 7

        # Load in the pathways databases
        reactions_database = store.ReactionsDatabase(
            config.pathways_database_part1)
        pathways_database = store.PathwaysDatabase(
            config.pathways_database_part2, reactions_database)

        # Load in the gene scores from the file
        # This file has the gene names included
        gene_scores = store.GeneScores()
        gene_scores.add_from_file(
            cfg.larger_gene_families_uniref50_with_names_file)

        # Turn off xipe and minpath
        minpath_toggle_original = config.minpath_toggle
        config.minpath_toggle = "off"
        xipe_toggle_original = config.xipe_toggle
        config.xipe_toggle = "off"

        pathways_and_reactions_store = modules.identify_reactions_and_pathways(
            gene_scores, reactions_database, pathways_database)

        # set the locations to write as temp files
        file_out, abundance_file = tempfile.mkstemp()
        os.close(file_out)
        config.pathabundance_file = abundance_file

        file_out, coverage_file = tempfile.mkstemp()
        os.close(file_out)
        config.pathcoverage_file = coverage_file

        unaligned_reads_count = 10
        abundance_file, coverage_file = modules.compute_pathways_abundance_and_coverage(
            gene_scores, reactions_database, pathways_and_reactions_store,
            pathways_database, unaligned_reads_count)

        # Reset xipe and minpath
        config.minpath_toggle = minpath_toggle_original
        config.xipe_toggle = xipe_toggle_original

        # check the output is as expected
        self.assertTrue(
            filecmp.cmp(abundance_file,
                        cfg.demo_pathabundance_file,
                        shallow=False))

        utils.remove_temp_file(abundance_file)
        utils.remove_temp_file(coverage_file)
예제 #4
0
 def test_GeneScores_add_from_file_bug_list(self):
     """
     GeneScores class: Test add_from_file bug list
     """
     
     gene_scores=store.GeneScores()
     
     gene_scores.add_from_file(cfg.genetable_file)
     
     # Test the bug list is as expected
     self.assertEqual(sorted(cfg.genetable_file_bug_scores.keys()),sorted(gene_scores.bug_list()))
예제 #5
0
    def test_Alignments_compute_gene_scores_double_gene_double_query_with_temp_alignment_file(
            self):
        """
        Test the compute_gene_scores function
        Test two hits to gene with more than one hit per query
        Test with the temp alignment file
        """

        # create a set of hits
        # bug, reference, reference_length, query, matches = hit

        matches1 = 41.0
        matches2 = 57.1
        matches3 = 61.0
        matches4 = 72.1

        gene1_length = 2
        gene2_length = 3
        gene3_length = 4

        # Create a set of alignments
        alignments_store = store.Alignments(minimize_memory_use=True)
        alignments_store.add("gene1", gene1_length, "query1", matches1, "bug1")
        alignments_store.add("gene2", gene2_length, "query1", matches2, "bug1")
        alignments_store.add("gene2", gene2_length, "query2", matches3, "bug1")
        alignments_store.add("gene3", gene3_length, "query3", matches4, "bug1")

        gene_scores_store = store.GeneScores()

        # compute gene scores
        alignments_store.convert_alignments_to_gene_scores(gene_scores_store)

        # gene1
        hit1_score = math.pow(matches1, config.match_power)
        hit2_score = math.pow(matches2, config.match_power)
        query1_sum = hit1_score + hit2_score

        # convert lengths to per kb
        gene2_length = gene2_length / 1000.0

        # gene2
        hit3_score = math.pow(matches3, config.match_power)
        query2_sum = hit3_score
        expected_gene_score = hit3_score / query2_sum / gene2_length + hit2_score / query1_sum / gene2_length

        actual_gene_score = gene_scores_store.get_score("bug1", "gene2")

        # delete the temp alignment file
        alignments_store.delete_temp_alignments_file()

        self.assertAlmostEqual(actual_gene_score,
                               expected_gene_score,
                               places=7)
예제 #6
0
 def test_GeneScores_add_from_file_scores(self):
     """
     GeneScores class: Test add_from_file scores
     """
     
     gene_scores=store.GeneScores()
     
     gene_scores.add_from_file(cfg.genetable_file)
     
     # Test the scores for all bugs and genes
     for bug in cfg.genetable_file_bug_scores:
         self.assertDictEqual(cfg.genetable_file_bug_scores[bug],gene_scores.scores_for_bug(bug))
예제 #7
0
    def test_GeneScores_add_from_file_id_mapping_bug_list(self):
        """
        GeneScores class: Test add_from_file bug list with id mapping
        """

        gene_scores = store.GeneScores()

        gene_scores.add_from_file(
            cfg.genetable_file, id_mapping_file=cfg.id_mapping_gene_table_file)

        # Test the bug list is as expected
        self.assertEqual(
            sorted(cfg.genetable_file_bug_scores_id_mapping.keys()),
            sorted(gene_scores.bug_list()))
예제 #8
0
 def test_GeneScores_get_score(self):
     """
     GeneScores class: Test get_score function
     """
     
     gene_scores=store.GeneScores()
     
     bug1_scores={"gene1":1,"gene2":2}
     gene_scores.add(bug1_scores,"bug1")
     
     bug2_scores={"gene1":1,"gene2":2}
     gene_scores.add(bug2_scores,"bug2")
     
     self.assertEqual(gene_scores.get_score("bug1","gene2"),2)
예제 #9
0
 def test_GeneScores_add(self):
     """
     GeneScores class: Test add function
     """
     
     gene_scores=store.GeneScores()
     
     bug1_scores={"gene1":1,"gene2":2}
     gene_scores.add(bug1_scores,"bug1")
     
     bug2_scores={"gene1":1,"gene2":2}
     gene_scores.add(bug2_scores,"bug2")
     
     self.assertEqual(gene_scores.count_genes_for_bug("bug1"),2)
예제 #10
0
    def test_compute_gene_abundance_in_pathways_with_reactions_database(self):
        """
        Test the compute gene abundance function
        Test the GeneScores add function
        Test the ReactionsDatabase add function
        Test with a reactions database (the pathways database is composed of reactions
        and these reactions map to genes, with some genes mapping to multiple reactions)
        """

        gene_scores = store.GeneScores()
        # Add gene scores for two bugs
        reactions_in_pathways_present = {}
        bug = "bug1"
        gene_scores.add_single_score(bug, "gene1", 1)
        gene_scores.add_single_score(bug, "gene2", 2)
        gene_scores.add_single_score(bug, "gene4", 4)

        bug = "bug2"
        # Test with different values of gene1 for each bug
        gene_scores.add_single_score(bug, "gene1", 1.1)
        gene_scores.add_single_score(bug, "gene7", 7)
        gene_scores.add_single_score(bug, "gene6", 6)
        gene_scores.add_single_score(bug, "gene8", 0.2)

        reactions_database = store.ReactionsDatabase()
        reactions = {
            "reaction1": ["gene1", "gene6"],
            "reaction2": ["gene1", "gene2"],
            "reaction3": ["gene4", "gene7"],
            "reaction4": ["gene8"]
        }
        reactions_database.add_reactions(reactions)

        # Test one bug with two reactions and one bug with a single reaction
        # For the bug with two reactions, test with both reactions including
        # The same gene (to test this value is not added twice in the abundance result)
        reactions_in_pathways_present["bug1"] = ["reaction1", "reaction2"]
        reactions_in_pathways_present["bug2"] = ["reaction1"]

        gene_abundance_in_pathways, remaining_gene_abundance = modules.compute_gene_abundance_in_pathways(
            gene_scores, reactions_database, reactions_in_pathways_present)

        # Check the gene abundances in pathways are correct
        self.assertEqual(gene_abundance_in_pathways["bug1"], 3)
        self.assertAlmostEqual(gene_abundance_in_pathways["bug2"], 7.1)

        # Check the gene abundances not in pathways are correct
        self.assertEqual(remaining_gene_abundance["bug1"], 4)
        self.assertAlmostEqual(remaining_gene_abundance["bug2"], 7.2)
예제 #11
0
 def test_GeneScores_add_second_set(self):
     """
     GeneScores class: Test add function
     Test adding a second set of scores to bug set
     """
     
     gene_scores=store.GeneScores()
     
     bug1_scores={"gene1":1,"gene2":2}
     gene_scores.add(bug1_scores,"bug1")
     
     bug1_scores_2={"gene3":1,"gene4":2, "gene2":22}
     gene_scores.add(bug1_scores_2,"bug1")
     
     self.assertEqual(gene_scores.count_genes_for_bug("bug1"),4)
예제 #12
0
   def test_GeneScores_scores_for_bug(self):
       """
       GeneScores class: Test scores_for_bug
       """
 
       gene_scores=store.GeneScores()
       
       bug1_scores={"gene1":1,"gene2":2}
       gene_scores.add(bug1_scores,"bug1")
       
       bug1_scores_2={"gene3":1,"gene4":2, "gene2":22}
       gene_scores.add(bug1_scores_2,"bug1")
       
       # Test that the most recent score for gene2 is returned
       self.assertDictEqual(gene_scores.scores_for_bug("bug1"),
           {"gene1":1,"gene2":22,"gene3":1,"gene4":2})      
예제 #13
0
 def test_GeneScores_get_score_second_set(self):
     """
     GeneScores class: Test get_score function
     Test getting the score for a second set of scores added to bug set
     """
     
     gene_scores=store.GeneScores()
     
     bug1_scores={"gene1":1,"gene2":2}
     gene_scores.add(bug1_scores,"bug1")
     
     bug1_scores_2={"gene3":1,"gene4":2, "gene2":22}
     gene_scores.add(bug1_scores_2,"bug1")
     
     # Test that the most recent score for gene2 is returned
     self.assertEqual(gene_scores.get_score("bug1","gene2"),22)
예제 #14
0
 def test_GeneScores_add_from_file_gene_list(self):
     """
     GeneScores class: Test add_from_file gene list
     """
     
     gene_scores=store.GeneScores()
     
     gene_scores.add_from_file(cfg.genetable_file)
     
     # Create a list of all of the genes in the table
     genes={}
     for bug in cfg.genetable_file_bug_scores:
         for gene in cfg.genetable_file_bug_scores[bug]:
             genes[gene]=1
     
     # Test the gene list is as expected
     self.assertEqual(sorted(genes.keys()),sorted(gene_scores.gene_list()))
예제 #15
0
    def test_Alignments_compute_gene_scores_single_gene_single_query_with_temp_alignment_file(
            self):
        """
        Test the compute_gene_scores function
        Test one hit for gene with one hit for query
        Test with the temp alignment file
        """

        # create a set of hits
        matches1 = 41.0
        matches2 = 57.1
        matches3 = 61.0
        matches4 = 72.1

        gene1_length = 2
        gene2_length = 3
        gene3_length = 4

        # Create a set of alignments
        alignments_store = store.Alignments(minimize_memory_use=True)
        alignments_store.add("gene1", gene1_length, "query1", matches1, "bug1")
        alignments_store.add("gene2", gene2_length, "query1", matches2, "bug1")
        alignments_store.add("gene2", gene2_length, "query2", matches3, "bug1")
        alignments_store.add("gene3", gene3_length, "query3", matches4, "bug1")

        gene_scores_store = store.GeneScores()

        # compute gene scores
        alignments_store.convert_alignments_to_gene_scores(gene_scores_store)

        # convert lengths to per kb
        gene3_length = gene3_length / 1000.0

        # gene3
        hit4_score = math.pow(matches4, config.match_power)
        query3_sum = hit4_score
        expected_gene_score = hit4_score / query3_sum / gene3_length

        actual_gene_score = gene_scores_store.get_score("bug1", "gene3")

        # delete the temp alignment file
        alignments_store.delete_temp_alignments_file()

        self.assertEqual(actual_gene_score, expected_gene_score)
예제 #16
0
    def test_Alignments_compute_gene_scores_single_gene_double_query(self):
        """
        Test the compute_gene_scores function
        Test one hit for gene with more than one hit per query
        """

        # create a set of hits
        # bug, reference, reference_length, query, matches = hit

        matches1 = 41.0
        matches2 = 57.1
        matches3 = 61.0
        matches4 = 72.1

        gene1_length = 2
        gene2_length = 3
        gene3_length = 4

        # Create a set of alignments
        alignments_store = store.Alignments()
        alignments_store.add("gene1", gene1_length, "query1", matches1, "bug1")
        alignments_store.add("gene2", gene2_length, "query1", matches2, "bug1")
        alignments_store.add("gene2", gene2_length, "query2", matches3, "bug1")
        alignments_store.add("gene3", gene3_length, "query3", matches4, "bug1")

        gene_scores_store = store.GeneScores()

        # compute gene scores
        alignments_store.convert_alignments_to_gene_scores(gene_scores_store)

        # convert lengths to per kb
        gene1_length = gene1_length / 1000.0

        # gene1
        hit1_score = math.pow(matches1, config.match_power)
        hit2_score = math.pow(matches2, config.match_power)
        query1_sum = hit1_score + hit2_score
        gene_score = hit1_score / query1_sum / gene1_length

        self.assertEqual(gene_scores_store.get_score("bug1", "gene1"),
                         gene_score)