def test_gene_families_gene_list(self):
        """
        Test the gene families function and the blast config indexes
        Test UniRef50_unknown is read in and used for gene scores but not printed
        Test the gene list
        """

        # create a set of alignments
        alignments = store.Alignments()

        # load the usearch output
        file_handle = open(cfg.usearch_file)

        for line in file_handle:
            if not re.search("^#", line):
                data = line.strip().split(config.blast_delimiter)

                referenceids = data[config.blast_reference_index].split("|")
                queryid = data[config.blast_query_index]
                evalue = float(data[config.blast_evalue_index])

                alignments.add(referenceids[1], 1, queryid, evalue,
                               referenceids[0])

        file_handle.close()

        # check the genes were loaded correctly
        self.assertEqual(sorted(cfg.usearch_file_gene_list),
                         sorted(alignments.gene_list()))
Beispiel #2
0
    def test_translated_search_unaligned_reads_annotations_gene_length(self):
        """
        Test the unaligned reads and the store alignments
        Test with a rapsearch2 output file
        Test the different annotation formats are recognized for gene length
        """
 
         # create a set of alignments
        alignments=store.Alignments()
        unaligned_reads_store=store.Reads()
        
        # load the rapsearch2 output with the unaligned reads function
        unaligned_file_fasta=translated_search.unaligned_reads(unaligned_reads_store, 
            cfg.rapsearch_file_annotations, alignments)
        
        # remove temp file
        utils.remove_temp_file(unaligned_file_fasta)       

        # there should be 4 hits identified
        all_hits=alignments.get_hit_list()
        self.assertEqual(len(all_hits),4)
        
        # check for set and default gene lengths
        for hit in all_hits:
            query, bug, reference, evalue, length = hit
            if reference == "UniRef50":
                self.assertEqual(length,2000)
            else:
                self.assertEqual(length,1000)
 def test_Alignments_process_chocophlan_length(self):
     """
     Test the process_chocophlan_length with standard length format
     """
     
     alignments_store=store.Alignments()
     
     length=alignments_store.process_chocophlan_length("1-100","gene")
     
     self.assertEqual(length, 100)
 def test_Alignments_process_chocophlan_length_multiple(self):
     """
     Test the process_chocophlan_length with multiple lengths
     Test with one length on the reverse strand
     """
     
     alignments_store=store.Alignments()
     
     length=alignments_store.process_chocophlan_length("c:100-1,1-100","gene")
     
     self.assertEqual(length, 200) 
Beispiel #5
0
 def test_translated_search_unaligned_reads_rapsearch_log(self):
     """
     Test the unaligned reads function
     Test with a rapsearch output file
     Test that log of evalue is taken
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     
     # load the rapsearch output
     file_handle=open(cfg.rapsearch2_output_file_with_header)
     
     for line in file_handle:
         if not re.search("^#",line):
             data=line.strip().split(config.blast_delimiter)
             
             referenceid=data[config.blast_reference_index]
             queryid=data[config.blast_query_index]
             evalue=float(data[config.blast_evalue_index])
         
             alignments.add(referenceid, 0, queryid, evalue,"unclassified")
         
     file_handle.close()
     
     alignments_test=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # load the rapsearch output with the unaligned reads function
     unaligned_file_fasta=translated_search.unaligned_reads(unaligned_reads_store, 
         cfg.rapsearch2_output_file_with_header, alignments_test)
     
     # remove temp file
     utils.remove_temp_file(unaligned_file_fasta)
     
     # check the evalues are changed
     hit1_evalue=sorted(alignments.get_hit_list())[0][-2]
     hit1_evalue_test=sorted(alignments_test.get_hit_list())[0][-2]
     self.assertAlmostEqual(math.pow(10.0,math.log(hit1_evalue)*-1),
         math.log(hit1_evalue_test)*-1,places=7)
Beispiel #6
0
 def test_translated_search_unaligned_reads_blastm8(self):
     """
     Test the unaligned reads and the store alignments
     Test with a blastm8-like output file
     Test with empty reads structure
     Test that log of evalue is not taken
     Test that function does not require gene lengths in reference id
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     
     # load the blastm8-like output
     file_handle=open(cfg.rapsearch2_output_file_without_header)
     
     for line in file_handle:
         if not re.search("^#",line):
             data=line.strip().split(config.blast_delimiter)
             
             referenceid=data[config.blast_reference_index]
             queryid=data[config.blast_query_index]
             evalue=float(data[config.blast_evalue_index])
         
             alignments.add(referenceid, 0, queryid, evalue,"unclassified")
         
     file_handle.close()
     
     alignments_test=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # load the blastm8-like output with the unaligned reads function
     unaligned_file_fasta=translated_search.unaligned_reads(unaligned_reads_store, 
         cfg.rapsearch2_output_file_without_header, alignments_test)
     
     # remove temp file
     utils.remove_temp_file(unaligned_file_fasta)
     
     # check the evalues are unchanged
     self.assertEqual(sorted(alignments.get_hit_list()), sorted(alignments_test.get_hit_list()))
    def test_gene_families_tsv_output(self):
        """
        Test the gene families function and the blast config indexes
        Test UniRef50_unknown is read in and used for gene scores but not printed
        Test the tsv output
        """

        # create a set of alignments
        alignments = store.Alignments()

        # load the usearch output
        file_handle = open(cfg.usearch_file)

        for line in file_handle:
            if not re.search("^#", line):
                data = line.strip().split(config.blast_delimiter)

                referenceids = data[config.blast_reference_index].split("|")
                queryid = data[config.blast_query_index]
                evalue = float(data[config.blast_evalue_index])

                alignments.add(referenceids[1], 1, queryid, evalue,
                               referenceids[0])

        file_handle.close()

        # set the output format
        config.output_format = "tsv"

        # set the location of the file to write to as a temp file
        file_out, gene_families_file = tempfile.mkstemp()
        os.close(file_out)
        config.genefamilies_file = gene_families_file

        # create gene_scores instance
        gene_scores = store.GeneScores()

        # obtain the gene families
        gene_families_file = quantify_families.gene_families(
            alignments, gene_scores)

        # check the gene families output is as expected
        self.assertTrue(
            filecmp.cmp(gene_families_file,
                        cfg.gene_familes_file,
                        shallow=False))

        # delete the temp file
        utils.remove_temp_file(gene_families_file)
 def test_Alignments_add_gene_list(self):
     """
     Alignments class: Test add function
     Test the gene list
     """             
     
     alignments_store=store.Alignments()
     
     alignments_store.add("gene2", 1, "Q3", 0.01, "bug1")
     alignments_store.add("gene1", 1, "Q1", 0.01, "bug2")
     alignments_store.add("gene3", 1, "Q2", 0.01, "bug3")
     alignments_store.add("gene1", 1, "Q1", 0.01, "bug1")
     
     # check gene list
     self.assertEqual(sorted(alignments_store.gene_list()),["gene1","gene2","gene3"])        
 def test_Alignments_add_gene_count(self):
     """
     Alignments class: Test add function
     Test the total genes
     """             
     
     alignments_store=store.Alignments()
     
     alignments_store.add("gene2", 1, "Q3", 0.01, "bug1")
     alignments_store.add("gene1", 1, "Q1", 0.01, "bug2")
     alignments_store.add("gene3", 1, "Q2", 0.01, "bug3")
     alignments_store.add("gene1", 1, "Q1", 0.01, "bug1")
     
     # check the total genes
     self.assertEqual(alignments_store.count_genes(),3)
Beispiel #10
0
    def test_Alignments_compute_gene_scores_double_gene_double_query(self):
        """
        Test the compute_gene_scores function
        Test two hits to gene with more than one hit per query
        """

        # create a set of hits
        # bug, reference, reference_length, query, evalue = hit

        eval1 = 1e-4
        eval2 = 3e-7
        eval3 = 2e-10
        eval4 = 2e-10

        gene1_length = 2
        gene2_length = 3
        gene3_length = 4

        # Create a set of alignments
        alignments_store = store.Alignments()
        alignments_store.add("gene1", gene1_length, "query1", eval1, "bug1")
        alignments_store.add("gene2", gene2_length, "query1", eval2, "bug1")
        alignments_store.add("gene2", gene2_length, "query2", eval3, "bug1")
        alignments_store.add("gene3", gene3_length, "query3", eval4, "bug1")

        gene_scores_store = store.GeneScores()

        # compute gene scores
        alignments_store.convert_alignments_to_gene_scores(gene_scores_store)

        # gene1
        hit1_score = math.exp(-eval1)
        hit2_score = math.exp(-eval2)
        query1_sum = hit1_score + hit2_score

        # convert lengths to per kb
        gene2_length = gene2_length / 1000.0

        # gene2
        hit3_score = math.exp(-eval3)
        query2_sum = hit3_score
        gene_score = hit3_score / query2_sum / gene2_length + hit2_score / query1_sum / gene2_length

        self.assertAlmostEqual(gene_scores_store.get_score("bug1", "gene2"),
                               gene_score,
                               places=12)
Beispiel #11
0
    def test_Alignments_id_mapping_all_hits(self):
        """
        Test the store_id_mapping function
        Test the add_annotated and process_reference_annotation with id mapping
        Test the lengths are mapped correctly
        """

        alignments_store = store.Alignments()

        # load in the id_mapping file
        alignments_store.process_id_mapping(cfg.id_mapping_file)

        # store some alignments
        alignments_store.add_annotated("query1", 1, "ref1")
        alignments_store.add_annotated("query2", 1, "ref2")
        alignments_store.add_annotated("query3", 1, "ref3")

        # test the lengths are correct
        stored_lengths = [item[-1] for item in alignments_store.get_hit_list()]
        self.assertEqual(sorted(stored_lengths), sorted([1, 10, 1000]))
Beispiel #12
0
    def test_Alignments_id_mapping_all_bug_list(self):
        """
        Test the store_id_mapping function
        Test the add_annotated and process_reference_annotation with id mapping
        Test the bugs are mapped correctly
        """

        alignments_store = store.Alignments()

        # load in the id_mapping file
        alignments_store.process_id_mapping(cfg.id_mapping_file)

        # store some alignments
        alignments_store.add_annotated("query1", 1, "ref1")
        alignments_store.add_annotated("query2", 1, "ref2")
        alignments_store.add_annotated("query3", 1, "ref3")

        # test the bugs are correct
        self.assertEqual(sorted(alignments_store.bug_list()),
                         sorted(["bug3", "unclassified"]))
Beispiel #13
0
 def test_translated_search_unaligned_reads_annotations_bug(self):
     """
     Test the unaligned reads and the store alignments
     Test with a rapsearch2 output file
     Test the different annotation formats are recognized for bug
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # load the rapsearch2 output with the unaligned reads function
     unaligned_file_fasta=translated_search.unaligned_reads(unaligned_reads_store, 
         cfg.rapsearch_file_annotations, alignments)
     
     # remove temp file
     utils.remove_temp_file(unaligned_file_fasta)
     
     # there should be one bug name and the other should be unclassified
     self.assertEqual(sorted(alignments.bug_list()),sorted(["s__Bacteroides_xylanisolvens","unclassified"]))
Beispiel #14
0
 def test_translated_search_unaligned_reads_annotations_reference(self):
     """
     Test the unaligned reads and the store alignments
     Test with a rapsearch2 output file
     Test the different annotation formats are recognized for reference
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # load the rapsearch2 output with the unaligned reads function
     unaligned_file_fasta=translated_search.unaligned_reads(unaligned_reads_store, 
         cfg.rapsearch_file_annotations, alignments)
     
     # remove temp file
     utils.remove_temp_file(unaligned_file_fasta)
     
     # three of the hits should be for gene "UniRef50"
     hits=alignments.hits_for_gene("UniRef50")
     self.assertEqual(len(hits),3)
Beispiel #15
0
    def test_Alignments_compute_gene_scores_single_gene_single_query(self):
        """
        Test the compute_gene_scores function
        Test one hit for gene with one hit for query
        """

        # create a set of hits
        eval1 = 1e-4
        eval2 = 3e-7
        eval3 = 2e-10
        eval4 = 2e-10

        gene1_length = 2
        gene2_length = 3
        gene3_length = 4

        # Create a set of alignments
        alignments_store = store.Alignments()
        alignments_store.add("gene1", gene1_length, "query1", eval1, "bug1")
        alignments_store.add("gene2", gene2_length, "query1", eval2, "bug1")
        alignments_store.add("gene2", gene2_length, "query2", eval3, "bug1")
        alignments_store.add("gene3", gene3_length, "query3", eval4, "bug1")

        gene_scores_store = store.GeneScores()

        # compute gene scores
        alignments_store.convert_alignments_to_gene_scores(gene_scores_store)

        # convert lengths to per kb
        gene3_length = gene3_length / 1000.0

        # gene3
        hit4_score = math.exp(-eval4)
        query3_sum = hit4_score
        gene_score = hit4_score / query3_sum / gene3_length

        self.assertEqual(gene_scores_store.get_score("bug1", "gene3"),
                         gene_score)