def test_nucleotide_search_unaligned_reads_read_count_aligned_subject_coverage(self):
     """
     Test the unaligned reads and the store alignments
     Test with a bowtie2/sam output file
     Test for aligned read counts
     Test with subject coverage filtering
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # turn off subject filtering
     config.nucleotide_query_coverage_threshold = 0
     
     # read in the aligned and unaligned reads
     [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
         cfg.sam_file_unaligned_reads, alignments, unaligned_reads_store, keep_sam=True) 
     
     # reset subject filtering
     config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
     
     # remove temp files
     utils.remove_temp_file(unaligned_reads_file_fasta)
     utils.remove_temp_file(reduced_aligned_reads_file)
     
     # check the aligned reads count
     self.assertEqual(len(alignments.get_hit_list()),cfg.sam_file_unaligned_reads_total_aligned_subject_coverage)
    def test_nucleotide_search_unaligned_reads_output_fasta_format(self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test output file is of fasta format
        Test sam file is not removed
        """
        
        # create a set of alignments
        alignments=store.Alignments()
        unaligned_reads_store=store.Reads()
       
        # turn off query/subject filtering
        config.nucleotide_subject_coverage_threshold = 0
        config.nucleotide_query_coverage_threshold = 0
 
        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
            cfg.sam_file_unaligned_reads, alignments, unaligned_reads_store, keep_sam=True) 
        
        # reset query/subject filtering
        config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
        config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold

        # check for fasta output file format
        file_format=utilities.determine_file_format(unaligned_reads_file_fasta)
        self.assertEqual("fasta",file_format)
        
        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)
 def test_nucleotide_search_unaligned_reads_output_blast_format(self):
     """
     Test the unaligned reads and the store alignments
     Test with a bowtie2/sam output file
     Test the aligned reads file created is of the blastm8 format
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # turn off query/subject filtering
     config.nucleotide_subject_coverage_threshold = 0
     config.nucleotide_query_coverage_threshold = 0
     
     config.file_basename="TEST"
     
     # read in the aligned and unaligned reads
     [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
         cfg.sam_file_annotations, alignments, unaligned_reads_store, keep_sam=True) 
     
     # reset query/subject filtering
     config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
     config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
     
     # test file is of the blastm8 format
     file_format=utilities.determine_file_format(reduced_aligned_reads_file)
     
     # remove temp files
     utils.remove_temp_file(unaligned_reads_file_fasta)
     utils.remove_temp_file(reduced_aligned_reads_file)           
     
     self.assertEqual(file_format,"blastm8")
 def test_nucleotide_search_unaligned_reads_annotations_bug(self):
     """
     Test the unaligned reads and the store alignments
     Test with a bowtie2/sam output file
     Test the different annotation formats are recognized for bug
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # turn off query/subject filtering
     config.nucleotide_subject_coverage_threshold = 0
     config.nucleotide_query_coverage_threshold = 0
     
     # read in the aligned and unaligned reads
     [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
         cfg.sam_file_annotations, alignments, unaligned_reads_store, keep_sam=True) 
     
     # reset query/subject filtering
     config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
     config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
     
     # remove temp files
     utils.remove_temp_file(unaligned_reads_file_fasta)
     utils.remove_temp_file(reduced_aligned_reads_file)
     
     # there should be one bug which is unclassified
     self.assertEqual(alignments.bug_list(),["unclassified"])
 def test_nucleotide_search_unaligned_reads_annotations_reference(self):
     """
     Test the unaligned reads and the store alignments
     Test with a bowtie2/sam output file
     Test the different annotation formats are recognized for reference
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # turn off query/subject filtering
     config.nucleotide_subject_coverage_threshold = 0
     config.nucleotide_query_coverage_threshold = 0
     
     # read in the aligned and unaligned reads
     [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
         cfg.sam_file_annotations, alignments, unaligned_reads_store, keep_sam=True) 
     
     # reset query/subject filtering
     config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
     config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
     
     # remove temp files
     utils.remove_temp_file(unaligned_reads_file_fasta)
     utils.remove_temp_file(reduced_aligned_reads_file)
     
     # two of the hits should be for gene "UniRef50"
     hits=alignments.hits_for_gene("UniRef50")
     self.assertEqual(len(hits),2)
 def test_nucleotide_search_unaligned_reads_read_count_unaligned_minimize_memory_use(self):
     """
     Test the unaligned reads and the store alignments
     Test with a bowtie2/sam output file
     Test for unaligned read counts
     Test with minimize memory use
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads(minimize_memory_use=True)
     
     # turn off query/subject filtering
     config.nucleotide_subject_coverage_threshold = 0
     config.nucleotide_query_coverage_threshold = 0
     
     # read in the aligned and unaligned reads
     [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
         cfg.sam_file_unaligned_reads, alignments, unaligned_reads_store, keep_sam=True) 
     
     # reset query/subject filtering
     config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
     config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
     
     # remove temp files
     utils.remove_temp_file(unaligned_reads_file_fasta)
     utils.remove_temp_file(reduced_aligned_reads_file)
     
     # check the unaligned reads count
     self.assertEqual(unaligned_reads_store.count_reads(),cfg.sam_file_unaligned_reads_total_unaligned)
 def test_nucleotide_search_unaligned_reads_annotations_gene_length(self):
     """
     Test the unaligned reads and the store alignments
     Test with a bowtie2/sam output file
     Test the different annotation formats are recognized for gene length
     Test the gene length uses the read length from the sam file
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # turn off query/subject filtering
     config.nucleotide_subject_coverage_threshold = 0
     config.nucleotide_query_coverage_threshold = 0
     
     # read in the aligned and unaligned reads
     [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
         cfg.sam_file_annotations, alignments, unaligned_reads_store, keep_sam=True) 
     
     # reset query/subject filtering
     config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
     config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
     
     # remove temp files
     utils.remove_temp_file(unaligned_reads_file_fasta)
     utils.remove_temp_file(reduced_aligned_reads_file)
     
     # there should be 4 hits identified
     all_hits=alignments.get_hit_list()
     self.assertEqual(len(all_hits),4)
     
     # check for set and default gene lengths
     read_length = 151
     expected_length_uniref50 = (abs(2000 - read_length)+1)/1000.0
     expected_length_other = (abs(1000 - read_length)+1)/1000.0
     
     for hit in all_hits:
         query, bug, reference, score, length = hit
         if reference == "UniRef50":
             self.assertEqual(length,expected_length_uniref50)
         else:
             self.assertEqual(length,expected_length_other)
    def test_nucleotide_search_unaligned_reads_read_count_aligned_evalue_threshold(self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test for aligned read counts
        Test the evalue threshold does not filter alignments
        """
        
        # create a set of alignments
        alignments=store.Alignments()
        unaligned_reads_store=store.Reads()
        
        # turn off query/subject filtering
        config.nucleotide_subject_coverage_threshold = 0
        config.nucleotide_query_coverage_threshold = 0

        # update the evalue threshold to a number less than those for the alignment file
        original_evalue_threshold=config.evalue_threshold
        config.evalue_threshold=1e-15
        
        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
            cfg.sam_file_unaligned_reads, alignments, unaligned_reads_store, keep_sam=True) 
        
        # reset query/subject filtering
        config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
        config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
        
        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)
        
        # reset the evalue threshold back to the original
        config.evalue_threshold=original_evalue_threshold
        
        # check the aligned reads count (all reads should be aligned even though they do not
        # meet the threshold as the evalue threshold is not applied for this type of alignment)
        self.assertEqual(len(alignments.get_hit_list()),cfg.sam_file_unaligned_reads_total_aligned)
 def test_nucleotide_search_unaligned_reads_read_count_aligned_identity_threshold(self):
     """
     Test the unaligned reads and the store alignments
     Test with a bowtie2/sam output file
     Test for aligned read counts
     Test the identity threshold does filter alignments
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # turn off query/subject filtering
     config.nucleotide_subject_coverage_threshold = 0
     config.nucleotide_query_coverage_threshold = 0
     
     # update the identity threshold to a number larger than those in the alignments
     original_identity_threshold=config.identity_threshold
     config.identity_threshold=101.0
     
     # read in the aligned and unaligned reads
     [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
         cfg.sam_file_unaligned_reads, alignments, unaligned_reads_store, keep_sam=True) 
     
     # reset query/subject filtering
     config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
     config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
     
     # remove temp files
     utils.remove_temp_file(unaligned_reads_file_fasta)
     utils.remove_temp_file(reduced_aligned_reads_file)
     
     # reset the identity threshold back to the original
     config.identity_threshold=original_identity_threshold
     
     # check the aligned reads count (it should be two as both should pass the threshold)
     self.assertEqual(len(alignments.get_hit_list()),2)
 def test_nucleotide_search_unaligned_reads_scores(self):
     """
     Test the unaligned reads and the store alignments
     Test with a bowtie2/sam output file
     Test the scores are based on percent identities
     """
     
     # create a set of alignments
     alignments=store.Alignments()
     unaligned_reads_store=store.Reads()
     
     # turn off query/subject filtering
     config.nucleotide_subject_coverage_threshold = 0
     config.nucleotide_query_coverage_threshold = 0
     
     # read in the aligned and unaligned reads
     [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads(
         cfg.sam_file_annotations, alignments, unaligned_reads_store, keep_sam=True) 
     
     # reset query/subject filtering
     config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold
     config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold
     
     # remove temp files
     utils.remove_temp_file(unaligned_reads_file_fasta)
     utils.remove_temp_file(reduced_aligned_reads_file)
     
     # there should be 4 hits identified
     all_hits=alignments.get_hit_list()
     
     # check for set and default gene lengths
     expected_score=math.pow(151.0, config.match_power)
     
     for hit in all_hits:
         query, bug, reference, score, length = hit
         self.assertEqual(score,expected_score)