def test_translated_search_unaligned_reads_annotations_bug(self): """ Test the unaligned reads and the store alignments Test with a rapsearch2 output file Test the different annotation formats are recognized for bug Test without the coverage filter """ # create a set of alignments alignments = store.Alignments() unaligned_reads_store = store.Reads() # set the coverage threshold to zero so as to not test with filter on current_coverage_threshold = config.translated_subject_coverage_threshold config.translated_subject_coverage_threshold = 0 # load the rapsearch2 output with the unaligned reads function unaligned_file_fasta = translated.unaligned_reads( unaligned_reads_store, cfg.rapsearch_file_annotations, alignments) # remove temp file utils.remove_temp_file(unaligned_file_fasta) # reset the coverage threshold config.translated_subject_coverage_threshold = current_coverage_threshold # there should be one bug name and the other should be unclassified self.assertEqual( sorted(alignments.bug_list()), sorted(["s__Bacteroides_xylanisolvens", "unclassified"]))
def test_determine_file_format_fastq_gzip(self): """ Test the determine_file_format function with a fastq file that is gzipped """ # create a small gzipped fastq file # read in the small fastq file file_handle = open(cfg.small_fastq_file, "rt") # create a temp file file_out, gzip_fastq_file = tempfile.mkstemp(suffix=".gz") os.close(file_out) # write the gzipped file file_handle_gzip = gzip.open(gzip_fastq_file, "wt") shutil.copyfileobj(file_handle, file_handle_gzip) file_handle.close() file_handle_gzip.close() format = utilities.determine_file_format(gzip_fastq_file) # remove the temp gzipped file utils.remove_temp_file(gzip_fastq_file) self.assertEqual(format, "fastq.gz")
def test_nucleotide_search_unaligned_reads_read_count_unaligned_minimize_memory_use(self): """ Test the unaligned reads and the store alignments Test with a bowtie2/sam output file Test for unaligned read counts Test with minimize memory use """ # create a set of alignments alignments=store.Alignments() unaligned_reads_store=store.Reads(minimize_memory_use=True) # turn off query/subject filtering config.nucleotide_subject_coverage_threshold = 0 config.nucleotide_query_coverage_threshold = 0 # read in the aligned and unaligned reads [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads( cfg.sam_file_unaligned_reads, alignments, unaligned_reads_store, keep_sam=True) # reset query/subject filtering config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold # remove temp files utils.remove_temp_file(unaligned_reads_file_fasta) utils.remove_temp_file(reduced_aligned_reads_file) # check the unaligned reads count self.assertEqual(unaligned_reads_store.count_reads(),cfg.sam_file_unaligned_reads_total_unaligned)
def test_PathwaysDatabase_print_flat_file_reactions_list(self): """ Pathways database class: Test the printing of a flat file from a recursive file Test the reactions list """ pathways_database_store=store.PathwaysDatabase(cfg.pathways_file, True) pathways_database_flat_store=store.PathwaysDatabase(cfg.pathways_flat_file, True) # write the flat file created from a recursive file to a temp file file_out, new_file=tempfile.mkstemp() os.write(file_out, pathways_database_store.get_database()) os.close(file_out) # load in the flat file and compare with the correct flat file pathways_database_flat_store_write=store.PathwaysDatabase(new_file, True) # remove the temp file utils.remove_temp_file(new_file) # check for the same number of pathways pathway_list=pathways_database_flat_store_write.pathway_list() pathway_list_flat=pathways_database_flat_store.pathway_list() # check that the reactions list for each pathway is identical for pathway in pathway_list: self.assertEqual(sorted(pathways_database_flat_store_write.find_reactions(pathway)), sorted(pathways_database_flat_store.find_reactions(pathway)))
def test_PathwaysDatabase_print_flat_file_pathways_count(self): """ Pathways database class: Test the printing of a flat file from a recursive file Test for the total number of pathways """ pathways_database_store=store.PathwaysDatabase(cfg.pathways_file, True) pathways_database_flat_store=store.PathwaysDatabase(cfg.pathways_flat_file, True) # write the flat file created from a recursive file to a temp file file_out, new_file=tempfile.mkstemp() os.write(file_out, pathways_database_store.get_database()) os.close(file_out) # load in the flat file and compare with the correct flat file pathways_database_flat_store_write=store.PathwaysDatabase(new_file, True) # remove the temp file utils.remove_temp_file(new_file) # check for the same number of pathways pathway_list=pathways_database_flat_store_write.pathway_list() pathway_list_flat=pathways_database_flat_store.pathway_list() self.assertEqual(len(pathway_list),len(pathway_list_flat))
def test_translated_search_unaligned_reads_annotations_gene_length(self): """ Test the unaligned reads and the store alignments Test with a rapsearch2 output file Test the different annotation formats are recognized for gene length """ # create a set of alignments alignments=store.Alignments() unaligned_reads_store=store.Reads() # load the rapsearch2 output with the unaligned reads function unaligned_file_fasta=translated_search.unaligned_reads(unaligned_reads_store, cfg.rapsearch_file_annotations, alignments) # remove temp file utils.remove_temp_file(unaligned_file_fasta) # there should be 4 hits identified all_hits=alignments.get_hit_list() self.assertEqual(len(all_hits),4) # check for set and default gene lengths for hit in all_hits: query, bug, reference, evalue, length = hit if reference == "UniRef50": self.assertEqual(length,2000) else: self.assertEqual(length,1000)
def test_PathwaysDatabase_print_flat_file_pathways_list(self): """ Pathways database class: Test the printing of a flat file from a structured file Test for the pathways list """ pathways_database_store=store.PathwaysDatabase(cfg.pathways_file) pathways_database_flat_store=store.PathwaysDatabase(cfg.pathways_flat_file) # write the flat file created from a structured file to a temp file file_out, new_file=tempfile.mkstemp() os.close(file_out) with open(new_file, "w") as file_handle: file_handle.write(pathways_database_store.get_database()) # load in the flat file and compare with the correct flat file pathways_database_flat_store_write=store.PathwaysDatabase(new_file) # remove the temp file utils.remove_temp_file(new_file) # check for the same number of pathways pathway_list=pathways_database_flat_store_write.pathway_list() pathway_list_flat=pathways_database_flat_store.pathway_list() # check that the pathway ids are identical for pathway in pathway_list: self.assertTrue(pathway in pathway_list_flat)
def test_nucleotide_search_unaligned_reads_output_fasta_format(self): """ Test the unaligned reads and the store alignments Test with a bowtie2/sam output file Test output file is of fasta format Test sam file is not removed """ # create a set of alignments alignments=store.Alignments() unaligned_reads_store=store.Reads() # turn off query/subject filtering config.nucleotide_subject_coverage_threshold = 0 config.nucleotide_query_coverage_threshold = 0 # read in the aligned and unaligned reads [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads( cfg.sam_file_unaligned_reads, alignments, unaligned_reads_store, keep_sam=True) # reset query/subject filtering config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold # check for fasta output file format file_format=utilities.determine_file_format(unaligned_reads_file_fasta) self.assertEqual("fasta",file_format) # remove temp files utils.remove_temp_file(unaligned_reads_file_fasta) utils.remove_temp_file(reduced_aligned_reads_file)
def test_gene_families_tsv_output_with_names(self): """ Test the gene families function and the blast config indexes Test UniRef50_unknown is read in and used for gene scores but not printed Test the tsv output Test that gene families have names applied to them Test unmapped reads total is written with the same precision as other lines """ # update the max decimals to allow for rounding config.output_max_decimals = 7 # set to a smaller mapping file original_gene_family_mapping_file = config.gene_family_name_mapping_file config.gene_family_name_mapping_file = cfg.gene_families_to_names_file # create a set of alignments alignments = store.Alignments() # load the usearch output file_handle = open(cfg.usearch_uniref50_file) for line in file_handle: if not re.search("^#", line): data = line.strip().split(config.blast_delimiter) referenceids = data[config.blast_reference_index].split("|") queryid = data[config.blast_query_index] identity = float(data[config.blast_identity_index]) alignments.add(referenceids[1], 1, queryid, identity, referenceids[0]) file_handle.close() # set the output format config.output_format = "tsv" # set the location of the file to write to as a temp file file_out, gene_families_file = tempfile.mkstemp() os.close(file_out) config.genefamilies_file = gene_families_file # create gene_scores instance gene_scores = store.GeneScores() # obtain the gene families gene_families_file = families.gene_families(alignments, gene_scores, 1) # check the gene families output is as expected self.assertTrue( filecmp.cmp(gene_families_file, cfg.gene_familes_uniref50_with_names_file, shallow=False)) # reset the mapping file config.gene_family_name_mapping_file = original_gene_family_mapping_file # delete the temp file utils.remove_temp_file(gene_families_file)
def test_nucleotide_search_unaligned_reads_read_count_aligned(self): """ Test the unaligned reads and the store alignments Test with a bowtie2/sam output file Test for aligned read counts """ # create a set of alignments alignments = store.Alignments() unaligned_reads_store = store.Reads() # read in the aligned and unaligned reads [unaligned_reads_file_fasta, reduced_aligned_reads_file ] = nucleotide.unaligned_reads(cfg.sam_file_unaligned_reads, alignments, unaligned_reads_store, keep_sam=True) # remove temp files utils.remove_temp_file(unaligned_reads_file_fasta) utils.remove_temp_file(reduced_aligned_reads_file) # check the aligned reads count self.assertEqual(len(alignments.get_hit_list()), cfg.sam_file_unaligned_reads_total_aligned)
def test_nucleotide_search_unaligned_reads_read_count_aligned_evalue_threshold( self): """ Test the unaligned reads and the store alignments Test with a bowtie2/sam output file Test for aligned read counts Test the evalue threshold does not filter alignments """ # create a set of alignments alignments = store.Alignments() unaligned_reads_store = store.Reads() # update the evalue threshold to a number less than those for the alignment file original_evalue_threshold = config.evalue_threshold config.evalue_threshold = 1e-15 # read in the aligned and unaligned reads [unaligned_reads_file_fasta, reduced_aligned_reads_file ] = nucleotide.unaligned_reads(cfg.sam_file_unaligned_reads, alignments, unaligned_reads_store, keep_sam=True) # remove temp files utils.remove_temp_file(unaligned_reads_file_fasta) utils.remove_temp_file(reduced_aligned_reads_file) # reset the evalue threshold back to the original config.evalue_threshold = original_evalue_threshold # check the aligned reads count (all reads should be aligned even though they do not # meet the threshold as the evalue threshold is not applied for this type of alignment) self.assertEqual(len(alignments.get_hit_list()), cfg.sam_file_unaligned_reads_total_aligned)
def test_nucleotide_search_unaligned_reads_read_count_aligned_identity_threshold( self): """ Test the unaligned reads and the store alignments Test with a bowtie2/sam output file Test for aligned read counts Test the identity threshold does filter alignments """ # create a set of alignments alignments = store.Alignments() unaligned_reads_store = store.Reads() # update the identity threshold to a number larger than those in the alignments original_identity_threshold = config.identity_threshold config.identity_threshold = 101.0 # read in the aligned and unaligned reads [unaligned_reads_file_fasta, reduced_aligned_reads_file ] = nucleotide.unaligned_reads(cfg.sam_file_unaligned_reads, alignments, unaligned_reads_store, keep_sam=True) # remove temp files utils.remove_temp_file(unaligned_reads_file_fasta) utils.remove_temp_file(reduced_aligned_reads_file) # reset the identity threshold back to the original config.identity_threshold = original_identity_threshold # check the aligned reads count (it should be zero as none should pass the threshold) self.assertEqual(len(alignments.get_hit_list()), 0)
def test_nucleotide_search_unaligned_reads_scores(self): """ Test the unaligned reads and the store alignments Test with a bowtie2/sam output file Test the scores are based on percent identities """ # create a set of alignments alignments = store.Alignments() unaligned_reads_store = store.Reads() # read in the aligned and unaligned reads [unaligned_reads_file_fasta, reduced_aligned_reads_file ] = nucleotide.unaligned_reads(cfg.sam_file_annotations, alignments, unaligned_reads_store, keep_sam=True) # remove temp files utils.remove_temp_file(unaligned_reads_file_fasta) utils.remove_temp_file(reduced_aligned_reads_file) # there should be 4 hits identified all_hits = alignments.get_hit_list() # check for set and default gene lengths expected_score = math.pow(151.0, config.match_power) for hit in all_hits: query, bug, reference, score, length = hit self.assertEqual(score, expected_score)
def test_humann_unpack_pathways_remove_taxonomy_tsv(self): """ Test the tsv gene families and pathway abundance file entries with humann_unpack_pathways Test with the remove taxonomy option which stratifies by pathway then gene instead of stratifying by pathway, taxonomy, then gene """ # create a temp file file_out, new_file = tempfile.mkstemp(prefix="humann_temp") # run the command utils.run_command([ "humann_unpack_pathways", "--input-genes", cfg.merge_abundance_genefamilies_input, "--input-pathways", cfg.merge_abundance_pathways_input, "--output", new_file, "--remove-taxonomy" ]) # check the output file is as expected # allow for varying precision in the calculations with almost equal self.assertTrue( utils.files_almost_equal( new_file, cfg.merge_abundance_remove_taxonomy_output)) # remove the temp file utils.remove_temp_file(new_file)
def test_nucleotide_search_unaligned_reads_annotations_bug(self): """ Test the unaligned reads and the store alignments Test with a bowtie2/sam output file Test the different annotation formats are recognized for bug """ # create a set of alignments alignments=store.Alignments() unaligned_reads_store=store.Reads() # turn off query/subject filtering config.nucleotide_subject_coverage_threshold = 0 config.nucleotide_query_coverage_threshold = 0 # read in the aligned and unaligned reads [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads( cfg.sam_file_annotations, alignments, unaligned_reads_store, keep_sam=True) # reset query/subject filtering config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold # remove temp files utils.remove_temp_file(unaligned_reads_file_fasta) utils.remove_temp_file(reduced_aligned_reads_file) # there should be one bug which is unclassified self.assertEqual(alignments.bug_list(),["unclassified"])
def test_nucleotide_search_unaligned_reads_output_blast_format(self): """ Test the unaligned reads and the store alignments Test with a bowtie2/sam output file Test the aligned reads file created is of the blastm8 format """ # create a set of alignments alignments=store.Alignments() unaligned_reads_store=store.Reads() # turn off query/subject filtering config.nucleotide_subject_coverage_threshold = 0 config.nucleotide_query_coverage_threshold = 0 config.file_basename="TEST" # read in the aligned and unaligned reads [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads( cfg.sam_file_annotations, alignments, unaligned_reads_store, keep_sam=True) # reset query/subject filtering config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold # test file is of the blastm8 format file_format=utilities.determine_file_format(reduced_aligned_reads_file) # remove temp files utils.remove_temp_file(unaligned_reads_file_fasta) utils.remove_temp_file(reduced_aligned_reads_file) self.assertEqual(file_format,"blastm8")
def test_translated_search_unaligned_reads_annotations_reference(self): """ Test the unaligned reads and the store alignments Test with a rapsearch2 output file Test the different annotation formats are recognized for reference Test without the coverage filter """ # create a set of alignments alignments = store.Alignments() unaligned_reads_store = store.Reads() # set the coverage threshold to zero so as to not test with filter on current_coverage_threshold = config.translated_subject_coverage_threshold config.translated_subject_coverage_threshold = 0 # load the rapsearch2 output with the unaligned reads function unaligned_file_fasta = translated.unaligned_reads( unaligned_reads_store, cfg.rapsearch_file_annotations, alignments) # remove temp file utils.remove_temp_file(unaligned_file_fasta) # reset the coverage threshold config.translated_subject_coverage_threshold = current_coverage_threshold # three of the hits should be for gene "UniRef50" hits = alignments.hits_for_gene("UniRef50") self.assertEqual(len(hits), 3)
def test_nucleotide_search_unaligned_reads_annotations_reference(self): """ Test the unaligned reads and the store alignments Test with a bowtie2/sam output file Test the different annotation formats are recognized for reference """ # create a set of alignments alignments=store.Alignments() unaligned_reads_store=store.Reads() # turn off query/subject filtering config.nucleotide_subject_coverage_threshold = 0 config.nucleotide_query_coverage_threshold = 0 # read in the aligned and unaligned reads [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads( cfg.sam_file_annotations, alignments, unaligned_reads_store, keep_sam=True) # reset query/subject filtering config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold # remove temp files utils.remove_temp_file(unaligned_reads_file_fasta) utils.remove_temp_file(reduced_aligned_reads_file) # two of the hits should be for gene "UniRef50" hits=alignments.hits_for_gene("UniRef50") self.assertEqual(len(hits),2)
def test_nucleotide_search_unaligned_reads_read_count_aligned_subject_coverage(self): """ Test the unaligned reads and the store alignments Test with a bowtie2/sam output file Test for aligned read counts Test with subject coverage filtering """ # create a set of alignments alignments=store.Alignments() unaligned_reads_store=store.Reads() # turn off subject filtering config.nucleotide_query_coverage_threshold = 0 # read in the aligned and unaligned reads [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads( cfg.sam_file_unaligned_reads, alignments, unaligned_reads_store, keep_sam=True) # reset subject filtering config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold # remove temp files utils.remove_temp_file(unaligned_reads_file_fasta) utils.remove_temp_file(reduced_aligned_reads_file) # check the aligned reads count self.assertEqual(len(alignments.get_hit_list()),cfg.sam_file_unaligned_reads_total_aligned_subject_coverage)
def test_pathways_abundance_with_names(self): """ Test the pathways abundance computation (xipe and minpath are off) Test the pathways print function Test the pathways mapping to names Test the unmapped and unintegrated values are printed """ # update the max decimals to allow for rounding config.output_max_decimals = 7 # Load in the pathways databases reactions_database = store.ReactionsDatabase( config.pathways_database_part1) pathways_database = store.PathwaysDatabase( config.pathways_database_part2, reactions_database) # Load in the gene scores from the file # This file has the gene names included gene_scores = store.GeneScores() gene_scores.add_from_file( cfg.larger_gene_families_uniref50_with_names_file) # Turn off xipe and minpath minpath_toggle_original = config.minpath_toggle config.minpath_toggle = "off" xipe_toggle_original = config.xipe_toggle config.xipe_toggle = "off" pathways_and_reactions_store = modules.identify_reactions_and_pathways( gene_scores, reactions_database, pathways_database) # set the locations to write as temp files file_out, abundance_file = tempfile.mkstemp() os.close(file_out) config.pathabundance_file = abundance_file file_out, coverage_file = tempfile.mkstemp() os.close(file_out) config.pathcoverage_file = coverage_file unaligned_reads_count = 10 abundance_file, coverage_file = modules.compute_pathways_abundance_and_coverage( gene_scores, reactions_database, pathways_and_reactions_store, pathways_database, unaligned_reads_count) # Reset xipe and minpath config.minpath_toggle = minpath_toggle_original config.xipe_toggle = xipe_toggle_original # check the output is as expected self.assertTrue( filecmp.cmp(abundance_file, cfg.demo_pathabundance_file, shallow=False)) utils.remove_temp_file(abundance_file) utils.remove_temp_file(coverage_file)
def test_fastq_to_fasta(self): """ Test the fastq_to_fasta function """ new_fasta_file = utilities.fastq_to_fasta(cfg.convert_fastq_file) self.assertTrue( filecmp.cmp(new_fasta_file, cfg.convert_fasta_file, shallow=False)) utils.remove_temp_file(new_fasta_file)
def test_fastq_to_fasta_with_pick_frames(self): """ Test the fastq_to_fasta function with pick frames """ new_fasta_file=utilities.fastq_to_fasta( cfg.convert_fastq_file, apply_pick_frames=True) self.assertTrue(filecmp.cmp(new_fasta_file, cfg.convert_fasta_pick_frames_file, shallow=False)) utils.remove_temp_file(new_fasta_file)
def test_pick_frames_from_fasta(self): """ Test the pick_frames_from_fasta function """ new_fasta_file=utilities.pick_frames_from_fasta( cfg.convert_fasta_multiline_file) self.assertTrue(filecmp.cmp(new_fasta_file, cfg.convert_fasta_pick_frames_file, shallow=False)) utils.remove_temp_file(new_fasta_file)
def test_translated_search_unaligned_reads_blastm8(self): """ Test the unaligned reads and the store alignments Test with a blastm8-like output file Test with empty reads structure Test that function does not require gene lengths in reference id Test without the coverage filter """ # create a set of alignments alignments = store.Alignments() # set the coverage threshold to zero so as to not test with filter on current_coverage_threshold = config.translated_subject_coverage_threshold config.translated_subject_coverage_threshold = 0 # load the blastm8-like output file_handle = open(cfg.rapsearch2_output_file_without_header) for line in file_handle: if not re.search("^#", line): data = line.strip().split(config.blast_delimiter) referenceid = data[config.blast_reference_index] queryid = data[config.blast_query_index] identity = float(data[config.blast_identity_index]) alignment_length = float( data[config.blast_aligned_length_index]) alignments.add(referenceid, 0, queryid, identity / 100.0 * alignment_length, "unclassified", alignment_length) file_handle.close() alignments_test = store.Alignments() unaligned_reads_store = store.Reads() # load the blastm8-like output with the unaligned reads function unaligned_file_fasta = translated.unaligned_reads( unaligned_reads_store, cfg.rapsearch2_output_file_without_header, alignments_test) # remove temp file utils.remove_temp_file(unaligned_file_fasta) # reset the coverage threshold config.translated_subject_coverage_threshold = current_coverage_threshold # check the values are unchanged self.assertEqual(sorted(alignments.get_hit_list()), sorted(alignments_test.get_hit_list()))
def test_fastq_to_fasta(self): """ Test the fastq_to_fasta function with a set of sequences which have the @ quality score as the first score This tests that the sequence id and sequence are selected correctly even though the @ starts a sequence id line and a quality score line """ new_fasta_file = utilities.fastq_to_fasta( cfg.convert_fastq_at_character_file) self.assertTrue( filecmp.cmp(new_fasta_file, cfg.convert_fasta_file, shallow=False)) utils.remove_temp_file(new_fasta_file)
def test_break_up_fasta_file(self): """ Test the break_up_fasta_file function """ # Break up the file into smaller files each containing a single read new_fasta_files = utilities.break_up_fasta_file( cfg.small_fasta_file, 1) for file in new_fasta_files: sequence_count = utilities.count_reads(file) self.assertEqual(sequence_count, 1) utils.remove_temp_file(file)
def test_gene_families_tsv_output(self): """ Test the gene families function and the blast config indexes Test UniRef50_unknown is read in and used for gene scores but not printed Test the tsv output """ # create a set of alignments alignments = store.Alignments() # load the usearch output file_handle = open(cfg.usearch_file) for line in file_handle: if not re.search("^#", line): data = line.strip().split(config.blast_delimiter) referenceids = data[config.blast_reference_index].split("|") queryid = data[config.blast_query_index] evalue = float(data[config.blast_evalue_index]) alignments.add(referenceids[1], 1, queryid, evalue, referenceids[0]) file_handle.close() # set the output format config.output_format = "tsv" # set the location of the file to write to as a temp file file_out, gene_families_file = tempfile.mkstemp() os.close(file_out) config.genefamilies_file = gene_families_file # create gene_scores instance gene_scores = store.GeneScores() # obtain the gene families gene_families_file = quantify_families.gene_families( alignments, gene_scores) # check the gene families output is as expected self.assertTrue( filecmp.cmp(gene_families_file, cfg.gene_familes_file, shallow=False)) # delete the temp file utils.remove_temp_file(gene_families_file)
def test_remove_spaces_from_file(self): """ Test the remove spaces from file function """ new_file = utilities.remove_spaces_from_file( cfg.small_fastq_spaces_file) with open(cfg.small_fastq_no_spaces_file) as file_handle: expected_file_lines = file_handle.readlines() with open(new_file) as file_handle: actual_file_lines = file_handle.readlines() # remove the temp file utils.remove_temp_file(new_file) self.assertEqual(expected_file_lines, actual_file_lines)
def test_sam_to_fastq(self): """ Test the sam to fastq function Test sam file contains one read with two mappings (to test it is only written once to the fastq output file) """ file_handle, temp_output_file = tempfile.mkstemp( prefix="kneaddata_test") utilities.sam_to_fastq(cfg.file_sam, temp_output_file) self.assertTrue( filecmp.cmp(temp_output_file, cfg.fastq_file_matches_sam_and_bam, shallow=False)) utils.remove_temp_file(temp_output_file)
def test_nucleotide_search_unaligned_reads_annotations_gene_length(self): """ Test the unaligned reads and the store alignments Test with a bowtie2/sam output file Test the different annotation formats are recognized for gene length Test the gene length uses the read length from the sam file """ # create a set of alignments alignments=store.Alignments() unaligned_reads_store=store.Reads() # turn off query/subject filtering config.nucleotide_subject_coverage_threshold = 0 config.nucleotide_query_coverage_threshold = 0 # read in the aligned and unaligned reads [unaligned_reads_file_fasta, reduced_aligned_reads_file] = nucleotide.unaligned_reads( cfg.sam_file_annotations, alignments, unaligned_reads_store, keep_sam=True) # reset query/subject filtering config.nucleotide_subject_coverage_threshold = self.default_nucleotide_subject_coverage_threshold config.nucleotide_query_coverage_threshold = self.default_nucleotide_query_coverage_threshold # remove temp files utils.remove_temp_file(unaligned_reads_file_fasta) utils.remove_temp_file(reduced_aligned_reads_file) # there should be 4 hits identified all_hits=alignments.get_hit_list() self.assertEqual(len(all_hits),4) # check for set and default gene lengths read_length = 151 expected_length_uniref50 = (abs(2000 - read_length)+1)/1000.0 expected_length_other = (abs(1000 - read_length)+1)/1000.0 for hit in all_hits: query, bug, reference, score, length = hit if reference == "UniRef50": self.assertEqual(length,expected_length_uniref50) else: self.assertEqual(length,expected_length_other)