Python Alignments 예제들, humann2.store.Alignments Python 예제들

예제 #1

0

파일 보기

파일: advanced_tests_translated_search.py 프로젝트: dytk2134/humann2

    def test_translated_search_unaligned_reads_blastm8(self):
        """
        Test the unaligned reads and the store alignments
        Test with a blastm8-like output file
        Test with empty reads structure
        Test that function does not require gene lengths in reference id
        Test without the coverage filter
        """

        # create a set of alignments
        alignments = store.Alignments()

        # set the coverage threshold to zero so as to not test with filter on
        current_coverage_threshold = config.translated_subject_coverage_threshold
        config.translated_subject_coverage_threshold = 0

        # load the blastm8-like output
        file_handle = open(cfg.rapsearch2_output_file_without_header)

        for line in file_handle:
            if not re.search("^#", line):
                data = line.strip().split(config.blast_delimiter)

                referenceid = data[config.blast_reference_index]
                queryid = data[config.blast_query_index]
                identity = float(data[config.blast_identity_index])
                alignment_length = float(
                    data[config.blast_aligned_length_index])

                alignments.add(referenceid, 0, queryid,
                               identity / 100.0 * alignment_length,
                               "unclassified", alignment_length)

        file_handle.close()

        alignments_test = store.Alignments()
        unaligned_reads_store = store.Reads()

        # load the blastm8-like output with the unaligned reads function
        unaligned_file_fasta = translated.unaligned_reads(
            unaligned_reads_store, cfg.rapsearch2_output_file_without_header,
            alignments_test)

        # remove temp file
        utils.remove_temp_file(unaligned_file_fasta)

        # reset the coverage threshold
        config.translated_subject_coverage_threshold = current_coverage_threshold

        # check the values are unchanged
        self.assertEqual(sorted(alignments.get_hit_list()),
                         sorted(alignments_test.get_hit_list()))

예제 #2

0

파일 보기

파일: advanced_tests_translated_search.py 프로젝트: dytk2134/humann2

    def test_translated_search_unaligned_reads_annotations_bug(self):
        """
        Test the unaligned reads and the store alignments
        Test with a rapsearch2 output file
        Test the different annotation formats are recognized for bug
        Test without the coverage filter
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # set the coverage threshold to zero so as to not test with filter on
        current_coverage_threshold = config.translated_subject_coverage_threshold
        config.translated_subject_coverage_threshold = 0

        # load the rapsearch2 output with the unaligned reads function
        unaligned_file_fasta = translated.unaligned_reads(
            unaligned_reads_store, cfg.rapsearch_file_annotations, alignments)

        # remove temp file
        utils.remove_temp_file(unaligned_file_fasta)

        # reset the coverage threshold
        config.translated_subject_coverage_threshold = current_coverage_threshold

        # there should be one bug name and the other should be unclassified
        self.assertEqual(
            sorted(alignments.bug_list()),
            sorted(["s__Bacteroides_xylanisolvens", "unclassified"]))

예제 #3

0

파일 보기

    def test_Alignments_id_mapping_half_hits_with_temp_alignment_file(self):
        """
        Test the store_id_mapping function
        Test the add_annotated and process_reference_annotation with id mapping
        Test the lengths are mapped correctly with only some references included
        in those provided for id mapping
        Test with the temp alignment file
        """

        alignments_store = store.Alignments(minimize_memory_use=True)

        # load in the id_mapping file
        alignments_store.process_id_mapping(cfg.id_mapping_file)

        # store some alignments
        alignments_store.add_annotated("query1", 1, "ref1")
        alignments_store.add_annotated("query2", 1, "ref2")
        alignments_store.add_annotated("query3", 1, "ref1|100")
        alignments_store.add_annotated("query3", 1, "200|ref2")

        hit_list = alignments_store.get_hit_list()

        # delete the temp alignment file
        alignments_store.delete_temp_alignments_file()

        # test the lengths are correct
        stored_lengths = [item[-1] for item in hit_list]
        self.assertEqual(
            sorted(stored_lengths),
            sorted([1 / 1000.0, 100 / 1000.0, 200 / 1000.0, 1000 / 1000.0]))

예제 #4

0

파일 보기

    def test_nucleotide_search_unaligned_reads_output_fasta_format(self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test output file is of fasta format
        Test sam file is not removed
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file
         ] = nucleotide.unaligned_reads(cfg.sam_file_unaligned_reads,
                                        alignments,
                                        unaligned_reads_store,
                                        keep_sam=True)

        # check for fasta output file format
        file_format = utilities.determine_file_format(
            unaligned_reads_file_fasta)
        self.assertEqual("fasta", file_format)

        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)

예제 #5

0

파일 보기

파일: advanced_tests_quantify_families.py 프로젝트: dytk2134/humann2

    def test_gene_families_tsv_output_with_names(self):
        """
        Test the gene families function and the blast config indexes
        Test UniRef50_unknown is read in and used for gene scores but not printed
        Test the tsv output
        Test that gene families have names applied to them
        Test unmapped reads total is written with the same precision as other lines
        """

        # update the max decimals to allow for rounding
        config.output_max_decimals = 7

        # set to a smaller mapping file
        original_gene_family_mapping_file = config.gene_family_name_mapping_file
        config.gene_family_name_mapping_file = cfg.gene_families_to_names_file

        # create a set of alignments
        alignments = store.Alignments()

        # load the usearch output
        file_handle = open(cfg.usearch_uniref50_file)

        for line in file_handle:
            if not re.search("^#", line):
                data = line.strip().split(config.blast_delimiter)

                referenceids = data[config.blast_reference_index].split("|")
                queryid = data[config.blast_query_index]
                identity = float(data[config.blast_identity_index])

                alignments.add(referenceids[1], 1, queryid, identity,
                               referenceids[0])

        file_handle.close()

        # set the output format
        config.output_format = "tsv"

        # set the location of the file to write to as a temp file
        file_out, gene_families_file = tempfile.mkstemp()
        os.close(file_out)
        config.genefamilies_file = gene_families_file

        # create gene_scores instance
        gene_scores = store.GeneScores()

        # obtain the gene families
        gene_families_file = families.gene_families(alignments, gene_scores, 1)

        # check the gene families output is as expected
        self.assertTrue(
            filecmp.cmp(gene_families_file,
                        cfg.gene_familes_uniref50_with_names_file,
                        shallow=False))

        # reset the mapping file
        config.gene_family_name_mapping_file = original_gene_family_mapping_file

        # delete the temp file
        utils.remove_temp_file(gene_families_file)

예제 #6

0

파일 보기

    def test_nucleotide_search_unaligned_reads_read_count_aligned_identity_threshold(
            self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test for aligned read counts
        Test the identity threshold does filter alignments
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # update the identity threshold to a number larger than those in the alignments
        original_identity_threshold = config.identity_threshold
        config.identity_threshold = 101.0

        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file
         ] = nucleotide.unaligned_reads(cfg.sam_file_unaligned_reads,
                                        alignments,
                                        unaligned_reads_store,
                                        keep_sam=True)

        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)

        # reset the identity threshold back to the original
        config.identity_threshold = original_identity_threshold

        # check the aligned reads count (it should be zero as none should pass the threshold)
        self.assertEqual(len(alignments.get_hit_list()), 0)

예제 #7

0

파일 보기

    def test_nucleotide_search_unaligned_reads_read_count_unaligned_minimize_memory_use(
            self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test for unaligned read counts
        Test with minimize memory use
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads(minimize_memory_use=True)

        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file
         ] = nucleotide.unaligned_reads(cfg.sam_file_unaligned_reads,
                                        alignments,
                                        unaligned_reads_store,
                                        keep_sam=True)

        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)

        # check the unaligned reads count
        self.assertEqual(unaligned_reads_store.count_reads(),
                         cfg.sam_file_unaligned_reads_total_unaligned)

예제 #8

0

파일 보기

    def test_nucleotide_search_unaligned_reads_output_blast_format(self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test the aligned reads file created is of the blastm8 format
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        config.file_basename = "TEST"

        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file
         ] = nucleotide.unaligned_reads(cfg.sam_file_annotations,
                                        alignments,
                                        unaligned_reads_store,
                                        keep_sam=True)

        # test file is of the blastm8 format
        file_format = utilities.determine_file_format(
            reduced_aligned_reads_file)

        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)

        self.assertEqual(file_format, "blastm8")

예제 #9

0

파일 보기

파일: advanced_tests_quantify_families.py 프로젝트: dytk2134/humann2

    def test_gene_families_gene_list(self):
        """
        Test the gene families function and the blast config indexes
        Test UniRef50_unknown is read in and used for gene scores but not printed
        Test the gene list
        """

        # create a set of alignments
        alignments = store.Alignments()

        # load the usearch output
        file_handle = open(cfg.usearch_file)

        for line in file_handle:
            if not re.search("^#", line):
                data = line.strip().split(config.blast_delimiter)

                referenceids = data[config.blast_reference_index].split("|")
                queryid = data[config.blast_query_index]
                identity = float(data[config.blast_identity_index])

                alignments.add(referenceids[1], 1, queryid, identity,
                               referenceids[0])

        file_handle.close()

        # check the genes were loaded correctly
        self.assertEqual(sorted(cfg.usearch_file_gene_list),
                         sorted(alignments.gene_list()))

예제 #10

0

파일 보기

    def test_nucleotide_search_unaligned_reads_scores(self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test the scores are based on percent identities
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file
         ] = nucleotide.unaligned_reads(cfg.sam_file_annotations,
                                        alignments,
                                        unaligned_reads_store,
                                        keep_sam=True)

        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)

        # there should be 4 hits identified
        all_hits = alignments.get_hit_list()

        # check for set and default gene lengths
        expected_score = math.pow(151.0, config.match_power)

        for hit in all_hits:
            query, bug, reference, score, length = hit
            self.assertEqual(score, expected_score)

예제 #11

0

파일 보기

파일: advanced_tests_translated_search.py 프로젝트: dytk2134/humann2

    def test_translated_search_unaligned_reads_annotations_reference(self):
        """
        Test the unaligned reads and the store alignments
        Test with a rapsearch2 output file
        Test the different annotation formats are recognized for reference
        Test without the coverage filter
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # set the coverage threshold to zero so as to not test with filter on
        current_coverage_threshold = config.translated_subject_coverage_threshold
        config.translated_subject_coverage_threshold = 0

        # load the rapsearch2 output with the unaligned reads function
        unaligned_file_fasta = translated.unaligned_reads(
            unaligned_reads_store, cfg.rapsearch_file_annotations, alignments)

        # remove temp file
        utils.remove_temp_file(unaligned_file_fasta)

        # reset the coverage threshold
        config.translated_subject_coverage_threshold = current_coverage_threshold

        # three of the hits should be for gene "UniRef50"
        hits = alignments.hits_for_gene("UniRef50")
        self.assertEqual(len(hits), 3)

예제 #12

0

파일 보기

    def test_nucleotide_search_unaligned_reads_read_count_aligned(self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test for aligned read counts
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file
         ] = nucleotide.unaligned_reads(cfg.sam_file_unaligned_reads,
                                        alignments,
                                        unaligned_reads_store,
                                        keep_sam=True)

        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)

        # check the aligned reads count
        self.assertEqual(len(alignments.get_hit_list()),
                         cfg.sam_file_unaligned_reads_total_aligned)

예제 #13

0

파일 보기

    def test_nucleotide_search_unaligned_reads_read_count_aligned_evalue_threshold(
            self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test for aligned read counts
        Test the evalue threshold does not filter alignments
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # update the evalue threshold to a number less than those for the alignment file
        original_evalue_threshold = config.evalue_threshold
        config.evalue_threshold = 1e-15

        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file
         ] = nucleotide.unaligned_reads(cfg.sam_file_unaligned_reads,
                                        alignments,
                                        unaligned_reads_store,
                                        keep_sam=True)

        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)

        # reset the evalue threshold back to the original
        config.evalue_threshold = original_evalue_threshold

        # check the aligned reads count (all reads should be aligned even though they do not
        # meet the threshold as the evalue threshold is not applied for this type of alignment)
        self.assertEqual(len(alignments.get_hit_list()),
                         cfg.sam_file_unaligned_reads_total_aligned)

예제 #14

0

파일 보기

    def test_Alignments_id_mapping_all_bug_list_with_temp_alignment_file(self):
        """
        Test the store_id_mapping function
        Test the add_annotated and process_reference_annotation with id mapping
        Test the bugs are mapped correctly
        Test with the temp alignment file
        """

        alignments_store = store.Alignments(minimize_memory_use=True)

        # load in the id_mapping file
        alignments_store.process_id_mapping(cfg.id_mapping_file)

        # store some alignments
        alignments_store.add_annotated("query1", 1, "ref1")
        alignments_store.add_annotated("query2", 1, "ref2")
        alignments_store.add_annotated("query3", 1, "ref3")

        bug_list = alignments_store.bug_list()

        # delete the temp alignment file
        alignments_store.delete_temp_alignments_file()

        # test the bugs are correct
        self.assertEqual(sorted(bug_list), sorted(["bug3", "unclassified"]))

예제 #15

0

파일 보기

    def test_nucleotide_search_unaligned_reads_annotations_reference(self):
        """
        Test the unaligned reads and the store alignments
        Test with a bowtie2/sam output file
        Test the different annotation formats are recognized for reference
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # read in the aligned and unaligned reads
        [unaligned_reads_file_fasta, reduced_aligned_reads_file
         ] = nucleotide.unaligned_reads(cfg.sam_file_annotations,
                                        alignments,
                                        unaligned_reads_store,
                                        keep_sam=True)

        # remove temp files
        utils.remove_temp_file(unaligned_reads_file_fasta)
        utils.remove_temp_file(reduced_aligned_reads_file)

        # two of the hits should be for gene "UniRef50"
        hits = alignments.hits_for_gene("UniRef50")
        self.assertEqual(len(hits), 2)

예제 #16

0

파일 보기

파일: basic_tests_store.py 프로젝트: dytk2134/humann2

 def test_Alignments_process_chocophlan_length(self):
     """
     Test the process_chocophlan_length with standard length format
     """
     
     alignments_store=store.Alignments()
     
     length=alignments_store.process_chocophlan_length("1-100","gene")
     
     self.assertEqual(length, 100)

예제 #17

0

파일 보기

파일: basic_tests_store.py 프로젝트: dytk2134/humann2

 def test_Alignments_process_chocophlan_length_multiple(self):
     """
     Test the process_chocophlan_length with multiple lengths
     Test with one length on the reverse strand
     """
     
     alignments_store=store.Alignments()
     
     length=alignments_store.process_chocophlan_length("c:100-1,1-100","gene")
     
     self.assertEqual(length, 200)

예제 #18

0

파일 보기

파일: basic_tests_store.py 프로젝트: dytk2134/humann2

 def test_Alignments_process_reference_annotation_gene_length_reversed(self):
     """
     Test the process reference annotation function with a gene and length reversed
     """
     
     alignments_store=store.Alignments()
     
     output=alignments_store.process_reference_annotation("3000|gene")
     
     expected_output=["gene",3000,"unclassified"]
     
     self.assertEqual(expected_output,output)

예제 #19

0

파일 보기

파일: basic_tests_store.py 프로젝트: dytk2134/humann2

 def test_Alignments_process_reference_annotation_unknown_annotations_three_items_length_string(self):
     """
     Test the process reference annotation function with unknown annotations (three items) with string for length
     """
     
     alignments_store=store.Alignments()
     
     output=alignments_store.process_reference_annotation("UniRef90_W1Q3F0|UniRef50_P59787|5000")
     
     expected_output=["UniRef90_W1Q3F0|UniRef50_P59787|5000",0,"unclassified"]
     
     self.assertEqual(expected_output,output)

예제 #20

0

파일 보기

파일: basic_tests_store.py 프로젝트: dytk2134/humann2

 def test_Alignments_process_reference_annotation_unknown_annotations_four_items(self):
     """
     Test the process reference annotation function with unknown annotations (four items)
     """
     
     alignments_store=store.Alignments()
     
     output=alignments_store.process_reference_annotation("UniRef90_W1Q3F0|UniRef50_P59787|5000|bug")
     
     expected_output=["UniRef90_W1Q3F0|UniRef50_P59787|5000|bug",0,"unclassified"]
     
     self.assertEqual(expected_output,output)

예제 #21

0

파일 보기

파일: basic_tests_store.py 프로젝트: dytk2134/humann2

 def test_Alignments_process_reference_annotation_gene_length_with_bug(self):
     """
     Test the process reference annotation function with a gene and length and bug
     """
     
     alignments_store=store.Alignments()
     
     output=alignments_store.process_reference_annotation("gene|3000|bug")
     
     expected_output=["gene",3000,"bug"]
     
     self.assertEqual(expected_output,output)

예제 #22

0

파일 보기

파일: basic_tests_store.py 프로젝트: dytk2134/humann2

 def test_Alignments_process_reference_annotation_unknown_annotations_three_items_bug_int(self):
     """
     Test the process reference annotation function with unknown annotations (three items) with int as bug
     """
     
     alignments_store=store.Alignments()
     
     output=alignments_store.process_reference_annotation("UniRef90_W1Q3F0|5000|5000")
     
     expected_output=["UniRef90_W1Q3F0|5000|5000",0,"unclassified"]
     
     self.assertEqual(expected_output,output)

예제 #23

0

파일 보기

파일: basic_tests_store.py 프로젝트: dytk2134/humann2

 def test_Alignments_process_reference_annotation_numerical_gene_length(self):
     """
     Test the process reference annotation function with gene (as number) and length
     """
     
     alignments_store=store.Alignments()
     
     output=alignments_store.process_reference_annotation("59787|5000")
     
     expected_output=["59787",5000,"unclassified"]
     
     self.assertEqual(expected_output,output)

예제 #24

0

파일 보기

    def test_Alignments_compute_gene_scores_double_gene_double_query_with_temp_alignment_file(
            self):
        """
        Test the compute_gene_scores function
        Test two hits to gene with more than one hit per query
        Test with the temp alignment file
        """

        # create a set of hits
        # bug, reference, reference_length, query, matches = hit

        matches1 = 41.0
        matches2 = 57.1
        matches3 = 61.0
        matches4 = 72.1

        gene1_length = 2
        gene2_length = 3
        gene3_length = 4

        # Create a set of alignments
        alignments_store = store.Alignments(minimize_memory_use=True)
        alignments_store.add("gene1", gene1_length, "query1", matches1, "bug1")
        alignments_store.add("gene2", gene2_length, "query1", matches2, "bug1")
        alignments_store.add("gene2", gene2_length, "query2", matches3, "bug1")
        alignments_store.add("gene3", gene3_length, "query3", matches4, "bug1")

        gene_scores_store = store.GeneScores()

        # compute gene scores
        alignments_store.convert_alignments_to_gene_scores(gene_scores_store)

        # gene1
        hit1_score = math.pow(matches1, config.match_power)
        hit2_score = math.pow(matches2, config.match_power)
        query1_sum = hit1_score + hit2_score

        # convert lengths to per kb
        gene2_length = gene2_length / 1000.0

        # gene2
        hit3_score = math.pow(matches3, config.match_power)
        query2_sum = hit3_score
        expected_gene_score = hit3_score / query2_sum / gene2_length + hit2_score / query1_sum / gene2_length

        actual_gene_score = gene_scores_store.get_score("bug1", "gene2")

        # delete the temp alignment file
        alignments_store.delete_temp_alignments_file()

        self.assertAlmostEqual(actual_gene_score,
                               expected_gene_score,
                               places=7)

예제 #25

0

파일 보기

파일: basic_tests_store.py 프로젝트: dytk2134/humann2

 def test_Alignments_process_reference_annotation_new_chocophlan_annotations(self):
     """
     Test the process reference annotation function with the new chocophlan annotations
     """
     
     alignments_store=store.Alignments()
     
     output=alignments_store.process_reference_annotation(
         "gi|554771211|gb|ACIN03000006.1|:c1189-5|46125|g__Abiotrophia.s__Abiotrophia_defectiva|UniRef90_W1Q3F0|UniRef50_P59787|5000")
     
     expected_output=["UniRef50_P59787",5000,"g__Abiotrophia.s__Abiotrophia_defectiva"]
     
     self.assertEqual(expected_output,output)

예제 #26

0

파일 보기

파일: basic_tests_store.py 프로젝트: dytk2134/humann2

 def test_Alignments_add_gene_count(self):
     """
     Alignments class: Test add function
     Test the total genes
     """             
     
     alignments_store=store.Alignments()
     
     alignments_store.add("gene2", 1, "Q3", 0.01, "bug1",1)
     alignments_store.add("gene1", 1, "Q1", 0.01, "bug2",1)
     alignments_store.add("gene3", 1, "Q2", 0.01, "bug3",1)
     alignments_store.add("gene1", 1, "Q1", 0.01, "bug1",1)
     
     # check the total genes
     self.assertEqual(alignments_store.count_genes(),3)

예제 #27

0

파일 보기

파일: basic_tests_store.py 프로젝트: dytk2134/humann2

 def test_Alignments_add_gene_list(self):
     """
     Alignments class: Test add function
     Test the gene list
     """             
     
     alignments_store=store.Alignments()
     
     alignments_store.add("gene2", 1, "Q3", 0.01, "bug1",1)
     alignments_store.add("gene1", 1, "Q1", 0.01, "bug2",1)
     alignments_store.add("gene3", 1, "Q2", 0.01, "bug3",1)
     alignments_store.add("gene1", 1, "Q1", 0.01, "bug1",1)
     
     # check gene list
     self.assertEqual(sorted(alignments_store.gene_list()),["gene1","gene2","gene3"])

예제 #28

0

파일 보기

파일: basic_tests_store.py 프로젝트: dytk2134/humann2

 def test_Alignments_add_gene_lengths(self):
     """
     Alignments class: Test add function
     Test the gene lengths
     """             
     
     alignments_store=store.Alignments()
     
     alignments_store.add("gene2", 10, "Q3", 0.01, "bug1",1)
     alignments_store.add("gene1", 100, "Q1", 0.01, "bug2",1)
     alignments_store.add("gene3", 1000, "Q2", 0.01, "bug3",1)
     alignments_store.add("gene1", 0, "Q1", 0.01, "bug1",1)
     
     # test the lengths are correct
     stored_lengths=[item[-1] for item in alignments_store.get_hit_list()]
     self.assertEqual(sorted(stored_lengths),sorted([10/1000.0,100/1000.0,1000/1000.0,1000/1000.0]))

예제 #29

0

파일 보기

    def test_Alignments_compute_gene_scores_single_gene_single_query_with_temp_alignment_file(
            self):
        """
        Test the compute_gene_scores function
        Test one hit for gene with one hit for query
        Test with the temp alignment file
        """

        # create a set of hits
        matches1 = 41.0
        matches2 = 57.1
        matches3 = 61.0
        matches4 = 72.1

        gene1_length = 2
        gene2_length = 3
        gene3_length = 4

        # Create a set of alignments
        alignments_store = store.Alignments(minimize_memory_use=True)
        alignments_store.add("gene1", gene1_length, "query1", matches1, "bug1")
        alignments_store.add("gene2", gene2_length, "query1", matches2, "bug1")
        alignments_store.add("gene2", gene2_length, "query2", matches3, "bug1")
        alignments_store.add("gene3", gene3_length, "query3", matches4, "bug1")

        gene_scores_store = store.GeneScores()

        # compute gene scores
        alignments_store.convert_alignments_to_gene_scores(gene_scores_store)

        # convert lengths to per kb
        gene3_length = gene3_length / 1000.0

        # gene3
        hit4_score = math.pow(matches4, config.match_power)
        query3_sum = hit4_score
        expected_gene_score = hit4_score / query3_sum / gene3_length

        actual_gene_score = gene_scores_store.get_score("bug1", "gene3")

        # delete the temp alignment file
        alignments_store.delete_temp_alignments_file()

        self.assertEqual(actual_gene_score, expected_gene_score)

예제 #30

0

파일 보기

파일: advanced_tests_translated_search.py 프로젝트: dytk2134/humann2

    def test_translated_search_unaligned_reads_annotations_gene_length(self):
        """
        Test the unaligned reads and the store alignments
        Test with a rapsearch2 output file
        Test the different annotation formats are recognized for gene length
        Test without the coverage filter
        """

        # create a set of alignments
        alignments = store.Alignments()
        unaligned_reads_store = store.Reads()

        # set the coverage threshold to zero so as to not test with filter on
        current_coverage_threshold = config.translated_subject_coverage_threshold
        config.translated_subject_coverage_threshold = 0

        # load the rapsearch2 output with the unaligned reads function
        unaligned_file_fasta = translated.unaligned_reads(
            unaligned_reads_store, cfg.rapsearch_file_annotations, alignments)

        # remove temp file
        utils.remove_temp_file(unaligned_file_fasta)

        # reset the coverage threshold
        config.translated_subject_coverage_threshold = current_coverage_threshold

        # there should be 4 hits identified
        all_hits = alignments.get_hit_list()
        self.assertEqual(len(all_hits), 4)

        # check for set and default gene lengths
        read_length = 50
        expected_length_uniref50 = (abs(2000 - read_length) + 1) / 1000.0
        expected_length_other = (abs(1000 - read_length) + 1) / 1000.0

        # check for set and default gene lengths
        for hit in all_hits:
            query, bug, reference, score, length = hit
            if reference == "UniRef50":
                self.assertEqual(length, expected_length_uniref50)
            else:
                self.assertEqual(length, expected_length_other)