예제 #1
0
 def snp(self):
     """SNP transcript detection"""
     print("Running SNP/mutations calling")
     self.check_multi_files(
             [self._args.fasta_files],
             ["--fasta_files"])
     if (self._args.bam_type != "related_genome") and (
             self._args.bam_type != "reference_genome"):
         print("Error: Please assign \"related_genome\" or"
               " \"reference_genome\" to --bam_type!")
         sys.exit()
     if (self._args.ploidy != "haploid") and (
             self._args.ploidy != "diploid"):
         print("Error: Please assign \"haploid\" or"
               " \"diploid\" to --chromosome_type!")
     if (self._args.caller != "c") and (
             self._args.caller != "m"):
         print("Error: Please assign \"c\" or"
               " \"m\" to --caller!")
     for prop in ("bcftools_path", "samtools_path"):
         setattr(self._args, prop,
                 self.check_execute_file(getattr(self._args, prop)))
     project_creator.create_subfolders(self._paths.required_folders("snp"))
     args_snp = self.args_container.container_snp(
         self._args.samtools_path, self._args.bcftools_path,
         self._args.bam_type,
         self._args.program, self._args.fasta_files,
         self._args.bam_files,
         self._args.quality, self._args.read_depth_range,
         self._paths.snp_output_folder, self._args.indel_fraction,
         self._args.ploidy, self._args.rg_tag, self._args.caller,
         self._args.filter_tag_info, self._args.dp4_cutoff)
     snp = SNPCalling(args_snp)
     snp.run_snp_calling(args_snp)
예제 #2
0
 def setUp(self):
     self.example = Example()
     self.mock_args = MockClass()
     self.test_folder = "test_folder"
     self.fasta = os.path.join(self.test_folder, "fasta")
     self.snp_folder = os.path.join(self.test_folder, "snp")
     self.table = os.path.join(self.test_folder, "table")
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.fasta)
         os.mkdir(self.snp_folder)
         os.mkdir(self.table)
         os.mkdir(os.path.join(self.test_folder, "compare_reference"))
         os.mkdir(os.path.join(self.test_folder, "compare_reference/seqs"))
         os.mkdir(
             os.path.join(self.test_folder,
                          "compare_reference/seqs/with_BAQ"))
         os.mkdir(
             os.path.join(self.test_folder, "compare_reference/statistics"))
     args = self.mock_args.mock()
     args.types = "reference"
     args.out_folder = self.test_folder
     args.fastas = self.fasta
     self.snp = SNPCalling(args)
     self.mock = Mock_func()
예제 #3
0
 def snp(self):
     """SNP transcript detection"""
     print("Running SNP/mutations calling...")
     self.check_folder([self._args.fasta_path])
     if (self._args.bam_type != "target") and (
             self._args.bam_type != "reference"):
         print("Error: please assign \"target\" or"
               " \"reference\" to --bam_type!!")
         sys.exit()
     if (self._args.ploidy != "haploid") and (
             self._args.ploidy != "diploid"):
         print("Error: please assign \"haploid\" or"
               " \"diploid\" to --chromosome_type!!")
     if (self._args.caller != "c") and (
             self._args.caller != "m"):
         print("Error: please assign \"c\" or"
               " \"m\" to --caller!!")
     project_creator.create_subfolders(self._paths.required_folders("snp"))
     args_snp = self.args_container.container_snp(
         self._args.samtools_path, self._args.bcftools_path,
         self._args.bam_type, self._args.sample_number,
         self._args.program, self._args.fasta_path,
         self._args.tex_bam_path, self._args.frag_bam_path,
         self._args.quality, self._args.read_depth_range,
         self._paths.snp_output_folder, self._args.indel_fraction,
         self._args.ploidy, self._args.RG_tag, self._args.caller,
         self._args.filter_tag_info, self._args.DP4_cutoff)
     snp = SNPCalling(args_snp)
     snp.run_snp_calling(args_snp)
예제 #4
0
 def setUp(self):
     self.example = Example()
     self.mock_args = MockClass()
     self.test_folder = "test_folder"
     self.fasta = os.path.join(self.test_folder, "fasta")
     self.snp_folder = os.path.join(self.test_folder, "snp")
     self.table = os.path.join(self.test_folder, "table")
     if (not os.path.exists(self.test_folder)):
         os.mkdir(self.test_folder)
         os.mkdir(self.fasta)
         os.mkdir(self.snp_folder)
         os.mkdir(self.table)
         os.mkdir(os.path.join(
             self.test_folder,
             "compare_related_and_reference_genomes"))
         os.mkdir(os.path.join(
             self.test_folder,
             "compare_related_and_reference_genomes/seqs"))
         os.mkdir(os.path.join(
             self.test_folder,
             "compare_related_and_reference_genomes/seqs/with_BAQ"))
         os.mkdir(os.path.join(
             self.test_folder,
             "compare_related_and_reference_genomes/statistics"))
         os.mkdir(os.path.join(
             self.test_folder,
             "compare_related_and_reference_genomes/SNP_raw_outputs"))
     args = self.mock_args.mock()
     args.types = "related_genome"
     args.out_folder = self.test_folder
     args.fastas = self.fasta
     self.snp = SNPCalling(args)
     self.mock = Mock_func()
예제 #5
0
 def snp(self):
     """SNP transcript detection"""
     print("Running SNP/mutations calling...")
     self.check_folder([self._args.fasta_path])
     if (self._args.bam_type != "target") and (
             self._args.bam_type != "reference"):
         print("Error: please assign \"target\" or"
               " \"reference\" to --bam_type!!")
         sys.exit()
     if (self._args.ploidy != "haploid") and (
             self._args.ploidy != "diploid"):
         print("Error: please assign \"haploid\" or"
               " \"diploid\" to --chromosome_type!!")
     project_creator.create_subfolders(self._paths.required_folders("snp"))
     args_snp = self.args_container.container_snp(
         self._args.samtools_path, self._args.bcftools_path,
         self._args.bam_type,
         self._args.program, self._args.fasta_path,
         self._args.tex_bam_path, self._args.frag_bam_path,
         self._args.quality, self._args.read_depth,
         self._paths.snp_output_folder, self._args.indel_fraction,
         self._args.ploidy)
     snp = SNPCalling(args_snp)
     snp.run_snp_calling(args_snp)
예제 #6
0
class TestSNPCalling(unittest.TestCase):
    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.fasta = os.path.join(self.test_folder, "fasta")
        self.snp_folder = os.path.join(self.test_folder, "snp")
        self.table = os.path.join(self.test_folder, "table")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.fasta)
            os.mkdir(self.snp_folder)
            os.mkdir(self.table)
            os.mkdir(
                os.path.join(self.test_folder,
                             "compare_related_and_reference_genomes"))
            os.mkdir(
                os.path.join(self.test_folder,
                             "compare_related_and_reference_genomes/seqs"))
            os.mkdir(
                os.path.join(
                    self.test_folder,
                    "compare_related_and_reference_genomes/seqs/with_BAQ"))
            os.mkdir(
                os.path.join(
                    self.test_folder,
                    "compare_related_and_reference_genomes/statistics"))
            os.mkdir(
                os.path.join(
                    self.test_folder,
                    "compare_related_and_reference_genomes/SNP_raw_outputs"))
        args = self.mock_args.mock()
        args.types = "related_genome"
        args.out_folder = self.test_folder
        args.fastas = self.fasta
        self.snp = SNPCalling(args)
        self.mock = Mock_func()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_transcript_snp(self):
        fasta = os.path.join(self.test_folder, "NC_007795.1.fa")
        gen_file(fasta, self.example.fasta)
        snp_folder = os.path.join(
            self.test_folder,
            "compare_related_and_reference_genomes/SNP_raw_outputs/test")
        os.mkdir(snp_folder)
        snp = os.path.join(snp_folder, "test_with_BAQ_NC_007795.1.vcf")
        gen_file(snp, self.example.snp)
        args = self.mock_args.mock()
        args.depth = 5
        args.fraction = 0.3
        args.quality = 2
        args.depth_s = "n_10"
        args.depth_b = "a_2"
        args.dp4_sum = "n_10"
        args.dp4_frac = 0.5
        args.idv = "n_10"
        args.imf = 0.5
        args.filters = ["VDB_s0.1"]
        args.min_sample = 2
        os.mkdir(
            os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes/seqs/with_BAQ/test"))
        depth_file = os.path.join(self.test_folder, "tmp_depthNC_007795.1")
        gen_file(depth_file, self.example.depth_file)
        bam_datas = [{
            "sample": "NC_007795.1",
            "bam_number": 1,
            "bams": "test",
            "rep": 1
        }]
        self.snp._transcript_snp(
            fasta,
            "test",
            "with",
            "test",
            bam_datas,
            self.table,
            args,
        )
        datas = import_data(
            os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes/statistics/stat_test_with_BAQ_NC_007795.1_SNP_best.csv"
            ))
        print("\n".join(datas))
        self.assertEqual("\n".join(datas), self.example.out_stat)
        datas = import_data(
            os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes/seqs/with_BAQ/test/test_NC_007795.1_NC_007795.1_1_1.fa"
            ))
        self.assertEqual("\n".join(datas),
                         ">NC_007795.1\nAaTTGaaTCCCGAACGACAGTTAT")
        os.remove("test_NC_007795.1_seq_reference.csv")
        os.remove("test_NC_007795.1_best.vcf")
        os.remove("test_NC_007795.1_NC_007795.1_SNP_QUAL_best.png")
        os.remove("test_NC_007795.1_NC_007795.1_SNP_QUAL_raw.png")

    def test_run_program(self):
        self.snp._run_sub = self.mock.mock_run_sub
        args = self.mock_args.mock()
        bam_datas = [{
            "sample": "NC_007795.1",
            "bam_number": 1,
            "bams": "test",
            "rep": 1
        }]
        args.program = ["with_BAQ"]
        self.snp._run_program("fasta", bam_datas, args)
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test")))

    def test_merge_bams(self):
        args = self.mock_args.mock()
        args.frag_bams = os.path.join(self.test_folder, "frag_bams")
        args.normal_bams = os.path.join(self.test_folder, "tex_bams")
        os.mkdir(args.normal_bams)
        os.mkdir(args.frag_bams)
        bam_datas = [{
            "sample": "NC_007795.1",
            "bam_number": 0,
            "bams": "test",
            "rep": 1
        }]
        self.snp._run_bam = self.mock.mock_run_bam
        gen_file(os.path.join(args.normal_bams, "tex.bam"), "test")
        gen_file(os.path.join(args.normal_bams, "notex.bam"), "test")
        gen_file(os.path.join(args.frag_bams, "farg.bam"), "test")
        args.bams = [args.frag_bams, args.normal_bams]
        args.samtools_path = "test"
        self.snp._merge_bams(args, bam_datas)
        self.assertEqual(bam_datas[0]["bam_number"], 1)

    def test_modify_header(self):
        gen_file(os.path.join(self.fasta, "test.fa"),
                 ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC")
        self.snp._modify_header(self.fasta)
        datas = import_data(os.path.join(self.fasta, "test.fa"))
        self.assertEqual("\n".join(datas), ">DDD\nAATTAATTGGCC")

    def test_get_genome_name(self):
        self.snp._get_header = self.mock.mock_get_header
        gen_file(os.path.join(self.test_folder, "header"), self.example.bam)
        args = self.mock_args.mock()
        args.samtools_path = "test"
        bam_datas = [{
            "sample": "NC_007795.1",
            "bam_number": 0,
            "bams": "test",
            "rep": 1
        }]
        seq_names = self.snp._get_genome_name(args, bam_datas)

    def test_run_snp_calling(self):
        self.snp._get_header = self.mock.mock_get_header
        self.snp._run_bam = self.mock.mock_run_bam
        self.snp._run_sub = self.mock.mock_run_sub
        self.snp._run_tools = self.mock.mock_run_tools
        self.snp._transcript_snp = self.mock.mock_transcript_snp
        gen_file(os.path.join(self.fasta, "test.fa"),
                 ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC")
        gen_file(os.path.join(self.test_folder, "header"), self.example.bam)
        gen_file(os.path.join(self.test_folder, "whole_reads.bam"), "test")
        gen_file(os.path.join(self.test_folder, "whole_reads_sorted.bam"),
                 "test")
        gen_file(os.path.join(self.test_folder, "tmp_bcf"), "test")
        gen_file(os.path.join(self.fasta, "all.fa.fai"), "test")
        args = self.mock_args.mock()
        args.types = "reference"
        args.program = ["with_BAQ"]
        args.bams = [
            "a1:" + os.path.join(self.test_folder, "frag_bams"),
            "a2:" + os.path.join(self.test_folder, "tex_bams")
        ]
        args.frag_bams = os.path.join(self.test_folder, "frag_bams")
        args.normal_bams = os.path.join(self.test_folder, "tex_bams")
        os.mkdir(args.normal_bams)
        os.mkdir(args.frag_bams)
        gen_file(os.path.join(args.normal_bams, "tex.bam"), "test")
        gen_file(os.path.join(args.normal_bams, "notex.bam"), "test")
        gen_file(os.path.join(args.frag_bams, "farg.bam"), "test")
        args.samtools_path = "test"
        self.snp.run_snp_calling(args)
예제 #7
0
class TestSNPCalling(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.fasta = os.path.join(self.test_folder, "fasta")
        self.snp_folder = os.path.join(self.test_folder, "snp")
        self.table = os.path.join(self.test_folder, "table")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.fasta)
            os.mkdir(self.snp_folder)
            os.mkdir(self.table)
            os.mkdir(os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes"))
            os.mkdir(os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes/seqs"))
            os.mkdir(os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes/seqs/with_BAQ"))
            os.mkdir(os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes/statistics"))
            os.mkdir(os.path.join(
                self.test_folder,
                "compare_related_and_reference_genomes/SNP_raw_outputs"))
        args = self.mock_args.mock()
        args.types = "related_genome"
        args.out_folder = self.test_folder
        args.fastas = self.fasta
        self.snp = SNPCalling(args)
        self.mock = Mock_func()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_transcript_snp(self):
        fasta = os.path.join(self.test_folder, "NC_007795.1.fa")
        gen_file(fasta, self.example.fasta)
        snp_folder = os.path.join(
                 self.test_folder,
                 "compare_related_and_reference_genomes/SNP_raw_outputs/test")
        os.mkdir(snp_folder)
        snp = os.path.join(snp_folder, "test_with_BAQ_NC_007795.1.vcf")
        gen_file(snp, self.example.snp)
        args = self.mock_args.mock()
        args.depth = 5
        args.fraction = 0.3
        args.quality = 2
        args.depth_s = "n_10"
        args.depth_b = "a_2"
        args.dp4_sum = "n_10"
        args.dp4_frac = 0.5
        args.idv = "n_10"
        args.imf = 0.5
        args.filters = ["VDB_s0.1"]
        args.min_sample = 2
        os.mkdir(os.path.join(
            self.test_folder,
            "compare_related_and_reference_genomes/seqs/with_BAQ/test"))
        depth_file = os.path.join(self.test_folder, "tmp_depthNC_007795.1")
        gen_file(depth_file, self.example.depth_file)
        bam_datas = [{"sample": "NC_007795.1", "bam_number": 1,
                      "bams": "test", "rep": 1}]
        self.snp._transcript_snp(fasta, "test", "with",
                                 "test", bam_datas, self.table, args,)
        datas = import_data(os.path.join(
            self.test_folder,
            "compare_related_and_reference_genomes/statistics/stat_test_with_BAQ_NC_007795.1_SNP_best.csv"))
        print("\n".join(datas))
        self.assertEqual("\n".join(datas), self.example.out_stat)
        datas = import_data(os.path.join(
            self.test_folder,
            "compare_related_and_reference_genomes/seqs/with_BAQ/test/test_NC_007795.1_NC_007795.1_1_1.fa"))
        self.assertEqual("\n".join(datas),
                         ">NC_007795.1\nAaTTGaaTCCCGAACGACAGTTAT")
        os.remove("test_NC_007795.1_seq_reference.csv")
        os.remove("test_NC_007795.1_best.vcf")
        os.remove("test_NC_007795.1_NC_007795.1_SNP_QUAL_best.png")
        os.remove("test_NC_007795.1_NC_007795.1_SNP_QUAL_raw.png")

    def test_run_program(self):
        self.snp._run_sub = self.mock.mock_run_sub
        args = self.mock_args.mock()
        bam_datas = [{"sample": "NC_007795.1", "bam_number": 1,
                      "bams": "test", "rep": 1}]
        args.program = ["with_BAQ"]
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.snp._run_program("fasta", bam_datas, args, log)
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test")))

    def test_merge_bams(self):
        args = self.mock_args.mock()
        args.frag_bams = os.path.join(self.test_folder, "frag_bams")
        args.normal_bams = os.path.join(self.test_folder, "tex_bams")
        os.mkdir(args.normal_bams)
        os.mkdir(args.frag_bams)
        bam_datas = [{"sample": "NC_007795.1", "bam_number": 0,
                      "bams": "test", "rep": 1}]
        self.snp._run_bam = self.mock.mock_run_bam
        gen_file(os.path.join(args.normal_bams, "tex.bam"), "test")
        gen_file(os.path.join(args.normal_bams, "notex.bam"), "test")
        gen_file(os.path.join(args.frag_bams, "farg.bam"), "test")
        args.bams = [args.frag_bams, args.normal_bams]
        args.samtools_path = "test"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.snp._merge_bams(args, bam_datas, log)
        self.assertEqual(bam_datas[0]["bam_number"], 1)

    def test_modify_header(self):
        gen_file(os.path.join(self.fasta, "test.fa"),
                 ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC")
        self.snp._modify_header(self.fasta)
        datas = import_data(os.path.join(self.fasta, "test.fa"))
        self.assertEqual("\n".join(datas), ">DDD\nAATTAATTGGCC")

    def test_get_genome_name(self):
        self.snp._get_header = self.mock.mock_get_header
        gen_file(os.path.join(self.test_folder, "header"), self.example.bam)
        args = self.mock_args.mock()
        args.samtools_path = "test"
        bam_datas = [{"sample": "NC_007795.1", "bam_number": 0,
                      "bams": "test", "rep": 1}]
        seq_names = self.snp._get_genome_name(args, bam_datas)

    def test_run_snp_calling(self):
        self.snp._get_header = self.mock.mock_get_header
        self.snp._run_bam = self.mock.mock_run_bam
        self.snp._run_sub = self.mock.mock_run_sub
        self.snp._run_tools = self.mock.mock_run_tools
        self.snp._transcript_snp = self.mock.mock_transcript_snp
        gen_file(os.path.join(self.fasta, "test.fa"),
                              ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC")
        gen_file(os.path.join(self.test_folder, "header"), self.example.bam)
        gen_file(os.path.join(self.test_folder, "whole_reads.bam"), "test")
        gen_file(os.path.join(self.test_folder, "whole_reads_sorted.bam"),
                 "test")
        gen_file(os.path.join(self.test_folder, "tmp_bcf"), "test")
        gen_file(os.path.join(self.fasta, "all.fa.fai"), "test")
        args = self.mock_args.mock()
        args.types = "reference"
        args.program = ["with_BAQ"]
        args.bams = ["a1:" + os.path.join(self.test_folder, "frag_bams"),
                     "a2:" + os.path.join(self.test_folder, "tex_bams")]
        args.frag_bams = os.path.join(self.test_folder, "frag_bams")
        args.normal_bams = os.path.join(self.test_folder, "tex_bams")
        os.mkdir(args.normal_bams)
        os.mkdir(args.frag_bams)
        gen_file(os.path.join(args.normal_bams, "tex.bam"), "test")
        gen_file(os.path.join(args.normal_bams, "notex.bam"), "test")
        gen_file(os.path.join(args.frag_bams, "farg.bam"), "test")
        args.samtools_path = "test"
        log = open(os.path.join(self.test_folder, "test.log"), "w")
        self.snp.run_snp_calling(args, log)
예제 #8
0
class TestSNPCalling(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.fasta = os.path.join(self.test_folder, "fasta")
        self.snp_folder = os.path.join(self.test_folder, "snp")
        self.table = os.path.join(self.test_folder, "table")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.fasta)
            os.mkdir(self.snp_folder)
            os.mkdir(self.table)
            os.mkdir(os.path.join(self.test_folder, "compare_reference"))
            os.mkdir(os.path.join(self.test_folder, "compare_reference/seqs"))
            os.mkdir(os.path.join(self.test_folder, "compare_reference/seqs/with_BAQ"))
            os.mkdir(os.path.join(self.test_folder, "compare_reference/statistics"))
        args = self.mock_args.mock()
        args.types = "reference"
        args.out_folder = self.test_folder
        args.fastas = self.fasta
        self.snp = SNPCalling(args)
        self.mock = Mock_func()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_import_bam(self):
        gen_file(os.path.join(self.test_folder, "test_1.bam"), "test")
        gen_file(os.path.join(self.test_folder, "test_2.bam"), "test")
        bams = []
        num_bams = self.snp._import_bam(self.test_folder, bams)
        self.assertEqual(num_bams, 2)
        self.assertListEqual(bams, ['test_folder/test_1.bam', 'test_folder/test_2.bam'])

    def test_transcript_snp(self):
        fasta = os.path.join(self.test_folder, "NC_007795.1.fa")
        gen_file(fasta, self.example.fasta)
        snp = os.path.join(self.test_folder, "NC_007795.1.csv")
        gen_file(snp, self.example.snp)
        args = self.mock_args.mock()
        args.depth = 5
        args.fraction = 0.3
        args.quality = 2
        os.mkdir(os.path.join(self.test_folder, "compare_reference/seqs/with_BAQ/test"))
        self.snp._transcript_snp(fasta, snp, "test", "with",
                                 "test", 10, self.table, args)
        datas = import_data(os.path.join(self.test_folder, "compare_reference/statistics/stat_test_with_BAQ_SNP.csv"))
        self.assertEqual("\n".join(datas), self.example.out_stat)
        datas = import_data(os.path.join(self.test_folder, "compare_reference/seqs/with_BAQ/test/test_NC_007795.1_1_1.fa"))
        self.assertEqual("\n".join(datas), ">NC_007795.1\nAaTTGaaTCCCGAACGACAGTTAT")
        os.remove("test_seq_reference.csv")
        os.remove("test_depth_only.vcf")
        os.remove("test_depth_quality.vcf")
        os.remove("test_NC_007795.1_SNP_QUAL.png")    

    def test_run_sub(self):
        self.snp._run_tools = self.mock.mock_run_tools
        self.snp._transcript_snp = self.mock.mock_transcript_snp
        file_prefixs = {"raw_prefix": "test",
                        "table_prefix": "test"}
        args = self.mock_args.mock()
        self.snp._run_sub(args, "fasta", "with", file_prefixs, "test",
                          self.test_folder, 10)
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "compare_reference/seqs/with_BAQ/test")))

    def test_run_program(self):
        self.snp._run_sub = self.mock.mock_run_sub
        args = self.mock_args.mock()
        args.program = ["1"]
        self.snp._run_program("fasta", "test", "test", 10,
                              "table", args)
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test")))

    def test_detect_fasta(self):
        datas = self.snp._detect_fasta("test.fa")
        self.assertEqual(datas, (True, 'test'))

    def test_merge_bams(self):
        args = self.mock_args.mock()
        args.frag_bams = os.path.join(self.test_folder, "frag_bams")
        args.normal_bams = os.path.join(self.test_folder, "tex_bams")
        os.mkdir(args.normal_bams)
        os.mkdir(args.frag_bams)
        self.snp._run_bam = self.mock.mock_run_bam
        gen_file(os.path.join(args.normal_bams, "tex.bam"), "test")
        gen_file(os.path.join(args.normal_bams, "notex.bam"), "test")
        gen_file(os.path.join(args.frag_bams, "farg.bam"), "test")
        args.samtools_path = "test"
        num = self.snp._merge_bams(args)
        self.assertEqual(num, 3)

    def test_modify_header(self):
        gen_file(os.path.join(self.fasta, "test.fa"), ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC")
        self.snp._modify_header(self.fasta)
        datas = import_data(os.path.join(self.fasta, "test.fa"))
        self.assertEqual("\n".join(datas), ">DDD\nAATTAATTGGCC")

    def test_get_genome_name(self):
        self.snp._get_header = self.mock.mock_get_header
        gen_file(os.path.join(self.test_folder, "header"), self.example.bam)
        seq_names = self.snp._get_genome_name("test")

    def test_run_snp_calling(self):
        self.snp._get_header = self.mock.mock_get_header
        self.snp._run_bam = self.mock.mock_run_bam
        self.snp._run_sub = self.mock.mock_run_sub
        self.snp._run_tools = self.mock.mock_run_tools
        self.snp._transcript_snp = self.mock.mock_transcript_snp
        gen_file(os.path.join(self.fasta, "test.fa"), ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC")
        gen_file(os.path.join(self.test_folder, "header"), self.example.bam)
        gen_file(os.path.join(self.test_folder, "whole_reads.bam"), "test")
        gen_file(os.path.join(self.test_folder, "whole_reads_sorted.bam"), "test")
        args = self.mock_args.mock()
        args.types = "reference"
        args.program = ["1"]
        args.frag_bams = os.path.join(self.test_folder, "frag_bams")
        args.normal_bams = os.path.join(self.test_folder, "tex_bams")
        os.mkdir(args.normal_bams)
        os.mkdir(args.frag_bams)
        gen_file(os.path.join(args.normal_bams, "tex.bam"), "test")
        gen_file(os.path.join(args.normal_bams, "notex.bam"), "test")
        gen_file(os.path.join(args.frag_bams, "farg.bam"), "test")
        args.samtools_path = "test"
        self.snp.run_snp_calling(args)
예제 #9
0
class TestSNPCalling(unittest.TestCase):

    def setUp(self):
        self.example = Example()
        self.mock_args = MockClass()
        self.test_folder = "test_folder"
        self.fasta = os.path.join(self.test_folder, "fasta")
        self.snp_folder = os.path.join(self.test_folder, "snp")
        self.table = os.path.join(self.test_folder, "table")
        if (not os.path.exists(self.test_folder)):
            os.mkdir(self.test_folder)
            os.mkdir(self.fasta)
            os.mkdir(self.snp_folder)
            os.mkdir(self.table)
            os.mkdir(os.path.join(self.test_folder, "compare_reference"))
            os.mkdir(os.path.join(self.test_folder, "compare_reference/seqs"))
            os.mkdir(os.path.join(self.test_folder, "compare_reference/seqs/with_BAQ"))
            os.mkdir(os.path.join(self.test_folder, "compare_reference/statistics"))
        args = self.mock_args.mock()
        args.types = "reference"
        args.out_folder = self.test_folder
        args.fastas = self.fasta
        self.snp = SNPCalling(args)
        self.mock = Mock_func()

    def tearDown(self):
        if os.path.exists(self.test_folder):
            shutil.rmtree(self.test_folder)

    def test_import_bam(self):
        gen_file(os.path.join(self.test_folder, "test_1.bam"), "test")
        gen_file(os.path.join(self.test_folder, "test_2.bam"), "test")
        bams = []
        num_bams = self.snp._import_bam(self.test_folder, bams)
        self.assertEqual(num_bams, 2)
        self.assertListEqual(bams, ['test_folder/test_1.bam', 'test_folder/test_2.bam'])

    def test_transcript_snp(self):
        fasta = os.path.join(self.test_folder, "NC_007795.1.fa")
        gen_file(fasta, self.example.fasta)
        snp = os.path.join(self.test_folder, "NC_007795.1.csv")
        gen_file(snp, self.example.snp)
        args = self.mock_args.mock()
        args.depth = 5
        args.fraction = 0.3
        args.quality = 2
        args.depth_s = "n_10"
        args.depth_b = "a_2"
        args.dp4_sum = "n_10"
        args.dp4_frac = 0.5
        args.idv = "n_10"
        args.imf = 0.5
        args.filters = ["VDB_s0.1"]
        args.min_sample = 2
        os.mkdir(os.path.join(self.test_folder, "compare_reference/seqs/with_BAQ/test"))
        depth_file = os.path.join(self.test_folder, "tmp_depth")
        gen_file(depth_file, self.example.depth_file)
        self.snp._transcript_snp(fasta, snp, "test", "with",
                                 "test", 10, self.table, args)
        datas = import_data(os.path.join(self.test_folder, "compare_reference/statistics/stat_test_with_BAQ_SNP_best.csv"))
        self.assertEqual("\n".join(datas), self.example.out_stat)
        datas = import_data(os.path.join(self.test_folder, "compare_reference/seqs/with_BAQ/test/test_NC_007795.1_1_1.fa"))
        self.assertEqual("\n".join(datas), ">NC_007795.1\nAaTTGaaTCCCGAACGACAGTTAT")
        os.remove("test_seq_reference.csv")
        os.remove("test_best.vcf")
        os.remove("test_NC_007795.1_SNP_QUAL_best.png")
        os.remove("test_NC_007795.1_SNP_QUAL_raw.png")

    def test_run_sub(self):
        self.snp._run_tools = self.mock.mock_run_tools
        self.snp._transcript_snp = self.mock.mock_transcript_snp
        file_prefixs = {"raw_prefix": "test",
                        "table_prefix": "test"}
        args = self.mock_args.mock()
        self.snp._run_sub(args, "fasta", "with", file_prefixs, "test",
                          self.test_folder, 10)
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "compare_reference/seqs/with_BAQ/test")))

    def test_run_program(self):
        self.snp._run_sub = self.mock.mock_run_sub
        args = self.mock_args.mock()
        args.program = ["1"]
        self.snp._run_program("fasta", "test", "test", 10,
                              "table", args)
        self.assertTrue(os.path.exists(os.path.join(self.test_folder, "test")))

    def test_detect_fasta(self):
        datas = self.snp._detect_fasta("test.fa")
        self.assertEqual(datas, (True, 'test'))

    def test_merge_bams(self):
        args = self.mock_args.mock()
        args.frag_bams = os.path.join(self.test_folder, "frag_bams")
        args.normal_bams = os.path.join(self.test_folder, "tex_bams")
        os.mkdir(args.normal_bams)
        os.mkdir(args.frag_bams)
        self.snp._run_bam = self.mock.mock_run_bam
        gen_file(os.path.join(args.normal_bams, "tex.bam"), "test")
        gen_file(os.path.join(args.normal_bams, "notex.bam"), "test")
        gen_file(os.path.join(args.frag_bams, "farg.bam"), "test")
        args.samtools_path = "test"
        num = self.snp._merge_bams(args)
        self.assertEqual(num, 3)

    def test_modify_header(self):
        gen_file(os.path.join(self.fasta, "test.fa"), ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC")
        self.snp._modify_header(self.fasta)
        datas = import_data(os.path.join(self.fasta, "test.fa"))
        self.assertEqual("\n".join(datas), ">DDD\nAATTAATTGGCC")

    def test_get_genome_name(self):
        self.snp._get_header = self.mock.mock_get_header
        gen_file(os.path.join(self.test_folder, "header"), self.example.bam)
        args = self.mock_args.mock()
        args.samtools_path = "test"
        seq_names = self.snp._get_genome_name(args)

    def test_run_snp_calling(self):
        self.snp._get_header = self.mock.mock_get_header
        self.snp._run_bam = self.mock.mock_run_bam
        self.snp._run_sub = self.mock.mock_run_sub
        self.snp._run_tools = self.mock.mock_run_tools
        self.snp._transcript_snp = self.mock.mock_transcript_snp
        gen_file(os.path.join(self.fasta, "test.fa"), ">AAA|BBB|CCC|DDD|EEE\nAATTAATTGGCC")
        gen_file(os.path.join(self.test_folder, "header"), self.example.bam)
        gen_file(os.path.join(self.test_folder, "whole_reads.bam"), "test")
        gen_file(os.path.join(self.test_folder, "whole_reads_sorted.bam"), "test")
        args = self.mock_args.mock()
        args.types = "reference"
        args.program = ["1"]
        args.frag_bams = os.path.join(self.test_folder, "frag_bams")
        args.normal_bams = os.path.join(self.test_folder, "tex_bams")
        os.mkdir(args.normal_bams)
        os.mkdir(args.frag_bams)
        gen_file(os.path.join(args.normal_bams, "tex.bam"), "test")
        gen_file(os.path.join(args.normal_bams, "notex.bam"), "test")
        gen_file(os.path.join(args.frag_bams, "farg.bam"), "test")
        args.samtools_path = "test"
        self.snp.run_snp_calling(args)