Beispiel #1
0
    def test_bam_extract_01(self):
        TEST_DIR, T_TEST_DIR = self.__get_temp_dirs()

        input_file = TEST_DIR + "test_terg_02.bam"
        output_file = T_TEST_DIR + "test_terg_02.filtered.bam"
        output_file_s = T_TEST_DIR + "test_terg_02.filtered.sam"
        test_file = TEST_DIR + "test_terg_02.filtered.sam"

        # c = BAMExtract(input_file)
        # c.extract("chr21:39000000-40000000", "chr5:1-2", output_file)
        command = [
            "bin/dr-disco", "bam-extract", "chr21:39000000-40000000",
            "chr5:1-2", input_file, output_file
        ]

        self.assertEqual(subprocess.call(command), 0)

        # Bam2Sam
        fhq = open(output_file_s, "w")
        fhq.write(pysam.view(output_file))
        fhq.close()

        #if not filecmp.cmp(output_file_s, test_file):
        #    print 'diff \'' + output_file_s + '\' \'' + test_file + '\''
        #self.assertTrue(filecmp.cmp(test_file, output_file_s))

        self.assertTrue(filecmp.cmp(test_file, output_file_s),
                        msg=get_diff(test_file, output_file_s))
Beispiel #2
0
    def test_sj_04__CA_GT_d4(self):
        test_id = 'splice_site_motif_04'

        input_sam = TEST_DIR + "test_" + test_id + ".in.sam"
        input_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        input_file = T_TEST_DIR + "test_" + test_id + ".dbed"

        gtf_file = None
        fasta_file = TEST_DIR + "test_" + test_id + ".in.fa"

        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"

        # sam -> fixed bam
        sam_to_fixed_bam(input_sam, input_bam, T_TEST_DIR)

        # fixed bam -> dr-disco detect
        ic = IntronDecomposition(input_bam)
        ic.decompose(0)
        fh = open(input_file, "w")
        ic.export(fh)
        fh.close()

        # dr-disco-detect (skip classify) -> dr-disco integrate
        cl = DetectOutput(input_file)

        # originally, this triggered an exception, now we just log an error
        # cl.integrate(output_file, gtf_file, fasta_file)
        # self.assertRaises(Exception, cl.integrate, output_file, gtf_file, fasta_file)

        cl.integrate(output_file, gtf_file, fasta_file)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Beispiel #3
0
    def test_03(self):  # +1, +2 -> 0
        TEST_DIR, T_TEST_DIR = self.__get_temp_dirs()

        test_id = 'frameshift-prediction_03'

        # fusions = [(['chr1', 1040604, '+'], ['chr1', 999020, '-']), (['1', 1040604, '+'], ['1', 999020, '-'])]

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        gtf_files = [
            TEST_DIR + 'frameshift_example.gtf',
            TEST_DIR + 'frameshift_example.no_chr_prefix.gtf'
        ]
        for gtf_file in gtf_files:
            command = [
                "bin/dr-disco", "integrate", "--gtf", gtf_file, input_file,
                output_file
            ]

            self.assertEqual(subprocess.call(command),
                             0,
                             msg=" ".join([str(x) for x in command]))

            self.assertTrue(filecmp.cmp(test_file, output_file),
                            msg=get_diff(test_file, output_file))
    def test_01_gzip(self):
        test_id = '01'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        input_file_gz = T_TEST_DIR + "test_" + test_id + ".in.dbed.gz"

        # create gzip archive
        with open(input_file, 'r') as fh_in:
            with gzip.open(input_file_gz, 'wt') as fh_out:
                #fh_out.write(str_to_bytearray(fh_in.read()))
                d = fh_in.read()
                print(type(d))
                #d = str_to_bytearray(fh_in.read())
                #d = d.encode()
                #print(type(d))
                #print(d)
                d = str(d)
                fh_out.write(d)

        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.gz.dbed"

        cl = DetectOutput(input_file_gz)
        cl.classify(output_file, False, Blacklist(), 1, True)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Beispiel #5
0
    def test_01_complementary(self):
        TEST_DIR, T_TEST_DIR = self.__get_temp_dirs()

        test_id = 'frameshift-prediction_01-complementary'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        gtf_files = [
            TEST_DIR + 'frameshift_example.gtf',
            TEST_DIR + 'frameshift_example.no_chr_prefix.gtf'
        ]
        for gtf_file in gtf_files:
            command = [
                "bin/dr-disco", "integrate", "--gtf", gtf_file, input_file,
                output_file
            ]

            self.assertEqual(subprocess.call(command),
                             0,
                             msg=" ".join([str(x) for x in command]))

            self.assertTrue(filecmp.cmp(test_file, output_file),
                            msg=get_diff(test_file, output_file))
Beispiel #6
0
    def test_01(self):  # example of in-frame fusion - strands are RNA strand
        TEST_DIR, T_TEST_DIR = self.__get_temp_dirs()

        test_id = 'frameshift-prediction_01'

        # both do have their DNA strand at minus!! :
        #
        #             <=(-)=| acceptor in negative strand at RNA
        #     =====(+)=====>| donor in positive strand at RNA

        #           donor                   acceptor
        # fusions = chr1', 1035203, '+'], ['chr1', 999610, '-'])
        #           1', 1035203, '+'], ['1', 999610, '-'])]  # strands are at RNA level, and gene order is DONOR, ACCEPTOR

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        gtf_files = [
            TEST_DIR + 'frameshift_example.gtf',
            TEST_DIR + 'frameshift_example.no_chr_prefix.gtf'
        ]
        for gtf_file in gtf_files:
            command = [
                "bin/dr-disco", "integrate", "--gtf", gtf_file, input_file,
                output_file
            ]

            self.assertEqual(subprocess.call(command),
                             0,
                             msg=" ".join([str(x) for x in command]))

            self.assertTrue(filecmp.cmp(test_file, output_file),
                            msg=get_diff(test_file, output_file))
Beispiel #7
0
    def test_sj_02(self):
        test_id = 'splice_site_motif_02'

        input_sam = TEST_DIR + "test_" + test_id + ".in.sam"
        input_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        input_file = T_TEST_DIR + "test_" + test_id + ".dbed"

        gtf_file = None
        fasta_file = TEST_DIR + "test_" + test_id + ".in.fa"

        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"

        # sam -> fixed bam
        sam_to_fixed_bam(input_sam, input_bam, T_TEST_DIR)

        # fixed bam -> dr-disco detect
        ic = IntronDecomposition(input_bam)
        ic.decompose(0)
        fh = open(input_file, "w")
        ic.export(fh)
        fh.close()

        # dr-disco-detect (skip classify) -> dr-disco integrate
        cl = DetectOutput(input_file)
        cl.integrate(output_file, gtf_file, fasta_file)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Beispiel #8
0
    def test_02(self):
        input_file = TEST_DIR + "test_01.ref.fa"
        output_file_n = T_TEST_DIR + "test_02.logo-n.fa"
        output_file_p = T_TEST_DIR + "test_02.logo-p.fa"
        test_file_n = TEST_DIR + "test_02.logo-n.fa"
        test_file_p = TEST_DIR + "test_02.logo-p.fa"

        command = [
            'dr-disco', 'logo-sequence', 'chr2:3', input_file, '-n', '4', '-p',
            '4', output_file_n, output_file_p
        ]
        print(" ".join(command))
        self.assertEqual(
            subprocess.call(command),
            0)  # Ensure error code is 0 - no exceptions have been thrown

        self.assertTrue(filecmp.cmp(test_file_n, output_file_n),
                        msg=get_diff(test_file_n, output_file_n))
        self.assertTrue(filecmp.cmp(test_file_p, output_file_p),
                        msg=get_diff(test_file_p, output_file_p))
    def test_01__only_valid(self):
        test_id = '01'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.only-valid.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.only-valid.dbed"

        cl = DetectOutput(input_file)
        cl.classify(output_file, True, Blacklist(), 1, True)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
    def test_02(self):
        test_id = '02'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        cl = DetectOutput(input_file)
        cl.classify(output_file, False, Blacklist(), 1, True)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
    def test_s041_nocrash(self):
        test_id = 'terg_s041_b'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        gtf_file = TEST_DIR + "example_refseq.gff"
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        cl = DetectOutput(input_file)
        cl.integrate(output_file, gtf_file, None)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Beispiel #12
0
    def test_01(self):
        test_id = '01'

        unfixed_sam = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"

        drdisco_detect = T_TEST_DIR + "test_" + test_id + "_detect.out.txt"
        drdisco_detect_test = TEST_DIR + "test_" + test_id + "_detect.out.txt"

        drdisco_classify = T_TEST_DIR + "test_" + test_id + "_classify.out.txt"
        drdisco_classify_test = TEST_DIR + "test_" + test_id + "_classify.out.txt"

        drdisco_integrate = T_TEST_DIR + "test_" + test_id + "_integrate.out.txt"
        drdisco_integrate_test = TEST_DIR + "test_" + test_id + "_integrate.out.txt"

        # Step 01: dr-disco fix (don't check please)
        sam_to_fixed_bam(unfixed_sam, fixed_bam, T_TEST_DIR)

        # Step 02: dr-disco detect (check appropriate values and columns)
        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(drdisco_detect, "w") as fh:
            ic.export(fh)

        self.assertTrue(filecmp.cmp(drdisco_detect_test, drdisco_detect), msg=get_diff( drdisco_detect_test , drdisco_detect ))

        # Step 03: dr-disco classify
        cl = DetectOutput(drdisco_detect)
        cl.classify(drdisco_classify, False, Blacklist(), 25, True)

        self.assertTrue(filecmp.cmp(drdisco_classify_test, drdisco_classify), msg=get_diff( drdisco_classify_test , drdisco_classify ))

        # Step 04: dr-disco integrate
        cl = DetectOutput(drdisco_classify)
        cl.integrate(drdisco_integrate, None, None)

        self.assertTrue(filecmp.cmp(drdisco_integrate_test, drdisco_integrate), msg=get_diff( drdisco_integrate_test , drdisco_integrate ))
    def test_s041_no_gtf(self):
        test_id = 'terg_s041_b'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        gtf_file = None
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        cl = DetectOutput(input_file)
        cl.integrate(output_file, gtf_file, None)

        #self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read())
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Beispiel #14
0
    def test_classify_16(self):
        TEST_DIR, T_TEST_DIR = self.__get_temp_dirs()
        test_id = '16'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        command = ["bin/dr-disco", "classify", input_file, output_file]

        self.assertEqual(subprocess.call(command),
                         0,
                         msg=" ".join([str(x) for x in command]))
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
    def test_02_c(self):
        input_file = TEST_DIR + "test_terg_02.bam"
        output_file = T_TEST_DIR + "test_terg_02.filtered.bam"
        output_file_s = T_TEST_DIR + "test_terg_02.filtered.sam"
        test_file = TEST_DIR + "test_terg_02.filtered.sam"

        c = BAMExtract(input_file, False)
        c.extract("chr7:151000000-153000000", "chr5:1-2", output_file)

        # Bam2Sam
        fhq = open(output_file_s, "w")
        fhq.write(pysam.view(output_file))
        fhq.close()

        self.assertTrue(filecmp.cmp(test_file, output_file_s), msg=get_diff(test_file, output_file_s))
    def test_01_a(self):
        # Tests a file that has not (yet) been fixed with `dr-disco fix`

        input_file = TEST_DIR + "test_terg_01.bam"
        output_file = T_TEST_DIR + "test_terg_01.filtered.bam"
        output_file_s = T_TEST_DIR + "test_terg_01.filtered.sam"
        test_file = TEST_DIR + "test_terg_02.filtered.sam"

        c = BAMExtract(input_file, False)
        c.extract("chr21:39000000-40000000", "chr5:1-2", output_file)

        # Bam2Sam
        fhq = open(output_file_s, "w")
        fhq.write(pysam.view(output_file))
        fhq.close()

        self.assertTrue(filecmp.cmp(test_file, output_file_s), msg=get_diff(test_file, output_file_s))
Beispiel #17
0
    def test_02_s041_no_gtf(self):
        TEST_DIR, T_TEST_DIR = self.__get_temp_dirs()

        test_id = 'terg_s041_b'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        command = ["bin/dr-disco", "integrate", input_file, output_file]

        self.assertEqual(subprocess.call(command),
                         0,
                         msg=" ".join([str(x) for x in command]))

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Beispiel #18
0
    def test_detect_02(self):
        TEST_DIR, T_TEST_DIR = self.__get_temp_dirs()

        test_id = '02'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        command = ["bin/dr-disco", "detect", "-m", "0", fixed_bam, output_file]

        self.assertEqual(subprocess.call(command), 0)
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
    def test_s041(self):
        test_id = 'terg_s041'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        gtf_files = [
            TEST_DIR + "test_" + test_id + ".in.gtf",
            TEST_DIR + "test_" + test_id + ".in.no_chr_prefix.gtf"
        ]
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        for gtf_file in gtf_files:
            cl = DetectOutput(input_file)
            cl.integrate(output_file, gtf_file, None)

            self.assertTrue(filecmp.cmp(test_file, output_file),
                            msg=get_diff(test_file, output_file))
Beispiel #20
0
    def test_01(self):
        input_file = TEST_DIR + "test_terg_01.filtered.bam"
        output_file = T_TEST_DIR + "test_terg_01.filtered.fixed.bam"
        output_file_s = T_TEST_DIR + "test_terg_01.filtered.fixed.sam"

        test_file = TEST_DIR + "test_terg_01.filtered.fixed.sam"

        alignment_handle = ChimericAlignment(input_file)
        alignment_handle.convert(output_file, "tmp")

        # Bam2Sam
        fhq = open(output_file_s, "w")
        fhq.write(pysam.view(output_file))
        fhq.close()

        #self.assertTrue(filecmp.cmp(test_file, output_file_s), msg="diff '" + test_file + "' '" + output_file_s + "':\n" + subprocess.Popen(['diff', test_file, output_file_s], stdout=subprocess.PIPE).stdout.read())
        self.assertTrue(filecmp.cmp(test_file, output_file_s),
                        msg=get_diff(test_file, output_file_s))
    def test_in_frame_non_hybrid_protein(self):
        test_id = 'in_frame_non_hybrid_protein'
        # Transcript ID's necessary:
        # - TMPRSS2: ENST00000424093
        # - ERG: ENST00000398910

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        gtf_files = [TEST_DIR + "test_" + test_id + ".gtf"]
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        for gtf_file in gtf_files:
            cl = DetectOutput(input_file)
            cl.integrate(output_file, gtf_file, None)

            #self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read())
            self.assertTrue(filecmp.cmp(test_file, output_file),
                            msg=get_diff(test_file, output_file))
Beispiel #22
0
    def test_03(self):
        test_id = '03'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(output_file, "w") as fh:
            ic.export(fh)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Beispiel #23
0
    def test_32_pruning_hotfix(self):
        test_id = '32'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(output_file, "w") as fh:
            ic.export(fh)

        # Test data not checked, should just not throw an exception
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Beispiel #24
0
    def test_01(self):
        test_id = 'artifact_reads_01'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        fh = open(output_file, "w")
        ic.export(fh)
        fh.close()

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Beispiel #25
0
    def test_02(self):
        if not os.path.exists("tmp"):
            os.mkdir("tmp")

        input_file = TEST_DIR + "test_terg_02.filtered.bam"
        output_file = T_TEST_DIR + "test_terg_02.filtered.fixed.bam"
        output_file_s = T_TEST_DIR + "test_terg_02.filtered.fixed.sam"

        test_file = TEST_DIR + "test_terg_02.filtered.fixed.sam"

        alignment_handle = ChimericAlignment(input_file)
        alignment_handle.convert(output_file, "tmp")

        # Bam2Sam
        fhq = open(output_file_s, "w")
        fhq.write(pysam.view(output_file))
        fhq.close()

        self.assertTrue(filecmp.cmp(test_file, output_file_s),
                        msg=get_diff(test_file, output_file_s))
Beispiel #26
0
    def test_21_tests_extracting_subnetworks_in_ideal_optimization_usecase(
            self):
        test_id = '21'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(output_file, "w") as fh:
            ic.export(fh)

        # Test data not checked, should just not throw an exception
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Beispiel #27
0
    def test_01(self):
        test_id = '01'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        fh = open(output_file, "w")
        ic.export(fh)
        fh.close()

        #self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read())
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Beispiel #28
0
    def test_sj_01(self):
        TEST_DIR, T_TEST_DIR = self.__get_temp_dirs()

        test_id = 'splice_site_motif_01'

        input_sam = TEST_DIR + "test_" + test_id + ".in.sam"
        input_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        input_file = T_TEST_DIR + "test_" + test_id + ".dbed"

        # gtf_file = None
        fasta_file = TEST_DIR + "test_" + test_id + ".in.fa"

        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"

        # sam -> fixed bam
        command = ["bin/dr-disco", "fix", input_sam, input_bam]

        self.assertEqual(subprocess.call(command),
                         0,
                         msg=" ".join([str(x) for x in command]))

        # fixed bam -> dr-disco detect
        command = ["bin/dr-disco", "detect", "-m", "0", input_bam, input_file]

        self.assertEqual(subprocess.call(command),
                         0,
                         msg=" ".join([str(x) for x in command]))

        # dr-disco-detect (skip classify) -> dr-disco integrate
        command = [
            "bin/dr-disco", "integrate", "--fasta", fasta_file, input_file,
            output_file
        ]

        self.assertEqual(subprocess.call(command),
                         0,
                         msg=" ".join([str(x) for x in command]))

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Beispiel #29
0
    def test_08(self):
        # True TMPRSS2-ERG inv/del; requires to pass, which is possible if stranding is taken into account correctly

        test_id = 'artifact_reads_08'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        fh = open(output_file, "w")
        ic.export(fh)
        fh.close()

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Beispiel #30
0
    def test_29_mismatch_ratio_and_stddev_b11383(self):
        # sum matches needs to be: 5675 (2919 + 2756 = 5675)
        # sum mismatches needs to be: 149 (73 + 76 = 149)
        test_id = '29'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(output_file, "w") as fh:
            ic.export(fh)

        # Test data not checked, should just not throw an exception
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))