예제 #1
0
    def test_sj_02(self):
        test_id = 'splice_site_motif_02'

        input_sam = TEST_DIR + "test_" + test_id + ".in.sam"
        input_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        input_file = T_TEST_DIR + "test_" + test_id + ".dbed"

        gtf_file = None
        fasta_file = TEST_DIR + "test_" + test_id + ".in.fa"

        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"

        # sam -> fixed bam
        sam_to_fixed_bam(input_sam, input_bam, T_TEST_DIR)

        # fixed bam -> dr-disco detect
        ic = IntronDecomposition(input_bam)
        ic.decompose(0)
        fh = open(input_file, "w")
        ic.export(fh)
        fh.close()

        # dr-disco-detect (skip classify) -> dr-disco integrate
        cl = DetectOutput(input_file)
        cl.integrate(output_file, gtf_file, fasta_file)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
예제 #2
0
    def test_sj_04__CA_GT_d4(self):
        test_id = 'splice_site_motif_04'

        input_sam = TEST_DIR + "test_" + test_id + ".in.sam"
        input_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        input_file = T_TEST_DIR + "test_" + test_id + ".dbed"

        gtf_file = None
        fasta_file = TEST_DIR + "test_" + test_id + ".in.fa"

        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"

        # sam -> fixed bam
        sam_to_fixed_bam(input_sam, input_bam, T_TEST_DIR)

        # fixed bam -> dr-disco detect
        ic = IntronDecomposition(input_bam)
        ic.decompose(0)
        fh = open(input_file, "w")
        ic.export(fh)
        fh.close()

        # dr-disco-detect (skip classify) -> dr-disco integrate
        cl = DetectOutput(input_file)

        # originally, this triggered an exception, now we just log an error
        # cl.integrate(output_file, gtf_file, fasta_file)
        # self.assertRaises(Exception, cl.integrate, output_file, gtf_file, fasta_file)

        cl.integrate(output_file, gtf_file, fasta_file)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
    def test_s041_nocrash(self):
        test_id = 'terg_s041_b'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        gtf_file = TEST_DIR + "example_refseq.gff"
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        cl = DetectOutput(input_file)
        cl.integrate(output_file, gtf_file, None)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
    def test_s041_no_gtf(self):
        test_id = 'terg_s041_b'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        gtf_file = None
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        cl = DetectOutput(input_file)
        cl.integrate(output_file, gtf_file, None)

        #self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read())
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
    def test_s041(self):
        test_id = 'terg_s041'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        gtf_files = [
            TEST_DIR + "test_" + test_id + ".in.gtf",
            TEST_DIR + "test_" + test_id + ".in.no_chr_prefix.gtf"
        ]
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        for gtf_file in gtf_files:
            cl = DetectOutput(input_file)
            cl.integrate(output_file, gtf_file, None)

            self.assertTrue(filecmp.cmp(test_file, output_file),
                            msg=get_diff(test_file, output_file))
    def test_in_frame_non_hybrid_protein(self):
        test_id = 'in_frame_non_hybrid_protein'
        # Transcript ID's necessary:
        # - TMPRSS2: ENST00000424093
        # - ERG: ENST00000398910

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        gtf_files = [TEST_DIR + "test_" + test_id + ".gtf"]
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        for gtf_file in gtf_files:
            cl = DetectOutput(input_file)
            cl.integrate(output_file, gtf_file, None)

            #self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read())
            self.assertTrue(filecmp.cmp(test_file, output_file),
                            msg=get_diff(test_file, output_file))
예제 #7
0
    def test_01(self):
        test_id = '01'

        unfixed_sam = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"

        drdisco_detect = T_TEST_DIR + "test_" + test_id + "_detect.out.txt"
        drdisco_detect_test = TEST_DIR + "test_" + test_id + "_detect.out.txt"

        drdisco_classify = T_TEST_DIR + "test_" + test_id + "_classify.out.txt"
        drdisco_classify_test = TEST_DIR + "test_" + test_id + "_classify.out.txt"

        drdisco_integrate = T_TEST_DIR + "test_" + test_id + "_integrate.out.txt"
        drdisco_integrate_test = TEST_DIR + "test_" + test_id + "_integrate.out.txt"

        # Step 01: dr-disco fix (don't check please)
        sam_to_fixed_bam(unfixed_sam, fixed_bam, T_TEST_DIR)

        # Step 02: dr-disco detect (check appropriate values and columns)
        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(drdisco_detect, "w") as fh:
            ic.export(fh)

        self.assertTrue(filecmp.cmp(drdisco_detect_test, drdisco_detect), msg=get_diff( drdisco_detect_test , drdisco_detect ))

        # Step 03: dr-disco classify
        cl = DetectOutput(drdisco_detect)
        cl.classify(drdisco_classify, False, Blacklist(), 25, True)

        self.assertTrue(filecmp.cmp(drdisco_classify_test, drdisco_classify), msg=get_diff( drdisco_classify_test , drdisco_classify ))

        # Step 04: dr-disco integrate
        cl = DetectOutput(drdisco_classify)
        cl.integrate(drdisco_integrate, None, None)

        self.assertTrue(filecmp.cmp(drdisco_integrate_test, drdisco_integrate), msg=get_diff( drdisco_integrate_test , drdisco_integrate ))