Ejemplo n.º 1
0
    def test_01_gzip(self):
        test_id = '01'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        input_file_gz = T_TEST_DIR + "test_" + test_id + ".in.dbed.gz"

        # create gzip archive
        with open(input_file, 'r') as fh_in:
            with gzip.open(input_file_gz, 'wt') as fh_out:
                #fh_out.write(str_to_bytearray(fh_in.read()))
                d = fh_in.read()
                print(type(d))
                #d = str_to_bytearray(fh_in.read())
                #d = d.encode()
                #print(type(d))
                #print(d)
                d = str(d)
                fh_out.write(d)

        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.gz.dbed"

        cl = DetectOutput(input_file_gz)
        cl.classify(output_file, False, Blacklist(), 1, True)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 2
0
    def test_sj_02(self):
        test_id = 'splice_site_motif_02'

        input_sam = TEST_DIR + "test_" + test_id + ".in.sam"
        input_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        input_file = T_TEST_DIR + "test_" + test_id + ".dbed"

        gtf_file = None
        fasta_file = TEST_DIR + "test_" + test_id + ".in.fa"

        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"

        # sam -> fixed bam
        sam_to_fixed_bam(input_sam, input_bam, T_TEST_DIR)

        # fixed bam -> dr-disco detect
        ic = IntronDecomposition(input_bam)
        ic.decompose(0)
        fh = open(input_file, "w")
        ic.export(fh)
        fh.close()

        # dr-disco-detect (skip classify) -> dr-disco integrate
        cl = DetectOutput(input_file)
        cl.integrate(output_file, gtf_file, fasta_file)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 3
0
    def test_sj_04__CA_GT_d4(self):
        test_id = 'splice_site_motif_04'

        input_sam = TEST_DIR + "test_" + test_id + ".in.sam"
        input_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        input_file = T_TEST_DIR + "test_" + test_id + ".dbed"

        gtf_file = None
        fasta_file = TEST_DIR + "test_" + test_id + ".in.fa"

        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"

        # sam -> fixed bam
        sam_to_fixed_bam(input_sam, input_bam, T_TEST_DIR)

        # fixed bam -> dr-disco detect
        ic = IntronDecomposition(input_bam)
        ic.decompose(0)
        fh = open(input_file, "w")
        ic.export(fh)
        fh.close()

        # dr-disco-detect (skip classify) -> dr-disco integrate
        cl = DetectOutput(input_file)

        # originally, this triggered an exception, now we just log an error
        # cl.integrate(output_file, gtf_file, fasta_file)
        # self.assertRaises(Exception, cl.integrate, output_file, gtf_file, fasta_file)

        cl.integrate(output_file, gtf_file, fasta_file)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 4
0
    def test_02(self):
        test_id = 'vcap_err_02'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        detect_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.classified.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        fh = open(detect_file, "w")
        ic.export(fh)
        fh.close()

        cl = DetectOutput(detect_file)
        cl.classify(output_file, False, Blacklist(), 1, True)

        n_valid = 0
        with open(output_file) as fh:
            for line in fh:
                if line.find('valid') > -1:
                    n_valid += 1

        self.assertEqual(n_valid, 0)
Ejemplo n.º 5
0
    def test_02(self):
        test_id = '02'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        cl = DetectOutput(input_file)
        cl.classify(output_file, False, Blacklist(), 1, True)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 6
0
    def test_01__only_valid(self):
        test_id = '01'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.only-valid.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.only-valid.dbed"

        cl = DetectOutput(input_file)
        cl.classify(output_file, True, Blacklist(), 1, True)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 7
0
    def test_s041_nocrash(self):
        test_id = 'terg_s041_b'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        gtf_file = TEST_DIR + "example_refseq.gff"
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        cl = DetectOutput(input_file)
        cl.integrate(output_file, gtf_file, None)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 8
0
    def test_s041_no_gtf(self):
        test_id = 'terg_s041_b'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        gtf_file = None
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        cl = DetectOutput(input_file)
        cl.integrate(output_file, gtf_file, None)

        #self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read())
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 9
0
    def test_s041(self):
        test_id = 'terg_s041'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        gtf_files = [
            TEST_DIR + "test_" + test_id + ".in.gtf",
            TEST_DIR + "test_" + test_id + ".in.no_chr_prefix.gtf"
        ]
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        for gtf_file in gtf_files:
            cl = DetectOutput(input_file)
            cl.integrate(output_file, gtf_file, None)

            self.assertTrue(filecmp.cmp(test_file, output_file),
                            msg=get_diff(test_file, output_file))
Ejemplo n.º 10
0
    def test_in_frame_non_hybrid_protein(self):
        test_id = 'in_frame_non_hybrid_protein'
        # Transcript ID's necessary:
        # - TMPRSS2: ENST00000424093
        # - ERG: ENST00000398910

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        gtf_files = [TEST_DIR + "test_" + test_id + ".gtf"]
        test_file = TEST_DIR + "test_" + test_id + ".out.txt"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.txt"

        for gtf_file in gtf_files:
            cl = DetectOutput(input_file)
            cl.integrate(output_file, gtf_file, None)

            #self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read())
            self.assertTrue(filecmp.cmp(test_file, output_file),
                            msg=get_diff(test_file, output_file))
Ejemplo n.º 11
0
    def test_blacklists(
            self):  # only test if they don't crash - do not test actual output
        test_id = '01'

        input_file = TEST_DIR + "test_" + test_id + ".in.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        blacklists = [Blacklist(), Blacklist(), Blacklist(), Blacklist()]
        blacklists[0].add_junctions_from_file(
            'share/blacklist-junctions.hg19.txt')
        blacklists[1].add_junctions_from_file(
            'share/blacklist-junctions.hg38.txt')
        blacklists[2].add_regions_from_bed('share/blacklist-regions.hg19.bed')
        blacklists[3].add_regions_from_bed('share/blacklist-regions.hg38.bed')

        for blacklist in blacklists:
            cl = DetectOutput(input_file)
            cl.classify(output_file, False, blacklist, 1, True)
Ejemplo n.º 12
0
    def test_01(self):
        test_id = '01'

        unfixed_sam = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"

        drdisco_detect = T_TEST_DIR + "test_" + test_id + "_detect.out.txt"
        drdisco_detect_test = TEST_DIR + "test_" + test_id + "_detect.out.txt"

        drdisco_classify = T_TEST_DIR + "test_" + test_id + "_classify.out.txt"
        drdisco_classify_test = TEST_DIR + "test_" + test_id + "_classify.out.txt"

        drdisco_integrate = T_TEST_DIR + "test_" + test_id + "_integrate.out.txt"
        drdisco_integrate_test = TEST_DIR + "test_" + test_id + "_integrate.out.txt"

        # Step 01: dr-disco fix (don't check please)
        sam_to_fixed_bam(unfixed_sam, fixed_bam, T_TEST_DIR)

        # Step 02: dr-disco detect (check appropriate values and columns)
        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(drdisco_detect, "w") as fh:
            ic.export(fh)

        self.assertTrue(filecmp.cmp(drdisco_detect_test, drdisco_detect), msg=get_diff( drdisco_detect_test , drdisco_detect ))

        # Step 03: dr-disco classify
        cl = DetectOutput(drdisco_detect)
        cl.classify(drdisco_classify, False, Blacklist(), 25, True)

        self.assertTrue(filecmp.cmp(drdisco_classify_test, drdisco_classify), msg=get_diff( drdisco_classify_test , drdisco_classify ))

        # Step 04: dr-disco integrate
        cl = DetectOutput(drdisco_classify)
        cl.integrate(drdisco_integrate, None, None)

        self.assertTrue(filecmp.cmp(drdisco_integrate_test, drdisco_integrate), msg=get_diff( drdisco_integrate_test , drdisco_integrate ))