Ejemplo n.º 1
0
    def test_sj_04__CA_GT_d4(self):
        test_id = 'splice_site_motif_04'

        input_sam = TEST_DIR + "test_" + test_id + ".in.sam"
        input_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        input_file = T_TEST_DIR + "test_" + test_id + ".dbed"

        gtf_file = None
        fasta_file = TEST_DIR + "test_" + test_id + ".in.fa"

        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"

        # sam -> fixed bam
        sam_to_fixed_bam(input_sam, input_bam, T_TEST_DIR)

        # fixed bam -> dr-disco detect
        ic = IntronDecomposition(input_bam)
        ic.decompose(0)
        fh = open(input_file, "w")
        ic.export(fh)
        fh.close()

        # dr-disco-detect (skip classify) -> dr-disco integrate
        cl = DetectOutput(input_file)

        # originally, this triggered an exception, now we just log an error
        # cl.integrate(output_file, gtf_file, fasta_file)
        # self.assertRaises(Exception, cl.integrate, output_file, gtf_file, fasta_file)

        cl.integrate(output_file, gtf_file, fasta_file)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 2
0
    def test_sj_02(self):
        test_id = 'splice_site_motif_02'

        input_sam = TEST_DIR + "test_" + test_id + ".in.sam"
        input_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        input_file = T_TEST_DIR + "test_" + test_id + ".dbed"

        gtf_file = None
        fasta_file = TEST_DIR + "test_" + test_id + ".in.fa"

        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"

        # sam -> fixed bam
        sam_to_fixed_bam(input_sam, input_bam, T_TEST_DIR)

        # fixed bam -> dr-disco detect
        ic = IntronDecomposition(input_bam)
        ic.decompose(0)
        fh = open(input_file, "w")
        ic.export(fh)
        fh.close()

        # dr-disco-detect (skip classify) -> dr-disco integrate
        cl = DetectOutput(input_file)
        cl.integrate(output_file, gtf_file, fasta_file)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 3
0
    def test_02(self):
        test_id = 'vcap_err_02'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        detect_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.classified.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        fh = open(detect_file, "w")
        ic.export(fh)
        fh.close()

        cl = DetectOutput(detect_file)
        cl.classify(output_file, False, Blacklist(), 1, True)

        n_valid = 0
        with open(output_file) as fh:
            for line in fh:
                if line.find('valid') > -1:
                    n_valid += 1

        self.assertEqual(n_valid, 0)
Ejemplo n.º 4
0
    def test_07(self):
        test_id = '07'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(output_file, "w") as fh:
            ic.export(fh)
Ejemplo n.º 5
0
    def test_detect_02(self):
        TEST_DIR, T_TEST_DIR = self.__get_temp_dirs()

        test_id = '02'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        command = ["bin/dr-disco", "detect", "-m", "0", fixed_bam, output_file]

        self.assertEqual(subprocess.call(command), 0)
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 6
0
    def test_03(self):
        test_id = '03'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(output_file, "w") as fh:
            ic.export(fh)

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 7
0
    def test_01(self):
        test_id = 'artifact_reads_01'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        fh = open(output_file, "w")
        ic.export(fh)
        fh.close()

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 8
0
    def test_32_pruning_hotfix(self):
        test_id = '32'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(output_file, "w") as fh:
            ic.export(fh)

        # Test data not checked, should just not throw an exception
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 9
0
    def test_01(self):
        test_id = '01'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        fh = open(output_file, "w")
        ic.export(fh)
        fh.close()

        #self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read())
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 10
0
    def test_21_tests_extracting_subnetworks_in_ideal_optimization_usecase(
            self):
        test_id = '21'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(output_file, "w") as fh:
            ic.export(fh)

        # Test data not checked, should just not throw an exception
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 11
0
    def test_08(self):
        # True TMPRSS2-ERG inv/del; requires to pass, which is possible if stranding is taken into account correctly

        test_id = 'artifact_reads_08'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        fh = open(output_file, "w")
        ic.export(fh)
        fh.close()

        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 12
0
    def test_29_mismatch_ratio_and_stddev_b11383(self):
        # sum matches needs to be: 5675 (2919 + 2756 = 5675)
        # sum mismatches needs to be: 149 (73 + 76 = 149)
        test_id = '29'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(output_file, "w") as fh:
            ic.export(fh)

        # Test data not checked, should just not throw an exception
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))
Ejemplo n.º 13
0
    def test_01(self):
        test_id = '01'

        unfixed_sam = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"

        drdisco_detect = T_TEST_DIR + "test_" + test_id + "_detect.out.txt"
        drdisco_detect_test = TEST_DIR + "test_" + test_id + "_detect.out.txt"

        drdisco_classify = T_TEST_DIR + "test_" + test_id + "_classify.out.txt"
        drdisco_classify_test = TEST_DIR + "test_" + test_id + "_classify.out.txt"

        drdisco_integrate = T_TEST_DIR + "test_" + test_id + "_integrate.out.txt"
        drdisco_integrate_test = TEST_DIR + "test_" + test_id + "_integrate.out.txt"

        # Step 01: dr-disco fix (don't check please)
        sam_to_fixed_bam(unfixed_sam, fixed_bam, T_TEST_DIR)

        # Step 02: dr-disco detect (check appropriate values and columns)
        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(drdisco_detect, "w") as fh:
            ic.export(fh)

        self.assertTrue(filecmp.cmp(drdisco_detect_test, drdisco_detect), msg=get_diff( drdisco_detect_test , drdisco_detect ))

        # Step 03: dr-disco classify
        cl = DetectOutput(drdisco_detect)
        cl.classify(drdisco_classify, False, Blacklist(), 25, True)

        self.assertTrue(filecmp.cmp(drdisco_classify_test, drdisco_classify), msg=get_diff( drdisco_classify_test , drdisco_classify ))

        # Step 04: dr-disco integrate
        cl = DetectOutput(drdisco_classify)
        cl.integrate(drdisco_integrate, None, None)

        self.assertTrue(filecmp.cmp(drdisco_integrate_test, drdisco_integrate), msg=get_diff( drdisco_integrate_test , drdisco_integrate ))
Ejemplo n.º 14
0
    def test_18_s27(self):
        test_id = '18'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(output_file, "w") as fh:
            ic.export(fh)

        # Test data not checked, should just not throw an exception
        #self.assertTrue(filecmp.cmp(test_file, output_file), msg=get_diff(test_file, output_file))

        # order is different, but content matching is still valid:
        self.assertTrue(
            sorted(open(test_file, 'r').read().split("\n")) == sorted(
                open(output_file, 'r').read().split("\n")))
Ejemplo n.º 15
0
    def test_31_many_muts_on_test_12_for_testing_manymuts_merging_in_subgraphs(
            self):
        """
            mm needs to be 4*1=4
            m needs to be 4*50=200
        """
        test_id = '31'

        input_file_a = TEST_DIR + "test_" + test_id + ".sam"
        fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam"
        test_file = TEST_DIR + "test_" + test_id + ".out.dbed"
        output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed"

        sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR)

        ic = IntronDecomposition(fixed_bam)
        ic.decompose(0)

        with open(output_file, "w") as fh:
            ic.export(fh)

        # Test data not checked, should just not throw an exception
        self.assertTrue(filecmp.cmp(test_file, output_file),
                        msg=get_diff(test_file, output_file))