def test_01_gzip(self): test_id = '01' input_file = TEST_DIR + "test_" + test_id + ".in.dbed" input_file_gz = T_TEST_DIR + "test_" + test_id + ".in.dbed.gz" # create gzip archive with open(input_file, 'r') as fh_in: with gzip.open(input_file_gz, 'wt') as fh_out: #fh_out.write(str_to_bytearray(fh_in.read())) d = fh_in.read() print(type(d)) #d = str_to_bytearray(fh_in.read()) #d = d.encode() #print(type(d)) #print(d) d = str(d) fh_out.write(d) test_file = TEST_DIR + "test_" + test_id + ".out.dbed" output_file = T_TEST_DIR + "test_" + test_id + ".out.gz.dbed" cl = DetectOutput(input_file_gz) cl.classify(output_file, False, Blacklist(), 1, True) self.assertTrue(filecmp.cmp(test_file, output_file), msg=get_diff(test_file, output_file))
def test_sj_02(self): test_id = 'splice_site_motif_02' input_sam = TEST_DIR + "test_" + test_id + ".in.sam" input_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam" input_file = T_TEST_DIR + "test_" + test_id + ".dbed" gtf_file = None fasta_file = TEST_DIR + "test_" + test_id + ".in.fa" output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed" test_file = TEST_DIR + "test_" + test_id + ".out.dbed" # sam -> fixed bam sam_to_fixed_bam(input_sam, input_bam, T_TEST_DIR) # fixed bam -> dr-disco detect ic = IntronDecomposition(input_bam) ic.decompose(0) fh = open(input_file, "w") ic.export(fh) fh.close() # dr-disco-detect (skip classify) -> dr-disco integrate cl = DetectOutput(input_file) cl.integrate(output_file, gtf_file, fasta_file) self.assertTrue(filecmp.cmp(test_file, output_file), msg=get_diff(test_file, output_file))
def test_sj_04__CA_GT_d4(self): test_id = 'splice_site_motif_04' input_sam = TEST_DIR + "test_" + test_id + ".in.sam" input_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam" input_file = T_TEST_DIR + "test_" + test_id + ".dbed" gtf_file = None fasta_file = TEST_DIR + "test_" + test_id + ".in.fa" output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed" test_file = TEST_DIR + "test_" + test_id + ".out.dbed" # sam -> fixed bam sam_to_fixed_bam(input_sam, input_bam, T_TEST_DIR) # fixed bam -> dr-disco detect ic = IntronDecomposition(input_bam) ic.decompose(0) fh = open(input_file, "w") ic.export(fh) fh.close() # dr-disco-detect (skip classify) -> dr-disco integrate cl = DetectOutput(input_file) # originally, this triggered an exception, now we just log an error # cl.integrate(output_file, gtf_file, fasta_file) # self.assertRaises(Exception, cl.integrate, output_file, gtf_file, fasta_file) cl.integrate(output_file, gtf_file, fasta_file) self.assertTrue(filecmp.cmp(test_file, output_file), msg=get_diff(test_file, output_file))
def test_02(self): test_id = 'vcap_err_02' input_file_a = TEST_DIR + "test_" + test_id + ".sam" fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam" detect_file = T_TEST_DIR + "test_" + test_id + ".out.dbed" output_file = T_TEST_DIR + "test_" + test_id + ".out.classified.dbed" sam_to_fixed_bam(input_file_a, fixed_bam, T_TEST_DIR) ic = IntronDecomposition(fixed_bam) ic.decompose(0) fh = open(detect_file, "w") ic.export(fh) fh.close() cl = DetectOutput(detect_file) cl.classify(output_file, False, Blacklist(), 1, True) n_valid = 0 with open(output_file) as fh: for line in fh: if line.find('valid') > -1: n_valid += 1 self.assertEqual(n_valid, 0)
def test_02(self): test_id = '02' input_file = TEST_DIR + "test_" + test_id + ".in.dbed" test_file = TEST_DIR + "test_" + test_id + ".out.dbed" output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed" cl = DetectOutput(input_file) cl.classify(output_file, False, Blacklist(), 1, True) self.assertTrue(filecmp.cmp(test_file, output_file), msg=get_diff(test_file, output_file))
def test_01__only_valid(self): test_id = '01' input_file = TEST_DIR + "test_" + test_id + ".in.dbed" test_file = TEST_DIR + "test_" + test_id + ".out.only-valid.dbed" output_file = T_TEST_DIR + "test_" + test_id + ".out.only-valid.dbed" cl = DetectOutput(input_file) cl.classify(output_file, True, Blacklist(), 1, True) self.assertTrue(filecmp.cmp(test_file, output_file), msg=get_diff(test_file, output_file))
def test_s041_nocrash(self): test_id = 'terg_s041_b' input_file = TEST_DIR + "test_" + test_id + ".in.dbed" gtf_file = TEST_DIR + "example_refseq.gff" test_file = TEST_DIR + "test_" + test_id + ".out.txt" output_file = T_TEST_DIR + "test_" + test_id + ".out.txt" cl = DetectOutput(input_file) cl.integrate(output_file, gtf_file, None) self.assertTrue(filecmp.cmp(test_file, output_file), msg=get_diff(test_file, output_file))
def test_s041_no_gtf(self): test_id = 'terg_s041_b' input_file = TEST_DIR + "test_" + test_id + ".in.dbed" gtf_file = None test_file = TEST_DIR + "test_" + test_id + ".out.txt" output_file = T_TEST_DIR + "test_" + test_id + ".out.txt" cl = DetectOutput(input_file) cl.integrate(output_file, gtf_file, None) #self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read()) self.assertTrue(filecmp.cmp(test_file, output_file), msg=get_diff(test_file, output_file))
def test_s041(self): test_id = 'terg_s041' input_file = TEST_DIR + "test_" + test_id + ".in.dbed" gtf_files = [ TEST_DIR + "test_" + test_id + ".in.gtf", TEST_DIR + "test_" + test_id + ".in.no_chr_prefix.gtf" ] test_file = TEST_DIR + "test_" + test_id + ".out.txt" output_file = T_TEST_DIR + "test_" + test_id + ".out.txt" for gtf_file in gtf_files: cl = DetectOutput(input_file) cl.integrate(output_file, gtf_file, None) self.assertTrue(filecmp.cmp(test_file, output_file), msg=get_diff(test_file, output_file))
def test_in_frame_non_hybrid_protein(self): test_id = 'in_frame_non_hybrid_protein' # Transcript ID's necessary: # - TMPRSS2: ENST00000424093 # - ERG: ENST00000398910 input_file = TEST_DIR + "test_" + test_id + ".in.dbed" gtf_files = [TEST_DIR + "test_" + test_id + ".gtf"] test_file = TEST_DIR + "test_" + test_id + ".out.txt" output_file = T_TEST_DIR + "test_" + test_id + ".out.txt" for gtf_file in gtf_files: cl = DetectOutput(input_file) cl.integrate(output_file, gtf_file, None) #self.assertTrue(filecmp.cmp(test_file, output_file), msg="diff '" + test_file + "' '" + output_file + "':\n" + subprocess.Popen(['diff', test_file, output_file], stdout=subprocess.PIPE).stdout.read()) self.assertTrue(filecmp.cmp(test_file, output_file), msg=get_diff(test_file, output_file))
def test_blacklists( self): # only test if they don't crash - do not test actual output test_id = '01' input_file = TEST_DIR + "test_" + test_id + ".in.dbed" output_file = T_TEST_DIR + "test_" + test_id + ".out.dbed" blacklists = [Blacklist(), Blacklist(), Blacklist(), Blacklist()] blacklists[0].add_junctions_from_file( 'share/blacklist-junctions.hg19.txt') blacklists[1].add_junctions_from_file( 'share/blacklist-junctions.hg38.txt') blacklists[2].add_regions_from_bed('share/blacklist-regions.hg19.bed') blacklists[3].add_regions_from_bed('share/blacklist-regions.hg38.bed') for blacklist in blacklists: cl = DetectOutput(input_file) cl.classify(output_file, False, blacklist, 1, True)
def test_01(self): test_id = '01' unfixed_sam = TEST_DIR + "test_" + test_id + ".sam" fixed_bam = T_TEST_DIR + "test_" + test_id + ".fixed.bam" drdisco_detect = T_TEST_DIR + "test_" + test_id + "_detect.out.txt" drdisco_detect_test = TEST_DIR + "test_" + test_id + "_detect.out.txt" drdisco_classify = T_TEST_DIR + "test_" + test_id + "_classify.out.txt" drdisco_classify_test = TEST_DIR + "test_" + test_id + "_classify.out.txt" drdisco_integrate = T_TEST_DIR + "test_" + test_id + "_integrate.out.txt" drdisco_integrate_test = TEST_DIR + "test_" + test_id + "_integrate.out.txt" # Step 01: dr-disco fix (don't check please) sam_to_fixed_bam(unfixed_sam, fixed_bam, T_TEST_DIR) # Step 02: dr-disco detect (check appropriate values and columns) ic = IntronDecomposition(fixed_bam) ic.decompose(0) with open(drdisco_detect, "w") as fh: ic.export(fh) self.assertTrue(filecmp.cmp(drdisco_detect_test, drdisco_detect), msg=get_diff( drdisco_detect_test , drdisco_detect )) # Step 03: dr-disco classify cl = DetectOutput(drdisco_detect) cl.classify(drdisco_classify, False, Blacklist(), 25, True) self.assertTrue(filecmp.cmp(drdisco_classify_test, drdisco_classify), msg=get_diff( drdisco_classify_test , drdisco_classify )) # Step 04: dr-disco integrate cl = DetectOutput(drdisco_classify) cl.integrate(drdisco_integrate, None, None) self.assertTrue(filecmp.cmp(drdisco_integrate_test, drdisco_integrate), msg=get_diff( drdisco_integrate_test , drdisco_integrate ))