def test_find_closest_splice_donor_minus(self): """ For a toy case with multiple donors and acceptors in close proximity, test whether TC can find the closest reference donor to the supplied intron bound. Similar to before, there is an exact match for the donor, located at 23071360 in 1-based coordinates and 23071359 in 0-based.""" # Process reference junctions sjFile = "input_files/test_junctions.txt" tmp_dir = "scratch/test/TC_tmp/" chroms = set(["chr1"]) donors, acceptors, sjDict = TC.processSpliceAnnotation( sjFile, tmp_dir, chroms) # Intron bound info transcriptID = "test_read" jnNumber = 0 chrom = "chr1" start = 23070360 end = 23071360 strand = "-" genome = Fasta("input_files/hg38_chr1.fa") junction = sj.SpliceJunction(transcriptID, jnNumber, chrom, start, end, strand, genome, sjDict) donor = junction.get_splice_donor() closest_donor = TC.find_closest_bound(donor, donors) assert closest_donor.start == 23071359 assert closest_donor.end == 23071360 assert closest_donor.dist == 0
def test_find_closest_splice_acceptor_plus(self): """ Find the closest splice acceptor, which is 17 bp upstream. Plus strand.""" # Process reference junctions sjFile = "input_files/test_junctions.txt" tmp_dir = "scratch/test/TC_tmp/" chroms = set(["chr1"]) donors, acceptors, sjDict = TC.processSpliceAnnotation( sjFile, tmp_dir, chroms) # Intron bound info transcriptID = "test_read" jnNumber = 0 chrom = "chr1" start = 23071360 end = 23072140 strand = "+" genome = Fasta("input_files/hg38_chr1.fa") junction = sj.SpliceJunction(transcriptID, jnNumber, chrom, start, end, strand, genome, sjDict) acceptor = junction.get_splice_acceptor() closest_acceptor = TC.find_closest_bound(acceptor, acceptors) assert closest_acceptor.start == 23072122 assert closest_acceptor.end == 23072123 assert closest_acceptor.dist == -17
def test_find_closest_splice_acceptor_minus(self): """ Find the closest splice acceptor, which is 1 bp downstream. Minus strand. Note that dist is relative to the genome, not to the direction of the transcript.""" # Process reference junctions sjFile = "input_files/test_junctions.txt" tmp_dir = "scratch/test/TC_tmp/" chroms = set(["chr1"]) donors, acceptors, sjDict = TC.processSpliceAnnotation( sjFile, tmp_dir, chroms) # Intron bound info transcriptID = "test_read" jnNumber = 0 chrom = "chr1" start = 22071331 end = 22073331 strand = "-" genome = Fasta("input_files/hg38_chr1.fa") junction = sj.SpliceJunction(transcriptID, jnNumber, chrom, start, end, strand, genome, sjDict) acceptor = junction.get_splice_acceptor() closest_acceptor = TC.find_closest_bound(acceptor, acceptors) assert closest_acceptor.start == 22071329 assert closest_acceptor.end == 22071330 assert closest_acceptor.dist == -1