def test_determine_sj_novelty_Known_exon(self): """ Test that chr1:1-100 gets classified as all known """ gtf_file = "input_files/test_get_transcript_sjs_util/annot.gtf" ref_loc_df, ref_edge_df, ref_t_df = prep_gtf(gtf_file, 'exon') query_gtf = "input_files/test_get_transcript_sjs_util/known.gtf" loc_df, edge_df, t_df = prep_gtf(query_gtf, 'exon') edge_df = tsj.determine_sj_novelty(ref_edge_df, edge_df) assert edge_df.iloc[0].start_known == True assert edge_df.iloc[0].stop_known == True assert edge_df.iloc[0].combination_known == True
def test_determine_sj_novelty_NNC_intron_donor(self): """ Test that chr1:90-900 gets classified as having a known stop and novel start""" gtf_file = "input_files/test_get_transcript_sjs_util/annot.gtf" ref_loc_df, ref_edge_df, ref_t_df = prep_gtf(gtf_file, 'intron') query_gtf = "input_files/test_get_transcript_sjs_util/intron_NNC_donor.gtf" loc_df, edge_df, t_df = prep_gtf(query_gtf, 'intron') edge_df = tsj.determine_sj_novelty(ref_edge_df, edge_df) assert edge_df.iloc[0].start_known == False assert edge_df.iloc[0].stop_known == True assert edge_df.iloc[0].combination_known == False
def test_determine_exon_novelty_antisense(self): """ Test that chr1:1-1000 on - strand gets classified as all novel""" gtf_file = "input_files/test_get_transcript_sjs_util/annot.gtf" ref_loc_df, ref_edge_df, ref_t_df = prep_gtf(gtf_file, 'exon') query_gtf = "input_files/test_get_transcript_sjs_util/antisense_exon.gtf" loc_df, edge_df, t_df = prep_gtf(query_gtf, 'exon') edge_df = tsj.determine_sj_novelty(ref_edge_df, edge_df) exon = edge_df.loc[edge_df['start'] == 100].iloc[0] assert edge_df.iloc[0].start_known == False assert edge_df.iloc[0].stop_known == False assert edge_df.iloc[0].combination_known == False
def test_transcript_exon_assignment(self): """ Test that exon chr1:1-1000 (+) gets assigned only to transcripts 1 and 2 """ gtf_file = "input_files/test_get_transcript_sjs_util/annot.gtf" ref_loc_df, ref_edge_df, ref_t_df = prep_gtf(gtf_file, 'exon') query_gtf = "input_files/test_get_transcript_sjs_util/transcript_exon_assignment.gtf" loc_df, edge_df, t_df = prep_gtf(query_gtf, 'exon') edge_df = tsj.determine_sj_novelty(ref_edge_df, edge_df) edge_df = tsj.find_tids_from_sj(edge_df, t_df, mode='exon') exon1 = edge_df.loc[(edge_df.chrom == 'chr1') & (edge_df.start == 1)] exon2 = edge_df.loc[(edge_df.chrom == 'chr1') & (edge_df.start == 900)] exon3 = edge_df.loc[(edge_df.chrom == 'chr1') & (edge_df.start == 100)] assert exon1.iloc[0].tids == "test1,test2" assert exon2.iloc[0].tids == "test2" assert exon3.iloc[0].tids == "antisense"
def test_determine_sj_novelty_NNC_exon_start(self): """ Test that chr1:800-1000 gets classified as having a known stop and novel start""" gtf_file = "input_files/test_get_transcript_sjs_util/annot.gtf" ref_loc_df, ref_edge_df, ref_t_df = prep_gtf(gtf_file, 'exon') query_gtf = "input_files/test_get_transcript_sjs_util/intron_NNC_acceptor.gtf" loc_df, edge_df, t_df = prep_gtf(query_gtf, 'exon') edge_df = tsj.determine_sj_novelty(ref_edge_df, edge_df) exon = edge_df.loc[edge_df['start'] == 800].iloc[0] print(exon) assert exon.start_known == False assert exon.stop_known == True assert exon.combination_known == False