예제 #1
0
    def test_determine_sj_novelty_Known_exon(self):
        """ Test that chr1:1-100 gets classified as all known """
        gtf_file = "input_files/test_get_transcript_sjs_util/annot.gtf"
        ref_loc_df, ref_edge_df, ref_t_df = prep_gtf(gtf_file, 'exon')

        query_gtf = "input_files/test_get_transcript_sjs_util/known.gtf"
        loc_df, edge_df, t_df = prep_gtf(query_gtf, 'exon')

        edge_df = tsj.determine_sj_novelty(ref_edge_df, edge_df)
        assert edge_df.iloc[0].start_known == True
        assert edge_df.iloc[0].stop_known == True
        assert edge_df.iloc[0].combination_known == True
예제 #2
0
    def test_determine_sj_novelty_NNC_intron_donor(self):
        """ Test that chr1:90-900 gets classified as having a known stop and
             novel start"""

        gtf_file = "input_files/test_get_transcript_sjs_util/annot.gtf"
        ref_loc_df, ref_edge_df, ref_t_df = prep_gtf(gtf_file, 'intron')

        query_gtf = "input_files/test_get_transcript_sjs_util/intron_NNC_donor.gtf"
        loc_df, edge_df, t_df = prep_gtf(query_gtf, 'intron')

        edge_df = tsj.determine_sj_novelty(ref_edge_df, edge_df)
        assert edge_df.iloc[0].start_known == False
        assert edge_df.iloc[0].stop_known == True
        assert edge_df.iloc[0].combination_known == False
예제 #3
0
    def test_determine_exon_novelty_antisense(self):
        """ Test that chr1:1-1000 on - strand gets classified as all novel"""

        gtf_file = "input_files/test_get_transcript_sjs_util/annot.gtf"
        ref_loc_df, ref_edge_df, ref_t_df = prep_gtf(gtf_file, 'exon')

        query_gtf = "input_files/test_get_transcript_sjs_util/antisense_exon.gtf"
        loc_df, edge_df, t_df = prep_gtf(query_gtf, 'exon')
        edge_df = tsj.determine_sj_novelty(ref_edge_df, edge_df)
        exon = edge_df.loc[edge_df['start'] == 100].iloc[0]

        assert edge_df.iloc[0].start_known == False
        assert edge_df.iloc[0].stop_known == False
        assert edge_df.iloc[0].combination_known == False
예제 #4
0
    def test_transcript_exon_assignment(self):
        """ Test that exon chr1:1-1000 (+) gets assigned only to transcripts
            1 and 2 """
        gtf_file = "input_files/test_get_transcript_sjs_util/annot.gtf"
        ref_loc_df, ref_edge_df, ref_t_df = prep_gtf(gtf_file, 'exon')

        query_gtf = "input_files/test_get_transcript_sjs_util/transcript_exon_assignment.gtf"
        loc_df, edge_df, t_df = prep_gtf(query_gtf, 'exon')
        edge_df = tsj.determine_sj_novelty(ref_edge_df, edge_df)
        edge_df = tsj.find_tids_from_sj(edge_df, t_df, mode='exon')
        exon1 = edge_df.loc[(edge_df.chrom == 'chr1') & (edge_df.start == 1)]
        exon2 = edge_df.loc[(edge_df.chrom == 'chr1') & (edge_df.start == 900)]
        exon3 = edge_df.loc[(edge_df.chrom == 'chr1') & (edge_df.start == 100)]
        assert exon1.iloc[0].tids == "test1,test2"
        assert exon2.iloc[0].tids == "test2"
        assert exon3.iloc[0].tids == "antisense"
예제 #5
0
    def test_determine_sj_novelty_NNC_exon_start(self):
        """ Test that chr1:800-1000 gets classified as having a known stop and
             novel start"""

        gtf_file = "input_files/test_get_transcript_sjs_util/annot.gtf"
        ref_loc_df, ref_edge_df, ref_t_df = prep_gtf(gtf_file, 'exon')

        query_gtf = "input_files/test_get_transcript_sjs_util/intron_NNC_acceptor.gtf"
        loc_df, edge_df, t_df = prep_gtf(query_gtf, 'exon')

        edge_df = tsj.determine_sj_novelty(ref_edge_df, edge_df)
        exon = edge_df.loc[edge_df['start'] == 800].iloc[0]
        print(exon)
        assert exon.start_known == False
        assert exon.stop_known == True
        assert exon.combination_known == False