def test_match(self): """ Example where the transcript is a moniexonic match. """ conn, cursor = get_db_cursor() build = "toy_build" talon.make_temp_novel_gene_table(cursor, build) talon.make_temp_monoexonic_transcript_table(cursor, build) edge_dict = talon.make_edge_dict(cursor) location_dict = talon.make_location_dict(build, cursor) run_info = talon.init_run_info(cursor, build) transcript_dict = talon.make_transcript_dict(cursor, build) vertex_2_gene = talon.make_vertex_2_gene_dict(cursor) gene_starts, gene_ends = talon.make_gene_start_and_end_dict( cursor, build) chrom = "chr4" strand = "-" positions = (3900, 1100) annotation = talon.identify_monoexon_transcript( chrom, positions, strand, cursor, location_dict, edge_dict, transcript_dict, vertex_2_gene, gene_starts, gene_ends, run_info) correct_gene_ID = fetch_correct_ID("TG6", "gene", cursor) correct_transcript_ID = fetch_correct_ID("TG6-001", "transcript", cursor) assert annotation['gene_ID'] == correct_gene_ID assert annotation['start_delta'] == 100 assert annotation['end_delta'] == -100 conn.close()
def test_partial_match_3prime(self): """ Example where the transcript is short, so it overlaps the annotated transcript but is not an accepted match. the end should get assigned to the annotated end, but the end is novel """ conn, cursor = get_db_cursor() build = "toy_build" talon.make_temp_novel_gene_table(cursor, build) talon.make_temp_monoexonic_transcript_table(cursor, build) edge_dict = talon.make_edge_dict(cursor) location_dict = talon.make_location_dict(build, cursor) run_info = talon.init_run_info(cursor, build) transcript_dict = talon.make_transcript_dict(cursor, build) vertex_2_gene = talon.make_vertex_2_gene_dict(cursor) gene_starts, gene_ends = talon.make_gene_start_and_end_dict( cursor, build) chrom = "chr4" strand = "-" positions = (2000, 1100) annotation = talon.identify_monoexon_transcript( chrom, positions, strand, cursor, location_dict, edge_dict, transcript_dict, vertex_2_gene, gene_starts, gene_ends, run_info) correct_gene_ID = fetch_correct_ID("TG6", "gene", cursor) assert annotation['gene_ID'] == correct_gene_ID assert annotation['start_delta'] == None assert annotation['end_delta'] == -100 conn.close()
def test_antisense(self): """ Example where the transcript is antisense """ conn, cursor = get_db_cursor() build = "toy_build" talon.make_temp_novel_gene_table(cursor, build) talon.make_temp_monoexonic_transcript_table(cursor, build) edge_dict = talon.make_edge_dict(cursor) location_dict = talon.make_location_dict(build, cursor) run_info = talon.init_run_info(cursor, build) transcript_dict = talon.make_transcript_dict(cursor, build) vertex_2_gene = talon.make_vertex_2_gene_dict(cursor) gene_starts, gene_ends = talon.make_gene_start_and_end_dict( cursor, build) chrom = "chr4" strand = "+" positions = (1300, 3900) annotation = talon.identify_monoexon_transcript( chrom, positions, strand, cursor, location_dict, edge_dict, transcript_dict, vertex_2_gene, gene_starts, gene_ends, run_info) anti_gene_ID = fetch_correct_ID("TG6", "gene", cursor) gene_novelty_types = [x[-2] for x in annotation['gene_novelty']] t_novelty_types = [x[-2] for x in annotation['transcript_novelty']] assert annotation['gene_novelty'][0][-1] == "TRUE" assert "antisense_gene" in gene_novelty_types assert "antisense_transcript" in t_novelty_types conn.close()
def test_genomic_unspliced(self): """ Monoexonic fragment that overlaps gene 1 """ conn, cursor = get_db_cursor() build = "toy_build" talon.make_temp_novel_gene_table(cursor, build) talon.make_temp_monoexonic_transcript_table(cursor, build) edge_dict = talon.make_edge_dict(cursor) location_dict = talon.make_location_dict(build, cursor) run_info = talon.init_run_info(cursor, build) transcript_dict = talon.make_transcript_dict(cursor, build) vertex_2_gene = talon.make_vertex_2_gene_dict(cursor) gene_starts, gene_ends = talon.make_gene_start_and_end_dict( cursor, build) chrom = "chr1" strand = "+" positions = (1, 990) annotation = talon.identify_monoexon_transcript( chrom, positions, strand, cursor, location_dict, edge_dict, transcript_dict, vertex_2_gene, gene_starts, gene_ends, run_info) correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor) novelty_types = [x[-2] for x in annotation['transcript_novelty']] assert annotation['gene_ID'] == correct_gene_ID assert "genomic_transcript" in novelty_types assert annotation['end_delta'] == -10 conn.close()
def test_ISM_internal(self): """ Example where the transcript matches an internal exon """ conn, cursor = get_db_cursor() build = "toy_build" talon.make_temp_novel_gene_table(cursor, build) talon.make_temp_monoexonic_transcript_table(cursor, build) edge_dict = talon.make_edge_dict(cursor) location_dict = talon.make_location_dict(build, cursor) run_info = talon.init_run_info(cursor, build) transcript_dict = talon.make_transcript_dict(cursor, build) vertex_2_gene = talon.make_vertex_2_gene_dict(cursor) gene_starts, gene_ends = talon.make_gene_start_and_end_dict( cursor, build) chrom = "chr1" strand = "+" positions = (500, 600) annotation = talon.identify_monoexon_transcript( chrom, positions, strand, cursor, location_dict, edge_dict, transcript_dict, vertex_2_gene, gene_starts, gene_ends, run_info) correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor) novelty_types = [x[-2] for x in annotation['transcript_novelty']] assert annotation['gene_ID'] == correct_gene_ID assert "ISM_transcript" in novelty_types assert annotation['start_delta'] == annotation['end_delta'] == 0 conn.close()
def test_overlap_but_no_vertex_match(self): """ Example where the transcript is short, so it overlaps the annotated transcript but is not an accepted match. the start should get assigned to the annotated end, but the end is novel """ conn, cursor = get_db_cursor() build = "toy_build" talon.make_temp_novel_gene_table(cursor, build) talon.make_temp_monoexonic_transcript_table(cursor, build) edge_dict = talon.make_edge_dict(cursor) location_dict = talon.make_location_dict(build, cursor) run_info = talon.init_run_info(cursor, build) transcript_dict = talon.make_transcript_dict(cursor, build) vertex_2_gene = talon.make_vertex_2_gene_dict(cursor) gene_starts, gene_ends = talon.make_gene_start_and_end_dict( cursor, build) tot_vertices = len(vertex_2_gene) query = """ SELECT COUNT(*) FROM temp_monoexon """ tot_monoexonic = cursor.execute(query).fetchone()[0] chrom = "chr4" strand = "-" positions = (2500, 2000) annotation = talon.identify_monoexon_transcript( chrom, positions, strand, cursor, location_dict, edge_dict, transcript_dict, vertex_2_gene, gene_starts, gene_ends, run_info) correct_gene_ID = fetch_correct_ID("TG6", "gene", cursor) print(annotation['start_vertex']) print(annotation['end_vertex']) assert annotation['gene_ID'] == correct_gene_ID assert annotation['start_delta'] == None assert annotation['end_delta'] == None # Now check if the transcript got added to the right data structures assert len(vertex_2_gene) == tot_vertices + 2 assert cursor.execute(query).fetchone()[0] == tot_monoexonic + 1 conn.close()
def test_partial_match(self): """ Example where the transcript overlaps a single-exon transcript, but is shorter. In the past, the start would be assigned to the annotated start, and the end would be novel. This is no longer the case- at this time, the transcript will be assigned to the annotated match. """ conn, cursor = get_db_cursor() build = "toy_build" database = "scratch/toy.db" talon.get_counters(database) init_refs.make_temp_novel_gene_table(cursor, build) init_refs.make_temp_monoexonic_transcript_table(cursor, build) edge_dict = init_refs.make_edge_dict(cursor) location_dict = init_refs.make_location_dict(build, cursor) run_info = talon.init_run_info(database, build) transcript_dict = init_refs.make_transcript_dict(cursor, build) vertex_2_gene = init_refs.make_vertex_2_gene_dict(cursor) gene_starts = init_refs.make_gene_start_or_end_dict(cursor, build, "start") gene_ends = init_refs.make_gene_start_or_end_dict(cursor, build, "end") chrom = "chr4" strand = "-" positions = ( 3900, 2900 ) annotation = talon.identify_monoexon_transcript(chrom, positions, strand, cursor, location_dict, edge_dict, transcript_dict, vertex_2_gene, gene_starts, gene_ends, run_info, 'temp_gene', 'temp_monoexon') correct_gene_ID = fetch_correct_ID("TG6", "gene", cursor) correct_transcript_ID = fetch_correct_ID("TG6-001", "transcript", cursor) assert annotation['gene_ID'] == correct_gene_ID assert annotation['transcript_ID'] == correct_transcript_ID assert annotation['start_delta'] == 100 assert annotation['end_delta'] == -1900 conn.close()