def test_no_match(self): """ Example with no FSM match """ conn, cursor = get_db_cursor() build = "toy_build" edge_dict = talon.make_edge_dict(cursor) location_dict = talon.make_location_dict(build, cursor) run_info = talon.init_run_info(cursor, build) transcript_dict = talon.make_transcript_dict(cursor, build) gene_starts, gene_ends = talon.make_gene_start_and_end_dict(cursor, build) chrom = "chr1" positions = [1, 100, 500, 600] strand = "+" edge_IDs = [2] vertex_IDs = [2,3,4,5] v_novelty = [0, 0, 0, 0] all_matches = talon.search_for_ISM(edge_IDs, transcript_dict) gene_ID, transcript_ID, novelty, start_end_info = talon.process_FSM(chrom, positions, strand, edge_IDs, vertex_IDs, all_matches, gene_starts, gene_ends, edge_dict, location_dict, run_info) assert gene_ID == transcript_ID == None conn.close()
def test_FSM_end_diff(self): """ Example where the transcript is an FSM but has a difference on the ends large enough to be novel. """ conn, cursor = get_db_cursor() build = "toy_build" edge_dict = talon.make_edge_dict(cursor) location_dict = talon.make_location_dict(build, cursor) run_info = talon.init_run_info(cursor, build) transcript_dict = talon.make_transcript_dict(cursor, build) orig_vertices = run_info['vertex'] gene_starts, gene_ends = talon.make_gene_start_and_end_dict(cursor, build) chrom = "chr2" positions = [1, 100, 500, 600, 900, 1301] #Last postion is > 300bp away strand = "+" edge_IDs = [13, 14, 15] vertex_IDs = [14, 15, 16, 17] v_novelty = [0, 0, 0, 0] all_matches = talon.search_for_ISM(edge_IDs, transcript_dict) gene_ID, transcript_ID, novelty, start_end_info = talon.process_FSM(chrom, positions, strand, edge_IDs, vertex_IDs, all_matches, gene_starts, gene_ends, edge_dict, location_dict, run_info) correct_gene_ID = fetch_correct_ID("TG2", "gene", cursor) correct_transcript_ID = fetch_correct_ID("TG2-001", "transcript", cursor) assert gene_ID == correct_gene_ID assert transcript_ID == correct_transcript_ID assert start_end_info["end_vertex"] == orig_vertices + 1 conn.close()
def test_ISM_prefix(self): """ Example where the transcript is a prefix ISM with a novel start """ conn, cursor = get_db_cursor() build = "toy_build" edge_dict = talon.make_edge_dict(cursor) location_dict = talon.make_location_dict(build, cursor) run_info = talon.init_run_info(cursor, build) transcript_dict = talon.make_transcript_dict(cursor, build) gene_starts, gene_ends = talon.make_gene_start_and_end_dict( cursor, build) orig_exons = run_info["edge"] chrom = "chr1" strand = "+" positions = [1, 100, 500, 600] edge_IDs = [2] vertex_IDs = [2, 3] v_novelty = [0, 0] all_matches = talon.search_for_ISM(edge_IDs, transcript_dict) gene_ID, transcript_ID, novelty, start_end_info = talon.process_ISM( chrom, positions, strand, edge_IDs, vertex_IDs, all_matches, transcript_dict, gene_starts, gene_ends, edge_dict, location_dict, run_info) correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor) assert gene_ID == correct_gene_ID assert start_end_info["vertex_IDs"] == [1, 2, 3, 4] assert start_end_info["edge_IDs"] == [1, 2, 3] conn.close()
def test_ISM_suffix(self): """ Example where the transcript is an ISM with suffix """ conn, cursor = get_db_cursor() build = "toy_build" edge_dict = talon.make_edge_dict(cursor) location_dict = talon.make_location_dict(build, cursor) run_info = talon.init_run_info(cursor, build) transcript_dict = talon.make_transcript_dict(cursor, build) gene_starts, gene_ends = talon.make_gene_start_and_end_dict( cursor, build) chrom = "chr1" strand = "+" positions = [500, 600, 900, 1000] edge_IDs = [4] vertex_IDs = [4, 5] v_novelty = [0, 0] all_matches = talon.search_for_ISM(edge_IDs, transcript_dict) gene_ID, transcript_ID, novelty, start_end_info = talon.process_ISM( chrom, positions, strand, edge_IDs, vertex_IDs, all_matches, transcript_dict, gene_starts, gene_ends, edge_dict, location_dict, run_info) correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor) assert gene_ID == correct_gene_ID assert start_end_info["vertex_IDs"] == [3, 4, 5, 6] assert start_end_info["edge_IDs"] == [3, 4, 5] assert start_end_info["start_novelty"] == 0 # because the exon is known assert start_end_info["end_novelty"] == 0 assert transcript_dict[frozenset(start_end_info["edge_IDs"])] != None conn.close()
def test_find_no_match(self): """ Example where the toy transcript database contains no matches for the edge set. """ conn, cursor = get_db_cursor() build = "toy_build" transcript_dict = talon.make_transcript_dict(cursor, build) conn.close() edges = (100, 200, 300) matches = talon.search_for_ISM(edges, transcript_dict) # Make sure that no match got returned assert matches == None
def test_find_monoexon_match(self): """ Input is a sinlge exon that matches part of an existing transcript """ conn, cursor = get_db_cursor() build = "toy_build" transcript_dict = talon.make_transcript_dict(cursor, build) edges = (14, ) matches = talon.search_for_ISM(edges, transcript_dict) # Make sure that correct match got returned correct_gene_ID = fetch_correct_ID("TG2", "gene", cursor) assert matches[0]["gene_ID"] == correct_gene_ID conn.close()
def test_find_match(self): """ Example where the toy transcript database contains exactly one ISM match for the transcript. """ conn, cursor = get_db_cursor() build = "toy_build" transcript_dict = talon.make_transcript_dict(cursor, build) edges = (2, 3) matches = talon.search_for_ISM(edges, transcript_dict) # Make sure that correct match got returned correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor) assert matches[0]["gene_ID"] == correct_gene_ID conn.close()
def test_no_match(self): """ Example with no ISM match """ conn, cursor = get_db_cursor() build = "toy_build" edge_dict = talon.make_edge_dict(cursor) location_dict = talon.make_location_dict(build, cursor) run_info = talon.init_run_info(cursor, build) transcript_dict = talon.make_transcript_dict(cursor, build) gene_starts, gene_ends = talon.make_gene_start_and_end_dict( cursor, build) chrom = "chr1" strand = "+" positions = [1, 100, 900, 1000] edge_IDs = [200] vertex_IDs = [2, 5] v_novelty = [0, 0] all_matches = talon.search_for_ISM(edge_IDs, transcript_dict) assert all_matches == None conn.close()
def test_FSM_perfect(self): """ Example where the transcript is a perfect full splice match. """ conn, cursor = get_db_cursor() build = "toy_build" edge_dict = talon.make_edge_dict(cursor) location_dict = talon.make_location_dict(build, cursor) run_info = talon.init_run_info(cursor, build) transcript_dict = talon.make_transcript_dict(cursor, build) gene_starts, gene_ends = talon.make_gene_start_and_end_dict(cursor, build) chrom = "chr1" positions = [1, 100, 500, 600, 900, 1010] strand = "+" edge_IDs = [2, 3, 4] vertex_IDs = [2, 3, 4, 5] v_novelty = [0, 0, 0, 0] all_matches = talon.search_for_ISM(edge_IDs, transcript_dict) gene_ID, transcript_ID, novelty, start_end_info = talon.process_FSM(chrom, positions, strand, edge_IDs, vertex_IDs, all_matches, gene_starts, gene_ends, edge_dict, location_dict, run_info) correct_gene_ID = fetch_correct_ID("TG1", "gene", cursor) correct_transcript_ID = fetch_correct_ID("TG1-001", "transcript", cursor) assert gene_ID == correct_gene_ID assert transcript_ID == correct_transcript_ID assert novelty == [] assert start_end_info["start_vertex"] == 1 assert start_end_info["end_vertex"] == 6 assert start_end_info["diff_3p"] == 10 conn.close()