def test_mark_start_stop_sites2(): # pos strand not guided t_dict, locus = read_single_locus("multi_strand1.gtf") sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS)) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(100, 200))] assert G.is_stop[sgraph.get_node_id(Exon(400, 650))] # neg strand not guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG)) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(950, 980))] assert G.is_stop[sgraph.get_node_id(Exon(400, 500))] # pos strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(100, 150))] assert G.is_start[sgraph.get_node_id(Exon(150, 200))] assert G.is_stop[sgraph.get_node_id(Exon(500, 600))] assert G.is_stop[sgraph.get_node_id(Exon(600, 650))] assert G.is_ref[sgraph.get_node_id(Exon(150, 200))] assert G.is_ref[sgraph.get_node_id(Exon(300, 400))] assert G.is_ref[sgraph.get_node_id(Exon(500, 600))] assert not G.is_ref[sgraph.get_node_id(Exon(100, 150))] assert not G.is_ref[sgraph.get_node_id(Exon(600, 650))] # neg strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.is_stop[sgraph.get_node_id(Exon(350, 400))] assert G.is_start[sgraph.get_node_id(Exon(980, 1000))] assert not G.is_start[sgraph.get_node_id(Exon(950, 980))] for n_id in G.node_ids_iter(): assert G.is_ref[n_id]
def test_empty_graph_bug(): t_dict, locus = read_single_locus('empty_graph_bug.gtf') transfrags = locus.get_transfrags(Strand.POS) sgraph = SpliceGraph.create(transfrags) pgf = PathGraphFactory(sgraph) K, k = pgf.create_optimal() assert K is None
def test_path2(): t_dict, locus = read_single_locus('noc2l_locus.gtf') for sgraph in locus.create_splice_graphs(): pgraphfactory = PathGraphFactory(sgraph) pgraph, k = pgraphfactory.create_optimal() paths = find_paths(pgraph) return
def test_path1(): t_dict, locus = read_single_locus('path1.gtf') transfrags = locus.get_transfrags(Strand.POS) sgraph = SpliceGraph.create(transfrags) k = 2 pgf = PathGraphFactory(sgraph) pgraph = pgf.create(k) paths = find_paths(pgraph) return
def test_path_graph2(): return t_dict, locus = read_single_locus('change_point2.gtf') sgraph = SpliceGraph.create(t_dict.values()) # trivial case without additional stops or starts k = 1 K = create_path_graph(sgraph, k) kmer_id_map = K.graph['kmer_id_map'] n_id = sgraph.get_node_id(Exon(0, 100)) kmer_id = kmer_id_map[(n_id, )] assert K.node[kmer_id]['expr'] == 12.0 assert K.node[SOURCE]['expr'] == 12.0 assert K.node[SINK]['expr'] == 12.0 # add a stop site sgraph.stop_sites.add(50) sgraph.recreate() K = create_path_graph(sgraph, k=2) kmer_id_map = K.graph['kmer_id_map'] n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100))) kmer1 = kmer_id_map[n1] n2 = (sgraph.get_node_id((0, 50)), ) kmer2 = kmer_id_map[n2] assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[SOURCE]['expr'] == 11.0 assert K.node[SINK]['expr'] == 11.0 # smooth kmer graph smooth_graph(K) assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[SOURCE]['expr'] == 11.0 assert K.node[SINK]['expr'] == 11.0 # TODO: test after rescuing short transfrags # add both a start and a stop site sgraph.start_sites.add(50) sgraph.stop_sites.add(50) sgraph.recreate() K = create_path_graph(sgraph, k=2) smooth_graph(K) kmer_id_map = K.graph['kmer_id_map'] n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100))) n2 = (sgraph.get_node_id((0, 50)), ) n3 = (sgraph.get_node_id((50, 100)), ) kmer1 = kmer_id_map[n1] kmer2 = kmer_id_map[n2] kmer3 = kmer_id_map[n3] assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[kmer3]['expr'] == 1.0 assert K.node[SOURCE]['expr'] == 12.0 assert K.node[SINK]['expr'] == 12.0
def test_multi_strand2(): t_dict, locus = read_single_locus("multi_strand2.gtf") transfrags_pos = locus.get_transfrags(Strand.POS) sgpos = SpliceGraph.create(transfrags_pos) sgdict = {} for sg in sgpos.split(): k = "%s:%d-%d[%s]" % (sg.chrom, sg.start, sg.end, Strand.to_gtf(sg.strand)) sgdict[k] = sg assert "chr1:100-300[+]" in sgdict assert "chr1:400-600[+]" in sgdict
def test_path_graph2(): t_dict, locus = read_single_locus('change_point2.gtf') sgraph = SpliceGraph.create(t_dict.values()) # trivial case without additional stops or starts k = 1 K = create_path_graph(sgraph, k) kmer_id_map = K.graph['kmer_id_map'] n_id = sgraph.get_node_id(Exon(0, 100)) kmer_id = kmer_id_map[(n_id,)] assert K.node[kmer_id]['expr'] == 12.0 assert K.node[SOURCE]['expr'] == 12.0 assert K.node[SINK]['expr'] == 12.0 # add a stop site sgraph.stop_sites.add(50) sgraph.recreate() K = create_path_graph(sgraph, k=2) kmer_id_map = K.graph['kmer_id_map'] n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100))) kmer1 = kmer_id_map[n1] n2 = (sgraph.get_node_id((0, 50)),) kmer2 = kmer_id_map[n2] assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[SOURCE]['expr'] == 11.0 assert K.node[SINK]['expr'] == 11.0 # smooth kmer graph smooth_graph(K) assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[SOURCE]['expr'] == 11.0 assert K.node[SINK]['expr'] == 11.0 # TODO: test after rescuing short transfrags # add both a start and a stop site sgraph.start_sites.add(50) sgraph.stop_sites.add(50) sgraph.recreate() K = create_path_graph(sgraph, k=2) smooth_graph(K) kmer_id_map = K.graph['kmer_id_map'] n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100))) n2 = (sgraph.get_node_id((0, 50)),) n3 = (sgraph.get_node_id((50, 100)),) kmer1 = kmer_id_map[n1] kmer2 = kmer_id_map[n2] kmer3 = kmer_id_map[n3] assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[kmer3]['expr'] == 1.0 assert K.node[SOURCE]['expr'] == 12.0 assert K.node[SINK]['expr'] == 12.0
def test_multi_strand2(): t_dict, locus = read_single_locus('multi_strand2.gtf') transfrags_pos = locus.get_transfrags(Strand.POS) sgpos = SpliceGraph.create(transfrags_pos) sgdict = {} for sg in sgpos.split(): k = ('%s:%d-%d[%s]' % (sg.chrom, sg.start, sg.end, Strand.to_gtf(sg.strand))) sgdict[k] = sg assert 'chr1:100-300[+]' in sgdict assert 'chr1:400-600[+]' in sgdict
def test_path_graph_factory(): t_dict, locus = read_single_locus('path1.gtf') sgraph = SpliceGraph.create(t_dict.values()) pgraphfactory = PathGraphFactory(sgraph) g1 = pgraphfactory.create(k=1) assert len(g1) == 5 g2 = pgraphfactory.create(k=2) assert len(g2) == 6 gopt, k = pgraphfactory.create_optimal() assert k == 2 return
def test_path2(): t_dict, locus = read_single_locus('noc2l_locus.gtf') for sgraph in locus.create_splice_graphs(): K, k = create_optimal_path_graph(sgraph) paths1 = find_paths(K, 'expr') paths2 = cpathfinder.find_paths(K, 'expr') assert len(paths1) == len(paths2) for p1, p2 in zip(paths1, paths2): p1, e1 = p1 p2, e2 = p2 assert p1 == p2 assert abs(e1-e2) < 1e-5
def test_path2(): t_dict, locus = read_single_locus('noc2l_locus.gtf') for sgraph in locus.create_splice_graphs(): K, k = create_optimal_path_graph(sgraph) paths1 = find_paths(K, 'expr') paths2 = cpathfinder.find_paths(K, 'expr') assert len(paths1) == len(paths2) for p1, p2 in zip(paths1, paths2): p1, e1 = p1 p2, e2 = p2 assert p1 == p2 assert abs(e1 - e2) < 1e-5
def test_topological_sort(): G = Graph() G.add_path((G.SOURCE, 10, 20, 30, 40, G.SINK)) G.add_path((G.SOURCE, 10, 30, 40, G.SINK)) G.add_path((G.SOURCE, 10, G.SINK)) G.add_path((G.SOURCE, 20, G.SINK)) assert G.is_topological_sort(G.topological_sort()) t_dict, locus = read_single_locus('noc2l_locus.gtf') for sgraph in locus.create_splice_graphs(): pgf = PathGraphFactory(sgraph) G = pgf.create(k=1) assert G.is_topological_sort(G.topological_sort()) assert G.is_topological_sort(G.topological_sort_dfs())
def test_ccle55_cuff_noc2l(): '''Locus containing from 55 CCLE samples assembled with Cufflinks''' # pull SpliceGraph out of GTF t_dict, locus = read_single_locus('noc2l_locus.gtf') found_sgraph = False for sgraph in locus.create_splice_graphs(): if (sgraph.chrom == 'chr1' and sgraph.start == 934942 and sgraph.end == 976702 and sgraph.strand == Strand.NEG): found_sgraph = True break assert found_sgraph # examine specific change points trim = False pval = 0.1 fc_cutoff = 0.8 n1 = Exon(934942, 944589) n1_id = sgraph.get_node_id(n1) assert sgraph.G.is_stop[n1_id] cps = sgraph.detect_change_points(pval=pval, fc_cutoff=fc_cutoff) for cp in cps: sgraph.apply_change_point(cp, trim=trim) true_starts = set([964528, 957434, 959316]) true_stops = set([944278]) assert true_starts.issubset(sgraph.start_sites) assert true_stops.issubset(sgraph.stop_sites) # rebuild graph and examine start/stop nodes sgraph.recreate() # get start/stop nodes start_nodes, stop_nodes = sgraph.get_start_stop_nodes() # convert to node intervals start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes) stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes) assert Exon(959214, 959316) in start_nodes assert Exon(959316, 964528) in start_nodes assert Exon(957273, 957434) in start_nodes assert Exon(944278, 944321) in stop_nodes # ensure best path uses change points config = Config.defaults() config.max_paths = 1 gene_isoforms = assemble_isoforms(sgraph, config) assert len(gene_isoforms) == 1 isoforms = gene_isoforms[0] assert len(isoforms) == 1 isoform = isoforms[0] assert isoform.path[0] == Exon(944321, 944800) assert isoform.path[-1] == Exon(959214, 959316)
def test_ccle55_cuff_noc2l(): '''Locus containing from 55 CCLE samples assembled with Cufflinks''' # pull SpliceGraph out of GTF t_dict, locus = read_single_locus('noc2l_locus.gtf') found_sgraph = False for sgraph in locus.create_splice_graphs(): if (sgraph.chrom == 'chr1' and sgraph.start == 934942 and sgraph.end == 976702 and sgraph.strand == Strand.NEG): found_sgraph = True break assert found_sgraph # examine specific change points trim = False pval = 0.05 fc_cutoff = 0.8 n1 = Exon(934942, 944589) n1_id = sgraph.get_node_id(n1) assert sgraph.G.node[n1_id][SGNode.IS_STOP] cps = sgraph.detect_change_points(pval=pval, fc_cutoff=fc_cutoff) for cp in cps: sgraph.apply_change_point(cp, trim=trim) true_starts = set([964528, 957434, 959316]) true_stops = set([944278]) assert true_starts.issubset(sgraph.start_sites) assert true_stops.issubset(sgraph.stop_sites) # rebuild graph and examine start/stop nodes sgraph.recreate() # get start/stop nodes start_nodes, stop_nodes = sgraph.get_start_stop_nodes() # convert to node intervals start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes) stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes) assert Exon(959214, 959316) in start_nodes assert Exon(959316, 964528) in start_nodes assert Exon(957273, 957434) in start_nodes assert Exon(944278, 944321) in stop_nodes # ensure best path uses change points config = Config.defaults() config.max_paths = 1 gene_isoforms = assemble_isoforms(sgraph, config) assert len(gene_isoforms) == 1 isoforms = gene_isoforms[0] assert len(isoforms) == 1 isoform = isoforms[0] assert isoform.path[0] == Exon(944321, 944800) assert isoform.path[-1] == Exon(959214, 959316)
def test_ccle55_cuff_noc2l(): '''Locus containing from 55 CCLE samples assembled with Cufflinks''' # pull SpliceGraph out of GTF t_dict, locus = read_single_locus('noc2l_locus.gtf') found_sgraph = False for sgraph in locus.create_splice_graphs(): if (sgraph.chrom == 'chr1' and sgraph.start == 934942 and sgraph.end == 976702 and sgraph.strand == Strand.NEG): found_sgraph = True break assert found_sgraph # examine specific change points trim = False pval = 0.1 fc_cutoff = 0.8 n1 = Exon(934942, 944589) n1_id = sgraph.get_node_id(n1) assert sgraph.G.is_stop[n1_id] cps = sgraph.detect_change_points(pval=pval, fc_cutoff=fc_cutoff) for cp in cps: sgraph.apply_change_point(cp, trim=trim) true_starts = set([964528, 957434, 959316]) true_stops = set([944278]) assert true_starts.issubset(sgraph.start_sites) assert true_stops.issubset(sgraph.stop_sites) # rebuild graph and examine start/stop nodes sgraph.recreate() # get start/stop nodes start_nodes, stop_nodes = sgraph.get_start_stop_nodes() # convert to node intervals start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes) stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes) assert Exon(959214, 959316) in start_nodes assert Exon(959316, 964528) in start_nodes assert Exon(957273, 957434) in start_nodes assert Exon(944278, 944321) in stop_nodes # ensure best path uses change points pgf = PathGraphFactory(sgraph) pgraph, k = pgf.create_optimal() paths = find_paths(pgraph, max_paths=1) assert len(paths) == 1 path, expr = paths[0] path = reconstruct_path(path, pgraph, sgraph) assert path[0] == Exon(944321, 944800) assert path[-1] == Exon(959214, 959316)
def test_unreachable_kmers(): t_dict, locus = read_single_locus('path_graph_k2.gtf') sgraph = SpliceGraph.create(t_dict.values()) K = create_path_graph(sgraph, k=2) assert not K.graph['valid'] assert len(K) == 0 K = create_path_graph(sgraph, k=1) assert K.graph['valid'] assert K.graph['num_lost_kmers'] == 0 assert len(K) == 8 K, k = create_optimal_path_graph(sgraph, kmax=0, loss_threshold=1.0) assert k == 1 assert len(K) == 8
def test_path1(): t_dict, locus = read_single_locus('path1.gtf') transfrags = locus.get_transfrags(Strand.POS) sgraph = SpliceGraph.create(transfrags) k = 2 K = create_path_graph(sgraph, k) paths1 = find_paths(K, 'expr') paths2 = cpathfinder.find_paths(K, 'expr') assert len(paths1) == len(paths2) for p1, p2 in zip(paths1, paths2): p1, e1 = p1 p2, e2 = p2 assert p1 == p2 assert abs(e1 - e2) < 1e-8 return
def test_path1(): t_dict, locus = read_single_locus('path1.gtf') transfrags = locus.get_transfrags(Strand.POS) sgraph = SpliceGraph.create(transfrags) k = 2 K = create_path_graph(sgraph, k) paths1 = find_paths(K, 'expr') paths2 = cpathfinder.find_paths(K, 'expr') assert len(paths1) == len(paths2) for p1, p2 in zip(paths1, paths2): p1, e1 = p1 p2, e2 = p2 assert p1 == p2 assert abs(e1-e2) < 1e-8 return
def test_find_node_boundaries(): t_dict, locus = read_single_locus("splice_sites.gtf") transfrags = t_dict.values() splice_sites = set() for t in transfrags: splice_sites.update(t.itersplices()) splice_sites = tuple(sorted(splice_sites)) assert splice_sites == (100, 200, 250, 300, 400) # aggregate expression sg = SpliceGraph.create(transfrags) # zero change points zero_sites = tuple(find_threshold_points(sg.expr_data, sg.start)) assert zero_sites == (100, 150, 300, 375) # combined boundaries boundaries = tuple(sg._find_node_boundaries()) assert boundaries == (10, 100, 150, 200, 250, 300, 375, 400, 525)
def test_find_node_boundaries(): t_dict, locus = read_single_locus('splice_sites.gtf') transfrags = t_dict.values() splice_sites = set() for t in transfrags: splice_sites.update(t.itersplices()) splice_sites = tuple(sorted(splice_sites)) assert splice_sites == (100, 200, 250, 300, 400) # aggregate expression sg = SpliceGraph.create(transfrags) # zero change points zero_sites = tuple(find_threshold_points(sg.expr_data, sg.start)) assert zero_sites == (100, 150, 300, 375) # combined boundaries boundaries = tuple(sg._find_node_boundaries()) assert boundaries == (10, 100, 150, 200, 250, 300, 375, 400, 525)
def test_trimming_to_zero_bug(): t_dict, locus = read_single_locus('change_point_bug.gtf') transfrags_un = locus.get_transfrags(Strand.NA) sgraph = SpliceGraph.create(transfrags_un) cps = sgraph.detect_change_points() for cp in cps: sgraph.apply_change_point(cp) sgraph.recreate() # get start/stop nodes start_nodes, stop_nodes = sgraph.get_start_stop_nodes() # convert to node intervals start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes) stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes) assert Exon(173433532, 173435169) in stop_nodes assert Exon(173433532, 173435169) in start_nodes assert Exon(173433532, 173435169) in start_nodes
def test_trimming_to_zero_bug(): t_dict, locus = read_single_locus('change_point_bug.gtf') transfrags_un = locus.get_transfrags(Strand.NA) sgraph = SpliceGraph.create(transfrags_un) cps = sgraph.detect_change_points(pval=0.1) for cp in cps: sgraph.apply_change_point(cp) sgraph.recreate() # get start/stop nodes start_nodes, stop_nodes = sgraph.get_start_stop_nodes() # convert to node intervals start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes) stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes) assert Exon(173433532, 173435169) in stop_nodes assert Exon(173433532, 173435169) in start_nodes assert Exon(173433532, 173435169) in start_nodes
def test_impute_strand(): t_dict, locus = read_single_locus('impute_strand.gtf') assert len(locus.get_transfrags(Strand.POS)) == 1 assert len(locus.get_transfrags(Strand.NEG)) == 1 assert len(locus.get_transfrags(Strand.NA)) == 3 locus.impute_unknown_strands() assert len(locus.get_transfrags(Strand.POS)) == 2 assert len(locus.get_transfrags(Strand.NEG)) == 2 assert len(locus.get_transfrags(Strand.NA)) == 1 a = locus.get_expr_data(9, 11, Strand.POS) assert np.array_equal(a, [2.0, 1.0]) a = locus.get_expr_data(14, 16, Strand.POS) assert np.array_equal(a, [1.0, 0.0]) a = locus.get_expr_data(14, 16, Strand.NEG) assert np.array_equal(a, [0.0, 1.0]) a = locus.get_expr_data(14, 16, Strand.NA) assert np.array_equal(a, [1.0, 1.0]) a = locus.get_expr_data(19, 21, Strand.NEG) assert np.array_equal(a, [1.0, 2.0])
def test_path_graph1(): # read transcripts t_dict, locus = read_single_locus('path1.gtf') SG = SpliceGraph.create(t_dict.values()) # paths ABCDE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400, 500), Exon(600, 700), Exon(800, 900), SINK) ACE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(800, 900), SINK) ABCE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400, 500), Exon(800, 900), SINK) ACDE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(600, 700), Exon(800, 900), SINK) paths = [ABCDE, ACE, ABCE, ACDE] # create path graph k = 2 k = 2 G1 = create_path_graph(SG, k) G2 = nx.DiGraph() for path in paths: kmers = list(get_kmers(path, k)) add_path(G2, kmers, 1.0) assert nx.is_isomorphic(G1, G2)
def test_impute_strand_guided(): t_dict, locus = read_single_locus('impute_strand_guided.gtf', guided_strand=True) assert len(locus.get_transfrags(Strand.POS)) == 2 assert len(locus.get_transfrags(Strand.NEG)) == 1 assert len(locus.get_transfrags(Strand.NA)) == 3 locus.impute_unknown_strands() assert t_dict['C'].strand == Strand.POS assert len(locus.get_transfrags(Strand.POS)) == 4 assert len(locus.get_transfrags(Strand.NEG)) == 1 assert len(locus.get_transfrags(Strand.NA)) == 1 a = locus.get_expr_data(14, 16, Strand.POS) assert np.array_equal(a, [2.0, 1.0]) a = locus.get_expr_data(14, 16, Strand.NEG) assert np.array_equal(a, [0.0, 0.0]) a = locus.get_expr_data(14, 16, Strand.NA) assert np.array_equal(a, [0.0, 1.0]) a = locus.get_expr_data(19, 21, Strand.NEG) assert np.array_equal(a, [0.0, 1.0]) a = locus.get_expr_data(19, 21, Strand.NA) assert np.array_equal(a, [1.0, 1.0])
def test_mark_start_stop_sites2(): # pos strand not guided t_dict, locus = read_single_locus('multi_strand1.gtf') sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS)) G = sgraph.G assert G.node[sgraph.get_node_id(Exon(100, 200))][SGNode.IS_START] assert G.node[sgraph.get_node_id(Exon(400, 650))][SGNode.IS_STOP] # neg strand not guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG)) G = sgraph.G assert G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START] assert G.node[sgraph.get_node_id(Exon(400, 500))][SGNode.IS_STOP] # pos strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_START] assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_START] assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_STOP] assert G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_STOP] assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_REF] assert G.node[sgraph.get_node_id(Exon(300, 400))][SGNode.IS_REF] assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_REF] assert not G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_REF] assert not G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_REF] # neg strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.node[sgraph.get_node_id(Exon(350, 400))][SGNode.IS_STOP] assert G.node[sgraph.get_node_id(Exon(980, 1000))][SGNode.IS_START] assert not G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START] for n, nd in G.nodes_iter(data=True): assert nd[SGNode.IS_REF] return
def test_mark_start_stop_sites2(): # pos strand not guided t_dict, locus = read_single_locus('multi_strand1.gtf') sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS)) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(100, 200))] assert G.is_stop[sgraph.get_node_id(Exon(400, 650))] # neg strand not guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG)) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(950, 980))] assert G.is_stop[sgraph.get_node_id(Exon(400, 500))] # pos strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(100, 150))] assert G.is_start[sgraph.get_node_id(Exon(150, 200))] assert G.is_stop[sgraph.get_node_id(Exon(500, 600))] assert G.is_stop[sgraph.get_node_id(Exon(600, 650))] assert G.is_ref[sgraph.get_node_id(Exon(150, 200))] assert G.is_ref[sgraph.get_node_id(Exon(300, 400))] assert G.is_ref[sgraph.get_node_id(Exon(500, 600))] assert not G.is_ref[sgraph.get_node_id(Exon(100, 150))] assert not G.is_ref[sgraph.get_node_id(Exon(600, 650))] # neg strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.is_stop[sgraph.get_node_id(Exon(350, 400))] assert G.is_start[sgraph.get_node_id(Exon(980, 1000))] assert not G.is_start[sgraph.get_node_id(Exon(950, 980))] for n_id in G.node_ids_iter(): assert G.is_ref[n_id]
def test_topological_sort(): t_dict, locus = read_single_locus('noc2l_locus.gtf') for sgraph in locus.create_splice_graphs(): nodes1 = tuple(nx.topological_sort(sgraph.G)) nodes2 = tuple(cpathfinder.topological_sort(sgraph.G)) assert nodes1 == nodes2
def test_ref_starts_ends(): t_dict, locus = read_single_locus('change_point1.gtf') sg = SpliceGraph.create(t_dict.values()) assert tuple(sorted(sg.ref_start_sites)) == (95,) assert tuple(sorted(sg.ref_stop_sites)) == (200,)
def test_empty_graph_bug(): t_dict, locus = read_single_locus('empty_graph_bug.gtf') transfrags = locus.get_transfrags(Strand.POS) sgraph = SpliceGraph.create(transfrags) isoforms = assemble_isoforms(sgraph, Config.defaults()) assert len(isoforms) == 0
def test_mark_start_stop_sites1(): t_dict, locus = read_single_locus('change_point1.gtf') sgraph = SpliceGraph.create(t_dict.values()) G = sgraph.G assert len(G) == 1 n_id = sgraph.get_node_id(Exon(50, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a start site change point sgraph.start_sites.add(125) sgraph.recreate() G = sgraph.G assert len(G) == 2 n_id = sgraph.get_node_id(Exon(50, 125)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a stop site change point sgraph.stop_sites.add(80) sgraph.recreate() G = sgraph.G assert len(G) == 3 n_id = sgraph.get_node_id(Exon(50, 80)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(80, 125)) assert not sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # flip strand for t_id, t in t_dict.iteritems(): t.strand = Strand.NEG sgraph = SpliceGraph.create(t_dict.values()) G = sgraph.G assert len(G) == 1 n_id = sgraph.get_node_id(Exon(50, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a start site change point sgraph.start_sites.add(125) sgraph.recreate() G = sgraph.G assert len(G) == 2 n_id = sgraph.get_node_id(Exon(50, 125)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] # add a stop site change point sgraph.stop_sites.add(80) sgraph.recreate() G = sgraph.G assert len(G) == 3 n_id = sgraph.get_node_id(Exon(50, 80)) assert not sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(80, 125)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id]
def test_ref_starts_ends(): t_dict, locus = read_single_locus("change_point1.gtf") sg = SpliceGraph.create(t_dict.values()) assert tuple(sorted(sg.ref_start_sites)) == (95,) assert tuple(sorted(sg.ref_stop_sites)) == (200,)
def test_mark_start_stop_sites1(): t_dict, locus = read_single_locus("change_point1.gtf") sgraph = SpliceGraph.create(t_dict.values()) G = sgraph.G assert len(G) == 1 n_id = sgraph.get_node_id(Exon(50, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a start site change point sgraph.start_sites.add(125) sgraph.recreate() G = sgraph.G assert len(G) == 2 n_id = sgraph.get_node_id(Exon(50, 125)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a stop site change point sgraph.stop_sites.add(80) sgraph.recreate() G = sgraph.G assert len(G) == 3 n_id = sgraph.get_node_id(Exon(50, 80)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(80, 125)) assert not sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # flip strand for t_id, t in t_dict.iteritems(): t.strand = Strand.NEG sgraph = SpliceGraph.create(t_dict.values()) G = sgraph.G assert len(G) == 1 n_id = sgraph.get_node_id(Exon(50, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a start site change point sgraph.start_sites.add(125) sgraph.recreate() G = sgraph.G assert len(G) == 2 n_id = sgraph.get_node_id(Exon(50, 125)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] # add a stop site change point sgraph.stop_sites.add(80) sgraph.recreate() G = sgraph.G assert len(G) == 3 n_id = sgraph.get_node_id(Exon(50, 80)) assert not sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(80, 125)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id]