def test_mark_start_stop_sites2(): # pos strand not guided t_dict, locus = read_single_locus("multi_strand1.gtf") sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS)) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(100, 200))] assert G.is_stop[sgraph.get_node_id(Exon(400, 650))] # neg strand not guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG)) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(950, 980))] assert G.is_stop[sgraph.get_node_id(Exon(400, 500))] # pos strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(100, 150))] assert G.is_start[sgraph.get_node_id(Exon(150, 200))] assert G.is_stop[sgraph.get_node_id(Exon(500, 600))] assert G.is_stop[sgraph.get_node_id(Exon(600, 650))] assert G.is_ref[sgraph.get_node_id(Exon(150, 200))] assert G.is_ref[sgraph.get_node_id(Exon(300, 400))] assert G.is_ref[sgraph.get_node_id(Exon(500, 600))] assert not G.is_ref[sgraph.get_node_id(Exon(100, 150))] assert not G.is_ref[sgraph.get_node_id(Exon(600, 650))] # neg strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.is_stop[sgraph.get_node_id(Exon(350, 400))] assert G.is_start[sgraph.get_node_id(Exon(980, 1000))] assert not G.is_start[sgraph.get_node_id(Exon(950, 980))] for n_id in G.node_ids_iter(): assert G.is_ref[n_id]
def test_empty_graph_bug(): t_dict, locus = read_single_locus('empty_graph_bug.gtf') transfrags = locus.get_transfrags(Strand.POS) sgraph = SpliceGraph.create(transfrags) pgf = PathGraphFactory(sgraph) K, k = pgf.create_optimal() assert K is None
def test_path1(): t_dict, locus = read_single_locus('path1.gtf') transfrags = locus.get_transfrags(Strand.POS) sgraph = SpliceGraph.create(transfrags) k = 2 pgf = PathGraphFactory(sgraph) pgraph = pgf.create(k) paths = find_paths(pgraph) return
def test_path_graph2(): return t_dict, locus = read_single_locus('change_point2.gtf') sgraph = SpliceGraph.create(t_dict.values()) # trivial case without additional stops or starts k = 1 K = create_path_graph(sgraph, k) kmer_id_map = K.graph['kmer_id_map'] n_id = sgraph.get_node_id(Exon(0, 100)) kmer_id = kmer_id_map[(n_id, )] assert K.node[kmer_id]['expr'] == 12.0 assert K.node[SOURCE]['expr'] == 12.0 assert K.node[SINK]['expr'] == 12.0 # add a stop site sgraph.stop_sites.add(50) sgraph.recreate() K = create_path_graph(sgraph, k=2) kmer_id_map = K.graph['kmer_id_map'] n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100))) kmer1 = kmer_id_map[n1] n2 = (sgraph.get_node_id((0, 50)), ) kmer2 = kmer_id_map[n2] assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[SOURCE]['expr'] == 11.0 assert K.node[SINK]['expr'] == 11.0 # smooth kmer graph smooth_graph(K) assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[SOURCE]['expr'] == 11.0 assert K.node[SINK]['expr'] == 11.0 # TODO: test after rescuing short transfrags # add both a start and a stop site sgraph.start_sites.add(50) sgraph.stop_sites.add(50) sgraph.recreate() K = create_path_graph(sgraph, k=2) smooth_graph(K) kmer_id_map = K.graph['kmer_id_map'] n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100))) n2 = (sgraph.get_node_id((0, 50)), ) n3 = (sgraph.get_node_id((50, 100)), ) kmer1 = kmer_id_map[n1] kmer2 = kmer_id_map[n2] kmer3 = kmer_id_map[n3] assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[kmer3]['expr'] == 1.0 assert K.node[SOURCE]['expr'] == 12.0 assert K.node[SINK]['expr'] == 12.0
def test_multi_strand2(): t_dict, locus = read_single_locus("multi_strand2.gtf") transfrags_pos = locus.get_transfrags(Strand.POS) sgpos = SpliceGraph.create(transfrags_pos) sgdict = {} for sg in sgpos.split(): k = "%s:%d-%d[%s]" % (sg.chrom, sg.start, sg.end, Strand.to_gtf(sg.strand)) sgdict[k] = sg assert "chr1:100-300[+]" in sgdict assert "chr1:400-600[+]" in sgdict
def test_path_graph2(): t_dict, locus = read_single_locus('change_point2.gtf') sgraph = SpliceGraph.create(t_dict.values()) # trivial case without additional stops or starts k = 1 K = create_path_graph(sgraph, k) kmer_id_map = K.graph['kmer_id_map'] n_id = sgraph.get_node_id(Exon(0, 100)) kmer_id = kmer_id_map[(n_id,)] assert K.node[kmer_id]['expr'] == 12.0 assert K.node[SOURCE]['expr'] == 12.0 assert K.node[SINK]['expr'] == 12.0 # add a stop site sgraph.stop_sites.add(50) sgraph.recreate() K = create_path_graph(sgraph, k=2) kmer_id_map = K.graph['kmer_id_map'] n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100))) kmer1 = kmer_id_map[n1] n2 = (sgraph.get_node_id((0, 50)),) kmer2 = kmer_id_map[n2] assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[SOURCE]['expr'] == 11.0 assert K.node[SINK]['expr'] == 11.0 # smooth kmer graph smooth_graph(K) assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[SOURCE]['expr'] == 11.0 assert K.node[SINK]['expr'] == 11.0 # TODO: test after rescuing short transfrags # add both a start and a stop site sgraph.start_sites.add(50) sgraph.stop_sites.add(50) sgraph.recreate() K = create_path_graph(sgraph, k=2) smooth_graph(K) kmer_id_map = K.graph['kmer_id_map'] n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100))) n2 = (sgraph.get_node_id((0, 50)),) n3 = (sgraph.get_node_id((50, 100)),) kmer1 = kmer_id_map[n1] kmer2 = kmer_id_map[n2] kmer3 = kmer_id_map[n3] assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[kmer3]['expr'] == 1.0 assert K.node[SOURCE]['expr'] == 12.0 assert K.node[SINK]['expr'] == 12.0
def test_multi_strand2(): t_dict, locus = read_single_locus('multi_strand2.gtf') transfrags_pos = locus.get_transfrags(Strand.POS) sgpos = SpliceGraph.create(transfrags_pos) sgdict = {} for sg in sgpos.split(): k = ('%s:%d-%d[%s]' % (sg.chrom, sg.start, sg.end, Strand.to_gtf(sg.strand))) sgdict[k] = sg assert 'chr1:100-300[+]' in sgdict assert 'chr1:400-600[+]' in sgdict
def test_path_graph_factory(): t_dict, locus = read_single_locus('path1.gtf') sgraph = SpliceGraph.create(t_dict.values()) pgraphfactory = PathGraphFactory(sgraph) g1 = pgraphfactory.create(k=1) assert len(g1) == 5 g2 = pgraphfactory.create(k=2) assert len(g2) == 6 gopt, k = pgraphfactory.create_optimal() assert k == 2 return
def test_mark_start_stop_sites2(): # pos strand not guided t_dict, locus = read_single_locus('multi_strand1.gtf') sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS)) G = sgraph.G assert G.node[sgraph.get_node_id(Exon(100, 200))][SGNode.IS_START] assert G.node[sgraph.get_node_id(Exon(400, 650))][SGNode.IS_STOP] # neg strand not guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG)) G = sgraph.G assert G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START] assert G.node[sgraph.get_node_id(Exon(400, 500))][SGNode.IS_STOP] # pos strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_START] assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_START] assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_STOP] assert G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_STOP] assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_REF] assert G.node[sgraph.get_node_id(Exon(300, 400))][SGNode.IS_REF] assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_REF] assert not G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_REF] assert not G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_REF] # neg strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.node[sgraph.get_node_id(Exon(350, 400))][SGNode.IS_STOP] assert G.node[sgraph.get_node_id(Exon(980, 1000))][SGNode.IS_START] assert not G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START] for n, nd in G.nodes_iter(data=True): assert nd[SGNode.IS_REF] return
def test_mark_start_stop_sites2(): # pos strand not guided t_dict, locus = read_single_locus('multi_strand1.gtf') sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS)) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(100, 200))] assert G.is_stop[sgraph.get_node_id(Exon(400, 650))] # neg strand not guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG)) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(950, 980))] assert G.is_stop[sgraph.get_node_id(Exon(400, 500))] # pos strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(100, 150))] assert G.is_start[sgraph.get_node_id(Exon(150, 200))] assert G.is_stop[sgraph.get_node_id(Exon(500, 600))] assert G.is_stop[sgraph.get_node_id(Exon(600, 650))] assert G.is_ref[sgraph.get_node_id(Exon(150, 200))] assert G.is_ref[sgraph.get_node_id(Exon(300, 400))] assert G.is_ref[sgraph.get_node_id(Exon(500, 600))] assert not G.is_ref[sgraph.get_node_id(Exon(100, 150))] assert not G.is_ref[sgraph.get_node_id(Exon(600, 650))] # neg strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.is_stop[sgraph.get_node_id(Exon(350, 400))] assert G.is_start[sgraph.get_node_id(Exon(980, 1000))] assert not G.is_start[sgraph.get_node_id(Exon(950, 980))] for n_id in G.node_ids_iter(): assert G.is_ref[n_id]
def test_unreachable_kmers(): t_dict, locus = read_single_locus('path_graph_k2.gtf') sgraph = SpliceGraph.create(t_dict.values()) K = create_path_graph(sgraph, k=2) assert not K.graph['valid'] assert len(K) == 0 K = create_path_graph(sgraph, k=1) assert K.graph['valid'] assert K.graph['num_lost_kmers'] == 0 assert len(K) == 8 K, k = create_optimal_path_graph(sgraph, kmax=0, loss_threshold=1.0) assert k == 1 assert len(K) == 8
def test_path1(): t_dict, locus = read_single_locus('path1.gtf') transfrags = locus.get_transfrags(Strand.POS) sgraph = SpliceGraph.create(transfrags) k = 2 K = create_path_graph(sgraph, k) paths1 = find_paths(K, 'expr') paths2 = cpathfinder.find_paths(K, 'expr') assert len(paths1) == len(paths2) for p1, p2 in zip(paths1, paths2): p1, e1 = p1 p2, e2 = p2 assert p1 == p2 assert abs(e1-e2) < 1e-8 return
def test_path1(): t_dict, locus = read_single_locus('path1.gtf') transfrags = locus.get_transfrags(Strand.POS) sgraph = SpliceGraph.create(transfrags) k = 2 K = create_path_graph(sgraph, k) paths1 = find_paths(K, 'expr') paths2 = cpathfinder.find_paths(K, 'expr') assert len(paths1) == len(paths2) for p1, p2 in zip(paths1, paths2): p1, e1 = p1 p2, e2 = p2 assert p1 == p2 assert abs(e1 - e2) < 1e-8 return
def test_find_node_boundaries(): t_dict, locus = read_single_locus('splice_sites.gtf') transfrags = t_dict.values() splice_sites = set() for t in transfrags: splice_sites.update(t.itersplices()) splice_sites = tuple(sorted(splice_sites)) assert splice_sites == (100, 200, 250, 300, 400) # aggregate expression sg = SpliceGraph.create(transfrags) # zero change points zero_sites = tuple(find_threshold_points(sg.expr_data, sg.start)) assert zero_sites == (100, 150, 300, 375) # combined boundaries boundaries = tuple(sg._find_node_boundaries()) assert boundaries == (10, 100, 150, 200, 250, 300, 375, 400, 525)
def test_trimming_to_zero_bug(): t_dict, locus = read_single_locus('change_point_bug.gtf') transfrags_un = locus.get_transfrags(Strand.NA) sgraph = SpliceGraph.create(transfrags_un) cps = sgraph.detect_change_points(pval=0.1) for cp in cps: sgraph.apply_change_point(cp) sgraph.recreate() # get start/stop nodes start_nodes, stop_nodes = sgraph.get_start_stop_nodes() # convert to node intervals start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes) stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes) assert Exon(173433532, 173435169) in stop_nodes assert Exon(173433532, 173435169) in start_nodes assert Exon(173433532, 173435169) in start_nodes
def test_trimming_to_zero_bug(): t_dict, locus = read_single_locus('change_point_bug.gtf') transfrags_un = locus.get_transfrags(Strand.NA) sgraph = SpliceGraph.create(transfrags_un) cps = sgraph.detect_change_points() for cp in cps: sgraph.apply_change_point(cp) sgraph.recreate() # get start/stop nodes start_nodes, stop_nodes = sgraph.get_start_stop_nodes() # convert to node intervals start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes) stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes) assert Exon(173433532, 173435169) in stop_nodes assert Exon(173433532, 173435169) in start_nodes assert Exon(173433532, 173435169) in start_nodes
def test_find_node_boundaries(): t_dict, locus = read_single_locus("splice_sites.gtf") transfrags = t_dict.values() splice_sites = set() for t in transfrags: splice_sites.update(t.itersplices()) splice_sites = tuple(sorted(splice_sites)) assert splice_sites == (100, 200, 250, 300, 400) # aggregate expression sg = SpliceGraph.create(transfrags) # zero change points zero_sites = tuple(find_threshold_points(sg.expr_data, sg.start)) assert zero_sites == (100, 150, 300, 375) # combined boundaries boundaries = tuple(sg._find_node_boundaries()) assert boundaries == (10, 100, 150, 200, 250, 300, 375, 400, 525)
def test_split_transfrag(): loci = read_gtf("splice_sites.gtf") interval, gtf_lines = loci[0] t_dict = Transfrag.parse_gtf(gtf_lines) sg = SpliceGraph.create(t_dict.values()) boundaries = array("i", sg._find_node_boundaries()) # check nodes t = t_dict["A"] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((10, 100), (200, 250), (250, 300), (400, 525)) t = t_dict["B"] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((10, 100), (250, 300), (400, 525)) t = t_dict["C"] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((150, 200), (200, 250), (250, 300), (400, 525)) t = t_dict["D"] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((375, 400), (400, 525))
def test_split_transfrag(): loci = read_gtf('splice_sites.gtf') interval, gtf_lines = loci[0] t_dict = Transfrag.parse_gtf(gtf_lines) sg = SpliceGraph.create(t_dict.values()) boundaries = array('i', sg._find_node_boundaries()) # check nodes t = t_dict['A'] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((10, 100), (200, 250), (250, 300), (400, 525)) t = t_dict['B'] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((10, 100), (250, 300), (400, 525)) t = t_dict['C'] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((150, 200), (200, 250), (250, 300), (400, 525)) t = t_dict['D'] nodes = tuple(split_transfrag(t, boundaries)) assert nodes == ((375, 400), (400, 525))
def test_path_graph1(): # read transcripts t_dict, locus = read_single_locus('path1.gtf') SG = SpliceGraph.create(t_dict.values()) # paths ABCDE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400, 500), Exon(600, 700), Exon(800, 900), SINK) ACE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(800, 900), SINK) ABCE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400, 500), Exon(800, 900), SINK) ACDE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(600, 700), Exon(800, 900), SINK) paths = [ABCDE, ACE, ABCE, ACDE] # create path graph k = 2 k = 2 G1 = create_path_graph(SG, k) G2 = nx.DiGraph() for path in paths: kmers = list(get_kmers(path, k)) add_path(G2, kmers, 1.0) assert nx.is_isomorphic(G1, G2)
def test_ref_starts_ends(): t_dict, locus = read_single_locus('change_point1.gtf') sg = SpliceGraph.create(t_dict.values()) assert tuple(sorted(sg.ref_start_sites)) == (95,) assert tuple(sorted(sg.ref_stop_sites)) == (200,)
def test_empty_graph_bug(): t_dict, locus = read_single_locus('empty_graph_bug.gtf') transfrags = locus.get_transfrags(Strand.POS) sgraph = SpliceGraph.create(transfrags) isoforms = assemble_isoforms(sgraph, Config.defaults()) assert len(isoforms) == 0
def test_trim_transfrags(): def make_ramp(strand, sign=1): transfrags = [] chrom = 'chr1' start = 1000 end = 1220 change_expr = 0.0 base_expr = 0.0 # "flat" part of expression landscape expr = 1.0 for i in xrange(0, 50): t = Transfrag(chrom=chrom, strand=strand, _id='T1.%d' % i, sample_id='S%d' % i, expr=expr, is_ref=False, exons=[Exon(start, end)]) transfrags.append(t) change_expr += expr base_expr += expr # "changing" area i = 0 expr = 10.0 for pos in range(1100, 1120): left, right = (start, pos) if sign < 0 else (pos, end) t = Transfrag(chrom=chrom, strand=strand, _id='T2.%d' % i, sample_id='S%d' % i, expr=expr, is_ref=False, exons=[Exon(left, right)]) transfrags.append(t) change_expr += expr i += 1 return chrom, start, end, strand, change_expr, base_expr, transfrags # positive strand tup = make_ramp(Strand.POS, sign=-1) chrom, start, end, strand, change_expr, base_expr, transfrags = tup sgraph = SpliceGraph.create(transfrags) cps = run_changepoint(sgraph.expr_data, smooth_window_len=11) assert len(cps) == 1 cp = cps[0] assert cp.pos == 110 assert cp.foldchange < 0.5 assert cp.sign == -1 cp = cp._replace(pos=start + cp.pos, start=start + cp.start, end=start + cp.end) # trim transfrags sgraph._trim_change_point(cp) expr_data_after = sgraph._compute_expression() assert expr_data_after[0] == 250 assert expr_data_after[-1] == 50 assert expr_data_after[cp.index - 1] == 150 assert expr_data_after[cp.index] == base_expr # now try SpliceGraph interface tup = make_ramp(Strand.POS, sign=-1) chrom, start, end, strand, change_expr, base_expr, transfrags = tup sgraph = SpliceGraph.create(transfrags) cps = sgraph.detect_change_points(smooth_window_len=11) for cp in cps: sgraph.apply_change_point(cp) sgraph.recreate() assert sgraph.expr_data[cp.index - 1] == 150 assert sgraph.expr_data[cp.index] == base_expr assert cp.pos in sgraph.stop_sites # negative strand should not affect change point tup = make_ramp(Strand.NEG, sign=-1) chrom, start, end, strand, left_expr, base_expr, transfrags = tup sgraph = SpliceGraph.create(transfrags) cps = sgraph.detect_change_points(smooth_window_len=11) for cp in cps: sgraph.apply_change_point(cp) sgraph.recreate() assert sgraph.expr_data[cp.index - 1] == 150 assert sgraph.expr_data[cp.index] == base_expr assert cp.pos in sgraph.start_sites # neg strand change in opposite direction tup = make_ramp(Strand.NEG, sign=1) chrom, start, end, strand, left_expr, base_expr, transfrags = tup sgraph = SpliceGraph.create(transfrags) cps = run_changepoint(sgraph.expr_data, smooth_window_len=11) cp = cps[0] assert cp.index == 110 assert cp.foldchange < 0.5 assert cp.sign == 1.0 cps = sgraph.detect_change_points(smooth_window_len=11) cp = cps[0] for cp in cps: sgraph.apply_change_point(cp) sgraph.recreate() assert sgraph.expr_data[0] == 50 assert sgraph.expr_data[-1] == 250 assert sgraph.expr_data[cp.index - 1] == base_expr assert sgraph.expr_data[cp.index] == 160 assert cp.pos in sgraph.stop_sites # pos strand change in opposite direction tup = make_ramp(Strand.POS, sign=1) chrom, start, end, strand, left_expr, base_expr, transfrags = tup sgraph = SpliceGraph.create(transfrags) cps = run_changepoint(sgraph.expr_data, smooth_window_len=11) cp = cps[0] assert cp.index == 110 assert cp.foldchange < 0.5 assert cp.sign == 1.0 cps = sgraph.detect_change_points(smooth_window_len=11) for cp in cps: sgraph.apply_change_point(cp) sgraph.recreate() assert sgraph.expr_data[0] == 50 assert sgraph.expr_data[-1] == 250 assert sgraph.expr_data[cp.index - 1] == base_expr assert sgraph.expr_data[cp.index] == 160 assert cp.pos in sgraph.start_sites return
def test_mark_start_stop_sites1(): t_dict, locus = read_single_locus("change_point1.gtf") sgraph = SpliceGraph.create(t_dict.values()) G = sgraph.G assert len(G) == 1 n_id = sgraph.get_node_id(Exon(50, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a start site change point sgraph.start_sites.add(125) sgraph.recreate() G = sgraph.G assert len(G) == 2 n_id = sgraph.get_node_id(Exon(50, 125)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a stop site change point sgraph.stop_sites.add(80) sgraph.recreate() G = sgraph.G assert len(G) == 3 n_id = sgraph.get_node_id(Exon(50, 80)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(80, 125)) assert not sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # flip strand for t_id, t in t_dict.iteritems(): t.strand = Strand.NEG sgraph = SpliceGraph.create(t_dict.values()) G = sgraph.G assert len(G) == 1 n_id = sgraph.get_node_id(Exon(50, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a start site change point sgraph.start_sites.add(125) sgraph.recreate() G = sgraph.G assert len(G) == 2 n_id = sgraph.get_node_id(Exon(50, 125)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] # add a stop site change point sgraph.stop_sites.add(80) sgraph.recreate() G = sgraph.G assert len(G) == 3 n_id = sgraph.get_node_id(Exon(50, 80)) assert not sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(80, 125)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id]
def test_ref_starts_ends(): t_dict, locus = read_single_locus("change_point1.gtf") sg = SpliceGraph.create(t_dict.values()) assert tuple(sorted(sg.ref_start_sites)) == (95,) assert tuple(sorted(sg.ref_stop_sites)) == (200,)
def test_multi_strand1(): # read gtf and test basic values loci = read_gtf("multi_strand1.gtf") assert len(loci) == 1 interval, gtf_lines = loci[0] assert interval == ("chr1", 100, 1000) t_dict = Transfrag.parse_gtf(gtf_lines) assert len(t_dict) == 5 locus = Locus.create(t_dict.values()) assert locus.chrom == "chr1" assert locus.start == 100 assert locus.end == 1000 # raise exception when creating with multiple strands with pytest.raises(TacoError): SpliceGraph.create(t_dict.values()) transfrags_pos = locus.get_transfrags(Strand.POS) transfrags_neg = locus.get_transfrags(Strand.NEG) sgpos = SpliceGraph.create(transfrags_pos) sgneg = SpliceGraph.create(transfrags_neg) # test assert sgpos.chrom == "chr1" assert sgpos.start == 100 assert sgpos.end == 650 assert sgpos.strand == Strand.POS assert sgpos.ref_start_sites == [150] assert sgpos.ref_stop_sites == [600] with pytest.raises(TacoError): sgpos.get_expr_data(90, 110) with pytest.raises(TacoError): sgpos.get_expr_data(650, 655) assert np.array_equal(sgpos.get_expr_data(100, 105), np.ones(5)) assert sgneg.chrom == "chr1" assert sgneg.start == 350 assert sgneg.end == 1000 assert sgneg.strand == Strand.NEG assert sgneg.ref_start_sites == [1000] assert sgneg.ref_stop_sites == [350] with pytest.raises(TacoError): sgneg.get_expr_data(340, 350) with pytest.raises(TacoError): sgneg.get_expr_data(1000, 1010) assert np.array_equal(sgneg.get_expr_data(400, 405), np.ones(5)) assert np.array_equal(sgneg.get_expr_data(945, 950), np.zeros(5)) assert np.array_equal(sgneg.get_expr_data(950, 955), np.ones(5)) assert np.array_equal(sgneg.get_expr_data(980, 985), np.zeros(5)) # test locus boundaries bpos = tuple(sgpos._find_node_boundaries()) assert bpos == tuple((100, 200, 300, 400, 650)) bneg = tuple(sgneg._find_node_boundaries()) assert bneg == tuple((350, 400, 500, 950, 980, 1000)) # added guided ends/assembly to use boundaries from reference lpos = SpliceGraph.create(transfrags_pos, guided_ends=True, guided_assembly=True) bpos = tuple(lpos._find_node_boundaries()) assert bpos == tuple((100, 150, 200, 300, 400, 500, 600, 650)) lneg = SpliceGraph.create(transfrags_neg, guided_ends=True, guided_assembly=True) bneg = tuple(lneg._find_node_boundaries()) assert bneg == tuple((350, 400, 500, 750, 900, 950, 980, 1000))
def test_mark_start_stop_sites1(): t_dict, locus = read_single_locus('change_point1.gtf') sgraph = SpliceGraph.create(t_dict.values()) G = sgraph.G assert len(G) == 1 n_id = sgraph.get_node_id(Exon(50, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a start site change point sgraph.start_sites.add(125) sgraph.recreate() G = sgraph.G assert len(G) == 2 n_id = sgraph.get_node_id(Exon(50, 125)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a stop site change point sgraph.stop_sites.add(80) sgraph.recreate() G = sgraph.G assert len(G) == 3 n_id = sgraph.get_node_id(Exon(50, 80)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(80, 125)) assert not sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # flip strand for t_id, t in t_dict.iteritems(): t.strand = Strand.NEG sgraph = SpliceGraph.create(t_dict.values()) G = sgraph.G assert len(G) == 1 n_id = sgraph.get_node_id(Exon(50, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a start site change point sgraph.start_sites.add(125) sgraph.recreate() G = sgraph.G assert len(G) == 2 n_id = sgraph.get_node_id(Exon(50, 125)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] # add a stop site change point sgraph.stop_sites.add(80) sgraph.recreate() G = sgraph.G assert len(G) == 3 n_id = sgraph.get_node_id(Exon(50, 80)) assert not sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(80, 125)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id]
def test_multi_strand1(): # read gtf and test basic values loci = read_gtf('multi_strand1.gtf') assert len(loci) == 1 interval, gtf_lines = loci[0] assert interval == ('chr1', 100, 1000) t_dict = Transfrag.parse_gtf(gtf_lines) assert len(t_dict) == 5 locus = Locus.create(t_dict.values()) assert locus.chrom == 'chr1' assert locus.start == 100 assert locus.end == 1000 # raise exception when creating with multiple strands with pytest.raises(TacoError): SpliceGraph.create(t_dict.values()) transfrags_pos = locus.get_transfrags(Strand.POS) transfrags_neg = locus.get_transfrags(Strand.NEG) sgpos = SpliceGraph.create(transfrags_pos) sgneg = SpliceGraph.create(transfrags_neg) # test assert sgpos.chrom == 'chr1' assert sgpos.start == 100 assert sgpos.end == 650 assert sgpos.strand == Strand.POS assert sgpos.ref_start_sites == [150] assert sgpos.ref_stop_sites == [600] with pytest.raises(TacoError): sgpos.get_expr_data(90, 110) with pytest.raises(TacoError): sgpos.get_expr_data(650, 655) assert np.array_equal(sgpos.get_expr_data(100, 105), np.ones(5)) assert sgneg.chrom == 'chr1' assert sgneg.start == 350 assert sgneg.end == 1000 assert sgneg.strand == Strand.NEG assert sgneg.ref_start_sites == [1000] assert sgneg.ref_stop_sites == [350] with pytest.raises(TacoError): sgneg.get_expr_data(340, 350) with pytest.raises(TacoError): sgneg.get_expr_data(1000, 1010) assert np.array_equal(sgneg.get_expr_data(400, 405), np.ones(5)) assert np.array_equal(sgneg.get_expr_data(945, 950), np.zeros(5)) assert np.array_equal(sgneg.get_expr_data(950, 955), np.ones(5)) assert np.array_equal(sgneg.get_expr_data(980, 985), np.zeros(5)) # test locus boundaries bpos = tuple(sgpos._find_node_boundaries()) assert bpos == tuple((100, 200, 300, 400, 650)) bneg = tuple(sgneg._find_node_boundaries()) assert bneg == tuple((350, 400, 500, 950, 980, 1000)) # added guided ends/assembly to use boundaries from reference lpos = SpliceGraph.create(transfrags_pos, guided_ends=True, guided_assembly=True) bpos = tuple(lpos._find_node_boundaries()) assert bpos == tuple((100, 150, 200, 300, 400, 500, 600, 650)) lneg = SpliceGraph.create(transfrags_neg, guided_ends=True, guided_assembly=True) bneg = tuple(lneg._find_node_boundaries()) assert bneg == tuple((350, 400, 500, 750, 900, 950, 980, 1000))