def make_ramp(strand, sign=1): transfrags = [] chrom = 'chr1' start = 1000 end = 1220 change_expr = 0.0 base_expr = 0.0 # "flat" part of expression landscape expr = 1.0 for i in xrange(0, 50): t = Transfrag(chrom=chrom, strand=strand, _id='T1.%d' % i, sample_id='S%d' % i, expr=expr, is_ref=False, exons=[Exon(start, end)]) transfrags.append(t) change_expr += expr base_expr += expr # "changing" area i = 0 expr = 10.0 for pos in range(1100, 1120): left, right = (start, pos) if sign < 0 else (pos, end) t = Transfrag(chrom=chrom, strand=strand, _id='T2.%d' % i, sample_id='S%d' % i, expr=expr, is_ref=False, exons=[Exon(left, right)]) transfrags.append(t) change_expr += expr i += 1 return chrom, start, end, strand, change_expr, base_expr, transfrags
def test_splices(): t = Transfrag(chrom='chrTest', strand=Strand.POS, exons=[Exon(0, 10), Exon(20, 30), Exon(40, 50)]) splice_sites = frozenset(t.itersplices()) assert len(splice_sites) == 4 assert splice_sites == frozenset((10, 20, 30, 40))
def test_introns(): t = Transfrag(chrom='chrTest', strand=Strand.POS, exons=[Exon(0, 10), Exon(20, 30), Exon(40, 50)]) introns = list(t.iterintrons()) assert len(introns) == 2 assert introns[0] == (10, 20) assert introns[1] == (30, 40)
def test_trimming_to_zero_bug(): t_dict, locus = read_single_locus('change_point_bug.gtf') transfrags_un = locus.get_transfrags(Strand.NA) sgraph = SpliceGraph.create(transfrags_un) cps = sgraph.detect_change_points() for cp in cps: sgraph.apply_change_point(cp) sgraph.recreate() # get start/stop nodes start_nodes, stop_nodes = sgraph.get_start_stop_nodes() # convert to node intervals start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes) stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes) assert Exon(173433532, 173435169) in stop_nodes assert Exon(173433532, 173435169) in start_nodes assert Exon(173433532, 173435169) in start_nodes
def test_path_graph2(): return t_dict, locus = read_single_locus('change_point2.gtf') sgraph = SpliceGraph.create(t_dict.values()) # trivial case without additional stops or starts k = 1 K = create_path_graph(sgraph, k) kmer_id_map = K.graph['kmer_id_map'] n_id = sgraph.get_node_id(Exon(0, 100)) kmer_id = kmer_id_map[(n_id, )] assert K.node[kmer_id]['expr'] == 12.0 assert K.node[SOURCE]['expr'] == 12.0 assert K.node[SINK]['expr'] == 12.0 # add a stop site sgraph.stop_sites.add(50) sgraph.recreate() K = create_path_graph(sgraph, k=2) kmer_id_map = K.graph['kmer_id_map'] n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100))) kmer1 = kmer_id_map[n1] n2 = (sgraph.get_node_id((0, 50)), ) kmer2 = kmer_id_map[n2] assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[SOURCE]['expr'] == 11.0 assert K.node[SINK]['expr'] == 11.0 # smooth kmer graph smooth_graph(K) assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[SOURCE]['expr'] == 11.0 assert K.node[SINK]['expr'] == 11.0 # TODO: test after rescuing short transfrags # add both a start and a stop site sgraph.start_sites.add(50) sgraph.stop_sites.add(50) sgraph.recreate() K = create_path_graph(sgraph, k=2) smooth_graph(K) kmer_id_map = K.graph['kmer_id_map'] n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100))) n2 = (sgraph.get_node_id((0, 50)), ) n3 = (sgraph.get_node_id((50, 100)), ) kmer1 = kmer_id_map[n1] kmer2 = kmer_id_map[n2] kmer3 = kmer_id_map[n3] assert K.node[kmer1]['expr'] == 1.0 assert K.node[kmer2]['expr'] == 10.0 assert K.node[kmer3]['expr'] == 1.0 assert K.node[SOURCE]['expr'] == 12.0 assert K.node[SINK]['expr'] == 12.0
def test_ccle55_cuff_noc2l(): '''Locus containing from 55 CCLE samples assembled with Cufflinks''' # pull SpliceGraph out of GTF t_dict, locus = read_single_locus('noc2l_locus.gtf') found_sgraph = False for sgraph in locus.create_splice_graphs(): if (sgraph.chrom == 'chr1' and sgraph.start == 934942 and sgraph.end == 976702 and sgraph.strand == Strand.NEG): found_sgraph = True break assert found_sgraph # examine specific change points trim = False pval = 0.05 fc_cutoff = 0.8 n1 = Exon(934942, 944589) n1_id = sgraph.get_node_id(n1) assert sgraph.G.node[n1_id][SGNode.IS_STOP] cps = sgraph.detect_change_points(pval=pval, fc_cutoff=fc_cutoff) for cp in cps: sgraph.apply_change_point(cp, trim=trim) true_starts = set([964528, 957434, 959316]) true_stops = set([944278]) assert true_starts.issubset(sgraph.start_sites) assert true_stops.issubset(sgraph.stop_sites) # rebuild graph and examine start/stop nodes sgraph.recreate() # get start/stop nodes start_nodes, stop_nodes = sgraph.get_start_stop_nodes() # convert to node intervals start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes) stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes) assert Exon(959214, 959316) in start_nodes assert Exon(959316, 964528) in start_nodes assert Exon(957273, 957434) in start_nodes assert Exon(944278, 944321) in stop_nodes # ensure best path uses change points config = Config.defaults() config.max_paths = 1 gene_isoforms = assemble_isoforms(sgraph, config) assert len(gene_isoforms) == 1 isoforms = gene_isoforms[0] assert len(isoforms) == 1 isoform = isoforms[0] assert isoform.path[0] == Exon(944321, 944800) assert isoform.path[-1] == Exon(959214, 959316)
def test_ccle55_cuff_noc2l(): '''Locus containing from 55 CCLE samples assembled with Cufflinks''' # pull SpliceGraph out of GTF t_dict, locus = read_single_locus('noc2l_locus.gtf') found_sgraph = False for sgraph in locus.create_splice_graphs(): if (sgraph.chrom == 'chr1' and sgraph.start == 934942 and sgraph.end == 976702 and sgraph.strand == Strand.NEG): found_sgraph = True break assert found_sgraph # examine specific change points trim = False pval = 0.1 fc_cutoff = 0.8 n1 = Exon(934942, 944589) n1_id = sgraph.get_node_id(n1) assert sgraph.G.is_stop[n1_id] cps = sgraph.detect_change_points(pval=pval, fc_cutoff=fc_cutoff) for cp in cps: sgraph.apply_change_point(cp, trim=trim) true_starts = set([964528, 957434, 959316]) true_stops = set([944278]) assert true_starts.issubset(sgraph.start_sites) assert true_stops.issubset(sgraph.stop_sites) # rebuild graph and examine start/stop nodes sgraph.recreate() # get start/stop nodes start_nodes, stop_nodes = sgraph.get_start_stop_nodes() # convert to node intervals start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes) stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes) assert Exon(959214, 959316) in start_nodes assert Exon(959316, 964528) in start_nodes assert Exon(957273, 957434) in start_nodes assert Exon(944278, 944321) in stop_nodes # ensure best path uses change points pgf = PathGraphFactory(sgraph) pgraph, k = pgf.create_optimal() paths = find_paths(pgraph, max_paths=1) assert len(paths) == 1 path, expr = paths[0] path = reconstruct_path(path, pgraph, sgraph) assert path[0] == Exon(944321, 944800) assert path[-1] == Exon(959214, 959316)
def test_path_graph1(): # read transcripts t_dict, locus = read_single_locus('path1.gtf') SG = SpliceGraph.create(t_dict.values()) # paths ABCDE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400, 500), Exon(600, 700), Exon(800, 900), SINK) ACE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(800, 900), SINK) ABCE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400, 500), Exon(800, 900), SINK) ACDE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(600, 700), Exon(800, 900), SINK) paths = [ABCDE, ACE, ABCE, ACDE] # create path graph k = 2 k = 2 G1 = create_path_graph(SG, k) G2 = nx.DiGraph() for path in paths: kmers = list(get_kmers(path, k)) add_path(G2, kmers, 1.0) assert nx.is_isomorphic(G1, G2)
def test_mark_start_stop_sites1(): t_dict, locus = read_single_locus('change_point1.gtf') sgraph = SpliceGraph.create(t_dict.values()) G = sgraph.G assert len(G) == 1 n_id = sgraph.get_node_id(Exon(50, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a start site change point sgraph.start_sites.add(125) sgraph.recreate() G = sgraph.G assert len(G) == 2 n_id = sgraph.get_node_id(Exon(50, 125)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a stop site change point sgraph.stop_sites.add(80) sgraph.recreate() G = sgraph.G assert len(G) == 3 n_id = sgraph.get_node_id(Exon(50, 80)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(80, 125)) assert not sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # flip strand for t_id, t in t_dict.iteritems(): t.strand = Strand.NEG sgraph = SpliceGraph.create(t_dict.values()) G = sgraph.G assert len(G) == 1 n_id = sgraph.get_node_id(Exon(50, 200)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] # add a start site change point sgraph.start_sites.add(125) sgraph.recreate() G = sgraph.G assert len(G) == 2 n_id = sgraph.get_node_id(Exon(50, 125)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id] # add a stop site change point sgraph.stop_sites.add(80) sgraph.recreate() G = sgraph.G assert len(G) == 3 n_id = sgraph.get_node_id(Exon(50, 80)) assert not sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(80, 125)) assert sgraph.G.is_start[n_id] assert sgraph.G.is_stop[n_id] n_id = sgraph.get_node_id(Exon(125, 200)) assert sgraph.G.is_start[n_id] assert not sgraph.G.is_stop[n_id]
def test_mark_start_stop_sites2(): # pos strand not guided t_dict, locus = read_single_locus('multi_strand1.gtf') sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS)) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(100, 200))] assert G.is_stop[sgraph.get_node_id(Exon(400, 650))] # neg strand not guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG)) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(950, 980))] assert G.is_stop[sgraph.get_node_id(Exon(400, 500))] # pos strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.is_start[sgraph.get_node_id(Exon(100, 150))] assert G.is_start[sgraph.get_node_id(Exon(150, 200))] assert G.is_stop[sgraph.get_node_id(Exon(500, 600))] assert G.is_stop[sgraph.get_node_id(Exon(600, 650))] assert G.is_ref[sgraph.get_node_id(Exon(150, 200))] assert G.is_ref[sgraph.get_node_id(Exon(300, 400))] assert G.is_ref[sgraph.get_node_id(Exon(500, 600))] assert not G.is_ref[sgraph.get_node_id(Exon(100, 150))] assert not G.is_ref[sgraph.get_node_id(Exon(600, 650))] # neg strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.is_stop[sgraph.get_node_id(Exon(350, 400))] assert G.is_start[sgraph.get_node_id(Exon(980, 1000))] assert not G.is_start[sgraph.get_node_id(Exon(950, 980))] for n_id in G.node_ids_iter(): assert G.is_ref[n_id]
def test_mark_start_stop_sites2(): # pos strand not guided t_dict, locus = read_single_locus('multi_strand1.gtf') sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS)) G = sgraph.G assert G.node[sgraph.get_node_id(Exon(100, 200))][SGNode.IS_START] assert G.node[sgraph.get_node_id(Exon(400, 650))][SGNode.IS_STOP] # neg strand not guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG)) G = sgraph.G assert G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START] assert G.node[sgraph.get_node_id(Exon(400, 500))][SGNode.IS_STOP] # pos strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_START] assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_START] assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_STOP] assert G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_STOP] assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_REF] assert G.node[sgraph.get_node_id(Exon(300, 400))][SGNode.IS_REF] assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_REF] assert not G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_REF] assert not G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_REF] # neg strand guided sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG), guided_ends=True, guided_assembly=True) G = sgraph.G assert G.node[sgraph.get_node_id(Exon(350, 400))][SGNode.IS_STOP] assert G.node[sgraph.get_node_id(Exon(980, 1000))][SGNode.IS_START] assert not G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START] for n, nd in G.nodes_iter(data=True): assert nd[SGNode.IS_REF] return