Example #1
0
 def make_ramp(strand, sign=1):
     transfrags = []
     chrom = 'chr1'
     start = 1000
     end = 1220
     change_expr = 0.0
     base_expr = 0.0
     # "flat" part of expression landscape
     expr = 1.0
     for i in xrange(0, 50):
         t = Transfrag(chrom=chrom, strand=strand,
                       _id='T1.%d' % i, sample_id='S%d' % i,
                       expr=expr, is_ref=False,
                       exons=[Exon(start, end)])
         transfrags.append(t)
         change_expr += expr
         base_expr += expr
     # "changing" area
     i = 0
     expr = 10.0
     for pos in range(1100, 1120):
         left, right = (start, pos) if sign < 0 else (pos, end)
         t = Transfrag(chrom=chrom, strand=strand,
                       _id='T2.%d' % i, sample_id='S%d' % i,
                       expr=expr, is_ref=False,
                       exons=[Exon(left, right)])
         transfrags.append(t)
         change_expr += expr
         i += 1
     return chrom, start, end, strand, change_expr, base_expr, transfrags
Example #2
0
def test_splices():
    t = Transfrag(chrom='chrTest',
                  strand=Strand.POS,
                  exons=[Exon(0, 10), Exon(20, 30),
                         Exon(40, 50)])
    splice_sites = frozenset(t.itersplices())
    assert len(splice_sites) == 4
    assert splice_sites == frozenset((10, 20, 30, 40))
Example #3
0
def test_introns():
    t = Transfrag(chrom='chrTest',
                  strand=Strand.POS,
                  exons=[Exon(0, 10), Exon(20, 30),
                         Exon(40, 50)])
    introns = list(t.iterintrons())
    assert len(introns) == 2
    assert introns[0] == (10, 20)
    assert introns[1] == (30, 40)
Example #4
0
def test_trimming_to_zero_bug():
    t_dict, locus = read_single_locus('change_point_bug.gtf')
    transfrags_un = locus.get_transfrags(Strand.NA)
    sgraph = SpliceGraph.create(transfrags_un)
    cps = sgraph.detect_change_points()
    for cp in cps:
        sgraph.apply_change_point(cp)
    sgraph.recreate()
    # get start/stop nodes
    start_nodes, stop_nodes = sgraph.get_start_stop_nodes()
    # convert to node intervals
    start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes)
    stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes)
    assert Exon(173433532, 173435169) in stop_nodes
    assert Exon(173433532, 173435169) in start_nodes
    assert Exon(173433532, 173435169) in start_nodes
Example #5
0
def test_path_graph2():
    return
    t_dict, locus = read_single_locus('change_point2.gtf')
    sgraph = SpliceGraph.create(t_dict.values())

    # trivial case without additional stops or starts
    k = 1
    K = create_path_graph(sgraph, k)
    kmer_id_map = K.graph['kmer_id_map']
    n_id = sgraph.get_node_id(Exon(0, 100))
    kmer_id = kmer_id_map[(n_id, )]
    assert K.node[kmer_id]['expr'] == 12.0
    assert K.node[SOURCE]['expr'] == 12.0
    assert K.node[SINK]['expr'] == 12.0

    # add a stop site
    sgraph.stop_sites.add(50)
    sgraph.recreate()
    K = create_path_graph(sgraph, k=2)
    kmer_id_map = K.graph['kmer_id_map']
    n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100)))
    kmer1 = kmer_id_map[n1]
    n2 = (sgraph.get_node_id((0, 50)), )
    kmer2 = kmer_id_map[n2]

    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[SOURCE]['expr'] == 11.0
    assert K.node[SINK]['expr'] == 11.0
    # smooth kmer graph
    smooth_graph(K)
    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[SOURCE]['expr'] == 11.0
    assert K.node[SINK]['expr'] == 11.0

    # TODO: test after rescuing short transfrags

    # add both a start and a stop site
    sgraph.start_sites.add(50)
    sgraph.stop_sites.add(50)
    sgraph.recreate()
    K = create_path_graph(sgraph, k=2)
    smooth_graph(K)
    kmer_id_map = K.graph['kmer_id_map']
    n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100)))
    n2 = (sgraph.get_node_id((0, 50)), )
    n3 = (sgraph.get_node_id((50, 100)), )
    kmer1 = kmer_id_map[n1]
    kmer2 = kmer_id_map[n2]
    kmer3 = kmer_id_map[n3]
    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[kmer3]['expr'] == 1.0
    assert K.node[SOURCE]['expr'] == 12.0
    assert K.node[SINK]['expr'] == 12.0
Example #6
0
def test_ccle55_cuff_noc2l():
    '''Locus containing from 55 CCLE samples assembled with Cufflinks'''
    # pull SpliceGraph out of GTF
    t_dict, locus = read_single_locus('noc2l_locus.gtf')
    found_sgraph = False
    for sgraph in locus.create_splice_graphs():
        if (sgraph.chrom == 'chr1' and sgraph.start == 934942 and
            sgraph.end == 976702 and sgraph.strand == Strand.NEG):
            found_sgraph = True
            break
    assert found_sgraph

    # examine specific change points
    trim = False
    pval = 0.05
    fc_cutoff = 0.8
    n1 = Exon(934942, 944589)
    n1_id = sgraph.get_node_id(n1)
    assert sgraph.G.node[n1_id][SGNode.IS_STOP]
    cps = sgraph.detect_change_points(pval=pval, fc_cutoff=fc_cutoff)
    for cp in cps:
        sgraph.apply_change_point(cp, trim=trim)
    true_starts = set([964528, 957434, 959316])
    true_stops = set([944278])
    assert true_starts.issubset(sgraph.start_sites)
    assert true_stops.issubset(sgraph.stop_sites)

    # rebuild graph and examine start/stop nodes
    sgraph.recreate()

    # get start/stop nodes
    start_nodes, stop_nodes = sgraph.get_start_stop_nodes()
    # convert to node intervals
    start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes)
    stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes)
    assert Exon(959214, 959316) in start_nodes
    assert Exon(959316, 964528) in start_nodes
    assert Exon(957273, 957434) in start_nodes
    assert Exon(944278, 944321) in stop_nodes

    # ensure best path uses change points
    config = Config.defaults()
    config.max_paths = 1
    gene_isoforms = assemble_isoforms(sgraph, config)
    assert len(gene_isoforms) == 1
    isoforms = gene_isoforms[0]
    assert len(isoforms) == 1
    isoform = isoforms[0]
    assert isoform.path[0] == Exon(944321, 944800)
    assert isoform.path[-1] == Exon(959214, 959316)
Example #7
0
def test_ccle55_cuff_noc2l():
    '''Locus containing from 55 CCLE samples assembled with Cufflinks'''
    # pull SpliceGraph out of GTF
    t_dict, locus = read_single_locus('noc2l_locus.gtf')
    found_sgraph = False
    for sgraph in locus.create_splice_graphs():
        if (sgraph.chrom == 'chr1' and sgraph.start == 934942
                and sgraph.end == 976702 and sgraph.strand == Strand.NEG):
            found_sgraph = True
            break
    assert found_sgraph

    # examine specific change points
    trim = False
    pval = 0.1
    fc_cutoff = 0.8
    n1 = Exon(934942, 944589)
    n1_id = sgraph.get_node_id(n1)
    assert sgraph.G.is_stop[n1_id]
    cps = sgraph.detect_change_points(pval=pval, fc_cutoff=fc_cutoff)
    for cp in cps:
        sgraph.apply_change_point(cp, trim=trim)
    true_starts = set([964528, 957434, 959316])
    true_stops = set([944278])
    assert true_starts.issubset(sgraph.start_sites)
    assert true_stops.issubset(sgraph.stop_sites)

    # rebuild graph and examine start/stop nodes
    sgraph.recreate()

    # get start/stop nodes
    start_nodes, stop_nodes = sgraph.get_start_stop_nodes()
    # convert to node intervals
    start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes)
    stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes)
    assert Exon(959214, 959316) in start_nodes
    assert Exon(959316, 964528) in start_nodes
    assert Exon(957273, 957434) in start_nodes
    assert Exon(944278, 944321) in stop_nodes

    # ensure best path uses change points
    pgf = PathGraphFactory(sgraph)
    pgraph, k = pgf.create_optimal()
    paths = find_paths(pgraph, max_paths=1)
    assert len(paths) == 1
    path, expr = paths[0]
    path = reconstruct_path(path, pgraph, sgraph)
    assert path[0] == Exon(944321, 944800)
    assert path[-1] == Exon(959214, 959316)
Example #8
0
def test_path_graph1():
    # read transcripts
    t_dict, locus = read_single_locus('path1.gtf')
    SG = SpliceGraph.create(t_dict.values())
    # paths
    ABCDE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400, 500),
             Exon(600, 700), Exon(800, 900), SINK)
    ACE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(800, 900), SINK)
    ABCE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400,
                                                       500), Exon(800,
                                                                  900), SINK)
    ACDE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(600,
                                                       700), Exon(800,
                                                                  900), SINK)
    paths = [ABCDE, ACE, ABCE, ACDE]
    # create path graph k = 2
    k = 2
    G1 = create_path_graph(SG, k)
    G2 = nx.DiGraph()
    for path in paths:
        kmers = list(get_kmers(path, k))
        add_path(G2, kmers, 1.0)
    assert nx.is_isomorphic(G1, G2)
Example #9
0
def test_mark_start_stop_sites1():
    t_dict, locus = read_single_locus('change_point1.gtf')
    sgraph = SpliceGraph.create(t_dict.values())
    G = sgraph.G
    assert len(G) == 1
    n_id = sgraph.get_node_id(Exon(50, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a start site change point
    sgraph.start_sites.add(125)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 2
    n_id = sgraph.get_node_id(Exon(50, 125))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a stop site change point
    sgraph.stop_sites.add(80)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 3
    n_id = sgraph.get_node_id(Exon(50, 80))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(80, 125))
    assert not sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # flip strand
    for t_id, t in t_dict.iteritems():
        t.strand = Strand.NEG
    sgraph = SpliceGraph.create(t_dict.values())
    G = sgraph.G
    assert len(G) == 1

    n_id = sgraph.get_node_id(Exon(50, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a start site change point
    sgraph.start_sites.add(125)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 2
    n_id = sgraph.get_node_id(Exon(50, 125))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]

    # add a stop site change point
    sgraph.stop_sites.add(80)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 3
    n_id = sgraph.get_node_id(Exon(50, 80))
    assert not sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(80, 125))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]
Example #10
0
def test_mark_start_stop_sites2():
    # pos strand not guided
    t_dict, locus = read_single_locus('multi_strand1.gtf')
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS))
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(100, 200))]
    assert G.is_stop[sgraph.get_node_id(Exon(400, 650))]

    # neg strand not guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG))
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(950, 980))]
    assert G.is_stop[sgraph.get_node_id(Exon(400, 500))]

    # pos strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(100, 150))]
    assert G.is_start[sgraph.get_node_id(Exon(150, 200))]
    assert G.is_stop[sgraph.get_node_id(Exon(500, 600))]
    assert G.is_stop[sgraph.get_node_id(Exon(600, 650))]
    assert G.is_ref[sgraph.get_node_id(Exon(150, 200))]
    assert G.is_ref[sgraph.get_node_id(Exon(300, 400))]
    assert G.is_ref[sgraph.get_node_id(Exon(500, 600))]
    assert not G.is_ref[sgraph.get_node_id(Exon(100, 150))]
    assert not G.is_ref[sgraph.get_node_id(Exon(600, 650))]

    # neg strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.is_stop[sgraph.get_node_id(Exon(350, 400))]
    assert G.is_start[sgraph.get_node_id(Exon(980, 1000))]
    assert not G.is_start[sgraph.get_node_id(Exon(950, 980))]
    for n_id in G.node_ids_iter():
        assert G.is_ref[n_id]
Example #11
0
def test_mark_start_stop_sites2():
    # pos strand not guided
    t_dict, locus = read_single_locus('multi_strand1.gtf')
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS))
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(100, 200))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(400, 650))][SGNode.IS_STOP]

    # neg strand not guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG))
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(400, 500))][SGNode.IS_STOP]

    # pos strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_REF]
    assert G.node[sgraph.get_node_id(Exon(300, 400))][SGNode.IS_REF]
    assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_REF]
    assert not G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_REF]
    assert not G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_REF]

    # neg strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(350, 400))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(980, 1000))][SGNode.IS_START]
    assert not G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START]
    for n, nd in G.nodes_iter(data=True):
        assert nd[SGNode.IS_REF]
    return