Ejemplo n.º 1
0
def test_mark_start_stop_sites2():
    # pos strand not guided
    t_dict, locus = read_single_locus("multi_strand1.gtf")
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS))
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(100, 200))]
    assert G.is_stop[sgraph.get_node_id(Exon(400, 650))]

    # neg strand not guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG))
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(950, 980))]
    assert G.is_stop[sgraph.get_node_id(Exon(400, 500))]

    # pos strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS), guided_ends=True, guided_assembly=True)
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(100, 150))]
    assert G.is_start[sgraph.get_node_id(Exon(150, 200))]
    assert G.is_stop[sgraph.get_node_id(Exon(500, 600))]
    assert G.is_stop[sgraph.get_node_id(Exon(600, 650))]
    assert G.is_ref[sgraph.get_node_id(Exon(150, 200))]
    assert G.is_ref[sgraph.get_node_id(Exon(300, 400))]
    assert G.is_ref[sgraph.get_node_id(Exon(500, 600))]
    assert not G.is_ref[sgraph.get_node_id(Exon(100, 150))]
    assert not G.is_ref[sgraph.get_node_id(Exon(600, 650))]

    # neg strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG), guided_ends=True, guided_assembly=True)
    G = sgraph.G
    assert G.is_stop[sgraph.get_node_id(Exon(350, 400))]
    assert G.is_start[sgraph.get_node_id(Exon(980, 1000))]
    assert not G.is_start[sgraph.get_node_id(Exon(950, 980))]
    for n_id in G.node_ids_iter():
        assert G.is_ref[n_id]
Ejemplo n.º 2
0
def test_empty_graph_bug():
    t_dict, locus = read_single_locus('empty_graph_bug.gtf')
    transfrags = locus.get_transfrags(Strand.POS)
    sgraph = SpliceGraph.create(transfrags)
    pgf = PathGraphFactory(sgraph)
    K, k = pgf.create_optimal()
    assert K is None
Ejemplo n.º 3
0
def test_path2():
    t_dict, locus = read_single_locus('noc2l_locus.gtf')
    for sgraph in locus.create_splice_graphs():
        pgraphfactory = PathGraphFactory(sgraph)
        pgraph, k = pgraphfactory.create_optimal()
        paths = find_paths(pgraph)
    return
Ejemplo n.º 4
0
def test_path1():
    t_dict, locus = read_single_locus('path1.gtf')
    transfrags = locus.get_transfrags(Strand.POS)
    sgraph = SpliceGraph.create(transfrags)
    k = 2
    pgf = PathGraphFactory(sgraph)
    pgraph = pgf.create(k)
    paths = find_paths(pgraph)
    return
Ejemplo n.º 5
0
def test_path_graph2():
    return
    t_dict, locus = read_single_locus('change_point2.gtf')
    sgraph = SpliceGraph.create(t_dict.values())

    # trivial case without additional stops or starts
    k = 1
    K = create_path_graph(sgraph, k)
    kmer_id_map = K.graph['kmer_id_map']
    n_id = sgraph.get_node_id(Exon(0, 100))
    kmer_id = kmer_id_map[(n_id, )]
    assert K.node[kmer_id]['expr'] == 12.0
    assert K.node[SOURCE]['expr'] == 12.0
    assert K.node[SINK]['expr'] == 12.0

    # add a stop site
    sgraph.stop_sites.add(50)
    sgraph.recreate()
    K = create_path_graph(sgraph, k=2)
    kmer_id_map = K.graph['kmer_id_map']
    n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100)))
    kmer1 = kmer_id_map[n1]
    n2 = (sgraph.get_node_id((0, 50)), )
    kmer2 = kmer_id_map[n2]

    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[SOURCE]['expr'] == 11.0
    assert K.node[SINK]['expr'] == 11.0
    # smooth kmer graph
    smooth_graph(K)
    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[SOURCE]['expr'] == 11.0
    assert K.node[SINK]['expr'] == 11.0

    # TODO: test after rescuing short transfrags

    # add both a start and a stop site
    sgraph.start_sites.add(50)
    sgraph.stop_sites.add(50)
    sgraph.recreate()
    K = create_path_graph(sgraph, k=2)
    smooth_graph(K)
    kmer_id_map = K.graph['kmer_id_map']
    n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100)))
    n2 = (sgraph.get_node_id((0, 50)), )
    n3 = (sgraph.get_node_id((50, 100)), )
    kmer1 = kmer_id_map[n1]
    kmer2 = kmer_id_map[n2]
    kmer3 = kmer_id_map[n3]
    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[kmer3]['expr'] == 1.0
    assert K.node[SOURCE]['expr'] == 12.0
    assert K.node[SINK]['expr'] == 12.0
Ejemplo n.º 6
0
def test_multi_strand2():
    t_dict, locus = read_single_locus("multi_strand2.gtf")
    transfrags_pos = locus.get_transfrags(Strand.POS)
    sgpos = SpliceGraph.create(transfrags_pos)
    sgdict = {}
    for sg in sgpos.split():
        k = "%s:%d-%d[%s]" % (sg.chrom, sg.start, sg.end, Strand.to_gtf(sg.strand))
        sgdict[k] = sg
    assert "chr1:100-300[+]" in sgdict
    assert "chr1:400-600[+]" in sgdict
Ejemplo n.º 7
0
def test_path_graph2():
    t_dict, locus = read_single_locus('change_point2.gtf')
    sgraph = SpliceGraph.create(t_dict.values())

    # trivial case without additional stops or starts
    k = 1
    K = create_path_graph(sgraph, k)
    kmer_id_map = K.graph['kmer_id_map']
    n_id = sgraph.get_node_id(Exon(0, 100))
    kmer_id = kmer_id_map[(n_id,)]
    assert K.node[kmer_id]['expr'] == 12.0
    assert K.node[SOURCE]['expr'] == 12.0
    assert K.node[SINK]['expr'] == 12.0

    # add a stop site
    sgraph.stop_sites.add(50)
    sgraph.recreate()
    K = create_path_graph(sgraph, k=2)
    kmer_id_map = K.graph['kmer_id_map']
    n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100)))
    kmer1 = kmer_id_map[n1]
    n2 = (sgraph.get_node_id((0, 50)),)
    kmer2 = kmer_id_map[n2]

    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[SOURCE]['expr'] == 11.0
    assert K.node[SINK]['expr'] == 11.0
    # smooth kmer graph
    smooth_graph(K)
    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[SOURCE]['expr'] == 11.0
    assert K.node[SINK]['expr'] == 11.0

    # TODO: test after rescuing short transfrags

    # add both a start and a stop site
    sgraph.start_sites.add(50)
    sgraph.stop_sites.add(50)
    sgraph.recreate()
    K = create_path_graph(sgraph, k=2)
    smooth_graph(K)
    kmer_id_map = K.graph['kmer_id_map']
    n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100)))
    n2 = (sgraph.get_node_id((0, 50)),)
    n3 = (sgraph.get_node_id((50, 100)),)
    kmer1 = kmer_id_map[n1]
    kmer2 = kmer_id_map[n2]
    kmer3 = kmer_id_map[n3]
    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[kmer3]['expr'] == 1.0
    assert K.node[SOURCE]['expr'] == 12.0
    assert K.node[SINK]['expr'] == 12.0
Ejemplo n.º 8
0
def test_multi_strand2():
    t_dict, locus = read_single_locus('multi_strand2.gtf')
    transfrags_pos = locus.get_transfrags(Strand.POS)
    sgpos = SpliceGraph.create(transfrags_pos)
    sgdict = {}
    for sg in sgpos.split():
        k = ('%s:%d-%d[%s]' % (sg.chrom, sg.start, sg.end,
             Strand.to_gtf(sg.strand)))
        sgdict[k] = sg
    assert 'chr1:100-300[+]' in sgdict
    assert 'chr1:400-600[+]' in sgdict
Ejemplo n.º 9
0
def test_path_graph_factory():
    t_dict, locus = read_single_locus('path1.gtf')
    sgraph = SpliceGraph.create(t_dict.values())
    pgraphfactory = PathGraphFactory(sgraph)
    g1 = pgraphfactory.create(k=1)
    assert len(g1) == 5
    g2 = pgraphfactory.create(k=2)
    assert len(g2) == 6
    gopt, k = pgraphfactory.create_optimal()
    assert k == 2
    return
Ejemplo n.º 10
0
def test_path2():
    t_dict, locus = read_single_locus('noc2l_locus.gtf')
    for sgraph in locus.create_splice_graphs():
        K, k = create_optimal_path_graph(sgraph)
        paths1 = find_paths(K, 'expr')
        paths2 = cpathfinder.find_paths(K, 'expr')
        assert len(paths1) == len(paths2)
        for p1, p2 in zip(paths1, paths2):
            p1, e1 = p1
            p2, e2 = p2
            assert p1 == p2
            assert abs(e1-e2) < 1e-5
Ejemplo n.º 11
0
def test_path2():
    t_dict, locus = read_single_locus('noc2l_locus.gtf')
    for sgraph in locus.create_splice_graphs():
        K, k = create_optimal_path_graph(sgraph)
        paths1 = find_paths(K, 'expr')
        paths2 = cpathfinder.find_paths(K, 'expr')
        assert len(paths1) == len(paths2)
        for p1, p2 in zip(paths1, paths2):
            p1, e1 = p1
            p2, e2 = p2
            assert p1 == p2
            assert abs(e1 - e2) < 1e-5
Ejemplo n.º 12
0
def test_topological_sort():
    G = Graph()
    G.add_path((G.SOURCE, 10, 20, 30, 40, G.SINK))
    G.add_path((G.SOURCE, 10, 30, 40, G.SINK))
    G.add_path((G.SOURCE, 10, G.SINK))
    G.add_path((G.SOURCE, 20, G.SINK))
    assert G.is_topological_sort(G.topological_sort())
    t_dict, locus = read_single_locus('noc2l_locus.gtf')
    for sgraph in locus.create_splice_graphs():
        pgf = PathGraphFactory(sgraph)
        G = pgf.create(k=1)
        assert G.is_topological_sort(G.topological_sort())
        assert G.is_topological_sort(G.topological_sort_dfs())
Ejemplo n.º 13
0
def test_topological_sort():
    G = Graph()
    G.add_path((G.SOURCE, 10, 20, 30, 40, G.SINK))
    G.add_path((G.SOURCE, 10, 30, 40, G.SINK))
    G.add_path((G.SOURCE, 10, G.SINK))
    G.add_path((G.SOURCE, 20, G.SINK))
    assert G.is_topological_sort(G.topological_sort())
    t_dict, locus = read_single_locus('noc2l_locus.gtf')
    for sgraph in locus.create_splice_graphs():
        pgf = PathGraphFactory(sgraph)
        G = pgf.create(k=1)
        assert G.is_topological_sort(G.topological_sort())
        assert G.is_topological_sort(G.topological_sort_dfs())
Ejemplo n.º 14
0
def test_ccle55_cuff_noc2l():
    '''Locus containing from 55 CCLE samples assembled with Cufflinks'''
    # pull SpliceGraph out of GTF
    t_dict, locus = read_single_locus('noc2l_locus.gtf')
    found_sgraph = False
    for sgraph in locus.create_splice_graphs():
        if (sgraph.chrom == 'chr1' and sgraph.start == 934942 and
            sgraph.end == 976702 and sgraph.strand == Strand.NEG):
            found_sgraph = True
            break
    assert found_sgraph

    # examine specific change points
    trim = False
    pval = 0.1
    fc_cutoff = 0.8
    n1 = Exon(934942, 944589)
    n1_id = sgraph.get_node_id(n1)
    assert sgraph.G.is_stop[n1_id]
    cps = sgraph.detect_change_points(pval=pval, fc_cutoff=fc_cutoff)
    for cp in cps:
        sgraph.apply_change_point(cp, trim=trim)
    true_starts = set([964528, 957434, 959316])
    true_stops = set([944278])
    assert true_starts.issubset(sgraph.start_sites)
    assert true_stops.issubset(sgraph.stop_sites)

    # rebuild graph and examine start/stop nodes
    sgraph.recreate()

    # get start/stop nodes
    start_nodes, stop_nodes = sgraph.get_start_stop_nodes()
    # convert to node intervals
    start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes)
    stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes)
    assert Exon(959214, 959316) in start_nodes
    assert Exon(959316, 964528) in start_nodes
    assert Exon(957273, 957434) in start_nodes
    assert Exon(944278, 944321) in stop_nodes

    # ensure best path uses change points
    config = Config.defaults()
    config.max_paths = 1
    gene_isoforms = assemble_isoforms(sgraph, config)
    assert len(gene_isoforms) == 1
    isoforms = gene_isoforms[0]
    assert len(isoforms) == 1
    isoform = isoforms[0]
    assert isoform.path[0] == Exon(944321, 944800)
    assert isoform.path[-1] == Exon(959214, 959316)
Ejemplo n.º 15
0
def test_ccle55_cuff_noc2l():
    '''Locus containing from 55 CCLE samples assembled with Cufflinks'''
    # pull SpliceGraph out of GTF
    t_dict, locus = read_single_locus('noc2l_locus.gtf')
    found_sgraph = False
    for sgraph in locus.create_splice_graphs():
        if (sgraph.chrom == 'chr1' and sgraph.start == 934942 and
            sgraph.end == 976702 and sgraph.strand == Strand.NEG):
            found_sgraph = True
            break
    assert found_sgraph

    # examine specific change points
    trim = False
    pval = 0.05
    fc_cutoff = 0.8
    n1 = Exon(934942, 944589)
    n1_id = sgraph.get_node_id(n1)
    assert sgraph.G.node[n1_id][SGNode.IS_STOP]
    cps = sgraph.detect_change_points(pval=pval, fc_cutoff=fc_cutoff)
    for cp in cps:
        sgraph.apply_change_point(cp, trim=trim)
    true_starts = set([964528, 957434, 959316])
    true_stops = set([944278])
    assert true_starts.issubset(sgraph.start_sites)
    assert true_stops.issubset(sgraph.stop_sites)

    # rebuild graph and examine start/stop nodes
    sgraph.recreate()

    # get start/stop nodes
    start_nodes, stop_nodes = sgraph.get_start_stop_nodes()
    # convert to node intervals
    start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes)
    stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes)
    assert Exon(959214, 959316) in start_nodes
    assert Exon(959316, 964528) in start_nodes
    assert Exon(957273, 957434) in start_nodes
    assert Exon(944278, 944321) in stop_nodes

    # ensure best path uses change points
    config = Config.defaults()
    config.max_paths = 1
    gene_isoforms = assemble_isoforms(sgraph, config)
    assert len(gene_isoforms) == 1
    isoforms = gene_isoforms[0]
    assert len(isoforms) == 1
    isoform = isoforms[0]
    assert isoform.path[0] == Exon(944321, 944800)
    assert isoform.path[-1] == Exon(959214, 959316)
Ejemplo n.º 16
0
def test_ccle55_cuff_noc2l():
    '''Locus containing from 55 CCLE samples assembled with Cufflinks'''
    # pull SpliceGraph out of GTF
    t_dict, locus = read_single_locus('noc2l_locus.gtf')
    found_sgraph = False
    for sgraph in locus.create_splice_graphs():
        if (sgraph.chrom == 'chr1' and sgraph.start == 934942
                and sgraph.end == 976702 and sgraph.strand == Strand.NEG):
            found_sgraph = True
            break
    assert found_sgraph

    # examine specific change points
    trim = False
    pval = 0.1
    fc_cutoff = 0.8
    n1 = Exon(934942, 944589)
    n1_id = sgraph.get_node_id(n1)
    assert sgraph.G.is_stop[n1_id]
    cps = sgraph.detect_change_points(pval=pval, fc_cutoff=fc_cutoff)
    for cp in cps:
        sgraph.apply_change_point(cp, trim=trim)
    true_starts = set([964528, 957434, 959316])
    true_stops = set([944278])
    assert true_starts.issubset(sgraph.start_sites)
    assert true_stops.issubset(sgraph.stop_sites)

    # rebuild graph and examine start/stop nodes
    sgraph.recreate()

    # get start/stop nodes
    start_nodes, stop_nodes = sgraph.get_start_stop_nodes()
    # convert to node intervals
    start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes)
    stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes)
    assert Exon(959214, 959316) in start_nodes
    assert Exon(959316, 964528) in start_nodes
    assert Exon(957273, 957434) in start_nodes
    assert Exon(944278, 944321) in stop_nodes

    # ensure best path uses change points
    pgf = PathGraphFactory(sgraph)
    pgraph, k = pgf.create_optimal()
    paths = find_paths(pgraph, max_paths=1)
    assert len(paths) == 1
    path, expr = paths[0]
    path = reconstruct_path(path, pgraph, sgraph)
    assert path[0] == Exon(944321, 944800)
    assert path[-1] == Exon(959214, 959316)
Ejemplo n.º 17
0
def test_unreachable_kmers():
    t_dict, locus = read_single_locus('path_graph_k2.gtf')
    sgraph = SpliceGraph.create(t_dict.values())
    K = create_path_graph(sgraph, k=2)
    assert not K.graph['valid']
    assert len(K) == 0

    K = create_path_graph(sgraph, k=1)
    assert K.graph['valid']
    assert K.graph['num_lost_kmers'] == 0
    assert len(K) == 8

    K, k = create_optimal_path_graph(sgraph, kmax=0, loss_threshold=1.0)
    assert k == 1
    assert len(K) == 8
Ejemplo n.º 18
0
def test_path1():
    t_dict, locus = read_single_locus('path1.gtf')
    transfrags = locus.get_transfrags(Strand.POS)
    sgraph = SpliceGraph.create(transfrags)
    k = 2
    K = create_path_graph(sgraph, k)
    paths1 = find_paths(K, 'expr')
    paths2 = cpathfinder.find_paths(K, 'expr')
    assert len(paths1) == len(paths2)
    for p1, p2 in zip(paths1, paths2):
        p1, e1 = p1
        p2, e2 = p2
        assert p1 == p2
        assert abs(e1 - e2) < 1e-8
    return
Ejemplo n.º 19
0
def test_unreachable_kmers():
    t_dict, locus = read_single_locus('path_graph_k2.gtf')
    sgraph = SpliceGraph.create(t_dict.values())
    K = create_path_graph(sgraph, k=2)
    assert not K.graph['valid']
    assert len(K) == 0

    K = create_path_graph(sgraph, k=1)
    assert K.graph['valid']
    assert K.graph['num_lost_kmers'] == 0
    assert len(K) == 8

    K, k = create_optimal_path_graph(sgraph, kmax=0, loss_threshold=1.0)
    assert k == 1
    assert len(K) == 8
Ejemplo n.º 20
0
def test_path1():
    t_dict, locus = read_single_locus('path1.gtf')
    transfrags = locus.get_transfrags(Strand.POS)
    sgraph = SpliceGraph.create(transfrags)
    k = 2
    K = create_path_graph(sgraph, k)
    paths1 = find_paths(K, 'expr')
    paths2 = cpathfinder.find_paths(K, 'expr')
    assert len(paths1) == len(paths2)
    for p1, p2 in zip(paths1, paths2):
        p1, e1 = p1
        p2, e2 = p2
        assert p1 == p2
        assert abs(e1-e2) < 1e-8
    return
Ejemplo n.º 21
0
def test_find_node_boundaries():
    t_dict, locus = read_single_locus("splice_sites.gtf")
    transfrags = t_dict.values()
    splice_sites = set()
    for t in transfrags:
        splice_sites.update(t.itersplices())
    splice_sites = tuple(sorted(splice_sites))
    assert splice_sites == (100, 200, 250, 300, 400)
    # aggregate expression
    sg = SpliceGraph.create(transfrags)
    # zero change points
    zero_sites = tuple(find_threshold_points(sg.expr_data, sg.start))
    assert zero_sites == (100, 150, 300, 375)
    # combined boundaries
    boundaries = tuple(sg._find_node_boundaries())
    assert boundaries == (10, 100, 150, 200, 250, 300, 375, 400, 525)
Ejemplo n.º 22
0
def test_find_node_boundaries():
    t_dict, locus = read_single_locus('splice_sites.gtf')
    transfrags = t_dict.values()
    splice_sites = set()
    for t in transfrags:
        splice_sites.update(t.itersplices())
    splice_sites = tuple(sorted(splice_sites))
    assert splice_sites == (100, 200, 250, 300, 400)
    # aggregate expression
    sg = SpliceGraph.create(transfrags)
    # zero change points
    zero_sites = tuple(find_threshold_points(sg.expr_data, sg.start))
    assert zero_sites == (100, 150, 300, 375)
    # combined boundaries
    boundaries = tuple(sg._find_node_boundaries())
    assert boundaries == (10, 100, 150, 200, 250, 300, 375, 400, 525)
Ejemplo n.º 23
0
def test_trimming_to_zero_bug():
    t_dict, locus = read_single_locus('change_point_bug.gtf')
    transfrags_un = locus.get_transfrags(Strand.NA)
    sgraph = SpliceGraph.create(transfrags_un)
    cps = sgraph.detect_change_points()
    for cp in cps:
        sgraph.apply_change_point(cp)
    sgraph.recreate()
    # get start/stop nodes
    start_nodes, stop_nodes = sgraph.get_start_stop_nodes()
    # convert to node intervals
    start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes)
    stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes)
    assert Exon(173433532, 173435169) in stop_nodes
    assert Exon(173433532, 173435169) in start_nodes
    assert Exon(173433532, 173435169) in start_nodes
Ejemplo n.º 24
0
def test_trimming_to_zero_bug():
    t_dict, locus = read_single_locus('change_point_bug.gtf')
    transfrags_un = locus.get_transfrags(Strand.NA)
    sgraph = SpliceGraph.create(transfrags_un)
    cps = sgraph.detect_change_points(pval=0.1)
    for cp in cps:
        sgraph.apply_change_point(cp)
    sgraph.recreate()
    # get start/stop nodes
    start_nodes, stop_nodes = sgraph.get_start_stop_nodes()
    # convert to node intervals
    start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes)
    stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes)
    assert Exon(173433532, 173435169) in stop_nodes
    assert Exon(173433532, 173435169) in start_nodes
    assert Exon(173433532, 173435169) in start_nodes
Ejemplo n.º 25
0
def test_impute_strand():
    t_dict, locus = read_single_locus('impute_strand.gtf')
    assert len(locus.get_transfrags(Strand.POS)) == 1
    assert len(locus.get_transfrags(Strand.NEG)) == 1
    assert len(locus.get_transfrags(Strand.NA)) == 3
    locus.impute_unknown_strands()
    assert len(locus.get_transfrags(Strand.POS)) == 2
    assert len(locus.get_transfrags(Strand.NEG)) == 2
    assert len(locus.get_transfrags(Strand.NA)) == 1
    a = locus.get_expr_data(9, 11, Strand.POS)
    assert np.array_equal(a, [2.0, 1.0])
    a = locus.get_expr_data(14, 16, Strand.POS)
    assert np.array_equal(a, [1.0, 0.0])
    a = locus.get_expr_data(14, 16, Strand.NEG)
    assert np.array_equal(a, [0.0, 1.0])
    a = locus.get_expr_data(14, 16, Strand.NA)
    assert np.array_equal(a, [1.0, 1.0])
    a = locus.get_expr_data(19, 21, Strand.NEG)
    assert np.array_equal(a, [1.0, 2.0])
Ejemplo n.º 26
0
def test_path_graph1():
    # read transcripts
    t_dict, locus = read_single_locus('path1.gtf')
    SG = SpliceGraph.create(t_dict.values())
    # paths
    ABCDE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400, 500),
             Exon(600, 700), Exon(800, 900), SINK)
    ACE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(800, 900), SINK)
    ABCE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400, 500),
            Exon(800, 900), SINK)
    ACDE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(600, 700),
            Exon(800, 900), SINK)
    paths = [ABCDE, ACE, ABCE, ACDE]
    # create path graph k = 2
    k = 2
    G1 = create_path_graph(SG, k)
    G2 = nx.DiGraph()
    for path in paths:
        kmers = list(get_kmers(path, k))
        add_path(G2, kmers, 1.0)
    assert nx.is_isomorphic(G1, G2)
Ejemplo n.º 27
0
def test_impute_strand_guided():
    t_dict, locus = read_single_locus('impute_strand_guided.gtf',
                                      guided_strand=True)
    assert len(locus.get_transfrags(Strand.POS)) == 2
    assert len(locus.get_transfrags(Strand.NEG)) == 1
    assert len(locus.get_transfrags(Strand.NA)) == 3
    locus.impute_unknown_strands()
    assert t_dict['C'].strand == Strand.POS
    assert len(locus.get_transfrags(Strand.POS)) == 4
    assert len(locus.get_transfrags(Strand.NEG)) == 1
    assert len(locus.get_transfrags(Strand.NA)) == 1
    a = locus.get_expr_data(14, 16, Strand.POS)
    assert np.array_equal(a, [2.0, 1.0])
    a = locus.get_expr_data(14, 16, Strand.NEG)
    assert np.array_equal(a, [0.0, 0.0])
    a = locus.get_expr_data(14, 16, Strand.NA)
    assert np.array_equal(a, [0.0, 1.0])
    a = locus.get_expr_data(19, 21, Strand.NEG)
    assert np.array_equal(a, [0.0, 1.0])
    a = locus.get_expr_data(19, 21, Strand.NA)
    assert np.array_equal(a, [1.0, 1.0])
Ejemplo n.º 28
0
def test_mark_start_stop_sites2():
    # pos strand not guided
    t_dict, locus = read_single_locus('multi_strand1.gtf')
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS))
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(100, 200))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(400, 650))][SGNode.IS_STOP]

    # neg strand not guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG))
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(400, 500))][SGNode.IS_STOP]

    # pos strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_REF]
    assert G.node[sgraph.get_node_id(Exon(300, 400))][SGNode.IS_REF]
    assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_REF]
    assert not G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_REF]
    assert not G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_REF]

    # neg strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(350, 400))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(980, 1000))][SGNode.IS_START]
    assert not G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START]
    for n, nd in G.nodes_iter(data=True):
        assert nd[SGNode.IS_REF]
    return
Ejemplo n.º 29
0
def test_mark_start_stop_sites2():
    # pos strand not guided
    t_dict, locus = read_single_locus('multi_strand1.gtf')
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS))
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(100, 200))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(400, 650))][SGNode.IS_STOP]

    # neg strand not guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG))
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(400, 500))][SGNode.IS_STOP]

    # pos strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_REF]
    assert G.node[sgraph.get_node_id(Exon(300, 400))][SGNode.IS_REF]
    assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_REF]
    assert not G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_REF]
    assert not G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_REF]

    # neg strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(350, 400))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(980, 1000))][SGNode.IS_START]
    assert not G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START]
    for n, nd in G.nodes_iter(data=True):
        assert nd[SGNode.IS_REF]
    return
Ejemplo n.º 30
0
def test_mark_start_stop_sites2():
    # pos strand not guided
    t_dict, locus = read_single_locus('multi_strand1.gtf')
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS))
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(100, 200))]
    assert G.is_stop[sgraph.get_node_id(Exon(400, 650))]

    # neg strand not guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG))
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(950, 980))]
    assert G.is_stop[sgraph.get_node_id(Exon(400, 500))]

    # pos strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(100, 150))]
    assert G.is_start[sgraph.get_node_id(Exon(150, 200))]
    assert G.is_stop[sgraph.get_node_id(Exon(500, 600))]
    assert G.is_stop[sgraph.get_node_id(Exon(600, 650))]
    assert G.is_ref[sgraph.get_node_id(Exon(150, 200))]
    assert G.is_ref[sgraph.get_node_id(Exon(300, 400))]
    assert G.is_ref[sgraph.get_node_id(Exon(500, 600))]
    assert not G.is_ref[sgraph.get_node_id(Exon(100, 150))]
    assert not G.is_ref[sgraph.get_node_id(Exon(600, 650))]

    # neg strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.is_stop[sgraph.get_node_id(Exon(350, 400))]
    assert G.is_start[sgraph.get_node_id(Exon(980, 1000))]
    assert not G.is_start[sgraph.get_node_id(Exon(950, 980))]
    for n_id in G.node_ids_iter():
        assert G.is_ref[n_id]
Ejemplo n.º 31
0
def test_path_graph1():
    # read transcripts
    t_dict, locus = read_single_locus('path1.gtf')
    SG = SpliceGraph.create(t_dict.values())
    # paths
    ABCDE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400, 500),
             Exon(600, 700), Exon(800, 900), SINK)
    ACE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(800, 900), SINK)
    ABCE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400,
                                                       500), Exon(800,
                                                                  900), SINK)
    ACDE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(600,
                                                       700), Exon(800,
                                                                  900), SINK)
    paths = [ABCDE, ACE, ABCE, ACDE]
    # create path graph k = 2
    k = 2
    G1 = create_path_graph(SG, k)
    G2 = nx.DiGraph()
    for path in paths:
        kmers = list(get_kmers(path, k))
        add_path(G2, kmers, 1.0)
    assert nx.is_isomorphic(G1, G2)
Ejemplo n.º 32
0
def test_topological_sort():
    t_dict, locus = read_single_locus('noc2l_locus.gtf')
    for sgraph in locus.create_splice_graphs():
        nodes1 = tuple(nx.topological_sort(sgraph.G))
        nodes2 = tuple(cpathfinder.topological_sort(sgraph.G))
        assert nodes1 == nodes2
Ejemplo n.º 33
0
def test_ref_starts_ends():
    t_dict, locus = read_single_locus('change_point1.gtf')
    sg = SpliceGraph.create(t_dict.values())
    assert tuple(sorted(sg.ref_start_sites)) == (95,)
    assert tuple(sorted(sg.ref_stop_sites)) == (200,)
Ejemplo n.º 34
0
def test_empty_graph_bug():
    t_dict, locus = read_single_locus('empty_graph_bug.gtf')
    transfrags = locus.get_transfrags(Strand.POS)
    sgraph = SpliceGraph.create(transfrags)
    isoforms = assemble_isoforms(sgraph, Config.defaults())
    assert len(isoforms) == 0
Ejemplo n.º 35
0
def test_mark_start_stop_sites1():
    t_dict, locus = read_single_locus('change_point1.gtf')
    sgraph = SpliceGraph.create(t_dict.values())
    G = sgraph.G
    assert len(G) == 1
    n_id = sgraph.get_node_id(Exon(50, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a start site change point
    sgraph.start_sites.add(125)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 2
    n_id = sgraph.get_node_id(Exon(50, 125))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a stop site change point
    sgraph.stop_sites.add(80)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 3
    n_id = sgraph.get_node_id(Exon(50, 80))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(80, 125))
    assert not sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # flip strand
    for t_id, t in t_dict.iteritems():
        t.strand = Strand.NEG
    sgraph = SpliceGraph.create(t_dict.values())
    G = sgraph.G
    assert len(G) == 1

    n_id = sgraph.get_node_id(Exon(50, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a start site change point
    sgraph.start_sites.add(125)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 2
    n_id = sgraph.get_node_id(Exon(50, 125))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]

    # add a stop site change point
    sgraph.stop_sites.add(80)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 3
    n_id = sgraph.get_node_id(Exon(50, 80))
    assert not sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(80, 125))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]
Ejemplo n.º 36
0
def test_topological_sort():
    t_dict, locus = read_single_locus('noc2l_locus.gtf')
    for sgraph in locus.create_splice_graphs():
        nodes1 = tuple(nx.topological_sort(sgraph.G))
        nodes2 = tuple(cpathfinder.topological_sort(sgraph.G))
        assert nodes1 == nodes2
Ejemplo n.º 37
0
def test_empty_graph_bug():
    t_dict, locus = read_single_locus('empty_graph_bug.gtf')
    transfrags = locus.get_transfrags(Strand.POS)
    sgraph = SpliceGraph.create(transfrags)
    isoforms = assemble_isoforms(sgraph, Config.defaults())
    assert len(isoforms) == 0
Ejemplo n.º 38
0
def test_ref_starts_ends():
    t_dict, locus = read_single_locus("change_point1.gtf")
    sg = SpliceGraph.create(t_dict.values())
    assert tuple(sorted(sg.ref_start_sites)) == (95,)
    assert tuple(sorted(sg.ref_stop_sites)) == (200,)
Ejemplo n.º 39
0
def test_mark_start_stop_sites1():
    t_dict, locus = read_single_locus("change_point1.gtf")
    sgraph = SpliceGraph.create(t_dict.values())
    G = sgraph.G
    assert len(G) == 1
    n_id = sgraph.get_node_id(Exon(50, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a start site change point
    sgraph.start_sites.add(125)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 2
    n_id = sgraph.get_node_id(Exon(50, 125))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a stop site change point
    sgraph.stop_sites.add(80)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 3
    n_id = sgraph.get_node_id(Exon(50, 80))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(80, 125))
    assert not sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # flip strand
    for t_id, t in t_dict.iteritems():
        t.strand = Strand.NEG
    sgraph = SpliceGraph.create(t_dict.values())
    G = sgraph.G
    assert len(G) == 1

    n_id = sgraph.get_node_id(Exon(50, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a start site change point
    sgraph.start_sites.add(125)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 2
    n_id = sgraph.get_node_id(Exon(50, 125))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]

    # add a stop site change point
    sgraph.stop_sites.add(80)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 3
    n_id = sgraph.get_node_id(Exon(50, 80))
    assert not sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(80, 125))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]