コード例 #1
0
ファイル: test_splice_graph.py プロジェクト: yniknafs/taco
def test_mark_start_stop_sites2():
    # pos strand not guided
    t_dict, locus = read_single_locus("multi_strand1.gtf")
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS))
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(100, 200))]
    assert G.is_stop[sgraph.get_node_id(Exon(400, 650))]

    # neg strand not guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG))
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(950, 980))]
    assert G.is_stop[sgraph.get_node_id(Exon(400, 500))]

    # pos strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS), guided_ends=True, guided_assembly=True)
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(100, 150))]
    assert G.is_start[sgraph.get_node_id(Exon(150, 200))]
    assert G.is_stop[sgraph.get_node_id(Exon(500, 600))]
    assert G.is_stop[sgraph.get_node_id(Exon(600, 650))]
    assert G.is_ref[sgraph.get_node_id(Exon(150, 200))]
    assert G.is_ref[sgraph.get_node_id(Exon(300, 400))]
    assert G.is_ref[sgraph.get_node_id(Exon(500, 600))]
    assert not G.is_ref[sgraph.get_node_id(Exon(100, 150))]
    assert not G.is_ref[sgraph.get_node_id(Exon(600, 650))]

    # neg strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG), guided_ends=True, guided_assembly=True)
    G = sgraph.G
    assert G.is_stop[sgraph.get_node_id(Exon(350, 400))]
    assert G.is_start[sgraph.get_node_id(Exon(980, 1000))]
    assert not G.is_start[sgraph.get_node_id(Exon(950, 980))]
    for n_id in G.node_ids_iter():
        assert G.is_ref[n_id]
コード例 #2
0
def test_empty_graph_bug():
    t_dict, locus = read_single_locus('empty_graph_bug.gtf')
    transfrags = locus.get_transfrags(Strand.POS)
    sgraph = SpliceGraph.create(transfrags)
    pgf = PathGraphFactory(sgraph)
    K, k = pgf.create_optimal()
    assert K is None
コード例 #3
0
def test_path1():
    t_dict, locus = read_single_locus('path1.gtf')
    transfrags = locus.get_transfrags(Strand.POS)
    sgraph = SpliceGraph.create(transfrags)
    k = 2
    pgf = PathGraphFactory(sgraph)
    pgraph = pgf.create(k)
    paths = find_paths(pgraph)
    return
コード例 #4
0
ファイル: test_path_graph.py プロジェクト: Rainkikiki/taco
def test_path_graph2():
    return
    t_dict, locus = read_single_locus('change_point2.gtf')
    sgraph = SpliceGraph.create(t_dict.values())

    # trivial case without additional stops or starts
    k = 1
    K = create_path_graph(sgraph, k)
    kmer_id_map = K.graph['kmer_id_map']
    n_id = sgraph.get_node_id(Exon(0, 100))
    kmer_id = kmer_id_map[(n_id, )]
    assert K.node[kmer_id]['expr'] == 12.0
    assert K.node[SOURCE]['expr'] == 12.0
    assert K.node[SINK]['expr'] == 12.0

    # add a stop site
    sgraph.stop_sites.add(50)
    sgraph.recreate()
    K = create_path_graph(sgraph, k=2)
    kmer_id_map = K.graph['kmer_id_map']
    n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100)))
    kmer1 = kmer_id_map[n1]
    n2 = (sgraph.get_node_id((0, 50)), )
    kmer2 = kmer_id_map[n2]

    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[SOURCE]['expr'] == 11.0
    assert K.node[SINK]['expr'] == 11.0
    # smooth kmer graph
    smooth_graph(K)
    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[SOURCE]['expr'] == 11.0
    assert K.node[SINK]['expr'] == 11.0

    # TODO: test after rescuing short transfrags

    # add both a start and a stop site
    sgraph.start_sites.add(50)
    sgraph.stop_sites.add(50)
    sgraph.recreate()
    K = create_path_graph(sgraph, k=2)
    smooth_graph(K)
    kmer_id_map = K.graph['kmer_id_map']
    n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100)))
    n2 = (sgraph.get_node_id((0, 50)), )
    n3 = (sgraph.get_node_id((50, 100)), )
    kmer1 = kmer_id_map[n1]
    kmer2 = kmer_id_map[n2]
    kmer3 = kmer_id_map[n3]
    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[kmer3]['expr'] == 1.0
    assert K.node[SOURCE]['expr'] == 12.0
    assert K.node[SINK]['expr'] == 12.0
コード例 #5
0
ファイル: test_splice_graph.py プロジェクト: yniknafs/taco
def test_multi_strand2():
    t_dict, locus = read_single_locus("multi_strand2.gtf")
    transfrags_pos = locus.get_transfrags(Strand.POS)
    sgpos = SpliceGraph.create(transfrags_pos)
    sgdict = {}
    for sg in sgpos.split():
        k = "%s:%d-%d[%s]" % (sg.chrom, sg.start, sg.end, Strand.to_gtf(sg.strand))
        sgdict[k] = sg
    assert "chr1:100-300[+]" in sgdict
    assert "chr1:400-600[+]" in sgdict
コード例 #6
0
ファイル: test_path_graph.py プロジェクト: balajipandian/taco
def test_path_graph2():
    t_dict, locus = read_single_locus('change_point2.gtf')
    sgraph = SpliceGraph.create(t_dict.values())

    # trivial case without additional stops or starts
    k = 1
    K = create_path_graph(sgraph, k)
    kmer_id_map = K.graph['kmer_id_map']
    n_id = sgraph.get_node_id(Exon(0, 100))
    kmer_id = kmer_id_map[(n_id,)]
    assert K.node[kmer_id]['expr'] == 12.0
    assert K.node[SOURCE]['expr'] == 12.0
    assert K.node[SINK]['expr'] == 12.0

    # add a stop site
    sgraph.stop_sites.add(50)
    sgraph.recreate()
    K = create_path_graph(sgraph, k=2)
    kmer_id_map = K.graph['kmer_id_map']
    n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100)))
    kmer1 = kmer_id_map[n1]
    n2 = (sgraph.get_node_id((0, 50)),)
    kmer2 = kmer_id_map[n2]

    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[SOURCE]['expr'] == 11.0
    assert K.node[SINK]['expr'] == 11.0
    # smooth kmer graph
    smooth_graph(K)
    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[SOURCE]['expr'] == 11.0
    assert K.node[SINK]['expr'] == 11.0

    # TODO: test after rescuing short transfrags

    # add both a start and a stop site
    sgraph.start_sites.add(50)
    sgraph.stop_sites.add(50)
    sgraph.recreate()
    K = create_path_graph(sgraph, k=2)
    smooth_graph(K)
    kmer_id_map = K.graph['kmer_id_map']
    n1 = (sgraph.get_node_id((0, 50)), sgraph.get_node_id((50, 100)))
    n2 = (sgraph.get_node_id((0, 50)),)
    n3 = (sgraph.get_node_id((50, 100)),)
    kmer1 = kmer_id_map[n1]
    kmer2 = kmer_id_map[n2]
    kmer3 = kmer_id_map[n3]
    assert K.node[kmer1]['expr'] == 1.0
    assert K.node[kmer2]['expr'] == 10.0
    assert K.node[kmer3]['expr'] == 1.0
    assert K.node[SOURCE]['expr'] == 12.0
    assert K.node[SINK]['expr'] == 12.0
コード例 #7
0
def test_multi_strand2():
    t_dict, locus = read_single_locus('multi_strand2.gtf')
    transfrags_pos = locus.get_transfrags(Strand.POS)
    sgpos = SpliceGraph.create(transfrags_pos)
    sgdict = {}
    for sg in sgpos.split():
        k = ('%s:%d-%d[%s]' % (sg.chrom, sg.start, sg.end,
             Strand.to_gtf(sg.strand)))
        sgdict[k] = sg
    assert 'chr1:100-300[+]' in sgdict
    assert 'chr1:400-600[+]' in sgdict
コード例 #8
0
ファイル: test_path_graph.py プロジェクト: Rainkikiki/taco
def test_path_graph_factory():
    t_dict, locus = read_single_locus('path1.gtf')
    sgraph = SpliceGraph.create(t_dict.values())
    pgraphfactory = PathGraphFactory(sgraph)
    g1 = pgraphfactory.create(k=1)
    assert len(g1) == 5
    g2 = pgraphfactory.create(k=2)
    assert len(g2) == 6
    gopt, k = pgraphfactory.create_optimal()
    assert k == 2
    return
コード例 #9
0
def test_mark_start_stop_sites2():
    # pos strand not guided
    t_dict, locus = read_single_locus('multi_strand1.gtf')
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS))
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(100, 200))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(400, 650))][SGNode.IS_STOP]

    # neg strand not guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG))
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(400, 500))][SGNode.IS_STOP]

    # pos strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_REF]
    assert G.node[sgraph.get_node_id(Exon(300, 400))][SGNode.IS_REF]
    assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_REF]
    assert not G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_REF]
    assert not G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_REF]

    # neg strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(350, 400))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(980, 1000))][SGNode.IS_START]
    assert not G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START]
    for n, nd in G.nodes_iter(data=True):
        assert nd[SGNode.IS_REF]
    return
コード例 #10
0
def test_mark_start_stop_sites2():
    # pos strand not guided
    t_dict, locus = read_single_locus('multi_strand1.gtf')
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS))
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(100, 200))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(400, 650))][SGNode.IS_STOP]

    # neg strand not guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG))
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(400, 500))][SGNode.IS_STOP]

    # pos strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_START]
    assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(150, 200))][SGNode.IS_REF]
    assert G.node[sgraph.get_node_id(Exon(300, 400))][SGNode.IS_REF]
    assert G.node[sgraph.get_node_id(Exon(500, 600))][SGNode.IS_REF]
    assert not G.node[sgraph.get_node_id(Exon(100, 150))][SGNode.IS_REF]
    assert not G.node[sgraph.get_node_id(Exon(600, 650))][SGNode.IS_REF]

    # neg strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.node[sgraph.get_node_id(Exon(350, 400))][SGNode.IS_STOP]
    assert G.node[sgraph.get_node_id(Exon(980, 1000))][SGNode.IS_START]
    assert not G.node[sgraph.get_node_id(Exon(950, 980))][SGNode.IS_START]
    for n, nd in G.nodes_iter(data=True):
        assert nd[SGNode.IS_REF]
    return
コード例 #11
0
def test_mark_start_stop_sites2():
    # pos strand not guided
    t_dict, locus = read_single_locus('multi_strand1.gtf')
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS))
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(100, 200))]
    assert G.is_stop[sgraph.get_node_id(Exon(400, 650))]

    # neg strand not guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG))
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(950, 980))]
    assert G.is_stop[sgraph.get_node_id(Exon(400, 500))]

    # pos strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.POS),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.is_start[sgraph.get_node_id(Exon(100, 150))]
    assert G.is_start[sgraph.get_node_id(Exon(150, 200))]
    assert G.is_stop[sgraph.get_node_id(Exon(500, 600))]
    assert G.is_stop[sgraph.get_node_id(Exon(600, 650))]
    assert G.is_ref[sgraph.get_node_id(Exon(150, 200))]
    assert G.is_ref[sgraph.get_node_id(Exon(300, 400))]
    assert G.is_ref[sgraph.get_node_id(Exon(500, 600))]
    assert not G.is_ref[sgraph.get_node_id(Exon(100, 150))]
    assert not G.is_ref[sgraph.get_node_id(Exon(600, 650))]

    # neg strand guided
    sgraph = SpliceGraph.create(locus.get_transfrags(Strand.NEG),
                                guided_ends=True,
                                guided_assembly=True)
    G = sgraph.G
    assert G.is_stop[sgraph.get_node_id(Exon(350, 400))]
    assert G.is_start[sgraph.get_node_id(Exon(980, 1000))]
    assert not G.is_start[sgraph.get_node_id(Exon(950, 980))]
    for n_id in G.node_ids_iter():
        assert G.is_ref[n_id]
コード例 #12
0
def test_unreachable_kmers():
    t_dict, locus = read_single_locus('path_graph_k2.gtf')
    sgraph = SpliceGraph.create(t_dict.values())
    K = create_path_graph(sgraph, k=2)
    assert not K.graph['valid']
    assert len(K) == 0

    K = create_path_graph(sgraph, k=1)
    assert K.graph['valid']
    assert K.graph['num_lost_kmers'] == 0
    assert len(K) == 8

    K, k = create_optimal_path_graph(sgraph, kmax=0, loss_threshold=1.0)
    assert k == 1
    assert len(K) == 8
コード例 #13
0
ファイル: test_path_graph.py プロジェクト: balajipandian/taco
def test_unreachable_kmers():
    t_dict, locus = read_single_locus('path_graph_k2.gtf')
    sgraph = SpliceGraph.create(t_dict.values())
    K = create_path_graph(sgraph, k=2)
    assert not K.graph['valid']
    assert len(K) == 0

    K = create_path_graph(sgraph, k=1)
    assert K.graph['valid']
    assert K.graph['num_lost_kmers'] == 0
    assert len(K) == 8

    K, k = create_optimal_path_graph(sgraph, kmax=0, loss_threshold=1.0)
    assert k == 1
    assert len(K) == 8
コード例 #14
0
def test_path1():
    t_dict, locus = read_single_locus('path1.gtf')
    transfrags = locus.get_transfrags(Strand.POS)
    sgraph = SpliceGraph.create(transfrags)
    k = 2
    K = create_path_graph(sgraph, k)
    paths1 = find_paths(K, 'expr')
    paths2 = cpathfinder.find_paths(K, 'expr')
    assert len(paths1) == len(paths2)
    for p1, p2 in zip(paths1, paths2):
        p1, e1 = p1
        p2, e2 = p2
        assert p1 == p2
        assert abs(e1-e2) < 1e-8
    return
コード例 #15
0
def test_path1():
    t_dict, locus = read_single_locus('path1.gtf')
    transfrags = locus.get_transfrags(Strand.POS)
    sgraph = SpliceGraph.create(transfrags)
    k = 2
    K = create_path_graph(sgraph, k)
    paths1 = find_paths(K, 'expr')
    paths2 = cpathfinder.find_paths(K, 'expr')
    assert len(paths1) == len(paths2)
    for p1, p2 in zip(paths1, paths2):
        p1, e1 = p1
        p2, e2 = p2
        assert p1 == p2
        assert abs(e1 - e2) < 1e-8
    return
コード例 #16
0
def test_find_node_boundaries():
    t_dict, locus = read_single_locus('splice_sites.gtf')
    transfrags = t_dict.values()
    splice_sites = set()
    for t in transfrags:
        splice_sites.update(t.itersplices())
    splice_sites = tuple(sorted(splice_sites))
    assert splice_sites == (100, 200, 250, 300, 400)
    # aggregate expression
    sg = SpliceGraph.create(transfrags)
    # zero change points
    zero_sites = tuple(find_threshold_points(sg.expr_data, sg.start))
    assert zero_sites == (100, 150, 300, 375)
    # combined boundaries
    boundaries = tuple(sg._find_node_boundaries())
    assert boundaries == (10, 100, 150, 200, 250, 300, 375, 400, 525)
コード例 #17
0
ファイル: test_change_point.py プロジェクト: yniknafs/taco
def test_trimming_to_zero_bug():
    t_dict, locus = read_single_locus('change_point_bug.gtf')
    transfrags_un = locus.get_transfrags(Strand.NA)
    sgraph = SpliceGraph.create(transfrags_un)
    cps = sgraph.detect_change_points(pval=0.1)
    for cp in cps:
        sgraph.apply_change_point(cp)
    sgraph.recreate()
    # get start/stop nodes
    start_nodes, stop_nodes = sgraph.get_start_stop_nodes()
    # convert to node intervals
    start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes)
    stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes)
    assert Exon(173433532, 173435169) in stop_nodes
    assert Exon(173433532, 173435169) in start_nodes
    assert Exon(173433532, 173435169) in start_nodes
コード例 #18
0
def test_trimming_to_zero_bug():
    t_dict, locus = read_single_locus('change_point_bug.gtf')
    transfrags_un = locus.get_transfrags(Strand.NA)
    sgraph = SpliceGraph.create(transfrags_un)
    cps = sgraph.detect_change_points()
    for cp in cps:
        sgraph.apply_change_point(cp)
    sgraph.recreate()
    # get start/stop nodes
    start_nodes, stop_nodes = sgraph.get_start_stop_nodes()
    # convert to node intervals
    start_nodes = set(sgraph.get_node_interval(n_id) for n_id in start_nodes)
    stop_nodes = set(sgraph.get_node_interval(n_id) for n_id in stop_nodes)
    assert Exon(173433532, 173435169) in stop_nodes
    assert Exon(173433532, 173435169) in start_nodes
    assert Exon(173433532, 173435169) in start_nodes
コード例 #19
0
ファイル: test_splice_graph.py プロジェクト: yniknafs/taco
def test_find_node_boundaries():
    t_dict, locus = read_single_locus("splice_sites.gtf")
    transfrags = t_dict.values()
    splice_sites = set()
    for t in transfrags:
        splice_sites.update(t.itersplices())
    splice_sites = tuple(sorted(splice_sites))
    assert splice_sites == (100, 200, 250, 300, 400)
    # aggregate expression
    sg = SpliceGraph.create(transfrags)
    # zero change points
    zero_sites = tuple(find_threshold_points(sg.expr_data, sg.start))
    assert zero_sites == (100, 150, 300, 375)
    # combined boundaries
    boundaries = tuple(sg._find_node_boundaries())
    assert boundaries == (10, 100, 150, 200, 250, 300, 375, 400, 525)
コード例 #20
0
ファイル: test_splice_graph.py プロジェクト: yniknafs/taco
def test_split_transfrag():
    loci = read_gtf("splice_sites.gtf")
    interval, gtf_lines = loci[0]
    t_dict = Transfrag.parse_gtf(gtf_lines)
    sg = SpliceGraph.create(t_dict.values())
    boundaries = array("i", sg._find_node_boundaries())
    # check nodes
    t = t_dict["A"]
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((10, 100), (200, 250), (250, 300), (400, 525))
    t = t_dict["B"]
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((10, 100), (250, 300), (400, 525))
    t = t_dict["C"]
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((150, 200), (200, 250), (250, 300), (400, 525))
    t = t_dict["D"]
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((375, 400), (400, 525))
コード例 #21
0
def test_split_transfrag():
    loci = read_gtf('splice_sites.gtf')
    interval, gtf_lines = loci[0]
    t_dict = Transfrag.parse_gtf(gtf_lines)
    sg = SpliceGraph.create(t_dict.values())
    boundaries = array('i', sg._find_node_boundaries())
    # check nodes
    t = t_dict['A']
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((10, 100), (200, 250), (250, 300), (400, 525))
    t = t_dict['B']
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((10, 100), (250, 300), (400, 525))
    t = t_dict['C']
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((150, 200), (200, 250), (250, 300), (400, 525))
    t = t_dict['D']
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((375, 400), (400, 525))
コード例 #22
0
ファイル: test_path_graph.py プロジェクト: balajipandian/taco
def test_path_graph1():
    # read transcripts
    t_dict, locus = read_single_locus('path1.gtf')
    SG = SpliceGraph.create(t_dict.values())
    # paths
    ABCDE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400, 500),
             Exon(600, 700), Exon(800, 900), SINK)
    ACE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(800, 900), SINK)
    ABCE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400, 500),
            Exon(800, 900), SINK)
    ACDE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(600, 700),
            Exon(800, 900), SINK)
    paths = [ABCDE, ACE, ABCE, ACDE]
    # create path graph k = 2
    k = 2
    G1 = create_path_graph(SG, k)
    G2 = nx.DiGraph()
    for path in paths:
        kmers = list(get_kmers(path, k))
        add_path(G2, kmers, 1.0)
    assert nx.is_isomorphic(G1, G2)
コード例 #23
0
def test_path_graph1():
    # read transcripts
    t_dict, locus = read_single_locus('path1.gtf')
    SG = SpliceGraph.create(t_dict.values())
    # paths
    ABCDE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400, 500),
             Exon(600, 700), Exon(800, 900), SINK)
    ACE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(800, 900), SINK)
    ABCE = (SOURCE, Exon(0, 100), Exon(200, 300), Exon(400,
                                                       500), Exon(800,
                                                                  900), SINK)
    ACDE = (SOURCE, Exon(0, 100), Exon(400, 500), Exon(600,
                                                       700), Exon(800,
                                                                  900), SINK)
    paths = [ABCDE, ACE, ABCE, ACDE]
    # create path graph k = 2
    k = 2
    G1 = create_path_graph(SG, k)
    G2 = nx.DiGraph()
    for path in paths:
        kmers = list(get_kmers(path, k))
        add_path(G2, kmers, 1.0)
    assert nx.is_isomorphic(G1, G2)
コード例 #24
0
def test_ref_starts_ends():
    t_dict, locus = read_single_locus('change_point1.gtf')
    sg = SpliceGraph.create(t_dict.values())
    assert tuple(sorted(sg.ref_start_sites)) == (95,)
    assert tuple(sorted(sg.ref_stop_sites)) == (200,)
コード例 #25
0
def test_empty_graph_bug():
    t_dict, locus = read_single_locus('empty_graph_bug.gtf')
    transfrags = locus.get_transfrags(Strand.POS)
    sgraph = SpliceGraph.create(transfrags)
    isoforms = assemble_isoforms(sgraph, Config.defaults())
    assert len(isoforms) == 0
コード例 #26
0
ファイル: test_path_finder.py プロジェクト: yniknafs/taco
def test_empty_graph_bug():
    t_dict, locus = read_single_locus('empty_graph_bug.gtf')
    transfrags = locus.get_transfrags(Strand.POS)
    sgraph = SpliceGraph.create(transfrags)
    isoforms = assemble_isoforms(sgraph, Config.defaults())
    assert len(isoforms) == 0
コード例 #27
0
ファイル: test_change_point.py プロジェクト: yniknafs/taco
def test_trim_transfrags():

    def make_ramp(strand, sign=1):
        transfrags = []
        chrom = 'chr1'
        start = 1000
        end = 1220
        change_expr = 0.0
        base_expr = 0.0
        # "flat" part of expression landscape
        expr = 1.0
        for i in xrange(0, 50):
            t = Transfrag(chrom=chrom, strand=strand,
                          _id='T1.%d' % i, sample_id='S%d' % i,
                          expr=expr, is_ref=False,
                          exons=[Exon(start, end)])
            transfrags.append(t)
            change_expr += expr
            base_expr += expr
        # "changing" area
        i = 0
        expr = 10.0
        for pos in range(1100, 1120):
            left, right = (start, pos) if sign < 0 else (pos, end)
            t = Transfrag(chrom=chrom, strand=strand,
                          _id='T2.%d' % i, sample_id='S%d' % i,
                          expr=expr, is_ref=False,
                          exons=[Exon(left, right)])
            transfrags.append(t)
            change_expr += expr
            i += 1
        return chrom, start, end, strand, change_expr, base_expr, transfrags

    # positive strand
    tup = make_ramp(Strand.POS, sign=-1)
    chrom, start, end, strand, change_expr, base_expr, transfrags = tup
    sgraph = SpliceGraph.create(transfrags)
    cps = run_changepoint(sgraph.expr_data, smooth_window_len=11)
    assert len(cps) == 1
    cp = cps[0]
    assert cp.pos == 110
    assert cp.foldchange < 0.5
    assert cp.sign == -1
    cp = cp._replace(pos=start + cp.pos,
                     start=start + cp.start,
                     end=start + cp.end)
    # trim transfrags
    sgraph._trim_change_point(cp)
    expr_data_after = sgraph._compute_expression()
    assert expr_data_after[0] == 250
    assert expr_data_after[-1] == 50
    assert expr_data_after[cp.index - 1] == 150
    assert expr_data_after[cp.index] == base_expr

    # now try SpliceGraph interface
    tup = make_ramp(Strand.POS, sign=-1)
    chrom, start, end, strand, change_expr, base_expr, transfrags = tup
    sgraph = SpliceGraph.create(transfrags)
    cps = sgraph.detect_change_points(smooth_window_len=11)
    for cp in cps:
        sgraph.apply_change_point(cp)
    sgraph.recreate()
    assert sgraph.expr_data[cp.index - 1] == 150
    assert sgraph.expr_data[cp.index] == base_expr
    assert cp.pos in sgraph.stop_sites

    # negative strand should not affect change point
    tup = make_ramp(Strand.NEG, sign=-1)
    chrom, start, end, strand, left_expr, base_expr, transfrags = tup
    sgraph = SpliceGraph.create(transfrags)
    cps = sgraph.detect_change_points(smooth_window_len=11)
    for cp in cps:
        sgraph.apply_change_point(cp)
    sgraph.recreate()
    assert sgraph.expr_data[cp.index - 1] == 150
    assert sgraph.expr_data[cp.index] == base_expr
    assert cp.pos in sgraph.start_sites

    # neg strand change in opposite direction
    tup = make_ramp(Strand.NEG, sign=1)
    chrom, start, end, strand, left_expr, base_expr, transfrags = tup
    sgraph = SpliceGraph.create(transfrags)
    cps = run_changepoint(sgraph.expr_data, smooth_window_len=11)
    cp = cps[0]
    assert cp.index == 110
    assert cp.foldchange < 0.5
    assert cp.sign == 1.0
    cps = sgraph.detect_change_points(smooth_window_len=11)
    cp = cps[0]
    for cp in cps:
        sgraph.apply_change_point(cp)
    sgraph.recreate()
    assert sgraph.expr_data[0] == 50
    assert sgraph.expr_data[-1] == 250
    assert sgraph.expr_data[cp.index - 1] == base_expr
    assert sgraph.expr_data[cp.index] == 160
    assert cp.pos in sgraph.stop_sites

    # pos strand change in opposite direction
    tup = make_ramp(Strand.POS, sign=1)
    chrom, start, end, strand, left_expr, base_expr, transfrags = tup
    sgraph = SpliceGraph.create(transfrags)
    cps = run_changepoint(sgraph.expr_data, smooth_window_len=11)
    cp = cps[0]
    assert cp.index == 110
    assert cp.foldchange < 0.5
    assert cp.sign == 1.0
    cps = sgraph.detect_change_points(smooth_window_len=11)
    for cp in cps:
        sgraph.apply_change_point(cp)
    sgraph.recreate()

    assert sgraph.expr_data[0] == 50
    assert sgraph.expr_data[-1] == 250
    assert sgraph.expr_data[cp.index - 1] == base_expr
    assert sgraph.expr_data[cp.index] == 160
    assert cp.pos in sgraph.start_sites
    return
コード例 #28
0
def test_trim_transfrags():

    def make_ramp(strand, sign=1):
        transfrags = []
        chrom = 'chr1'
        start = 1000
        end = 1220
        change_expr = 0.0
        base_expr = 0.0
        # "flat" part of expression landscape
        expr = 1.0
        for i in xrange(0, 50):
            t = Transfrag(chrom=chrom, strand=strand,
                          _id='T1.%d' % i, sample_id='S%d' % i,
                          expr=expr, is_ref=False,
                          exons=[Exon(start, end)])
            transfrags.append(t)
            change_expr += expr
            base_expr += expr
        # "changing" area
        i = 0
        expr = 10.0
        for pos in range(1100, 1120):
            left, right = (start, pos) if sign < 0 else (pos, end)
            t = Transfrag(chrom=chrom, strand=strand,
                          _id='T2.%d' % i, sample_id='S%d' % i,
                          expr=expr, is_ref=False,
                          exons=[Exon(left, right)])
            transfrags.append(t)
            change_expr += expr
            i += 1
        return chrom, start, end, strand, change_expr, base_expr, transfrags

    # positive strand
    tup = make_ramp(Strand.POS, sign=-1)
    chrom, start, end, strand, change_expr, base_expr, transfrags = tup
    sgraph = SpliceGraph.create(transfrags)
    cps = run_changepoint(sgraph.expr_data, smooth_window_len=11)
    assert len(cps) == 1
    cp = cps[0]
    assert cp.pos == 110
    assert cp.foldchange < 0.5
    assert cp.sign == -1
    cp = cp._replace(pos=start + cp.pos,
                     start=start + cp.start,
                     end=start + cp.end)
    # trim transfrags
    sgraph._trim_change_point(cp)
    expr_data_after = sgraph._compute_expression()
    assert expr_data_after[0] == 250
    assert expr_data_after[-1] == 50
    assert expr_data_after[cp.index - 1] == 150
    assert expr_data_after[cp.index] == base_expr

    # now try SpliceGraph interface
    tup = make_ramp(Strand.POS, sign=-1)
    chrom, start, end, strand, change_expr, base_expr, transfrags = tup
    sgraph = SpliceGraph.create(transfrags)
    cps = sgraph.detect_change_points(smooth_window_len=11)
    for cp in cps:
        sgraph.apply_change_point(cp)
    sgraph.recreate()
    assert sgraph.expr_data[cp.index - 1] == 150
    assert sgraph.expr_data[cp.index] == base_expr
    assert cp.pos in sgraph.stop_sites

    # negative strand should not affect change point
    tup = make_ramp(Strand.NEG, sign=-1)
    chrom, start, end, strand, left_expr, base_expr, transfrags = tup
    sgraph = SpliceGraph.create(transfrags)
    cps = sgraph.detect_change_points(smooth_window_len=11)
    for cp in cps:
        sgraph.apply_change_point(cp)
    sgraph.recreate()
    assert sgraph.expr_data[cp.index - 1] == 150
    assert sgraph.expr_data[cp.index] == base_expr
    assert cp.pos in sgraph.start_sites

    # neg strand change in opposite direction
    tup = make_ramp(Strand.NEG, sign=1)
    chrom, start, end, strand, left_expr, base_expr, transfrags = tup
    sgraph = SpliceGraph.create(transfrags)
    cps = run_changepoint(sgraph.expr_data, smooth_window_len=11)
    cp = cps[0]
    assert cp.index == 110
    assert cp.foldchange < 0.5
    assert cp.sign == 1.0
    cps = sgraph.detect_change_points(smooth_window_len=11)
    cp = cps[0]
    for cp in cps:
        sgraph.apply_change_point(cp)
    sgraph.recreate()
    assert sgraph.expr_data[0] == 50
    assert sgraph.expr_data[-1] == 250
    assert sgraph.expr_data[cp.index - 1] == base_expr
    assert sgraph.expr_data[cp.index] == 160
    assert cp.pos in sgraph.stop_sites

    # pos strand change in opposite direction
    tup = make_ramp(Strand.POS, sign=1)
    chrom, start, end, strand, left_expr, base_expr, transfrags = tup
    sgraph = SpliceGraph.create(transfrags)
    cps = run_changepoint(sgraph.expr_data, smooth_window_len=11)
    cp = cps[0]
    assert cp.index == 110
    assert cp.foldchange < 0.5
    assert cp.sign == 1.0
    cps = sgraph.detect_change_points(smooth_window_len=11)
    for cp in cps:
        sgraph.apply_change_point(cp)
    sgraph.recreate()

    assert sgraph.expr_data[0] == 50
    assert sgraph.expr_data[-1] == 250
    assert sgraph.expr_data[cp.index - 1] == base_expr
    assert sgraph.expr_data[cp.index] == 160
    assert cp.pos in sgraph.start_sites
    return
コード例 #29
0
ファイル: test_splice_graph.py プロジェクト: yniknafs/taco
def test_mark_start_stop_sites1():
    t_dict, locus = read_single_locus("change_point1.gtf")
    sgraph = SpliceGraph.create(t_dict.values())
    G = sgraph.G
    assert len(G) == 1
    n_id = sgraph.get_node_id(Exon(50, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a start site change point
    sgraph.start_sites.add(125)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 2
    n_id = sgraph.get_node_id(Exon(50, 125))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a stop site change point
    sgraph.stop_sites.add(80)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 3
    n_id = sgraph.get_node_id(Exon(50, 80))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(80, 125))
    assert not sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # flip strand
    for t_id, t in t_dict.iteritems():
        t.strand = Strand.NEG
    sgraph = SpliceGraph.create(t_dict.values())
    G = sgraph.G
    assert len(G) == 1

    n_id = sgraph.get_node_id(Exon(50, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a start site change point
    sgraph.start_sites.add(125)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 2
    n_id = sgraph.get_node_id(Exon(50, 125))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]

    # add a stop site change point
    sgraph.stop_sites.add(80)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 3
    n_id = sgraph.get_node_id(Exon(50, 80))
    assert not sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(80, 125))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]
コード例 #30
0
ファイル: test_splice_graph.py プロジェクト: yniknafs/taco
def test_ref_starts_ends():
    t_dict, locus = read_single_locus("change_point1.gtf")
    sg = SpliceGraph.create(t_dict.values())
    assert tuple(sorted(sg.ref_start_sites)) == (95,)
    assert tuple(sorted(sg.ref_stop_sites)) == (200,)
コード例 #31
0
ファイル: test_splice_graph.py プロジェクト: yniknafs/taco
def test_multi_strand1():
    # read gtf and test basic values
    loci = read_gtf("multi_strand1.gtf")
    assert len(loci) == 1
    interval, gtf_lines = loci[0]
    assert interval == ("chr1", 100, 1000)
    t_dict = Transfrag.parse_gtf(gtf_lines)
    assert len(t_dict) == 5
    locus = Locus.create(t_dict.values())
    assert locus.chrom == "chr1"
    assert locus.start == 100
    assert locus.end == 1000
    # raise exception when creating with multiple strands
    with pytest.raises(TacoError):
        SpliceGraph.create(t_dict.values())
    transfrags_pos = locus.get_transfrags(Strand.POS)
    transfrags_neg = locus.get_transfrags(Strand.NEG)
    sgpos = SpliceGraph.create(transfrags_pos)
    sgneg = SpliceGraph.create(transfrags_neg)

    # test
    assert sgpos.chrom == "chr1"
    assert sgpos.start == 100
    assert sgpos.end == 650
    assert sgpos.strand == Strand.POS
    assert sgpos.ref_start_sites == [150]
    assert sgpos.ref_stop_sites == [600]
    with pytest.raises(TacoError):
        sgpos.get_expr_data(90, 110)
    with pytest.raises(TacoError):
        sgpos.get_expr_data(650, 655)
    assert np.array_equal(sgpos.get_expr_data(100, 105), np.ones(5))

    assert sgneg.chrom == "chr1"
    assert sgneg.start == 350
    assert sgneg.end == 1000
    assert sgneg.strand == Strand.NEG
    assert sgneg.ref_start_sites == [1000]
    assert sgneg.ref_stop_sites == [350]
    with pytest.raises(TacoError):
        sgneg.get_expr_data(340, 350)
    with pytest.raises(TacoError):
        sgneg.get_expr_data(1000, 1010)
    assert np.array_equal(sgneg.get_expr_data(400, 405), np.ones(5))
    assert np.array_equal(sgneg.get_expr_data(945, 950), np.zeros(5))
    assert np.array_equal(sgneg.get_expr_data(950, 955), np.ones(5))
    assert np.array_equal(sgneg.get_expr_data(980, 985), np.zeros(5))

    # test locus boundaries
    bpos = tuple(sgpos._find_node_boundaries())
    assert bpos == tuple((100, 200, 300, 400, 650))
    bneg = tuple(sgneg._find_node_boundaries())
    assert bneg == tuple((350, 400, 500, 950, 980, 1000))

    # added guided ends/assembly to use boundaries from reference
    lpos = SpliceGraph.create(transfrags_pos, guided_ends=True, guided_assembly=True)
    bpos = tuple(lpos._find_node_boundaries())
    assert bpos == tuple((100, 150, 200, 300, 400, 500, 600, 650))

    lneg = SpliceGraph.create(transfrags_neg, guided_ends=True, guided_assembly=True)
    bneg = tuple(lneg._find_node_boundaries())
    assert bneg == tuple((350, 400, 500, 750, 900, 950, 980, 1000))
コード例 #32
0
def test_mark_start_stop_sites1():
    t_dict, locus = read_single_locus('change_point1.gtf')
    sgraph = SpliceGraph.create(t_dict.values())
    G = sgraph.G
    assert len(G) == 1
    n_id = sgraph.get_node_id(Exon(50, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a start site change point
    sgraph.start_sites.add(125)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 2
    n_id = sgraph.get_node_id(Exon(50, 125))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a stop site change point
    sgraph.stop_sites.add(80)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 3
    n_id = sgraph.get_node_id(Exon(50, 80))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(80, 125))
    assert not sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # flip strand
    for t_id, t in t_dict.iteritems():
        t.strand = Strand.NEG
    sgraph = SpliceGraph.create(t_dict.values())
    G = sgraph.G
    assert len(G) == 1

    n_id = sgraph.get_node_id(Exon(50, 200))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    # add a start site change point
    sgraph.start_sites.add(125)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 2
    n_id = sgraph.get_node_id(Exon(50, 125))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]

    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]

    # add a stop site change point
    sgraph.stop_sites.add(80)
    sgraph.recreate()
    G = sgraph.G
    assert len(G) == 3
    n_id = sgraph.get_node_id(Exon(50, 80))
    assert not sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(80, 125))
    assert sgraph.G.is_start[n_id]
    assert sgraph.G.is_stop[n_id]
    n_id = sgraph.get_node_id(Exon(125, 200))
    assert sgraph.G.is_start[n_id]
    assert not sgraph.G.is_stop[n_id]
コード例 #33
0
def test_multi_strand1():
    # read gtf and test basic values
    loci = read_gtf('multi_strand1.gtf')
    assert len(loci) == 1
    interval, gtf_lines = loci[0]
    assert interval == ('chr1', 100, 1000)
    t_dict = Transfrag.parse_gtf(gtf_lines)
    assert len(t_dict) == 5
    locus = Locus.create(t_dict.values())
    assert locus.chrom == 'chr1'
    assert locus.start == 100
    assert locus.end == 1000
    # raise exception when creating with multiple strands
    with pytest.raises(TacoError):
        SpliceGraph.create(t_dict.values())
    transfrags_pos = locus.get_transfrags(Strand.POS)
    transfrags_neg = locus.get_transfrags(Strand.NEG)
    sgpos = SpliceGraph.create(transfrags_pos)
    sgneg = SpliceGraph.create(transfrags_neg)

    # test
    assert sgpos.chrom == 'chr1'
    assert sgpos.start == 100
    assert sgpos.end == 650
    assert sgpos.strand == Strand.POS
    assert sgpos.ref_start_sites == [150]
    assert sgpos.ref_stop_sites == [600]
    with pytest.raises(TacoError):
        sgpos.get_expr_data(90, 110)
    with pytest.raises(TacoError):
        sgpos.get_expr_data(650, 655)
    assert np.array_equal(sgpos.get_expr_data(100, 105), np.ones(5))

    assert sgneg.chrom == 'chr1'
    assert sgneg.start == 350
    assert sgneg.end == 1000
    assert sgneg.strand == Strand.NEG
    assert sgneg.ref_start_sites == [1000]
    assert sgneg.ref_stop_sites == [350]
    with pytest.raises(TacoError):
        sgneg.get_expr_data(340, 350)
    with pytest.raises(TacoError):
        sgneg.get_expr_data(1000, 1010)
    assert np.array_equal(sgneg.get_expr_data(400, 405), np.ones(5))
    assert np.array_equal(sgneg.get_expr_data(945, 950), np.zeros(5))
    assert np.array_equal(sgneg.get_expr_data(950, 955), np.ones(5))
    assert np.array_equal(sgneg.get_expr_data(980, 985), np.zeros(5))

    # test locus boundaries
    bpos = tuple(sgpos._find_node_boundaries())
    assert bpos == tuple((100, 200, 300, 400, 650))
    bneg = tuple(sgneg._find_node_boundaries())
    assert bneg == tuple((350, 400, 500, 950, 980, 1000))

    # added guided ends/assembly to use boundaries from reference
    lpos = SpliceGraph.create(transfrags_pos,
                              guided_ends=True,
                              guided_assembly=True)
    bpos = tuple(lpos._find_node_boundaries())
    assert bpos == tuple((100, 150, 200, 300, 400, 500, 600, 650))

    lneg = SpliceGraph.create(transfrags_neg,
                              guided_ends=True,
                              guided_assembly=True)
    bneg = tuple(lneg._find_node_boundaries())
    assert bneg == tuple((350, 400, 500, 750, 900, 950, 980, 1000))