Ejemplo n.º 1
0
def test_split_transfrag():
    loci = read_gtf("splice_sites.gtf")
    interval, gtf_lines = loci[0]
    t_dict = Transfrag.parse_gtf(gtf_lines)
    sg = SpliceGraph.create(t_dict.values())
    boundaries = array("i", sg._find_node_boundaries())
    # check nodes
    t = t_dict["A"]
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((10, 100), (200, 250), (250, 300), (400, 525))
    t = t_dict["B"]
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((10, 100), (250, 300), (400, 525))
    t = t_dict["C"]
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((150, 200), (200, 250), (250, 300), (400, 525))
    t = t_dict["D"]
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((375, 400), (400, 525))
Ejemplo n.º 2
0
def test_split_transfrag():
    loci = read_gtf('splice_sites.gtf')
    interval, gtf_lines = loci[0]
    t_dict = Transfrag.parse_gtf(gtf_lines)
    sg = SpliceGraph.create(t_dict.values())
    boundaries = array('i', sg._find_node_boundaries())
    # check nodes
    t = t_dict['A']
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((10, 100), (200, 250), (250, 300), (400, 525))
    t = t_dict['B']
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((10, 100), (250, 300), (400, 525))
    t = t_dict['C']
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((150, 200), (200, 250), (250, 300), (400, 525))
    t = t_dict['D']
    nodes = tuple(split_transfrag(t, boundaries))
    assert nodes == ((375, 400), (400, 525))
Ejemplo n.º 3
0
def test_create_locus():
    loci = read_gtf('splice_sites.gtf')
    assert len(loci) == 1
    interval, gtf_lines = loci[0]
    assert interval == ('chr1', 10, 525)
    t_dict = Transfrag.parse_gtf(gtf_lines)

    locus = Locus.create(t_dict.values())
    assert locus.chrom == 'chr1'
    assert locus.start == 10
    assert locus.end == 525
    a = locus.get_expr_data(49, 51, Strand.POS)
    assert np.array_equal(a, [1.0, 2.0])
    a = locus.get_expr_data(150, 151, Strand.POS)
    assert np.array_equal(a, [1.0])
    a = locus.get_expr_data(499, 501, Strand.POS)
    assert np.array_equal(a, [3.0, 1.0])
    with pytest.raises(TacoError):
        locus.get_expr_data(5, 15, Strand.POS)
    with pytest.raises(TacoError):
        locus.get_expr_data(520, 530, Strand.POS)
Ejemplo n.º 4
0
def test_parse_loci():
    loci = read_gtf('parse_loci.gtf')
    assert len(loci) == 3
    assert loci[0][0] == ('chr1', 10, 50)
    assert loci[1][0] == ('chr1', 50, 200)
    assert loci[2][0] == ('chr2', 100, 200)
Ejemplo n.º 5
0
def test_multi_strand1():
    # read gtf and test basic values
    loci = read_gtf("multi_strand1.gtf")
    assert len(loci) == 1
    interval, gtf_lines = loci[0]
    assert interval == ("chr1", 100, 1000)
    t_dict = Transfrag.parse_gtf(gtf_lines)
    assert len(t_dict) == 5
    locus = Locus.create(t_dict.values())
    assert locus.chrom == "chr1"
    assert locus.start == 100
    assert locus.end == 1000
    # raise exception when creating with multiple strands
    with pytest.raises(TacoError):
        SpliceGraph.create(t_dict.values())
    transfrags_pos = locus.get_transfrags(Strand.POS)
    transfrags_neg = locus.get_transfrags(Strand.NEG)
    sgpos = SpliceGraph.create(transfrags_pos)
    sgneg = SpliceGraph.create(transfrags_neg)

    # test
    assert sgpos.chrom == "chr1"
    assert sgpos.start == 100
    assert sgpos.end == 650
    assert sgpos.strand == Strand.POS
    assert sgpos.ref_start_sites == [150]
    assert sgpos.ref_stop_sites == [600]
    with pytest.raises(TacoError):
        sgpos.get_expr_data(90, 110)
    with pytest.raises(TacoError):
        sgpos.get_expr_data(650, 655)
    assert np.array_equal(sgpos.get_expr_data(100, 105), np.ones(5))

    assert sgneg.chrom == "chr1"
    assert sgneg.start == 350
    assert sgneg.end == 1000
    assert sgneg.strand == Strand.NEG
    assert sgneg.ref_start_sites == [1000]
    assert sgneg.ref_stop_sites == [350]
    with pytest.raises(TacoError):
        sgneg.get_expr_data(340, 350)
    with pytest.raises(TacoError):
        sgneg.get_expr_data(1000, 1010)
    assert np.array_equal(sgneg.get_expr_data(400, 405), np.ones(5))
    assert np.array_equal(sgneg.get_expr_data(945, 950), np.zeros(5))
    assert np.array_equal(sgneg.get_expr_data(950, 955), np.ones(5))
    assert np.array_equal(sgneg.get_expr_data(980, 985), np.zeros(5))

    # test locus boundaries
    bpos = tuple(sgpos._find_node_boundaries())
    assert bpos == tuple((100, 200, 300, 400, 650))
    bneg = tuple(sgneg._find_node_boundaries())
    assert bneg == tuple((350, 400, 500, 950, 980, 1000))

    # added guided ends/assembly to use boundaries from reference
    lpos = SpliceGraph.create(transfrags_pos, guided_ends=True, guided_assembly=True)
    bpos = tuple(lpos._find_node_boundaries())
    assert bpos == tuple((100, 150, 200, 300, 400, 500, 600, 650))

    lneg = SpliceGraph.create(transfrags_neg, guided_ends=True, guided_assembly=True)
    bneg = tuple(lneg._find_node_boundaries())
    assert bneg == tuple((350, 400, 500, 750, 900, 950, 980, 1000))
Ejemplo n.º 6
0
def test_multi_strand1():
    # read gtf and test basic values
    loci = read_gtf('multi_strand1.gtf')
    assert len(loci) == 1
    interval, gtf_lines = loci[0]
    assert interval == ('chr1', 100, 1000)
    t_dict = Transfrag.parse_gtf(gtf_lines)
    assert len(t_dict) == 5
    locus = Locus.create(t_dict.values())
    assert locus.chrom == 'chr1'
    assert locus.start == 100
    assert locus.end == 1000
    # raise exception when creating with multiple strands
    with pytest.raises(TacoError):
        SpliceGraph.create(t_dict.values())
    transfrags_pos = locus.get_transfrags(Strand.POS)
    transfrags_neg = locus.get_transfrags(Strand.NEG)
    sgpos = SpliceGraph.create(transfrags_pos)
    sgneg = SpliceGraph.create(transfrags_neg)

    # test
    assert sgpos.chrom == 'chr1'
    assert sgpos.start == 100
    assert sgpos.end == 650
    assert sgpos.strand == Strand.POS
    assert sgpos.ref_start_sites == [150]
    assert sgpos.ref_stop_sites == [600]
    with pytest.raises(TacoError):
        sgpos.get_expr_data(90, 110)
    with pytest.raises(TacoError):
        sgpos.get_expr_data(650, 655)
    assert np.array_equal(sgpos.get_expr_data(100, 105), np.ones(5))

    assert sgneg.chrom == 'chr1'
    assert sgneg.start == 350
    assert sgneg.end == 1000
    assert sgneg.strand == Strand.NEG
    assert sgneg.ref_start_sites == [1000]
    assert sgneg.ref_stop_sites == [350]
    with pytest.raises(TacoError):
        sgneg.get_expr_data(340, 350)
    with pytest.raises(TacoError):
        sgneg.get_expr_data(1000, 1010)
    assert np.array_equal(sgneg.get_expr_data(400, 405), np.ones(5))
    assert np.array_equal(sgneg.get_expr_data(945, 950), np.zeros(5))
    assert np.array_equal(sgneg.get_expr_data(950, 955), np.ones(5))
    assert np.array_equal(sgneg.get_expr_data(980, 985), np.zeros(5))

    # test locus boundaries
    bpos = tuple(sgpos._find_node_boundaries())
    assert bpos == tuple((100, 200, 300, 400, 650))
    bneg = tuple(sgneg._find_node_boundaries())
    assert bneg == tuple((350, 400, 500, 950, 980, 1000))

    # added guided ends/assembly to use boundaries from reference
    lpos = SpliceGraph.create(transfrags_pos,
                              guided_ends=True,
                              guided_assembly=True)
    bpos = tuple(lpos._find_node_boundaries())
    assert bpos == tuple((100, 150, 200, 300, 400, 500, 600, 650))

    lneg = SpliceGraph.create(transfrags_neg,
                              guided_ends=True,
                              guided_assembly=True)
    bneg = tuple(lneg._find_node_boundaries())
    assert bneg == tuple((350, 400, 500, 750, 900, 950, 980, 1000))