예제 #1
0
def test_pad():
    a = np.zeros((20, 4))
    a[12] = 1
    a_short = a[5:15]
    assert a_short[7][0] == 1
    assert a[12][0] == 1
    assert a[13][0] == 0

    p = Pattern("test",
                a_short,
                dict(t=a_short),
                dict(t=a_short),
                dict(t=a))

    assert np.all(p.profile['t'] == a)
    assert p.len_profile() == 20

    assert p.pad(10).len_profile() == 20
    assert len(p.pad(10)) == 10

    assert len(p.pad(10)) == 10
    assert p.shift(-2).profile['t'][10][0] == 1
    assert p.shift(-2).contrib['t'][5][0] == 1
    pnew = p.shift(1).rc().shift(1).rc()
    assert np.allclose(p.seq, pnew.seq)
    assert np.allclose(p.contrib['t'], pnew.contrib['t'])
    assert np.allclose(p.profile['t'], pnew.profile['t'])
def create_patterns(motif_seqs):
    patterns = [
        Pattern(seq=encodeDNA([s])[0],
                contrib=dict(a=encodeDNA([s])[0]),
                hyp_contrib=dict(a=encodeDNA([s])[0]),
                name=str(i)) for i, s in enumerate(motif_seqs)
    ]

    aligned_patterns = [
        p.align(patterns[0], pad_value=np.array([0.25] * 4)) for p in patterns
    ]
    return patterns, aligned_patterns
예제 #3
0
 def _extract(cls, seqlet, seq, hyp_contrib, profiles, profile_width=None):
     """Extract all the values using the seqlet
     """
     from basepair.modisco.core import Pattern
     ex_hyp_contrib = {task: seqlet.extract(arr)
                       for task, arr in hyp_contrib.items()}
     ex_seq = seqlet.extract(seq)
     # TODO - implement this to work also for the out-of-core case efficiently
     wide_seqlet = seqlet.resize(profile_width)
     return Pattern(name=seqlet.name,
                    seq=ex_seq,
                    contrib={task: arr * ex_seq for task, arr in ex_hyp_contrib.items()},
                    hyp_contrib=ex_hyp_contrib,
                    profile={task: wide_seqlet.extract(arr)
                             for task, arr in profiles.items()},
                    attrs=dict()
                    )
예제 #4
0
def dont_test_parse_hdf4():
    from basepair.modisco.results import ModiscoResult
    mr = ModiscoResult("/s/project/avsec/basepair/modisco/modisco.h5")
    mr.open()

    mr.f.ls()
    metacluster = "metacluster_0"
    pattern = "pattern_0"
    pattern_grp = mr.get_pattern_grp(metacluster, pattern)
    p = Pattern.from_hdf5_grp(pattern_grp, "m0_p0")
    pt = p.trim_seq_ic(0.08)
    assert len(pt) == len(pt.contrib['Klf4'])
    p = mr.get_pattern("metacluster_0/pattern_0")
    import matplotlib.pyplot as plt
    p.plot(kind='all')
    p.plot(kind=['seq', 'contrib/Klf4'])
    p.plot(kind='seq')

    plt.show()
    mr.plot_pattern("metacluster_0/pattern_0", kind=['seq', 'contrib/Klf4'])
예제 #5
0
def test_pattern():
    p = Pattern("test",
                np.random.randn(10, 4),
                dict(t=np.random.randn(10, 4)),
                dict(t=np.random.randn(10, 4)))
    assert p.tasks() == ['t']
    print(p)
    p.get_consensus()
    p.seq = np.abs(p.seq)
    p.get_seq_ic()
    p.copy()
    p.copy()

    p.rc()
    len(p)

    with raises(Exception):
        p = Pattern("test",
                    np.random.randn(10, 4),
                    dict(t=np.random.randn(10, 4)),
                    dict(t=np.random.randn(11, 4)))

    with raises(Exception):
        p = Pattern("test",
                    np.random.randn(10, 4),
                    [5] * 10,
                    np.random.randn(10, 4))
예제 #6
0
 def get_pattern(self, pattern):
     """Get the pattern name
     """
     from basepair.modisco.core import Pattern
     return Pattern.from_hdf5_grp(self.get_pattern_grp(*pattern.split("/")),
                                  pattern)