예제 #1
0
def run(n_models=10, n_iter=200, iter_step=10, n_needles=2, n_distractors=8,
        n_rows=100, pairtype=None, pair_kws=None):

    needle_idxs = [(2*i, 2*i+1,) for i in range(n_needles)]
    needle_cols = list(range(n_needles*2))
    distractor_cols = list(range(n_needles*2, n_needles*2+n_distractors))
    combs = list(it.product(needle_cols, distractor_cols))
    distractor_idxs = random.sample(combs, min(len(combs), 32))

    df = _gen_data(n_needles, n_distractors, n_rows, pairtype, pair_kws)

    engine = Engine(df, n_models=n_models)
    engine.init_models()
    # for model in engine._models:
    #     # XXX: emulates the log grid expected alpha
    #     # e.g. mean(exp(linspace(log(1/n_rows), log(rows))))
    #     # model['state_alpha'] = .5*(n_needles*2. + n_distractors)
    #     model['state_alpha'] = 100.

    # no column_alpha transition
    tlist = [b'row_assignment', b'column_assignment', b'row_alpha',
             b'column_hypers']

    n_steps = int(n_iter/iter_step)
    needle_dps = np.zeros((n_needles, n_steps+1,))
    distractor_dps = np.zeros((len(distractor_idxs), n_steps+1,))
    for i in range(n_steps+1):
        engine.run(iter_step, trans_kwargs={'transition_list': tlist})
        # engine.run(iter_step)

        for nidx, (a, b) in enumerate(needle_idxs):
            a = df.columns[a]
            b = df.columns[b]
            needle_dps[nidx, i] = engine.dependence_probability(a, b)

        for didx, (a, b) in enumerate(distractor_idxs):
            a = df.columns[a]
            b = df.columns[b]
            distractor_dps[didx, i] = engine.dependence_probability(a, b)

    iter_count = np.cumsum([1]+[iter_step]*n_steps)

    for y in distractor_dps:
        plt.plot(iter_count, y, color='gray', alpha=.3)

    for y in needle_dps:
        plt.plot(iter_count, y, color='crimson')

    # plt.gca().set_xscale('log')
    plt.ylim([-.05, 1.05])
    plt.xlim([1, iter_count[-1]])
    plt.show()

    engine.heatmap('dependence_probability')
    plt.show()
예제 #2
0
def test_dependence_probability():
    x = np.random.randn(30)

    s1 = pd.Series(x)
    s2 = pd.Series(x + 1.0)
    s3 = pd.Series(np.random.rand(30))

    df = pd.concat([s1, s2, s3], axis=1)
    df.columns = ['c0', 'c1', 'c2']

    engine = Engine(df, n_models=20, use_mp=False)
    engine.init_models()
    engine.run(10)
    depprob_01 = engine.dependence_probability('c0', 'c1')
    depprob_02 = engine.dependence_probability('c0', 'c2')
    depprob_12 = engine.dependence_probability('c1', 'c2')

    assert depprob_01 > depprob_02
    assert depprob_01 > depprob_12
예제 #3
0
def test_dependence_probability():
    x = np.random.randn(30)

    s1 = pd.Series(x)
    s2 = pd.Series(x + 1.0)
    s3 = pd.Series(np.random.rand(30))

    df = pd.concat([s1, s2, s3], axis=1)
    df.columns = ['c0', 'c1', 'c2']

    engine = Engine(df, n_models=20, use_mp=False)
    engine.init_models()
    engine.run(10)
    depprob_01 = engine.dependence_probability('c0', 'c1')
    depprob_02 = engine.dependence_probability('c0', 'c2')
    depprob_12 = engine.dependence_probability('c1', 'c2')

    assert depprob_01 > depprob_02
    assert depprob_01 > depprob_12