예제 #1
0
def run(n_models=10, n_iter=200, iter_step=10, n_needles=2, n_distractors=8,
        n_rows=100, pairtype=None, pair_kws=None):

    needle_idxs = [(2*i, 2*i+1,) for i in range(n_needles)]
    needle_cols = list(range(n_needles*2))
    distractor_cols = list(range(n_needles*2, n_needles*2+n_distractors))
    combs = list(it.product(needle_cols, distractor_cols))
    distractor_idxs = random.sample(combs, min(len(combs), 32))

    df = _gen_data(n_needles, n_distractors, n_rows, pairtype, pair_kws)

    engine = Engine(df, n_models=n_models)
    engine.init_models()
    # for model in engine._models:
    #     # XXX: emulates the log grid expected alpha
    #     # e.g. mean(exp(linspace(log(1/n_rows), log(rows))))
    #     # model['state_alpha'] = .5*(n_needles*2. + n_distractors)
    #     model['state_alpha'] = 100.

    # no column_alpha transition
    tlist = [b'row_assignment', b'column_assignment', b'row_alpha',
             b'column_hypers']

    n_steps = int(n_iter/iter_step)
    needle_dps = np.zeros((n_needles, n_steps+1,))
    distractor_dps = np.zeros((len(distractor_idxs), n_steps+1,))
    for i in range(n_steps+1):
        engine.run(iter_step, trans_kwargs={'transition_list': tlist})
        # engine.run(iter_step)

        for nidx, (a, b) in enumerate(needle_idxs):
            a = df.columns[a]
            b = df.columns[b]
            needle_dps[nidx, i] = engine.dependence_probability(a, b)

        for didx, (a, b) in enumerate(distractor_idxs):
            a = df.columns[a]
            b = df.columns[b]
            distractor_dps[didx, i] = engine.dependence_probability(a, b)

    iter_count = np.cumsum([1]+[iter_step]*n_steps)

    for y in distractor_dps:
        plt.plot(iter_count, y, color='gray', alpha=.3)

    for y in needle_dps:
        plt.plot(iter_count, y, color='crimson')

    # plt.gca().set_xscale('log')
    plt.ylim([-.05, 1.05])
    plt.xlim([1, iter_count[-1]])
    plt.show()

    engine.heatmap('dependence_probability')
    plt.show()
예제 #2
0
# we take several models.
print('Initializing 32 models...')
engine.init_models()
print('Running models for 200 iterations...')
engine.run(200, checkpoint=5)

# To check whether inference has converges, we plot the log score for each
# model as a function of time and make sure they all have leveled out.
engine.convergence_plot()
plt.show()

# We can view which columns are dependent on which other columns by plotting
# a n_cols by n_cols matrix where each cell is the dependence probability
# between two columns. Note that the dependence probability is simply the
# probability that a dependence exists, not the strength of the dependence.
engine.heatmap('dependence_probability', plot_kwargs={'figsize': (10, 10,)})
plt.show()

engine.heatmap('row_similarity', plot_kwargs={'figsize': (10, 10,)})
plt.show()

# The paint job is an important part of what makes a pine wood derby car fast,
# but does it matter for animals? We'll use the linfoot information to
# determine how predictive variables are of whether an animal is fast. Linfoot
# if basically the information-theoretic counterpart to correlation.
linfoot_lean = engine.mutual_information('fast', 'lean', linfoot=False)
linfoot_stripes = engine.mutual_information('fast', 'stripes', linfoot=False)

print('Linfoot(fast, lean) = %f' % (linfoot_lean,))
print('Linfoot(fast, stripes) = %f' % (linfoot_stripes,))
예제 #3
0
print('Initializing 32 models...')
engine.init_models()
print('Running models for 200 iterations...')
engine.run(200, checkpoint=5)

# To check whether inference has converges, we plot the log score for each
# model as a function of time and make sure they all have leveled out.
engine.convergence_plot()
plt.show()

# We can view which columns are dependent on which other columns by plotting
# a n_cols by n_cols matrix where each cell is the dependence probability
# between two columns. Note that the dependence probability is simply the
# probability that a dependence exists, not the strength of the dependence.
engine.heatmap('dependence_probability', plot_kwargs={'figsize': (
    10,
    10,
)})
plt.show()

engine.heatmap('row_similarity', plot_kwargs={'figsize': (
    10,
    10,
)})
plt.show()

# The paint job is an important part of what makes a pine wood derby car fast,
# but does it matter for animals? We'll use the linfoot information to
# determine how predictive variables are of whether an animal is fast. Linfoot
# if basically the information-theoretic counterpart to correlation.
linfoot_lean = engine.mutual_information('fast', 'lean', linfoot=False)
linfoot_stripes = engine.mutual_information('fast', 'stripes', linfoot=False)
예제 #4
0
    std = 1.
    for i in range(n_rows):
        a = np.random.randint(3)
        b = np.random.randint(3)
        mu = mus[a, b]
        x = np.random.randn()*std + mu

        data.append([x, a, b])

    return pd.DataFrame(data)

n_rows = 100
n_cols = 32

da = gen_phenotype_data(n_rows)
db = pd.DataFrame(np.random.randint(3, size=(n_rows, n_cols,)))
df = pd.concat([da, db], axis=1)

df.columns = ['T', 'A', 'B'] + ['x_%d' % i for i in range(n_cols)]

engine = Engine(df, n_models=32)
engine.init_models()
engine.run(100)

for col in df.columns:
    if col != 'T':
        print("1/H(%s|T) = %f" % (col, 1/engine.conditional_entropy(col, 'T')))

engine.heatmap('dependence_probability')
plt.show()
예제 #5
0
        x = np.random.randn() * std + mu

        data.append([x, a, b])

    return pd.DataFrame(data)


n_rows = 100
n_cols = 32

da = gen_phenotype_data(n_rows)
db = pd.DataFrame(np.random.randint(3, size=(
    n_rows,
    n_cols,
)))
df = pd.concat([da, db], axis=1)

df.columns = ['T', 'A', 'B'] + ['x_%d' % i for i in range(n_cols)]

engine = Engine(df, n_models=32)
engine.init_models()
engine.run(100)

for col in df.columns:
    if col != 'T':
        print("1/H(%s|T) = %f" %
              (col, 1 / engine.conditional_entropy(col, 'T')))

engine.heatmap('dependence_probability')
plt.show()