예제 #1
0
def test_logp_scaling(df):
    engine = Engine(df)
    engine.init_models(8)
    engine.run(500)

    x = np.linspace(3, 7, 200)

    p_true = norm.pdf(x, loc=5., scale=.5)
    lp_baxcat = engine.probability(x[:, np.newaxis], ['t'],
                                   given=[('x', 1), ('y', 2)])

    inftest_plot(x, p_true, np.exp(lp_baxcat), 'p_t-xy', RESDIR)

    assert abs(max(p_true) - max(np.exp(lp_baxcat))) < .05
예제 #2
0
def test_logp_scaling(df):
    engine = Engine(df)
    engine.init_models(8)
    engine.run(500)

    x = np.linspace(3, 7, 200)

    p_true = norm.pdf(x, loc=5., scale=.5)
    lp_baxcat = engine.probability(x[:, np.newaxis], ['t'],
                                   given=[('x', 1), ('y', 2)])

    inftest_plot(x, p_true, np.exp(lp_baxcat), 'p_t-xy', RESDIR)

    assert abs(max(p_true) - max(np.exp(lp_baxcat))) < .05
예제 #3
0

df = pd.concat([pd.concat([s_a1, s_a2], axis=1),
                pd.concat([s_b1, s_b2], axis=1)], axis=0)
assert df.shape == (2*n, 2,)

df.columns = ['label', 'x']


engine = Engine(df, n_models=8)
engine.init_models()
engine.run(200)

x = np.linspace(-6., 6., 200)[np.newaxis].T

p_01 = np.exp(engine.probability(x, ['x']))
p_0 = .5*np.exp(engine.probability(x, ['x'], given=[('label', 0,)]))
p_1 = .5*np.exp(engine.probability(x, ['x'], given=[('label', 1,)]))

plt.figure(figsize=(4, 4,))
plt.hist(df['x'], 31, histtype='stepfilled', color='#aaaaaa', edgecolor='None',
         normed=True)
plt.plot(x.flatten(), p_0, label='p(x|label=0)')
plt.plot(x.flatten(), p_1, label='p(x|label=1)')
plt.plot(x.flatten(), p_01, ls='--', label='p(x)')
plt.xlabel('x')
plt.ylabel('PDF')
plt.legend(loc=0)

plt.savefig('exp_condprob.png', dpi=300)
예제 #4
0
    [pd.concat([s_a1, s_a2], axis=1),
     pd.concat([s_b1, s_b2], axis=1)], axis=0)
assert df.shape == (
    2 * n,
    2,
)

df.columns = ['label', 'x']

engine = Engine(df, n_models=8)
engine.init_models()
engine.run(200)

x = np.linspace(-6., 6., 200)[np.newaxis].T

p_01 = np.exp(engine.probability(x, ['x']))
p_0 = .5 * np.exp(engine.probability(x, ['x'], given=[(
    'label',
    0,
)]))
p_1 = .5 * np.exp(engine.probability(x, ['x'], given=[(
    'label',
    1,
)]))

plt.figure(figsize=(
    4,
    4,
))
plt.hist(df['x'],
         31,
예제 #5
0
# determine how predictive variables are of whether an animal is fast. Linfoot
# if basically the information-theoretic counterpart to correlation.
linfoot_lean = engine.mutual_information('fast', 'lean', linfoot=False)
linfoot_stripes = engine.mutual_information('fast', 'stripes', linfoot=False)

print('Linfoot(fast, lean) = %f' % (linfoot_lean,))
print('Linfoot(fast, stripes) = %f' % (linfoot_stripes,))

# We can also figure out which animals are more similar. Is a wolf more
# similar to a dalmatian or a rat.
sim_wolves = engine.row_similarity('chihuahua', 'wolf')
sim_rats = engine.row_similarity('chihuahua', 'rat')

print('Similarity between Chihuahuas and wolves is %f' % (sim_wolves,))
print('Similarity between Chihuahuas and rats is %f' % (sim_rats,))


# Which animals are outliers with respect to their being fast. We can find out
# by calculating the surprisal (self infotmation).
s = engine.surprisal('fast')
s.sort(['surprisal'], ascending=False, inplace=True)
print(s.head(10))

# Lets say we're out in the woods and we see a lean, spotted animal with a
# tail. What is the probability that it is fierce and fast?
# Note that for continuous variables, Engine.probability returns the log PDF
# of an event given observations.
logp = engine.probability([1, 1], ['fierce', 'fast'],
                          given=[('lean', 1,), ('spots', 1,), ('tail', 1,)])
print('p(fierce, fast | lean, spots, tail) = %s' % (exp(logp),))
예제 #6
0
# We can also figure out which animals are more similar. Is a wolf more
# similar to a dalmatian or a rat.
sim_wolves = engine.row_similarity('chihuahua', 'wolf')
sim_rats = engine.row_similarity('chihuahua', 'rat')

print('Similarity between Chihuahuas and wolves is %f' % (sim_wolves, ))
print('Similarity between Chihuahuas and rats is %f' % (sim_rats, ))

# Which animals are outliers with respect to their being fast. We can find out
# by calculating the surprisal (self infotmation).
s = engine.surprisal('fast')
s.sort(['surprisal'], ascending=False, inplace=True)
print(s.head(10))

# Lets say we're out in the woods and we see a lean, spotted animal with a
# tail. What is the probability that it is fierce and fast?
# Note that for continuous variables, Engine.probability returns the log PDF
# of an event given observations.
logp = engine.probability([1, 1], ['fierce', 'fast'],
                          given=[(
                              'lean',
                              1,
                          ), (
                              'spots',
                              1,
                          ), (
                              'tail',
                              1,
                          )])
print('p(fierce, fast | lean, spots, tail) = %s' % (exp(logp), ))