def test_logp_scaling(df): engine = Engine(df) engine.init_models(8) engine.run(500) x = np.linspace(3, 7, 200) p_true = norm.pdf(x, loc=5., scale=.5) lp_baxcat = engine.probability(x[:, np.newaxis], ['t'], given=[('x', 1), ('y', 2)]) inftest_plot(x, p_true, np.exp(lp_baxcat), 'p_t-xy', RESDIR) assert abs(max(p_true) - max(np.exp(lp_baxcat))) < .05
df = pd.concat([pd.concat([s_a1, s_a2], axis=1), pd.concat([s_b1, s_b2], axis=1)], axis=0) assert df.shape == (2*n, 2,) df.columns = ['label', 'x'] engine = Engine(df, n_models=8) engine.init_models() engine.run(200) x = np.linspace(-6., 6., 200)[np.newaxis].T p_01 = np.exp(engine.probability(x, ['x'])) p_0 = .5*np.exp(engine.probability(x, ['x'], given=[('label', 0,)])) p_1 = .5*np.exp(engine.probability(x, ['x'], given=[('label', 1,)])) plt.figure(figsize=(4, 4,)) plt.hist(df['x'], 31, histtype='stepfilled', color='#aaaaaa', edgecolor='None', normed=True) plt.plot(x.flatten(), p_0, label='p(x|label=0)') plt.plot(x.flatten(), p_1, label='p(x|label=1)') plt.plot(x.flatten(), p_01, ls='--', label='p(x)') plt.xlabel('x') plt.ylabel('PDF') plt.legend(loc=0) plt.savefig('exp_condprob.png', dpi=300)
[pd.concat([s_a1, s_a2], axis=1), pd.concat([s_b1, s_b2], axis=1)], axis=0) assert df.shape == ( 2 * n, 2, ) df.columns = ['label', 'x'] engine = Engine(df, n_models=8) engine.init_models() engine.run(200) x = np.linspace(-6., 6., 200)[np.newaxis].T p_01 = np.exp(engine.probability(x, ['x'])) p_0 = .5 * np.exp(engine.probability(x, ['x'], given=[( 'label', 0, )])) p_1 = .5 * np.exp(engine.probability(x, ['x'], given=[( 'label', 1, )])) plt.figure(figsize=( 4, 4, )) plt.hist(df['x'], 31,
# determine how predictive variables are of whether an animal is fast. Linfoot # if basically the information-theoretic counterpart to correlation. linfoot_lean = engine.mutual_information('fast', 'lean', linfoot=False) linfoot_stripes = engine.mutual_information('fast', 'stripes', linfoot=False) print('Linfoot(fast, lean) = %f' % (linfoot_lean,)) print('Linfoot(fast, stripes) = %f' % (linfoot_stripes,)) # We can also figure out which animals are more similar. Is a wolf more # similar to a dalmatian or a rat. sim_wolves = engine.row_similarity('chihuahua', 'wolf') sim_rats = engine.row_similarity('chihuahua', 'rat') print('Similarity between Chihuahuas and wolves is %f' % (sim_wolves,)) print('Similarity between Chihuahuas and rats is %f' % (sim_rats,)) # Which animals are outliers with respect to their being fast. We can find out # by calculating the surprisal (self infotmation). s = engine.surprisal('fast') s.sort(['surprisal'], ascending=False, inplace=True) print(s.head(10)) # Lets say we're out in the woods and we see a lean, spotted animal with a # tail. What is the probability that it is fierce and fast? # Note that for continuous variables, Engine.probability returns the log PDF # of an event given observations. logp = engine.probability([1, 1], ['fierce', 'fast'], given=[('lean', 1,), ('spots', 1,), ('tail', 1,)]) print('p(fierce, fast | lean, spots, tail) = %s' % (exp(logp),))
# We can also figure out which animals are more similar. Is a wolf more # similar to a dalmatian or a rat. sim_wolves = engine.row_similarity('chihuahua', 'wolf') sim_rats = engine.row_similarity('chihuahua', 'rat') print('Similarity between Chihuahuas and wolves is %f' % (sim_wolves, )) print('Similarity between Chihuahuas and rats is %f' % (sim_rats, )) # Which animals are outliers with respect to their being fast. We can find out # by calculating the surprisal (self infotmation). s = engine.surprisal('fast') s.sort(['surprisal'], ascending=False, inplace=True) print(s.head(10)) # Lets say we're out in the woods and we see a lean, spotted animal with a # tail. What is the probability that it is fierce and fast? # Note that for continuous variables, Engine.probability returns the log PDF # of an event given observations. logp = engine.probability([1, 1], ['fierce', 'fast'], given=[( 'lean', 1, ), ( 'spots', 1, ), ( 'tail', 1, )]) print('p(fierce, fast | lean, spots, tail) = %s' % (exp(logp), ))