def run(n_models=10, n_iter=200, iter_step=10, n_needles=2, n_distractors=8, n_rows=100, pairtype=None, pair_kws=None): needle_idxs = [(2*i, 2*i+1,) for i in range(n_needles)] needle_cols = list(range(n_needles*2)) distractor_cols = list(range(n_needles*2, n_needles*2+n_distractors)) combs = list(it.product(needle_cols, distractor_cols)) distractor_idxs = random.sample(combs, min(len(combs), 32)) df = _gen_data(n_needles, n_distractors, n_rows, pairtype, pair_kws) engine = Engine(df, n_models=n_models) engine.init_models() # for model in engine._models: # # XXX: emulates the log grid expected alpha # # e.g. mean(exp(linspace(log(1/n_rows), log(rows)))) # # model['state_alpha'] = .5*(n_needles*2. + n_distractors) # model['state_alpha'] = 100. # no column_alpha transition tlist = [b'row_assignment', b'column_assignment', b'row_alpha', b'column_hypers'] n_steps = int(n_iter/iter_step) needle_dps = np.zeros((n_needles, n_steps+1,)) distractor_dps = np.zeros((len(distractor_idxs), n_steps+1,)) for i in range(n_steps+1): engine.run(iter_step, trans_kwargs={'transition_list': tlist}) # engine.run(iter_step) for nidx, (a, b) in enumerate(needle_idxs): a = df.columns[a] b = df.columns[b] needle_dps[nidx, i] = engine.dependence_probability(a, b) for didx, (a, b) in enumerate(distractor_idxs): a = df.columns[a] b = df.columns[b] distractor_dps[didx, i] = engine.dependence_probability(a, b) iter_count = np.cumsum([1]+[iter_step]*n_steps) for y in distractor_dps: plt.plot(iter_count, y, color='gray', alpha=.3) for y in needle_dps: plt.plot(iter_count, y, color='crimson') # plt.gca().set_xscale('log') plt.ylim([-.05, 1.05]) plt.xlim([1, iter_count[-1]]) plt.show() engine.heatmap('dependence_probability') plt.show()
def test_dependence_probability(): x = np.random.randn(30) s1 = pd.Series(x) s2 = pd.Series(x + 1.0) s3 = pd.Series(np.random.rand(30)) df = pd.concat([s1, s2, s3], axis=1) df.columns = ['c0', 'c1', 'c2'] engine = Engine(df, n_models=20, use_mp=False) engine.init_models() engine.run(10) depprob_01 = engine.dependence_probability('c0', 'c1') depprob_02 = engine.dependence_probability('c0', 'c2') depprob_12 = engine.dependence_probability('c1', 'c2') assert depprob_01 > depprob_02 assert depprob_01 > depprob_12