def test_dependence_probability(): '''Test that Loom correctly recovers a 2-view dataset.''' D, Zv, Zc = tu.gen_data_table(n_rows=150, view_weights=None, cluster_weights=[ [.2, .2, .2, .4], [.3, .2, .5], ], cctypes=['normal'] * 6, distargs=[None] * 6, separation=[0.95] * 6, view_partition=[0, 0, 0, 1, 1, 1], rng=gu.gen_rng(12)) engine = Engine( D.T, outputs=[7, 2, 12, 80, 129, 98], cctypes=['normal'] * len(D), distargs=[None] * 6, rng=gu.gen_rng(122), num_states=20, ) logscore0 = engine.logpdf_score() engine.transition_loom(N=100) logscore1 = engine.logpdf_score() assert numpy.mean(logscore1) > numpy.mean(logscore0) dependence_probability = numpy.mean( engine.dependence_probability_pairwise(), axis=0) assert dependence_probability[0, 1] > 0.8 assert dependence_probability[1, 2] > 0.8 assert dependence_probability[0, 2] > 0.8 assert dependence_probability[3, 4] > 0.8 assert dependence_probability[4, 5] > 0.8 assert dependence_probability[3, 5] > 0.8 assert dependence_probability[0, 3] < 0.2 assert dependence_probability[0, 4] < 0.2 assert dependence_probability[0, 5] < 0.2 assert dependence_probability[1, 3] < 0.2 assert dependence_probability[1, 4] < 0.2 assert dependence_probability[1, 5] < 0.2 assert dependence_probability[2, 3] < 0.2 assert dependence_probability[2, 4] < 0.2 assert dependence_probability[2, 5] < 0.2
def test_logpdf_score_crash(): rng = gen_rng(8) # T = rng.choice([0,1], p=[.3,.7], size=250).reshape(-1,1) T = rng.normal(size=30).reshape(-1, 1) engine = Engine(T, cctypes=['normal'], rng=rng, num_states=4) logpdf_likelihood_initial = np.array(engine.logpdf_likelihood()) logpdf_score_initial = np.array(engine.logpdf_score()) assert np.all(logpdf_score_initial < logpdf_likelihood_initial) # assert np.all(logpdf_likelihood_initial < logpdf_score_initial) engine.transition(N=100) engine.transition(kernels=['column_hypers', 'view_alphas'], N=10) logpdf_likelihood_final = np.asarray(engine.logpdf_likelihood()) logpdf_score_final = np.asarray(engine.logpdf_score()) assert np.all(logpdf_score_final < logpdf_likelihood_final) assert np.max(logpdf_score_initial) < np.max(logpdf_score_final)
def test_multiple_stattypes(): '''Test cgpm statistical types are heuristically converted to Loom types.''' cctypes, distargs = cu.parse_distargs([ 'normal', 'poisson', 'bernoulli', 'categorical(k=4)', 'lognormal', 'exponential', 'beta', 'geometric', 'vonmises' ]) T, Zv, Zc = tu.gen_data_table(200, [1], [[.25, .25, .5]], cctypes, distargs, [.95] * len(cctypes), rng=gu.gen_rng(10)) engine = Engine( T.T, cctypes=cctypes, distargs=distargs, rng=gu.gen_rng(15), num_states=16, ) logscore0 = engine.logpdf_score() engine.transition_loom(N=5) logscore1 = engine.logpdf_score() assert numpy.mean(logscore1) > numpy.mean(logscore0) # Check serializeation. metadata = engine.to_metadata() modname = importlib.import_module(metadata['factory'][0]) builder = getattr(modname, metadata['factory'][1]) engine2 = builder.from_metadata(metadata) # To JSON. json_metadata = json.dumps(engine.to_metadata()) engine3 = builder.from_metadata(json.loads(json_metadata)) # Assert all states in engine, engine2, and engine3 have same loom_path. loom_paths = list( itertools.chain.from_iterable([s._loom_path for s in e.states] for e in [engine, engine2, engine3])) assert all(p == loom_paths[0] for p in loom_paths) engine2.transition(S=5) dependence_probability = engine2.dependence_probability_pairwise() assert numpy.all(dependence_probability > 0.85)
def state(): # Create an engine. engine = Engine(DATA, cctypes=['normal', 'categorical'], distargs=[None, { 'k': 6 }], num_states=4, rng=gu.gen_rng(212)) engine.transition(N=15) marginals = engine.logpdf_score() ranking = np.argsort(marginals)[::-1] return engine.get_state(ranking[0])
def generate_gpmcc_posteriors(cctype, distargs, D_train, iters, seconds): """Learns gpmcc on D_train for seconds and simulates NUM_TEST times.""" # Learning and posterior simulation. engine = Engine(D_train, cctypes=[cctype], distargs=[distargs], num_states=64, rng=gu.gen_rng(1)) engine.transition(N=iters, S=seconds, progress=0) if iters: kernel = 'column_params' if cu.cctype_class(cctype).is_conditional()\ else 'column_hypers' engine.transition(N=100, kernels=[kernel], progress=0) samples = engine.simulate(-1, [0], N=NUM_TEST) marginals = engine.logpdf_score() ranking = np.argsort(marginals)[::-1] for r in ranking[:5]: engine.get_state(r).plot() return [samples[i] for i in ranking[:5]]