def test_dependence_probability():
    '''Test that Loom correctly recovers a 2-view dataset.'''
    D, Zv, Zc = tu.gen_data_table(n_rows=150,
                                  view_weights=None,
                                  cluster_weights=[
                                      [.2, .2, .2, .4],
                                      [.3, .2, .5],
                                  ],
                                  cctypes=['normal'] * 6,
                                  distargs=[None] * 6,
                                  separation=[0.95] * 6,
                                  view_partition=[0, 0, 0, 1, 1, 1],
                                  rng=gu.gen_rng(12))

    engine = Engine(
        D.T,
        outputs=[7, 2, 12, 80, 129, 98],
        cctypes=['normal'] * len(D),
        distargs=[None] * 6,
        rng=gu.gen_rng(122),
        num_states=20,
    )

    logscore0 = engine.logpdf_score()
    engine.transition_loom(N=100)
    logscore1 = engine.logpdf_score()
    assert numpy.mean(logscore1) > numpy.mean(logscore0)

    dependence_probability = numpy.mean(
        engine.dependence_probability_pairwise(), axis=0)

    assert dependence_probability[0, 1] > 0.8
    assert dependence_probability[1, 2] > 0.8
    assert dependence_probability[0, 2] > 0.8

    assert dependence_probability[3, 4] > 0.8
    assert dependence_probability[4, 5] > 0.8
    assert dependence_probability[3, 5] > 0.8

    assert dependence_probability[0, 3] < 0.2
    assert dependence_probability[0, 4] < 0.2
    assert dependence_probability[0, 5] < 0.2

    assert dependence_probability[1, 3] < 0.2
    assert dependence_probability[1, 4] < 0.2
    assert dependence_probability[1, 5] < 0.2

    assert dependence_probability[2, 3] < 0.2
    assert dependence_probability[2, 4] < 0.2
    assert dependence_probability[2, 5] < 0.2
Exemple #2
0
def test_logpdf_score_crash():
    rng = gen_rng(8)
    # T = rng.choice([0,1], p=[.3,.7], size=250).reshape(-1,1)
    T = rng.normal(size=30).reshape(-1, 1)
    engine = Engine(T, cctypes=['normal'], rng=rng, num_states=4)
    logpdf_likelihood_initial = np.array(engine.logpdf_likelihood())
    logpdf_score_initial = np.array(engine.logpdf_score())
    assert np.all(logpdf_score_initial < logpdf_likelihood_initial)
    # assert np.all(logpdf_likelihood_initial < logpdf_score_initial)
    engine.transition(N=100)
    engine.transition(kernels=['column_hypers', 'view_alphas'], N=10)
    logpdf_likelihood_final = np.asarray(engine.logpdf_likelihood())
    logpdf_score_final = np.asarray(engine.logpdf_score())
    assert np.all(logpdf_score_final < logpdf_likelihood_final)
    assert np.max(logpdf_score_initial) < np.max(logpdf_score_final)
def test_multiple_stattypes():
    '''Test cgpm statistical types are heuristically converted to Loom types.'''
    cctypes, distargs = cu.parse_distargs([
        'normal', 'poisson', 'bernoulli', 'categorical(k=4)', 'lognormal',
        'exponential', 'beta', 'geometric', 'vonmises'
    ])

    T, Zv, Zc = tu.gen_data_table(200, [1], [[.25, .25, .5]],
                                  cctypes,
                                  distargs, [.95] * len(cctypes),
                                  rng=gu.gen_rng(10))

    engine = Engine(
        T.T,
        cctypes=cctypes,
        distargs=distargs,
        rng=gu.gen_rng(15),
        num_states=16,
    )

    logscore0 = engine.logpdf_score()
    engine.transition_loom(N=5)
    logscore1 = engine.logpdf_score()
    assert numpy.mean(logscore1) > numpy.mean(logscore0)

    # Check serializeation.
    metadata = engine.to_metadata()
    modname = importlib.import_module(metadata['factory'][0])
    builder = getattr(modname, metadata['factory'][1])
    engine2 = builder.from_metadata(metadata)

    # To JSON.
    json_metadata = json.dumps(engine.to_metadata())
    engine3 = builder.from_metadata(json.loads(json_metadata))

    # Assert all states in engine, engine2, and engine3 have same loom_path.
    loom_paths = list(
        itertools.chain.from_iterable([s._loom_path for s in e.states]
                                      for e in [engine, engine2, engine3]))
    assert all(p == loom_paths[0] for p in loom_paths)

    engine2.transition(S=5)
    dependence_probability = engine2.dependence_probability_pairwise()

    assert numpy.all(dependence_probability > 0.85)
def state():
    # Create an engine.
    engine = Engine(DATA,
                    cctypes=['normal', 'categorical'],
                    distargs=[None, {
                        'k': 6
                    }],
                    num_states=4,
                    rng=gu.gen_rng(212))
    engine.transition(N=15)
    marginals = engine.logpdf_score()
    ranking = np.argsort(marginals)[::-1]
    return engine.get_state(ranking[0])
Exemple #5
0
def generate_gpmcc_posteriors(cctype, distargs, D_train, iters, seconds):
    """Learns gpmcc on D_train for seconds and simulates NUM_TEST times."""
    # Learning and posterior simulation.
    engine = Engine(D_train,
                    cctypes=[cctype],
                    distargs=[distargs],
                    num_states=64,
                    rng=gu.gen_rng(1))
    engine.transition(N=iters, S=seconds, progress=0)
    if iters:
        kernel = 'column_params' if cu.cctype_class(cctype).is_conditional()\
            else 'column_hypers'
        engine.transition(N=100, kernels=[kernel], progress=0)
    samples = engine.simulate(-1, [0], N=NUM_TEST)
    marginals = engine.logpdf_score()
    ranking = np.argsort(marginals)[::-1]
    for r in ranking[:5]:
        engine.get_state(r).plot()
    return [samples[i] for i in ranking[:5]]