Example #1
0
def test_simulate_bulk__ci_(engine):
    engine = Engine.from_metadata(engine)
    rowid1, targets1, constraints1, N1, = -1, [0, 2, 4, 5], {3: 1}, 7
    rowid2, targets2, constraints2, N2 = 5, [1, 3], {2: 1}, 3
    rowid3, targets3, constraints3, N3 = 8, [0], {4: .8}, 3
    # Bulk.
    rowids = [rowid1, rowid2, rowid3]
    targets_list = [targets1, targets2, targets3]
    constraints_list = [constraints1, constraints2, constraints3]
    Ns = [N1, N2, N3]

    def test_correct_dimensions(statenos):
        # Invoke
        samples = engine.simulate_bulk(rowids,
                                       targets_list,
                                       constraints_list=constraints_list,
                                       Ns=Ns,
                                       statenos=statenos)
        assert len(samples) == (engine.num_states()
                                if statenos is None else len(statenos))
        for states_samples in samples:
            assert len(states_samples) == len(rowids)
            for i, sample in enumerate(states_samples):
                assert len(sample) == Ns[i]
                for s in sample:
                    assert set(s.keys()) == set(targets_list[i])
                    assert len(s) == len(targets_list[i])

    test_correct_dimensions(None)
    test_correct_dimensions([4])
Example #2
0
    def _engine(self, bdb, generator_id):
        # Probe the cache.
        cache = self._cache(bdb)
        if cache is not None and generator_id in cache.engine:
            return cache.engine[generator_id]

        # Not cached.  Load the engine from the database.
        cursor = bdb.sql_execute(
            '''
            SELECT engine_json FROM bayesdb_cgpm_generator
                WHERE generator_id = ?
        ''', (generator_id, ))
        engine_json = cursor_value(cursor)
        if engine_json is None:
            generator = core.bayesdb_generator_name(bdb, generator_id)
            raise BQLError(
                bdb, 'No models initialized for generator: %r' % (generator, ))

        # Deserialize the engine.
        engine = Engine.from_metadata(json.loads(engine_json),
                                      rng=bdb.np_prng,
                                      multiprocess=self._ncpu)

        # Cache it, if we can.
        if cache is not None:
            cache.engine[generator_id] = engine
        return engine
Example #3
0
def test_logpdf_bulk__ci_(engine):
    engine = Engine.from_metadata(engine)
    rowid1, targets1, constraints1 = 5, {0: 0}, {2: 1, 3: .5}
    rowid2, targets2, constraints2 = -1, {1: 0, 4: .8}, {5: .5}
    # Bulk.
    rowids = [rowid1, rowid2]
    targets_list = [targets1, targets2]
    constraints_list = [constraints1, constraints2]

    def test_correct_dimensions(statenos):
        # Invoke
        logpdfs = engine.logpdf_bulk(rowids,
                                     targets_list,
                                     constraints_list=constraints_list,
                                     statenos=statenos)
        assert len(logpdfs) == \
            engine.num_states() if statenos is None else len(statenos)
        for state_logpdfs in logpdfs:
            # state_logpdfs should be a list of floats, one float per targets.
            assert len(state_logpdfs) == len(rowids)
            for l in state_logpdfs:
                assert isinstance(l, float)

    test_correct_dimensions(statenos=None)
    test_correct_dimensions(statenos=[0, 1, 4, 5])
Example #4
0
def test_row_similarity__ci_(engine):
    engine = Engine.from_metadata(engine)

    results = engine.row_similarity(0, 2, statenos=None)
    assert len(results) == engine.num_states()

    results = engine.row_similarity(0, 2, statenos=[1, 4, 5])
    assert len(results) == 3
Example #5
0
def test_dependence_probability__ci_(engine):
    engine = Engine.from_metadata(engine)

    results = engine.dependence_probability(0, 2, statenos=None)
    assert len(results) == engine.num_states()

    results = engine.dependence_probability(0, 2, statenos=[1, 4])
    assert len(results) == 2
Example #6
0
 def _populate_from_metadata(model, metadata):
     model.initialized = metadata['initialized']
     model.dataset = pd.DataFrame(metadata['dataset.values'],
                                  index=metadata['dataset.index'],
                                  columns=metadata['dataset.columns'])
     model.engine = Engine.from_metadata(metadata['engine']) \
         if model.initialized else None
     return model
Example #7
0
def test_relevance_probability__ci_(engine):
    engine = Engine.from_metadata(engine)

    results = engine.relevance_probability(0, [2, 14], 0, statenos=None)
    assert len(results) == engine.num_states()

    results = engine.relevance_probability(0, [2, 14],
                                           0,
                                           statenos=range(engine.num_states()))
    assert len(results) == engine.num_states()
Example #8
0
 def from_metadata(metadata, seed):
     model = TRCRP_Mixture(
         chains=metadata['chains'],
         lag=metadata['lag'],
         variables=metadata['variables'],
         rng=np.random.RandomState(seed),
     )
     # Internal fields.
     model.initialized = metadata['initialized']
     model.dataset = pd.DataFrame(metadata['dataset.values'],
                                  index=metadata['dataset.index'],
                                  columns=metadata['dataset.columns'])
     model.engine = Engine.from_metadata(metadata['engine']) \
         if model.initialized else None
     return model
Example #9
0
def test_simulate__ci_(engine):
    engine = Engine.from_metadata(engine)

    def test_correct_dimensions(rowid, targets, constraints, N, statenos):
        samples = engine.simulate(rowid,
                                  targets,
                                  constraints=constraints,
                                  N=N,
                                  statenos=statenos)
        assert len(samples) == (engine.num_states()
                                if statenos is None else len(statenos))
        for states_samples in samples:
            # Each element of samples should be a list of N samples.
            assert len(states_samples) == N
            for s in states_samples:
                # Each raw sample should be len(Q) dimensional.
                assert set(s.keys()) == set(targets)
                assert len(s) == len(targets)
        s = engine._likelihood_weighted_resample(samples,
                                                 rowid,
                                                 constraints=constraints,
                                                 statenos=statenos)
        assert len(s) == N

    targets1, constraints1 = [0], {2: 0, 3: 6}
    targets2, constraints2 = [1, 2, 5], {0: 3, 3: .8}

    for statenos in (None, (1, 3)):
        test_correct_dimensions(-1, targets1, constraints1, 1, statenos)
        test_correct_dimensions(-1, targets1, constraints1, 8, statenos)
        test_correct_dimensions(-1, targets2, constraints2, 1, statenos)
        test_correct_dimensions(-1, targets2, constraints2, 8, statenos)

        targets3, constraints3 = [0, 1, 2], {3: 1}
        test_correct_dimensions(5, targets3, constraints3, 1, statenos)
        test_correct_dimensions(5, targets3, constraints3, 8, statenos)
Example #10
0
def test_logpdf__ci_(engine):
    engine = Engine.from_metadata(engine)

    def test_correct_dimensions(rowid, targets, constraints, statenos):
        # logpdfs should be a list of floats.
        logpdfs = engine.logpdf(rowid,
                                targets,
                                constraints=constraints,
                                statenos=statenos)
        assert len(logpdfs) == (engine.num_states()
                                if statenos is None else len(statenos))
        for state_logpdfs in logpdfs:
            # Each element in logpdfs should be a single float.
            assert isinstance(state_logpdfs, float)
        lp = engine._likelihood_weighted_integrate(logpdfs,
                                                   rowid,
                                                   constraints=constraints,
                                                   statenos=statenos)
        assert isinstance(lp, float)

    for statenos in (None, [0, 2, 4]):
        test_correct_dimensions(-1, {0: 1}, {2: 1, 3: .5}, statenos)
        test_correct_dimensions(-1, {2: 0, 5: 3}, {0: 4, 1: 5}, statenos)
        test_correct_dimensions(5, {0: 0, 2: 0}, {3: 3}, statenos)
Example #11
0
def test_incorporate_engine():
    engine = Engine(
        T[:,:2],
        cctypes=CCTYPES[:2],
        distargs=DISTARGS[:2],
        num_states=4,
        rng=gu.gen_rng(0),
    )
    engine.transition(N=5)

    # Incorporate a new dim into with a non-contiguous output.
    engine.incorporate_dim(
        T[:,2],
        outputs=[10],
        cctype=CCTYPES[2],
        distargs=DISTARGS[2]
    )
    engine.transition(N=2)

    # Serialize the engine, and run a targeted transtion on variable 10.
    m = engine.to_metadata()
    engine2 = Engine.from_metadata(m)
    engine2.transition(N=2, cols=[10], multiprocess=0)
    assert all(s.outputs == [0,1,10] for s in engine.states)