Ejemplo n.º 1
0
def test_models_pickle():
    models = (
        bb,
        bnb,
        gp,
        nich,
        dd(4),
        bbnc,
        niw(3),
        dm(5),
    )

    for model in models:
        bstr = pickle.dumps(model)
        model1 = pickle.loads(bstr)
        assert_equals(model.name(), model1.name())

        if model.name() == 'dd':
            assert_equals(len(model.default_hyperparams()['alphas']),
                          len(model1.default_hyperparams()['alphas']))
        elif model.name() == 'niw':
            assert_equals(len(model.default_hyperparams()['mu']),
                          len(model1.default_hyperparams()['mu']))
        elif model.name() == 'dm':
            assert_equals(model.py_desc().get_np_dtype().shape,
                          model1.py_desc().get_np_dtype().shape)
Ejemplo n.º 2
0
def test_models_pickle():
    models = (
        bb,
        bnb,
        gp,
        nich,
        dd(4),
        bbnc,
        niw(3),
        dm(5),
    )

    for model in models:
        bstr = pickle.dumps(model)
        model1 = pickle.loads(bstr)
        assert_equals(model.name(), model1.name())

        if model.name() == 'dd':
            assert_equals(len(model.default_hyperparams()['alphas']),
                          len(model1.default_hyperparams()['alphas']))
        elif model.name() == 'niw':
            assert_equals(len(model.default_hyperparams()['mu']),
                          len(model1.default_hyperparams()['mu']))
        elif model.name() == 'dm':
            assert_equals(model.py_desc().get_np_dtype().shape,
                          model1.py_desc().get_np_dtype().shape)
Ejemplo n.º 3
0
 def score_dataset(counts):
     M, K = counts.shape
     Y = np.array([(y, ) for y in counts], dtype=[('', np.int, (K, ))])
     view = cxx_numpy_dataview(Y)
     r = rng()
     defn = model_definition(M, [dm(K)])
     prior = {'alphas': [1.] * K}
     s = cxx_initialize(defn,
                        view,
                        r,
                        feature_hps=[prior],
                        assignment=[0] * M)
     assert_equals(s.groups(), [0])
     return s.score_data(None, None, r)
Ejemplo n.º 4
0
 def score_dataset(counts):
     M, K = counts.shape
     Y = np.array([(y,) for y in counts], dtype=[('', np.int, (K,))])
     view = cxx_numpy_dataview(Y)
     r = rng()
     defn = model_definition(M, [dm(K)])
     prior = {'alphas': [1.] * K}
     s = cxx_initialize(
         defn,
         view,
         r,
         feature_hps=[prior],
         assignment=[0] * M)
     assert_equals(s.groups(), [0])
     return s.score_data(None, None, r)
Ejemplo n.º 5
0
def test_dm_cxx():
    K = 4
    Y = np.array([
        ([0, 1, 2, 5], ),
        ([1, 0, 1, 2], ),
        ([0, 2, 9, 9], ),
    ],
                 dtype=[('', np.int, (K, ))])
    Y_np = np.vstack(y[0] for y in Y)

    cxx_view = cxx_numpy_dataview(Y)
    r = rng()
    defn = model_definition(Y.shape[0], [dm(K)])
    prior = {'alphas': [1.] * K}
    cxx_s = cxx_initialize(defn,
                           cxx_view,
                           r,
                           feature_hps=[prior],
                           assignment=[0] * Y.shape[0])

    counts = cxx_s.get_suffstats(0, 0)['counts']
    assert_sequence_equal(counts, list(Y_np.sum(axis=0)))
Ejemplo n.º 6
0
def test_dm_cxx():
    K = 4
    Y = np.array([
        ([0, 1, 2, 5],),
        ([1, 0, 1, 2],),
        ([0, 2, 9, 9],),
    ], dtype=[('', np.int, (K,))])
    Y_np = np.vstack(y[0] for y in Y)

    cxx_view = cxx_numpy_dataview(Y)
    r = rng()
    defn = model_definition(Y.shape[0], [dm(K)])
    prior = {'alphas': [1.] * K}
    cxx_s = cxx_initialize(
        defn,
        cxx_view,
        r,
        feature_hps=[prior],
        assignment=[0] * Y.shape[0])

    counts = cxx_s.get_suffstats(0, 0)['counts']
    assert_sequence_equal(counts, list(Y_np.sum(axis=0)))
Ejemplo n.º 7
0
def test_betabin_equiv():

    # https://github.com/pymc-devs/pymc/blob/
    # a7ab153f2b58d81824a56166747c678d7f421bde/pymc/distributions/discrete.py#L84
    def betabin_like(value, alpha, beta, n):
        return (gammaln(alpha + beta) - gammaln(alpha) - gammaln(beta) +
                gammaln(n + 1) - gammaln(value + 1) - gammaln(n - value + 1) +
                gammaln(alpha + value) + gammaln(n + beta - value) -
                gammaln(beta + alpha + n))

    # this N refers to the number of trials in the binomial distribution
    N = 10

    # this refers to the dataset size
    M = 100

    # hyperparams of the beta dist
    alpha, beta = 1., 2.

    heads = np.random.randint(low=0, high=N + 1, size=M)
    tails = N - heads

    data = np.vstack((heads, tails)).T

    Y = np.array([(y, ) for y in data], dtype=[('', np.int, (2, ))])
    view = cxx_numpy_dataview(Y)
    r = rng()
    defn = model_definition(Y.shape[0], [dm(2)])
    prior = {'alphas': [alpha, beta]}
    s = cxx_initialize(defn,
                       view,
                       r,
                       feature_hps=[prior],
                       assignment=[0] * Y.shape[0])

    assert_equals(s.groups(), [0])

    def all_indices(N):
        for i, j in it.product(range(0, N + 1), repeat=2):
            if (i + j) == N:
                yield i, j

    all_data = [(list(ij), ) for ij in all_indices(N)]

    Y_test = np.array(all_data, dtype=[('', np.int, (2, ))])

    # the actual score is simply a betabin using the updated alpha, beta
    alpha1, beta1 = np.array([alpha, beta]) + data.sum(axis=0)

    def model_score(Y_value):
        _, (score, ) = s.score_value(Y_value, r)
        return score

    def test_score(Y_value):
        score = betabin_like(Y_value[0][0], alpha1, beta1, N)
        return score

    model_scores = np.array(map(model_score, Y_test))
    test_scores = np.array(map(test_score, Y_test))

    assert_almost_equals(np.exp(model_scores).sum(), 1., places=2)
    assert_almost_equals(np.exp(test_scores).sum(), 1., places=2)
    assert_almost_equals(np.abs(model_scores - test_scores).max(),
                         0.,
                         places=1)
Ejemplo n.º 8
0
def test_betabin_equiv():

    # https://github.com/pymc-devs/pymc/blob/
    # a7ab153f2b58d81824a56166747c678d7f421bde/pymc/distributions/discrete.py#L84
    def betabin_like(value, alpha, beta, n):
        return (gammaln(alpha + beta) - gammaln(alpha) - gammaln(beta) +
                gammaln(n + 1) - gammaln(value + 1) - gammaln(n - value + 1) +
                gammaln(alpha + value) + gammaln(n + beta - value) -
                gammaln(beta + alpha + n))

    # this N refers to the number of trials in the binomial distribution
    N = 10

    # this refers to the dataset size
    M = 100

    # hyperparams of the beta dist
    alpha, beta = 1., 2.

    heads = np.random.randint(low=0, high=N + 1, size=M)
    tails = N - heads

    data = np.vstack((heads, tails)).T

    Y = np.array([(y,) for y in data], dtype=[('', np.int, (2,))])
    view = cxx_numpy_dataview(Y)
    r = rng()
    defn = model_definition(Y.shape[0], [dm(2)])
    prior = {'alphas': [alpha, beta]}
    s = cxx_initialize(
        defn,
        view,
        r,
        feature_hps=[prior],
        assignment=[0] * Y.shape[0])

    assert_equals(s.groups(), [0])

    def all_indices(N):
        for i, j in it.product(range(0, N + 1), repeat=2):
            if (i + j) == N:
                yield i, j

    all_data = [(list(ij),) for ij in all_indices(N)]

    Y_test = np.array(all_data, dtype=[('', np.int, (2,))])

    # the actual score is simply a betabin using the updated alpha, beta
    alpha1, beta1 = np.array([alpha, beta]) + data.sum(axis=0)

    def model_score(Y_value):
        _, (score,) = s.score_value(Y_value, r)
        return score

    def test_score(Y_value):
        score = betabin_like(Y_value[0][0], alpha1, beta1, N)
        return score

    model_scores = np.array(map(model_score, Y_test))
    test_scores = np.array(map(test_score, Y_test))

    assert_almost_equals(np.exp(model_scores).sum(), 1., places=2)
    assert_almost_equals(np.exp(test_scores).sum(), 1., places=2)
    assert_almost_equals(
        np.abs(model_scores - test_scores).max(), 0., places=1)