Ejemplo n.º 1
0
def fit_model(data_matrix,
              isotropic_w=True,
              isotropic_b=True,
              num_iter=NUM_ITER):
    X_init = init_X(data_matrix)

    model = CRPModel(1., X_init.shape[1],
                     distributions.InverseGammaDistribution(0.01, 0.01),
                     distributions.InverseGammaDistribution(0.01, 0.01),
                     isotropic_w, isotropic_b)

    N, D = X_init.shape

    k_init = min(N // 4, 40)
    km = sklearn.cluster.KMeans(n_clusters=k_init)
    km.fit(X_init)
    init_assignments = km.labels_

    sigma_sq_f = sigma_sq_n = X_init.var() / 2.
    if not model.isotropic_b:
        sigma_sq_f = X_init.var(0) / 2.
    state = CollapsedCRPState(X_init, init_assignments, sigma_sq_n, sigma_sq_f)
    state.centers = km.cluster_centers_

    fixed_variance = data_matrix.fixed_variance()

    data = data_matrix.observations

    if fixed_variance:
        if isotropic_w:
            state.sigma_sq_w = 1.
        else:
            state.sigma_sq_w = np.ones(D)

    pbar = misc.pbar(num_iter)

    t0 = time.time()
    for it in range(num_iter):
        pred = state.centers[state.assignments, :]
        state.X = data_matrix.sample_latent_values(pred, state.sigma_sq_w)
        gibbs_sweep_collapsed(model, data, state, fixed_variance)

        if time.time() - t0 > 3600.:  # 1 hour
            break

        pbar.update(it)
    pbar.finish()

    # sample the centers
    cache = CollapsedCRPCache.from_state(model, data, state)
    gibbs_step_centers(model, data, state, cache)

    return state
Ejemplo n.º 2
0
def p_star(state, X, obs):
    K = state.U.shape[1]
    total = log_poisson(K, 1.)

    var_prior = distributions.InverseGammaDistribution(A, B)
    total += var_prior.loglik(state.ssq_U).sum()

    assert np.isfinite(total)

    U_dist = distributions.GaussianDistribution(0., state.ssq_U[nax, :])
    total += U_dist.loglik(state.U).sum()

    assert np.isfinite(total)

    V_dist = distributions.GaussianDistribution(0., 1.)
    total += V_dist.loglik(state.V).sum()

    assert np.isfinite(total)

    pred = np.dot(state.U, state.V)
    X_dist = distributions.GaussianDistribution(pred, state.ssq_N)
    total += X_dist.loglik(X)[obs].sum()

    assert np.isfinite(total)

    return total
Ejemplo n.º 3
0
def cond_sigma_sq_w(model, data, state):
    diff = state.X - state.centers[state.assignments, :]
    if model.isotropic_w:
        a = model.within_var_prior.a + 0.5 * np.sum(data.mask)
        b = model.within_var_prior.b + 0.5 * np.sum(data.mask * diff**2)
    else:
        a = model.within_var_prior.a + 0.5 * np.sum(data.mask, axis=0)
        b = model.within_var_prior.b + 0.5 * np.sum(data.mask * diff**2,
                                                    axis=0)
    return distributions.InverseGammaDistribution(a, b)
Ejemplo n.º 4
0
def cond_sigma_sq_b(model, data, state):
    counts = np.bincount(state.assignments)
    nz = np.where(counts > 0)[0]
    centers = state.centers[nz, :]

    if model.isotropic_b:
        a = model.between_var_prior.a + 0.5 * nz.size * model.ndim
        b = model.between_var_prior.b + 0.5 * np.sum(centers**2)
    else:
        a = model.between_var_prior.a + 0.5 * nz.size * np.ones(model.ndim)
        b = model.between_var_prior.b + 0.5 * np.sum(centers**2, axis=0)
    return distributions.InverseGammaDistribution(a, b)
Ejemplo n.º 5
0
def fit_model(data_matrix, num_iter=NUM_ITER):
    model = IBPModel(1., distributions.InverseGammaDistribution(1., 1.), distributions.InverseGammaDistribution(1., 1.))
    fixed_variance = data_matrix.fixed_variance()
    data = data_matrix.observations
    state = sequential_init(model, data, fixed_variance)

    pbar = misc.pbar(num_iter)

    t0 = time.time()
    for it in range(num_iter):
        gibbs_sweep(model, data, state, True, True, fixed_variance)

        pred = np.dot(state.Z, state.A)
        state.X = data.sample_latent_values(pred, state.sigma_sq_n)
        
        if time.time() - t0 > TIME_LIMIT:
            break

        pbar.update(it)
    pbar.finish()

    return state
def cond_sigma_sq_Z(state):
    a = 1. + 0.5 * state.Z.size
    b = 1. + 0.5 * np.sum((state.Z - state.mu_Z) ** 2)
    return distributions.InverseGammaDistribution(a, b)
Ejemplo n.º 7
0
 def cond_ssq_u(self):
     a = A + 0.5 * self.num_assigned
     b = B + 0.5 * self.sum_u_sq
     return distributions.InverseGammaDistribution(a, b)
Ejemplo n.º 8
0
def cond_sigma_sq_n(model, data, state):
    diff = state.X - np.dot(state.Z, state.A)
    a = model.feature_var_prior.a + 0.5 * np.sum(data.mask)
    b = model.feature_var_prior.b + 0.5 * np.sum(data.mask * diff**2)
    return distributions.InverseGammaDistribution(a, b)
Ejemplo n.º 9
0
def cond_sigma_sq_f(model, data, state):
    a = model.noise_var_prior.a + 0.5 * state.A.size
    b = model.noise_var_prior.b + 0.5 * np.sum(state.A**2)
    return distributions.InverseGammaDistribution(a, b)