Beispiel #1
def generate_smooth_gp_re_a(out_fname='data.csv', country_variation=True):
    """ Generate random data based on a nested gaussian process random
    effects model with age, with covariates that vary smoothly over
    time (where unexplained variation in time does not interact with
    unexplained variation in age)

    This function generates data for all countries in all regions, and
    all age groups based on the model::

        Y_r,c,t = beta * X_r,c,t + f_r(t) + g_r(a) + f_c(t)

        beta = [30., -.5, .1, .1, -.1, 0., 0., 0., 0., 0.]
        f_r ~ GP(0, C(3.))
        g_r ~ GP(0, C(2.))
        f_c ~ GP(0, C(1.)) or 0 depending on country_variation flag
        C(amp) = Matern(amp, scale=20., diff_degree=2)

        X_r,c,t[0] = 1
        X_r,c,t[1] = t - 1990.
        X_r,c,t[k] ~ GP(t; 0, C(1)) for k >= 2
    c4 = countries_by_region()

    data = col_names()

    beta = [30., -.5, .1, .1, -.1, 0., 0., 0., 0., 0.]
    C0 = gp.matern.euclidean(time_range, time_range, amp=1., scale=25., diff_degree=2)
    C1 = gp.matern.euclidean(age_range, age_range, amp=1., scale=25., diff_degree=2)
    C2 = gp.matern.euclidean(time_range, time_range, amp=.1, scale=25., diff_degree=2)
    C3 = gp.matern.euclidean(time_range, time_range, amp=1., scale=25., diff_degree=2)

    g = mc.rmv_normal_cov(pl.zeros_like(age_range), C1)
    for r in c4:
        f_r = mc.rmv_normal_cov(pl.zeros_like(time_range), C0)
        g_r = mc.rmv_normal_cov(g, C1)
        for c in c4[r]:
            f_c = mc.rmv_normal_cov(pl.zeros_like(time_range), C2)

            x_gp = {}
            for k in range(2,10):
                x_gp[k] = mc.rmv_normal_cov(pl.zeros_like(time_range), C3)

            for j, t in enumerate(time_range):
                for i, a in enumerate(age_range):
                    x = [1] + [j] + [x_gp[k][j] for k in range(2,10)]
                    y = float(, x)) + f_r[j] + g_r[i]
                    if country_variation:
                        y += f_c[j]
                    se = 0.
                    data.append([r, c, t, a, y, se] + list(x))
    write(data, out_fname)
Beispiel #2
Beispiel #3
    def propose(self):
        ee = np.asarray(self.Y.value) - np.asarray(self.muY.value)
        H = (np.exp(self.LH.value)**(-0.5))[1:]
        K = np.asarray(self.Y.value).shape[1]
        b_new = np.empty_like(self.stochastic.value)

        # auxiliary variables to pick the right subvector/submatrix for the equations
        lb = 0
        ub = 1

        for j in range(1, K):
            z = np.expand_dims(H[:, j], 1)*np.expand_dims(ee[:, j], 1)     # LHS variable in the regression
            Z = np.expand_dims(-H[:, j], 1)*ee[:, :j]                      # RHS variables in the regression

            b_prior = np.asarray([self.b_bar[lb:ub]])
            Vinv_prior = inv(self.Pb_bar[lb:ub, lb:ub])

            V_post = inv(Vinv_prior + Z.T @ Z)
            b_post = V_post @ (Vinv_prior @ b_prior.T + Z.T @ z)

            b_new[lb:ub] = pm.rmv_normal_cov(b_post.ravel(), V_post)
            lb = ub
            ub += j+1

        self.stochastic.value = b_new
Beispiel #4
    def _set_initial_values(self, alpha0, nu0, Phi0, mu0, Sigma0, weights0,
                            alpha_a0, alpha_b0):
        if nu0 is None:
            nu0 = 3

        if Phi0 is None:
            Phi0 = np.empty((self.ncomp, self.ndim, self.ndim))
            Phi0[:] = np.eye(self.ndim) * (nu0 - 1)

        if Sigma0 is None:
            # draw from prior
            Sigma0 = np.empty((self.ncomp, self.ndim, self.ndim))
            for j in xrange(self.ncomp):
                Sigma0[j] = pm.rinverse_wishart_prec(nu0 + 2 + self.ncomp, Phi0[j])

        # starting values, are these sensible?
        if mu0 is None:
            mu0 = np.empty((self.ncomp, self.ndim))
            for j in xrange(self.ncomp):
                mu0[j] = pm.rmv_normal_cov(self.mu_prior_mean,
                                           self.gamma[j] * Sigma0[j])

        if weights0 is None:
            _, weights0 = stick_break_proc(1, 1, size=self.ncomp - 1)

        self._alpha0 = alpha0
        self._alpha_a0 = alpha_a0
        self._alpha_b0 = alpha_b0

        self._weights0 = weights0
        self._mu0 = mu0
        self._Sigma0 = Sigma0
        self._nu0 = nu0 # prior degrees of freedom
        self._Phi0 = Phi0 # prior location for Sigma_j's
Beispiel #5
def complex_hierarchical_data(n):
    """ Generate data based on the much more complicated model
    given in section 3.2.1::

        y_ij ~ N(mu_j - exp(beta_j)t_ij - exp(gamma_j)t_ij^2, sigma_j^2)
        gamma_j | sigma^2, xi, X_j ~ N(eta_0 + eta_1 X_j + eta_2 X_j^2, omega^2)
        beta_j | gamma_j, sigma^2, xi, X_j ~ N(delta_beta_0 + delta_beta_1 X_j + delta_beta_2 X_j^2 + delta_beta_3 gamma_j, omega_beta^2)
        mu_j | gamma_j, beta_j, sigma^2, xi, X_j ~ N(delta_mu_0 + delta_mu_1 X_j + delta_mu_2 X_j^2 + delta_mu_3 gamma_j + delta_mu_4 beta_j, omega_mu^2)

        eta = (eta_0, eta_1, eta_2, log(omega))'
        delta_beta = (delta_beta_0, delta_beta_1, delta_beta_2, delta_beta_3, log(omega_beta))'
        delta_mu = (delta_mu_0, delta_mu_1, delta_mu_2, delta_mu_3, log(omega_mu))'
        xi = (eta, delta_beta, delta_mu)
        eta ~ MVNormal(M, C)
        delta_beta, delta_mu ~ Normal(m, s)

    n : list, len(n) = J, n[j] = num observations in group j

    J = len(n)
    # covariate data, not entirely specified in paper
    X = mc.rnormal(0, .1**-2, size=J)
    t = [pl.arange(n[j]) for j in range(J)]

    # hyper-priors, not specified in detail in paper
    m = 0.
    s = 1.
    M = pl.zeros(4)
    r = [[  1, .57, .18, .56],
         [.57,   1, .72, .16],
         [.18, .72,   1, .14],
         [.56, .16, .14,   1]]

    eta = mc.rmv_normal_cov(M, r)
    omega = .0001 #pl.exp(eta[-1])

    delta_beta = mc.rnormal(m, s**-2, size=5)
    omega_beta = .0001 #pl.exp(delta_beta[-1])

    delta_mu = mc.rnormal(m, s**-2, size=5)
    omega_mu = .0001 #pl.exp(delta_mu[-1])

    gamma = mc.rnormal(eta[0] + eta[1]*X + eta[2]*X**2, omega**-2.)
    beta = mc.rnormal(delta_beta[0] + delta_beta[1]*X + delta_beta[2]*X**2 + delta_beta[3]*gamma, omega_beta**-2)
    mu = mc.rnormal(delta_mu[0] + delta_mu[1]*X + delta_mu[2]*X**2 + delta_mu[3]*gamma + delta_mu[4]*beta, omega_mu**-2)

    # stochastic error, not specified in paper
    sigma = .01*pl.ones(J)
    y = [mc.rnormal(mu[j] - pl.exp(beta[j])*t[j] - pl.exp(gamma[j])*t[j]**2, sigma[j]**-2) for j in range(J)]

    eta_cross_eta = [eta[0]*eta[1], eta[0]*eta[2], eta[0]*eta[3], eta[1]*eta[2], eta[1]*eta[2], eta[2]*eta[3]]

    return vars()
Beispiel #6
def simulate_data(M_pri, C_pri, N_samp, V, N_exam, N_age_samps, correction_factor_array, age_lims):
    """Called by pred_samps in the outer loop to simulate data."""
    # Draw P' from prior.
    f_samp = pm.rmv_normal_cov(M_pri, C_pri + eye(N_samp)*V)

    # Get ages, number positive, and normalized age distribution for prediction
    ages, positives, age_distribution = ages_and_data(N_exam, f_samp, correction_factor_array, age_lims)
    sig = sqrt(diag(C_pri))
    lo = M_pri - sig*5
    hi = M_pri + sig*5

    # Make log-likelihood functions
    marginal_log_likelihoods = known_age_corr_likelihoods_f(positives, ages, correction_factor_array, linspace(lo.min(),hi.max(),500), 0)
    return marginal_log_likelihoods, positives
Beispiel #7
    def step(self):
        pri_sig = np.asarray(self.sig.value)
        lo =, self.x.T, uplo='L').T
        post_tau =,lo.T)
        l = np.linalg.cholesky(post_tau)
        post_C =, np.eye(l.shape[0]),uplo='L')
        post_C =, post_C, uplo='U')
        post_mean =,, self.d, uplo='L'))
        post_mean =, post_mean, uplo='L')
        post_mean =, post_mean, uplo='U')
        new_val = pm.rmv_normal_cov(post_mean, post_C).squeeze()
        [b.set_value(nv) for (b,nv) in zip(self.beta, new_val)]
Beispiel #8
Beispiel #9
    def _update_mu_Sigma(self, Sigma, component_mask):
        mu_output = np.zeros((self.ncomp, self.ndim))
        Sigma_output = np.zeros((self.ncomp, self.ndim, self.ndim))

        for j in xrange(self.ncomp):
            mask = component_mask[j]
            Xj =[mask]
            nj = len(Xj)

            # TODO: sample from prior if nj == 0
            sumxj = Xj.sum(0)

            gam = self.gamma[j]
            mu_hyper = self.mu_prior_mean

            post_mean = (mu_hyper / gam + sumxj) / (1 / gam + nj)
            post_cov = 1 / (1 / gam + nj) * Sigma[j]

            new_mu = pm.rmv_normal_cov(post_mean, post_cov)

            Xj_demeaned = Xj - new_mu

            mu_SS = np.outer(new_mu - mu_hyper, new_mu - mu_hyper) / gam
            data_SS =, Xj_demeaned)
            post_Phi = data_SS + mu_SS + self._nu0 * self._Phi0[j]

            # symmetrize
            post_Phi = (post_Phi + post_Phi.T) / 2

            # P(Sigma) ~ IW(nu + 2, nu * Phi)
            # P(Sigma | theta, Y) ~
            post_nu = nj + self.ncomp + self._nu0 + 3

            # pymc rinverse_wishart takes
            new_Sigma = pm.rinverse_wishart_prec(post_nu, post_Phi)

            mu_output[j] = new_mu
            Sigma_output[j] = new_Sigma

        return mu_output, Sigma_output
Beispiel #10
def generate_data(n=1e5, k=2, ncomps=3, seed=1):
    data_concat = []
    labels_concat = []

    for j in xrange(ncomps):
        mean = gen_mean[j]
        sd = gen_sd[j]
        corr = gen_corr[j]

        cov = np.empty((k, k))
        cov[np.diag_indices(k)] = 1
        cov *= np.outer(sd, sd)

        num = int(n * group_weights[j])
        rvs = pm.rmv_normal_cov(mean, cov, size=num)

        labels_concat.append(np.repeat(j, num))

    return (np.concatenate(labels_concat), np.concatenate(data_concat, axis=0))
Beispiel #11
Beispiel #12
def sample_covariates(covariate_dict, C_eval, d):
    Samples covariates back in when they have been marginalized away.
        - covariate_dict : {name : value-on-input, prior-variance}
        - M_eval : array. Probably zeros, unless you did something fancy in the mean.
        - C_eval : covariance of d | covariates, m
        - d : current deviation from mean of covariates' immediate child.
    # Extract keys to list to preserve order.
    n = covariate_dict.keys()
    cvv = [covariate_dict[k] for k in n]
    x = np.asarray([v[0] for v in cvv])
    prior_var = np.diag([v[1] for v in cvv])
    prior_offdiag =,x).T
    prior_S = np.linalg.cholesky(np.asarray(C_eval) +, x)), prior_offdiag, uplo='L', transa='N', inplace=True)
    post_C = prior_var -, prior_offdiag)
    post_mean =,, d, uplo='L', transa='N'))
    new_val = pm.rmv_normal_cov(post_mean, post_C).squeeze()

    return dict(zip(n, new_val))
Beispiel #13
        [ \rho\sigma_2\sigma_1       \sigma_2\sigma_2 ]

Then, knowing the covariance matrix C and given the random samples,
we want to estimate the posterior distribution of mu. Since the prior
for mu is uniform, the mean posterior distribution is simply a bivariate
normal with the same correlation coefficient rho, but with variances
divided by sqrt(N), where N is the number of samples drawn.

We can check that the sampler works correctly by making sure that
after a while, the covariance matrix of the samples for mu tend to C/N.

N = 50
mu = np.array([-2., 3.])
C = np.array([[1, .8 * np.sqrt(2)], [.8 * np.sqrt(2), 2.]])
r = pymc.rmv_normal_cov(mu, C, size=50)

def mean(value=np.array([0., 0.])):
    """The mean of the samples (mu). """
    return 0.

obs = pymc.MvNormalCov('obs', mean, C, value=r, observed=True)

class TestAM(TestCase):
    def test_convergence(self):
        S = pymc.MCMC([mean, obs])
        S.use_step_method(pymc.AdaptiveMetropolis, mean, delay=200)
Beispiel #14

cv = {}
if len(names)>2:
    for name in names[:-2]:
        cv[name] = np.random.normal(size=n_data+n_pred)*on#np.ones(n_data)
cv['m'] = np.ones(n_data+n_pred)*on
cv['t'] = t*on
C =, amp=1, scale=1, inc=np.pi/4, ecc=.3,st=.1, sd=.5, tlc=.2, sf = .1)

dm = np.vstack((lon,lat,t)).T

C_eval = C(dm,dm)

f = pm.rmv_normal_cov(np.sum([cv[name]*vals[name] for name in names],axis=0), C_eval) + np.random.normal(size=n_data+n_pred)*np.sqrt(V)
p = pm.flib.invlogit(f)
ns = 100
pos = pm.rbinomial(ns, p)
neg = ns - pos

print p

ra_data = np.rec.fromarrays((pos[:n_data], neg[:n_data], lon[:n_data], lat[:n_data]) + tuple([cv[name][:n_data] for name in names]), names=['pos','neg','lon','lat']+names)

ra_pred = np.rec.fromarrays((pos[n_data:], neg[n_data:], lon[n_data:], lat[n_data:]) + tuple([cv[name][n_data:] for name in names]), names=['pos','neg','lon','lat']+names)

os.system('infer cov_test test_db test_data.csv -t 10 -n 8 -i 100000')
# os.system('cov-test-predict test test_pred.csv 1000 100')
Beispiel #15
    dims = 2.
    beta_0 = 1.
    nu_0 = dims
    m_0 = np.zeros(dims)
    W_0 = np.eye(dims)*1
    N_points = 50
    similarity = 1.

    # Generate some data for two distributions
    same = False
    prior_deg_freedom = nu_0 +similarity# must be >= dims
    prior_mu = m_0
    prior_cov = W_0/similarity
    prior_cov_wish = np.array(pymc.rwishart_cov(nu_0, W_0))#W_0#np.eye(dims)
    true_mu1 = pymc.rmv_normal_cov(prior_mu,prior_cov)
    true_cov1 = np.array(pymc.rwishart_cov(prior_deg_freedom, prior_cov_wish))
    true_mu2 = pymc.rmv_normal_cov(prior_mu,prior_cov)
    true_cov2 = np.array(pymc.rwishart_cov(prior_deg_freedom, prior_cov_wish))
    if same:
        true_mu2 = true_mu1
        true_cov2 = true_cov1
    #true_mu2 = true_mu1+0.1
    obs1 = pymc.rmv_normal_cov(true_mu1, true_cov1, size = N_points)
    obs2 = pymc.rmv_normal_cov(true_mu2, true_cov2, size = N_points)

    all_obs = np.vstack((obs1,obs2))
    all_labels = np.hstack((np.zeros(len(obs1)),np.ones(len(obs2))))

Beispiel #16
    pylab.scatter(obs1[:, 0], obs1[:, 1])
Beispiel #18
