コード例 #1
1
ファイル: data.py プロジェクト: aflaxman/pymc-cod-correct
def logit_normal_draw(cf_mean, std, N, J):
    std = pl.array(std)
    if mc.__version__ == '2.0rc2': # version on Omak 
        X = [mc.invlogit(mc.rnormal(mu=cf_mean, tau=std**-2)) for n in range(N)]
        Y = pl.array(X)
    else: 
        X = mc.rnormal(mu=cf_mean, tau=std**-2, size=(N,J))
        Y = mc.invlogit(X)
    return Y
コード例 #2
0
def main():
    x_t = pm.rnormal(0, 1, 200)
    x_t[0] = 0
    y_t = np.zeros(200)
    for i in range(1, 200):
        y_t[i] = pm.rnormal(y_t[i - 1], 1)

    plt.plot(y_t, label="$y_t$", lw=3)
    plt.plot(x_t, label="$x_t$", lw=3)
    plt.xlabel("time, $t$")
    plt.legend()
    plt.show()

    colors = ["#348ABD", "#A60628", "#7A68A6"]

    x = np.arange(1, 200)
    plt.bar(x, autocorr(y_t)[1:], width=1, label="$y_t$",
            edgecolor=colors[0], color=colors[0])
    plt.bar(x, autocorr(x_t)[1:], width=1, label="$x_t$",
            color=colors[1], edgecolor=colors[1])

    plt.legend(title="Autocorrelation")
    plt.ylabel("measured correlation \nbetween $y_t$ and $y_{t-k}$.")
    plt.xlabel("k (lag)")
    plt.title("Autocorrelation plot of $y_t$ and $x_t$ for differing $k$ lags.")
    plt.show()
コード例 #3
0
def simple_hierarchical_data(n):
    """ Generate data based on the simple one-way hierarchical model
    given in section 3.1.1::

        y[i,j] | alpha[j], sigma^2 ~ N(alpha[j], sigma^2) i = 1, ..., n_j, j = 1, ..., J;
        alpha[j] | mu, tau^2 ~ N(mu, tau^2) j = 1, ..., J.

        sigma^2 ~ Inv-Chi^2(5, 20)
        mu ~ N(5, 5^2)
        tau^2 ~ Inv-Chi^2(2, 10)

    Parameters
    ----------
    n : list, len(n) = J, n[j] = num observations in group j
    """

    inv_sigma_sq = mc.rgamma(alpha=2.5, beta=50.0)
    mu = mc.rnormal(mu=5.0, tau=5.0 ** -2.0)
    inv_tau_sq = mc.rgamma(alpha=1.0, beta=10.0)

    J = len(n)
    alpha = mc.rnormal(mu=mu, tau=inv_tau_sq, size=J)
    y = [mc.rnormal(mu=alpha[j], tau=inv_sigma_sq, size=n[j]) for j in range(J)]

    mu_by_tau = mu * pl.sqrt(inv_tau_sq)
    alpha_by_sigma = alpha * pl.sqrt(inv_sigma_sq)
    alpha_bar = alpha.sum()
    alpha_bar_by_sigma = alpha_bar * pl.sqrt(inv_sigma_sq)

    return vars()
コード例 #4
0
def main():
    x_t = pm.rnormal(0, 1, 200)
    x_t[0] = 0
    y_t = np.zeros(200)
    for i in range(1, 200):
        y_t[i] = pm.rnormal(y_t[i - 1], 1)

    plt.plot(y_t, label="$y_t$", lw=3)
    plt.plot(x_t, label="$x_t$", lw=3)
    plt.xlabel("time, $t$")
    plt.legend()
    plt.show()

    colors = ["#348ABD", "#A60628", "#7A68A6"]

    x = np.arange(1, 200)
    plt.bar(x,
            autocorr(y_t)[1:],
            width=1,
            label="$y_t$",
            edgecolor=colors[0],
            color=colors[0])
    plt.bar(x,
            autocorr(x_t)[1:],
            width=1,
            label="$x_t$",
            color=colors[1],
            edgecolor=colors[1])

    plt.legend(title="Autocorrelation")
    plt.ylabel("measured correlation \nbetween $y_t$ and $y_{t-k}$.")
    plt.xlabel("k (lag)")
    plt.title(
        "Autocorrelation plot of $y_t$ and $x_t$ for differing $k$ lags.")
    plt.show()
コード例 #5
0
def simple_hierarchical_data(n):
    """ Generate data based on the simple one-way hierarchical model
    given in section 3.1.1::

        y[i,j] | alpha[j], sigma^2 ~ N(alpha[j], sigma^2) i = 1, ..., n_j, j = 1, ..., J;
        alpha[j] | mu, tau^2 ~ N(mu, tau^2) j = 1, ..., J.

        sigma^2 ~ Inv-Chi^2(5, 20)
        mu ~ N(5, 5^2)
        tau^2 ~ Inv-Chi^2(2, 10)

    Parameters
    ----------
    n : list, len(n) = J, n[j] = num observations in group j
    """

    inv_sigma_sq = mc.rgamma(alpha=2.5, beta=50.)
    mu = mc.rnormal(mu=5., tau=5.**-2.)
    inv_tau_sq = mc.rgamma(alpha=1., beta=10.)

    J = len(n)
    alpha = mc.rnormal(mu=mu, tau=inv_tau_sq, size=J)
    y = [mc.rnormal(mu=alpha[j], tau=inv_sigma_sq, size=n[j]) for j in range(J)]

    mu_by_tau = mu * pl.sqrt(inv_tau_sq)
    alpha_by_sigma = alpha * pl.sqrt(inv_sigma_sq)
    alpha_bar = alpha.sum()
    alpha_bar_by_sigma = alpha_bar * pl.sqrt(inv_sigma_sq)

    return vars()
コード例 #6
0
ファイル: test_covariates.py プロジェクト: aflaxman/gbd
def test_covariate_model_sim_no_hierarchy():
    # simulate normal data
    model = data.ModelData()
    model.hierarchy, model.output_template = data_simulation.small_output()

    X = mc.rnormal(0., 1.**2, size=(128,3))

    beta_true = [-.1, .1, .2]
    Y_true = pl.dot(X, beta_true)

    pi_true = pl.exp(Y_true)
    sigma_true = .01*pl.ones_like(pi_true)

    p = mc.rnormal(pi_true, 1./sigma_true**2.)

    model.input_data = pandas.DataFrame(dict(value=p, x_0=X[:,0], x_1=X[:,1], x_2=X[:,2]))
    model.input_data['area'] = 'all'
    model.input_data['sex'] = 'total'
    model.input_data['year_start'] = 2000
    model.input_data['year_end'] = 2000

    # create model and priors
    vars = {}
    vars.update(covariate_model.mean_covariate_model('test', 1, model.input_data, {}, model, 'all', 'total', 'all'))
    vars.update(rate_model.normal_model('test', vars['pi'], 0., p, sigma_true))

    # fit model
    m = mc.MCMC(vars)
    m.sample(2)
コード例 #7
0
def test_covariate_model_sim_no_hierarchy():
    # simulate normal data
    model = dismod_mr.data.ModelData()
    model.hierarchy, model.output_template = data_simulation.small_output()

    X = mc.rnormal(0., 1.**2, size=(128, 3))

    beta_true = [-.1, .1, .2]
    Y_true = np.dot(X, beta_true)

    pi_true = np.exp(Y_true)
    sigma_true = .01 * np.ones_like(pi_true)

    p = mc.rnormal(pi_true, 1. / sigma_true**2.)

    model.input_data = pd.DataFrame(
        dict(value=p, x_0=X[:, 0], x_1=X[:, 1], x_2=X[:, 2]))
    model.input_data['area'] = 'all'
    model.input_data['sex'] = 'total'
    model.input_data['year_start'] = 2000
    model.input_data['year_end'] = 2000

    # create model and priors
    vars = {}
    vars.update(
        dismod_mr.model.covariates.mean_covariate_model(
            'test', 1, model.input_data, {}, model, 'all', 'total', 'all'))
    vars.update(
        dismod_mr.model.likelihood.normal('test', vars['pi'], 0., p,
                                          sigma_true))

    # fit model
    m = mc.MCMC(vars)
    m.sample(2)
コード例 #8
0
def sim_data(N,
             true_cf=[[.3, .6, .1], [.3, .5, .2]],
             true_std=[[.2, .05, .05], [.3, 0.1, 0.1]],
             sum_to_one=True):
    """ 
    Create an NxTxJ matrix of simulated data (T is determined by the length 
    of true_cf, J by the length of the elements of true_cf). 

    true_cf - a list of lists of true cause fractions (each must sum to one)
    true_std - a list of lists of the standard deviations corresponding to the true csmf's 
             for each time point. Can either be a list of length J inside a list of length
             1 (in this case, the same standard deviation is used for all time points) or 
             can be T lists of length J (in this case, the a separate standard deviation 
             is specified and used for each time point). 
    """

    if sum_to_one == True:
        assert pl.allclose(pl.sum(true_cf, 1),
                           1), 'The sum of elements of true_cf must equal 1'
    T = len(true_cf)
    J = len(true_cf[0])

    ## if only one std provided, duplicate for all time points
    if len(true_std) == 1 and len(true_cf) > 1:
        true_std = [true_std[0] for i in range(len(true_cf))]

    ## transform the mean and std to logit space
    transformed_std = []
    for t in range(T):
        pi_i = pl.array(true_cf[t])
        sigma_pi_i = pl.array(true_std[t])
        transformed_std.append(
            ((1 / (pi_i * (pi_i - 1)))**2 * sigma_pi_i**2)**0.5)

    ## find minimum standard deviation (by cause across time) and draw from this
    min = pl.array(transformed_std).min(0)
    common_perturbation = [
        pl.ones([T, J]) * mc.rnormal(mu=0, tau=min**-2) for n in range(N)
    ]

    ## draw from remaining variation
    tau = pl.array(transformed_std)**2 - min**2
    tau[tau == 0] = 0.000001
    additional_perturbation = [
        [mc.rnormal(mu=0, tau=tau[t]**-1) for t in range(T)] for n in range(N)
    ]

    result = pl.zeros([N, T, J])
    for n in range(N):
        result[n, :, :] = [
            mc.invlogit(
                mc.logit(true_cf[t]) + common_perturbation[n][t] +
                additional_perturbation[n][t]) for t in range(T)
        ]

    return result
コード例 #9
0
ファイル: kq1.py プロジェクト: fonnesbeck/PKUMetaAnalysis
    def pred(a1=alpha1, mu_int=mu_int, tau_int=tau_int, mu_slope=mu_slope, tau_slope=tau_slope, tau_iq=tau_iq, values=(70,75,80,85)):
        """Estimate the probability of IQ<85 for different covariate values"""
        b0 = rnormal(mu_int, tau_int, size=len(phe_pred))
        a0 = rnormal(mu_slope, tau_slope, size=len(phe_pred))

        b1 = a0 + a1*crit_pred

        iq = rnormal(b0 + b1*phe_pred, tau_iq)

        return [iq<v for v in values]
コード例 #10
0
def logit_normal_draw(cf_mean, std, N, J):
    std = pl.array(std)
    if mc.__version__ == '2.0rc2':  # version on Omak
        X = [
            mc.invlogit(mc.rnormal(mu=cf_mean, tau=std**-2)) for n in range(N)
        ]
        Y = pl.array(X)
    else:
        X = mc.rnormal(mu=cf_mean, tau=std**-2, size=(N, J))
        Y = mc.invlogit(X)
    return Y
コード例 #11
0
ファイル: survival.py プロジェクト: calebamiles/survival
 def propose(self):
     tau = 1./(self.adaptive_scale_factor * self.proposal_sd)**2
     time = pymc.rnormal(self.stochastic.value.time, tau)
     n = pymc.rnormal(len(self.stochastic.value), tau)
     if n <= 0:
         n = 0
     times = [rand.random() for _ in range(n)]
     total = float(sum(times))
     times = [item*time/total for item in times]
     events = [Event(time=item, censored=False) for item in times]
     self.stochastic.value = MultiEvent(events)
コード例 #12
0
ファイル: data_simulation.py プロジェクト: blue442/dismod_mr
def simulated_age_intervals(data_type, n, a, pi_age_true, sigma_true):
    # choose age intervals to measure
    age_start = np.array(mc.runiform(0, 100, n), dtype=int)
    age_start.sort()  # sort to make it easy to discard the edges when testing
    age_end = np.array(mc.runiform(age_start + 1,
                                   np.minimum(age_start + 10, 100)),
                       dtype=int)

    # find truth for the integral across the age intervals
    import scipy.integrate
    pi_interval_true = [
        scipy.integrate.trapz(pi_age_true[a_0i:(a_1i + 1)]) / (a_1i - a_0i)
        for a_0i, a_1i in zip(age_start, age_end)
    ]

    # generate covariates that add explained variation
    X = mc.rnormal(0., 1.**2, size=(n, 3))
    beta_true = [-.1, .1, .2]
    beta_true = [0, 0, 0]
    Y_true = np.dot(X, beta_true)

    # calculate the true value of the rate in each interval
    pi_true = pi_interval_true * np.exp(Y_true)

    # simulate the noisy measurement of the rate in each interval
    p = np.maximum(0., mc.rnormal(pi_true, 1. / sigma_true**2.))

    # store the simulated data in a pandas DataFrame
    data = pandas.DataFrame(
        dict(value=p,
             age_start=age_start,
             age_end=age_end,
             x_0=X[:, 0],
             x_1=X[:, 1],
             x_2=X[:, 2]))
    data['effective_sample_size'] = np.maximum(p * (1 - p) / sigma_true**2, 1.)

    data['standard_error'] = np.nan
    data['upper_ci'] = np.nan
    data['lower_ci'] = np.nan

    data['year_start'] = 2005.  # TODO: make these vary
    data['year_end'] = 2005.
    data['sex'] = 'total'
    data['area'] = 'all'
    data['data_type'] = data_type

    return data
コード例 #13
0
ファイル: test_covariates.py プロジェクト: aflaxman/gbd
def test_fixed_effect_priors():
    model = data.ModelData()

    # set prior on sex
    parameters = dict(fixed_effects={'x_sex': dict(dist='TruncatedNormal', mu=1., sigma=.5, lower=-10, upper=10)})

    # simulate normal data
    n = 32.
    sex_list = pl.array(['male', 'female', 'total'])
    sex = sex_list[mc.rcategorical([.3, .3, .4], n)]
    beta_true = dict(male=-1., total=0., female=1.)
    pi_true = pl.exp([beta_true[s] for s in sex])
    sigma_true = .05
    p = mc.rnormal(pi_true, 1./sigma_true**2.)

    model.input_data = pandas.DataFrame(dict(value=p, sex=sex))
    model.input_data['area'] = 'all'
    model.input_data['year_start'] = 2010
    model.input_data['year_start'] = 2010



    # create model and priors
    vars = {}
    vars.update(covariate_model.mean_covariate_model('test', 1, model.input_data, parameters, model,
                                                     'all', 'total', 'all'))

    print vars['beta']
    assert vars['beta'][0].parents['mu'] == 1.
コード例 #14
0
def main():
    """ Demonstrating thinning of two autocorrelated inputs (representing
        posterior probabilities). The key point is the thinned - every 2nd / 3rd
        point - functions approach zero quicker. More thinning is better (but
        expensive)
    """

    # x_t = pm.rnormal(0, 1, 200)
    # x_t[0] = 0
    y_t = np.zeros(200)
    for i in range(1, 200):
        y_t[i] = pm.rnormal(y_t[i - 1], 1)

    max_x = 200 / 3 + 1
    x = np.arange(1, max_x)

    colors = ["#348ABD", "#A60628", "#7A68A6"]
    plt.bar(x, autocorr(y_t)[1:max_x], edgecolor=colors[0],
            label="no thinning", color=colors[0], width=1)
    plt.bar(x, autocorr(y_t[::2])[1:max_x], edgecolor=colors[1],
            label="keeping every 2nd sample", color=colors[1], width=1)
    plt.bar(x, autocorr(y_t[::3])[1:max_x], width=1, edgecolor=colors[2],
            label="keeping every 3rd sample", color=colors[2])

    plt.autoscale(tight=True)
    plt.legend(title="Autocorrelation plot for $y_t$", loc="lower left")
    plt.ylabel("measured correlation \nbetween $y_t$ and $y_{t-k}$.")
    plt.xlabel("k (lag)")
    plt.title("Autocorrelation of $y_t$ (no thinning vs. thinning) \
            at differing $k$ lags.")
    plt.show()
コード例 #15
0
ファイル: test_covariates.py プロジェクト: aflaxman/gbd
def test_random_effect_priors():
    model = data.ModelData()

    # set prior on sex
    parameters = dict(random_effects={'USA': dict(dist='TruncatedNormal', mu=.1, sigma=.5, lower=-10, upper=10)})


    # simulate normal data
    n = 32.
    area_list = pl.array(['all', 'USA', 'CAN'])
    area = area_list[mc.rcategorical([.3, .3, .4], n)]
    alpha_true = dict(all=0., USA=.1, CAN=-.2)
    pi_true = pl.exp([alpha_true[a] for a in area])
    sigma_true = .05
    p = mc.rnormal(pi_true, 1./sigma_true**2.)

    model.input_data = pandas.DataFrame(dict(value=p, area=area))
    model.input_data['sex'] = 'male'
    model.input_data['year_start'] = 2010
    model.input_data['year_end'] = 2010

    model.hierarchy.add_edge('all', 'USA')
    model.hierarchy.add_edge('all', 'CAN')

    # create model and priors
    vars = {}
    vars.update(covariate_model.mean_covariate_model('test', 1, model.input_data, parameters, model,
                                                     'all', 'total', 'all'))

    print vars['alpha']
    print vars['alpha'][1].parents['mu']
    assert vars['alpha'][1].parents['mu'] == .1
コード例 #16
0
ファイル: talk_neg_binom.py プロジェクト: studentmicky/gbd
def plot_funnel(pi_true, delta_str):
    delta = float(delta_str)
    n = pl.exp(mc.rnormal(10, 2**-2, size=10000))
    p = pi_true * pl.ones_like(n)

    # old way:
    #delta = delta * p * n

    nb = rate_model.neg_binom_model('funnel', pi_true, delta, p, n)
    r = nb['p_pred'].value

    pl.vlines([pi_true],
              .1 * n.min(),
              10 * n.max(),
              linewidth=5,
              linestyle='--',
              color='black',
              zorder=10)
    pl.plot(r, n, 'o', color=colors[0], ms=10, mew=0, alpha=.25)

    pl.semilogy(schiz['r'],
                schiz['n'],
                's',
                mew=1,
                mec='white',
                ms=15,
                color=colors[1],
                label='Observed Values')

    pl.xlabel('Rate (Per 1000 PY)', size=32)
    pl.ylabel('Study Size (PY)', size=32)
    pl.axis([-.0001, .0101, 50., 15000000])
    pl.title(r'$\delta = %s$' % delta_str, size=48)
    pl.xticks([0, .005, .01], [0, 5, 10], size=30)
    pl.yticks(size=30)
コード例 #17
0
    def step(self):
        x0 = np.copy(self.stochastic.value)
        dx = pymc.rnormal(np.zeros(np.shape(x0)), self.proposal_tau)

        logp = [self.logp_plus_loglike]
        x_prime = [x0]

        for direction in [-1, 1]:
            for i in xrange(25):
                delta = direction * np.exp(.1 * i) * dx
                try:
                    self.stochastic.value = x0 + delta
                    logp.append(self.logp_plus_loglike)
                    x_prime.append(x0 + delta)
                except pymc.ZeroProbability:
                    self.stochastic.value = x0

        i = pymc.rcategorical(np.exp(np.array(logp) - pymc.flib.logsum(logp)))
        self.stochastic.value = x_prime[i]

        if i == 0:
            self.rejected += 1
            if self.verbose > 2:
                print self._id + ' rejecting'
        else:
            self.accepted += 1
            if self.verbose > 2:
                print self._id + ' accepting'
コード例 #18
0
ファイル: talk_neg_binom.py プロジェクト: aflaxman/gbd
def plot_funnel(pi_true, delta_str):
    delta = float(delta_str)
    n = pl.exp(mc.rnormal(10, 2**-2, size=10000))
    p = pi_true*pl.ones_like(n)

    # old way:
    #delta = delta * p * n

    nb = rate_model.neg_binom_model('funnel', pi_true, delta, p, n)
    r = nb['p_pred'].value

    pl.vlines([pi_true], .1*n.min(), 10*n.max(),
              linewidth=5, linestyle='--', color='black', zorder=10)
    pl.plot(r, n, 'o', color=colors[0], ms=10,
            mew=0, alpha=.25)

    pl.semilogy(schiz['r'], schiz['n'], 's', mew=1, mec='white', ms=15,
                color=colors[1],
                label='Observed Values')

    pl.xlabel('Rate (Per 1000 PY)', size=32)
    pl.ylabel('Study Size (PY)', size=32)
    pl.axis([-.0001, .0101, 50., 15000000])
    pl.title(r'$\delta = %s$'%delta_str, size=48)
    pl.xticks([0, .005, .01], [0, 5, 10], size=30)
    pl.yticks(size=30)
コード例 #19
0
ファイル: kq1.py プロジェクト: fonnesbeck/PKUMetaAnalysis
    def pred(a1=alpha1,
             mu_int=mu_int,
             tau_int=tau_int,
             mu_slope=mu_slope,
             tau_slope=tau_slope,
             tau_iq=tau_iq,
             values=(70, 75, 80, 85)):
        """Estimate the probability of IQ<85 for different covariate values"""
        b0 = rnormal(mu_int, tau_int, size=len(phe_pred))
        a0 = rnormal(mu_slope, tau_slope, size=len(phe_pred))

        b1 = a0 + a1 * crit_pred

        iq = rnormal(b0 + b1 * phe_pred, tau_iq)

        return [iq < v for v in values]
コード例 #20
0
def test_fixed_effect_priors():
    model = dismod_mr.data.ModelData()

    # set prior on sex
    parameters = dict(
        fixed_effects={
            'x_sex':
            dict(dist='TruncatedNormal', mu=1., sigma=.5, lower=-10, upper=10)
        })

    # simulate normal data
    n = 32
    sex_list = np.array(['male', 'female', 'total'])
    sex = sex_list[mc.rcategorical([.3, .3, .4], n)]
    beta_true = dict(male=-1., total=0., female=1.)
    pi_true = np.exp([beta_true[s] for s in sex])
    sigma_true = .05
    p = mc.rnormal(pi_true, 1. / sigma_true**2.)

    model.input_data = pd.DataFrame(dict(value=p, sex=sex))
    model.input_data['area'] = 'all'
    model.input_data['year_start'] = 2010
    model.input_data['year_start'] = 2010

    # create model and priors
    vars = {}
    vars.update(
        dismod_mr.model.covariates.mean_covariate_model(
            'test', 1, model.input_data, parameters, model, 'all', 'total',
            'all'))

    print(vars['beta'])
    assert vars['beta'][0].parents['mu'] == 1.
コード例 #21
0
def test_random_effect_priors():
    model = dismod_mr.data.ModelData()

    # set prior on sex
    parameters = dict(random_effects={
        'USA':
        dict(dist='TruncatedNormal', mu=.1, sigma=.5, lower=-10, upper=10)
    })

    # simulate normal data
    n = 32
    area_list = np.array(['all', 'USA', 'CAN'])
    area = area_list[mc.rcategorical([.3, .3, .4], n)]
    alpha_true = dict(all=0., USA=.1, CAN=-.2)
    pi_true = np.exp([alpha_true[a] for a in area])
    sigma_true = .05
    p = mc.rnormal(pi_true, 1. / sigma_true**2.)

    model.input_data = pd.DataFrame(dict(value=p, area=area))
    model.input_data['sex'] = 'male'
    model.input_data['year_start'] = 2010
    model.input_data['year_end'] = 2010

    model.hierarchy.add_edge('all', 'USA')
    model.hierarchy.add_edge('all', 'CAN')

    # create model and priors
    vars = {}
    vars.update(
        dismod_mr.model.covariates.mean_covariate_model(
            'test', 1, model.input_data, parameters, model, 'all', 'total',
            'all'))

    print(vars['alpha'])
    print(vars['alpha'][1].parents['mu'])
コード例 #22
0
ファイル: dMCMC_ACSET_restFit.py プロジェクト: bdyer8/CaPaper
 def __init__(self, stochastic, proposal_sd=None, verbose=None):
     pm.Metropolis.__init__(self, stochastic, proposal_sd=proposal_sd,
                         verbose=verbose, tally=False)
     self.proposal_tau = self.proposal_sd**-2.
     self.n = 0
     self.N = 11
     self.value = pm.rnormal(self.stochastic.value, self.proposal_tau, size=tuple([self.N] + list(self.stochastic.value.shape)))
コード例 #23
0
ファイル: dMCMC_ACSET_restFit.py プロジェクト: bdyer8/CaPaper
    def step(self):
        x0 = self.value[self.n]
        u = pm.rnormal(np.zeros(self.N), 1.)
        dx = np.dot(u, self.value)
 
        self.stochastic.value = x0
        logp = [self.logp_plus_loglike]
        x_prime = [x0]
 
        for direction in [-1, 1]:
            for i in xrange(25):
                delta = direction*np.exp(.1*i)*dx
                try:
                    self.stochastic.value = x0 + delta
                    logp.append(self.logp_plus_loglike)
                    x_prime.append(x0 + delta)
                except pm.ZeroProbability:
                    self.stochastic.value = x0
 
        i = pm.rcategorical(np.exp(np.array(logp) - pm.flib.logsum(logp)))
        self.value[self.n] = x_prime[i]
        self.stochastic.value = x_prime[i]
 
        if i == 0:
            self.rejected += 1
        else:
            self.accepted += 1
 
        self.n += 1
        if self.n == self.N:
            self.n = 0    
コード例 #24
0
ファイル: steppers.py プロジェクト: AtomyChan/JLU-python-code
    def step(self):
        x0 = np.copy(self.stochastic.value)
        dx = pymc.rnormal(np.zeros(np.shape(x0)), self.proposal_tau)

        logp = [self.logp_plus_loglike]
        x_prime = [x0]

        for direction in [-1, 1]:
            for i in xrange(25):
                delta = direction*np.exp(.1*i)*dx
                try:
                    self.stochastic.value = x0 + delta
                    logp.append(self.logp_plus_loglike)
                    x_prime.append(x0 + delta)
                except pymc.ZeroProbability:
                    self.stochastic.value = x0
        
        i = pymc.rcategorical(np.exp(np.array(logp) - pymc.flib.logsum(logp)))
        self.stochastic.value = x_prime[i]

        if i == 0:
            self.rejected += 1
            if self.verbose > 2:
                print self._id + ' rejecting'
        else:
            self.accepted += 1
            if self.verbose > 2:
                print self._id + ' accepting'
コード例 #25
0
ファイル: test_spline.py プロジェクト: blue442/dismod_mr
def test_age_pattern_model_sim():
    # simulate normal data
    a = np.arange(0, 100, 5)
    pi_true = .0001 * (a * (100. - a) + 100.)
    sigma_true = .025 * np.ones_like(pi_true)

    p = np.maximum(0., mc.rnormal(pi_true, 1. / sigma_true**2.))

    # create model and priors
    vars = {}

    vars.update(
        dismod_mr.model.spline.spline('test',
                                      ages=np.arange(101),
                                      knots=np.arange(0, 101, 5),
                                      smoothing=.1))

    vars['pi'] = mc.Lambda('pi', lambda mu=vars['mu_age'], a=a: mu[a])
    vars.update(
        dismod_mr.model.likelihood.normal('test', vars['pi'], 0., p,
                                          sigma_true))

    # fit model
    m = mc.MCMC(vars)
    m.sample(2)
コード例 #26
0
    def step(self):
        x0 = self.value[self.n]
        u = pymc.rnormal(np.zeros(self.N), 1.)
        dx = np.dot(u, self.value)

        self.stochastic.value = x0
        logp = [self.logp_plus_loglike]
        x_prime = [x0]

        for direction in [-1, 1]:
            for i in xrange(25):
                delta = direction * np.exp(.1 * i) * dx
                try:
                    self.stochastic.value = x0 + delta
                    logp.append(self.logp_plus_loglike)
                    x_prime.append(x0 + delta)
                except pymc.ZeroProbability:
                    self.stochastic.value = x0

        i = pymc.rcategorical(np.exp(np.array(logp) - pymc.flib.logsum(logp)))
        self.value[self.n] = x_prime[i]
        self.stochastic.value = x_prime[i]

        if i == 0:
            self.rejected += 1
            if self.verbose > 2:
                print self._id + ' rejecting'
        else:
            self.accepted += 1
            if self.verbose > 2:
                print self._id + ' accepting'

        self.n += 1
        if self.n == self.N:
            self.n = 0
コード例 #27
0
ファイル: data.py プロジェクト: aflaxman/pymc-cod-correct
def sim_data(N, true_cf=[[.3, .6, .1],
                           [.3, .5, .2]],
             true_std=[[.2, .05, .05], 
                       [.3, 0.1, 0.1]],
             sum_to_one=True):
    """ 
    Create an NxTxJ matrix of simulated data (T is determined by the length 
    of true_cf, J by the length of the elements of true_cf). 

    true_cf - a list of lists of true cause fractions (each must sum to one)
    true_std - a list of lists of the standard deviations corresponding to the true csmf's 
             for each time point. Can either be a list of length J inside a list of length
             1 (in this case, the same standard deviation is used for all time points) or 
             can be T lists of length J (in this case, the a separate standard deviation 
             is specified and used for each time point). 
    """

    if sum_to_one == True: 
        assert pl.allclose(pl.sum(true_cf, 1), 1), 'The sum of elements of true_cf must equal 1' 
    T = len(true_cf)
    J = len(true_cf[0])
    
    ## if only one std provided, duplicate for all time points 
    if len(true_std)==1 and len(true_cf)>1: 
        true_std = [true_std[0] for i in range(len(true_cf))]    

    ## transform the mean and std to logit space
    transformed_std = []
    for t in range(T): 
        pi_i = pl.array(true_cf[t])
        sigma_pi_i = pl.array(true_std[t])
        transformed_std.append( ((1/(pi_i*(pi_i-1)))**2 * sigma_pi_i**2)**0.5 )
            
    ## find minimum standard deviation (by cause across time) and draw from this 
    min = pl.array(transformed_std).min(0)
    common_perturbation = [pl.ones([T,J])*mc.rnormal(mu=0, tau=min**-2) for n in range(N)]
    
    ## draw from remaining variation 
    tau=pl.array(transformed_std)**2 - min**2
    tau[tau==0] = 0.000001
    additional_perturbation = [[mc.rnormal(mu=0, tau=tau[t]**-1) for t in range(T)] for n in range(N)]

    result = pl.zeros([N, T, J])
    for n in range(N):
        result[n, :, :] = [mc.invlogit(mc.logit(true_cf[t]) + common_perturbation[n][t] + additional_perturbation[n][t]) for t in range(T)]

    return result
コード例 #28
0
def test_log_normal_model_sim(N=16):
    # simulate negative binomial data
    pi_true = 2.
    sigma_true = .1

    n = pl.array(pl.exp(mc.rnormal(10, 1**-2, size=N)), dtype=int)
    p = pl.exp(mc.rnormal(pl.log(pi_true), 1./(sigma_true**2 + 1./n), size=N))

    # create model and priors
    vars = dict(mu_age=mc.Uniform('mu_age', 0., 1000., value=.01),
                sigma=mc.Uniform('sigma', 0., 10000., value=1000.))
    vars['mu_interval'] = mc.Lambda('mu_interval', lambda mu=vars['mu_age']: mu*pl.ones(N))
    vars.update(rate_model.log_normal_model('sim', vars['mu_interval'], vars['sigma'], p, 1./pl.sqrt(n)))

    # fit model
    m = mc.MCMC(vars)
    m.sample(1)
コード例 #29
0
ファイル: PyMCmodel.py プロジェクト: AsymmetricHuang/pymc
def make_model(n_fmesh=11, fmesh_is_obsmesh=False):
    x = np.arange(-1., 1., .1)

    # Prior parameters of C
    nu = pm.Uniform('nu', 1., 3, value=1.5)
    phi = pm.Lognormal('phi', mu=.4, tau=1, value=1)
    theta = pm.Lognormal('theta', mu=.5, tau=1, value=1)

    # The covariance dtrm C is valued as a Covariance object.
    @pm.deterministic
    def C(eval_fun=gp.matern.euclidean,
          diff_degree=nu, amp=phi, scale=theta):
        return gp.NearlyFullRankCovariance(eval_fun, diff_degree=diff_degree, amp=amp, scale=scale)

    # Prior parameters of M
    a = pm.Normal('a', mu=1., tau=1., value=1)
    b = pm.Normal('b', mu=.5, tau=1., value=0)
    c = pm.Normal('c', mu=2., tau=1., value=0)

    # The mean M is valued as a Mean object.
    def linfun(x, a, b, c):
        return a * x ** 2 + b * x + c

    @pm.deterministic
    def M(eval_fun=linfun, a=a, b=b, c=c):
        return gp.Mean(eval_fun, a=a, b=b, c=c)

    # The actual observation locations
    actual_obs_locs = np.linspace(-.8, .8, 4)

    if fmesh_is_obsmesh:
        o = actual_obs_locs
        fmesh = o
    else:
        # The unknown observation locations
        o = pm.Normal('o', actual_obs_locs, 1000., value=actual_obs_locs)
        fmesh = np.linspace(-1, 1, n_fmesh)

    # The GP submodel
    sm = gp.GPSubmodel('sm', M, C, fmesh)

    # Observation variance
    V = pm.Lognormal('V', mu=-1, tau=1, value=.0001)
    observed_values = pm.rnormal(actual_obs_locs ** 2, 10000)

    # The data d is just array-valued. It's normally distributed about
    # GP.f(obs_x).
    d = pm.Normal(
        'd',
        mu=sm.f(o),
        tau=1. / V,
        value=observed_values,
        observed=True)

    return locals()
コード例 #30
0
def make_model(n_fmesh=11, fmesh_is_obsmesh=False):
    x = np.arange(-1., 1., .1)

    # Prior parameters of C
    nu = pm.Uniform('nu', 1., 3, value=1.5)
    phi = pm.Lognormal('phi', mu=.4, tau=1, value=1)
    theta = pm.Lognormal('theta', mu=.5, tau=1, value=1)

    # The covariance dtrm C is valued as a Covariance object.
    @pm.deterministic
    def C(eval_fun=gp.matern.euclidean, diff_degree=nu, amp=phi, scale=theta):
        return gp.NearlyFullRankCovariance(eval_fun,
                                           diff_degree=diff_degree,
                                           amp=amp,
                                           scale=scale)

    # Prior parameters of M
    a = pm.Normal('a', mu=1., tau=1., value=1)
    b = pm.Normal('b', mu=.5, tau=1., value=0)
    c = pm.Normal('c', mu=2., tau=1., value=0)

    # The mean M is valued as a Mean object.
    def linfun(x, a, b, c):
        return a * x**2 + b * x + c

    @pm.deterministic
    def M(eval_fun=linfun, a=a, b=b, c=c):
        return gp.Mean(eval_fun, a=a, b=b, c=c)

    # The actual observation locations
    actual_obs_locs = np.linspace(-.8, .8, 4)

    if fmesh_is_obsmesh:
        o = actual_obs_locs
        fmesh = o
    else:
        # The unknown observation locations
        o = pm.Normal('o', actual_obs_locs, 1000., value=actual_obs_locs)
        fmesh = np.linspace(-1, 1, n_fmesh)

    # The GP submodel
    sm = gp.GPSubmodel('sm', M, C, fmesh)

    # Observation variance
    V = pm.Lognormal('V', mu=-1, tau=1, value=.0001)
    observed_values = pm.rnormal(actual_obs_locs**2, 10000)

    # The data d is just array-valued. It's normally distributed about GP.f(obs_x).
    d = pm.Normal('d',
                  mu=sm.f(o),
                  tau=1. / V,
                  value=observed_values,
                  observed=True)

    return locals()
コード例 #31
0
def complex_hierarchical_data(n):
    """ Generate data based on the much more complicated model
    given in section 3.2.1::

        y_ij ~ N(mu_j - exp(beta_j)t_ij - exp(gamma_j)t_ij^2, sigma_j^2)
        gamma_j | sigma^2, xi, X_j ~ N(eta_0 + eta_1 X_j + eta_2 X_j^2, omega^2)
        beta_j | gamma_j, sigma^2, xi, X_j ~ N(delta_beta_0 + delta_beta_1 X_j + delta_beta_2 X_j^2 + delta_beta_3 gamma_j, omega_beta^2)
        mu_j | gamma_j, beta_j, sigma^2, xi, X_j ~ N(delta_mu_0 + delta_mu_1 X_j + delta_mu_2 X_j^2 + delta_mu_3 gamma_j + delta_mu_4 beta_j, omega_mu^2)

        eta = (eta_0, eta_1, eta_2, log(omega))'
        delta_beta = (delta_beta_0, delta_beta_1, delta_beta_2, delta_beta_3, log(omega_beta))'
        delta_mu = (delta_mu_0, delta_mu_1, delta_mu_2, delta_mu_3, log(omega_mu))'
        xi = (eta, delta_beta, delta_mu)
        eta ~ MVNormal(M, C)
        delta_beta, delta_mu ~ Normal(m, s)

    Parameters
    ----------
    n : list, len(n) = J, n[j] = num observations in group j
    """

    J = len(n)
    
    # covariate data, not entirely specified in paper
    X = mc.rnormal(0, .1**-2, size=J)
    t = [pl.arange(n[j]) for j in range(J)]

    # hyper-priors, not specified in detail in paper
    m = 0.
    s = 1.
    M = pl.zeros(4)
    r = [[  1, .57, .18, .56],
         [.57,   1, .72, .16],
         [.18, .72,   1, .14],
         [.56, .16, .14,   1]]

    eta = mc.rmv_normal_cov(M, r)
    omega = .0001 #pl.exp(eta[-1])

    delta_beta = mc.rnormal(m, s**-2, size=5)
    omega_beta = .0001 #pl.exp(delta_beta[-1])

    delta_mu = mc.rnormal(m, s**-2, size=5)
    omega_mu = .0001 #pl.exp(delta_mu[-1])

    gamma = mc.rnormal(eta[0] + eta[1]*X + eta[2]*X**2, omega**-2.)
    beta = mc.rnormal(delta_beta[0] + delta_beta[1]*X + delta_beta[2]*X**2 + delta_beta[3]*gamma, omega_beta**-2)
    mu = mc.rnormal(delta_mu[0] + delta_mu[1]*X + delta_mu[2]*X**2 + delta_mu[3]*gamma + delta_mu[4]*beta, omega_mu**-2)

    # stochastic error, not specified in paper
    sigma = .01*pl.ones(J)
    y = [mc.rnormal(mu[j] - pl.exp(beta[j])*t[j] - pl.exp(gamma[j])*t[j]**2, sigma[j]**-2) for j in range(J)]

    eta_cross_eta = [eta[0]*eta[1], eta[0]*eta[2], eta[0]*eta[3], eta[1]*eta[2], eta[1]*eta[2], eta[2]*eta[3]]

    return vars()
コード例 #32
0
def simulate_age_group_data(N=50, delta_true=150, pi_true=true_rate_function):
    """ generate simulated data
    """
    # start with a simple model with N rows of data
    model = data_simulation.simple_model(N)

    # record the true age-specific rates
    model.ages = pl.arange(0, 101, 1)
    model.pi_age_true = pi_true(model.ages)

    # choose age groups randomly
    age_width = mc.runiform(1, 100, size=N)
    age_mid = mc.runiform(age_width / 2, 100 - age_width / 2, size=N)
    age_width[:10] = 10
    age_mid[:10] = pl.arange(5, 105, 10)
    #age_width[10:20] = 10
    #age_mid[10:20] = pl.arange(5, 105, 10)

    age_start = pl.array(age_mid - age_width / 2, dtype=int)
    age_end = pl.array(age_mid + age_width / 2, dtype=int)

    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end

    # choose effective sample size uniformly at random
    n = mc.runiform(100, 10000, size=N)
    model.input_data['effective_sample_size'] = n

    # integrate true age-specific rate across age groups to find true group rate
    model.input_data['true'] = pl.nan
    model.input_data['age_weights'] = ''

    for i in range(N):
        beta = mc.rnormal(0., .025**-2)

        # TODO: clean this up, it is computing more than is necessary
        age_weights = pl.exp(beta * model.ages)
        sum_pi_wt = pl.cumsum(model.pi_age_true * age_weights)
        sum_wt = pl.cumsum(age_weights)
        p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] -
                                                           sum_wt[age_start])

        model.input_data.ix[i, 'true'] = p[i]
        model.input_data.ix[i, 'age_weights'] = ';'.join(
            ['%.4f' % w for w in age_weights[age_start[i]:(age_end[i] + 1)]])

    # sample observed rate values from negative binomial distribution
    model.input_data['value'] = mc.rnegative_binomial(
        n * model.input_data['true'], delta_true) / n

    print model.input_data.drop(['standard_error', 'upper_ci', 'lower_ci'],
                                axis=1)
    return model
コード例 #33
0
ファイル: validate_age_group.py プロジェクト: aflaxman/gbd
def simulate_age_group_data(N=50, delta_true=150, pi_true=true_rate_function):
    """ generate simulated data
    """
    # start with a simple model with N rows of data
    model = data_simulation.simple_model(N)


    # record the true age-specific rates
    model.ages = pl.arange(0, 101, 1)
    model.pi_age_true = pi_true(model.ages)


    # choose age groups randomly
    age_width = mc.runiform(1, 100, size=N)
    age_mid = mc.runiform(age_width/2, 100-age_width/2, size=N)
    age_width[:10] = 10
    age_mid[:10] = pl.arange(5, 105, 10)
    #age_width[10:20] = 10
    #age_mid[10:20] = pl.arange(5, 105, 10)

    age_start = pl.array(age_mid - age_width/2, dtype=int)
    age_end = pl.array(age_mid + age_width/2, dtype=int)

    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end


    # choose effective sample size uniformly at random
    n = mc.runiform(100, 10000, size=N)
    model.input_data['effective_sample_size'] = n


    # integrate true age-specific rate across age groups to find true group rate
    model.input_data['true'] = pl.nan
    model.input_data['age_weights'] = ''

    for i in range(N):
        beta = mc.rnormal(0., .025**-2)

        # TODO: clean this up, it is computing more than is necessary
        age_weights = pl.exp(beta*model.ages)
        sum_pi_wt = pl.cumsum(model.pi_age_true*age_weights)
        sum_wt = pl.cumsum(age_weights)
        p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start])

        model.input_data.ix[i, 'true'] = p[i]
        model.input_data.ix[i, 'age_weights'] = ';'.join(['%.4f'%w for w in age_weights[age_start[i]:(age_end[i]+1)]])

    # sample observed rate values from negative binomial distribution
    model.input_data['value'] = mc.rnegative_binomial(n*model.input_data['true'], delta_true) / n

    print model.input_data.drop(['standard_error', 'upper_ci', 'lower_ci'], axis=1)
    return model
コード例 #34
0
def alpha_true_sim(model, area_list, sigma_true):
    # choose alpha^true
    alpha = dict(all=0.)
    sum_sr = 0.
    last_sr = -1
    for sr in model.hierarchy['all']:
        if sr not in area_list:
            continue

        sum_r = 0.
        last_r = -1
        for r in model.hierarchy[sr]:
            if r not in area_list:
                continue

            sum_c = 0.
            last_c = -1
            for c in model.hierarchy[r]:
                if c not in area_list:
                    continue

                alpha[c] = mc.rnormal(0., sigma_true[3]**-2.)
                sum_c += alpha[c]
                last_c = c
            if last_c >= 0:
                alpha[last_c] -= sum_c

            alpha[r] = mc.rnormal(0., sigma_true[2]**-2.)
            sum_r += alpha[r]
            last_r = r
        if last_r >= 0:
            alpha[last_r] -= sum_r

        alpha[sr] = mc.rnormal(0., sigma_true[1]**-2.)
        sum_sr += alpha[sr]
        last_sr = sr
    if last_sr >= 0:
        alpha[last_sr] -= sum_sr

    return alpha
コード例 #35
0
ファイル: validate_covariates.py プロジェクト: aflaxman/gbd
def alpha_true_sim(model, area_list, sigma_true):
    # choose alpha^true
    alpha = dict(all=0.)
    sum_sr = 0.
    last_sr = -1
    for sr in model.hierarchy['all']:
        if sr not in area_list:
            continue

        sum_r = 0.
        last_r = -1
        for r in model.hierarchy[sr]:
            if r not in area_list:
                continue

            sum_c = 0.
            last_c = -1
            for c in model.hierarchy[r]:
                if c not in area_list:
                    continue

                alpha[c] = mc.rnormal(0., sigma_true[3]**-2.)
                sum_c += alpha[c]
                last_c = c
            if last_c >= 0:
                alpha[last_c] -= sum_c

            alpha[r] = mc.rnormal(0., sigma_true[2]**-2.)
            sum_r += alpha[r]
            last_r = r
        if last_r >= 0:
            alpha[last_r] -= sum_r

        alpha[sr] = mc.rnormal(0., sigma_true[1]**-2.)
        sum_sr += alpha[sr]
        last_sr = sr
    if last_sr >= 0:
        alpha[last_sr] -= sum_sr

    return alpha
コード例 #36
0
ファイル: data_simulation.py プロジェクト: ngraetz/dismod_mr
def simulated_age_intervals(data_type, n, a, pi_age_true, sigma_true):
    # choose age intervals to measure
    age_start = np.array(mc.runiform(0, 100, n), dtype=int)
    age_start.sort()  # sort to make it easy to discard the edges when testing
    age_end = np.array(mc.runiform(age_start+1, np.minimum(age_start+10,100)), dtype=int)

    # find truth for the integral across the age intervals
    import scipy.integrate
    pi_interval_true = [scipy.integrate.trapz(pi_age_true[a_0i:(a_1i+1)]) / (a_1i - a_0i) 
                        for a_0i, a_1i in zip(age_start, age_end)]

    # generate covariates that add explained variation
    X = mc.rnormal(0., 1.**2, size=(n,3))
    beta_true = [-.1, .1, .2]
    beta_true = [0, 0, 0]
    Y_true = np.dot(X, beta_true)

    # calculate the true value of the rate in each interval
    pi_true = pi_interval_true*np.exp(Y_true)

    # simulate the noisy measurement of the rate in each interval
    p = np.maximum(0., mc.rnormal(pi_true, 1./sigma_true**2.))

    # store the simulated data in a pandas DataFrame
    data = pandas.DataFrame(dict(value=p, age_start=age_start, age_end=age_end,
                                 x_0=X[:,0], x_1=X[:,1], x_2=X[:,2]))
    data['effective_sample_size'] = np.maximum(p*(1-p)/sigma_true**2, 1.)

    data['standard_error'] = np.nan
    data['upper_ci'] = np.nan
    data['lower_ci'] = np.nan

    data['year_start'] = 2005.  # TODO: make these vary
    data['year_end'] = 2005.
    data['sex'] = 'total'
    data['area'] = 'all'
    data['data_type'] = data_type
    
    return data
コード例 #37
0
 def __init__(self, stochastic, proposal_sd=None, verbose=None):
     pymc.Metropolis.__init__(self,
                              stochastic,
                              proposal_sd=proposal_sd,
                              verbose=verbose,
                              tally=False)
     self.proposal_tau = self.proposal_sd**-2.
     self.n = 0
     self.N = 11
     self.value = pymc.rnormal(
         self.stochastic.value,
         self.proposal_tau,
         size=tuple([self.N] + list(self.stochastic.value.shape)))
コード例 #38
0
def data_gen_for_rnn(samples_n=1, tau_start=75, tau_end=100, gamma=0.01, var=5):
    alpha = 1.0 / gamma
    lam = alpha
    for i in xrange(samples_n):
        con = []
        tau = pm.rdiscrete_uniform(tau_start, tau_end)
        for j in xrange(tau):
            if j == 0:
                val = round(pm.rnormal(lam, var), 2)
                con.append(val)
            elif j == 1:
                val = con[0] + pm.rnormal(0, var)
                val = round(val, 2)
                con.append(val)

            else:
                # n = len(con)
                # lam_n = float(np.array(con).sum())/n
                val = 0.7 * con[-1] + 0.3 * con[-2] + pm.rnormal(0, var)
                val = round(val, 2)
                con.append(val)
                # print val, lam_n
        yield con
コード例 #39
0
    def step(self):
        # We're going to do this in a way that allows easy extension
        # to multivariate beta (and even y with non-diagonal covariances,
        # for whatever that's worth).

        y = np.atleast_1d(np.squeeze(self.y_obs.value))

        if np.alen(y) == 0:
            self.stochastic.random()
            return

        X = getattr(self.X, 'value', self.X)
        # Gotta broadcast when the parameters are scalars.
        bcast_beta = np.ones_like(self.stochastic.value)
        a_beta = bcast_beta * getattr(self.a_beta, 'value', self.a_beta)
        tau_beta = bcast_beta * np.atleast_1d(getattr(self.tau_beta, 'value',
                                                      self.tau_beta))

        tau_y = getattr(self.tau_y, 'value', self.tau_y)

        #
        # This is how we get the posterior mean:
        # C^{-1} m = R^{-1} a + F V^{-1} y
        #
        rhs = np.dot(tau_beta, a_beta) + np.dot(X.T * tau_y, y)

        tau_post = np.diag(tau_beta) + np.dot(X.T * tau_y, X)

        a_post = np.linalg.solve(tau_post, rhs)
        tau_post = np.diag(tau_post)

        # TODO: These could be symbolic/Deterministic, no?
        parents_post = {'mu': a_post, 'tau': tau_post}
        self.stochastic.parents_post = parents_post

        # TODO: If self.V_inv, sample normal-gamma dist

        if self.post_a is not None and self.post_b is not None:
            parents_post['a'] = self.post_a
            parents_post['b'] = self.post_b
            res = pymc.rtruncated_normal(**parents_post)

            # pymc's truncated distribution(s) doesn't handle
            # the limit values correctly, so we have to clip
            # the values.
            self.stochastic.value = res.clip(self.post_a, self.post_b)
        else:
            self.stochastic.value = pymc.rnormal(**parents_post)
コード例 #40
0
ファイル: models.py プロジェクト: aflaxman/pymc-mixing-movies
def setup_and_sample(vars, step, iters=5000):
    mod = mc.MCMC(vars)
    if step == 'AdaptiveMetropolis':
        mod.use_step_method(mc.AdaptiveMetropolis, mod.X)
    elif step == 'Hit-and-Run':
        mod.use_step_method(steppers.HitAndRun, mod.X, proposal_sd=.1)
    elif step == 'H-RAM':
        #mod.use_step_method(steppers.HRAM, mod.X, proposal_sd=.01)
        mod.use_step_method(history_steps.HRAM, mod.X, init_history=mc.rnormal(mod.X.value, 1., size=(20, len(mod.X.value))), xprime_sds=2, xprime_n=51)
    elif step == 'Metropolis':
        mod.use_step_method(mc.Metropolis, mod.X, proposal_sd=.1)
    else:
        raise Exception, 'Unrecognized Step Method'
    mod.sample(iters)

    return mod
コード例 #41
0
def test_neg_binom_model_sim(N=16):
    # simulate negative binomial data
    pi_true = .01
    delta_true = 50

    n = pl.array(pl.exp(mc.rnormal(10, 1**-2, size=N)), dtype=int)
    k = pl.array(mc.rnegative_binomial(n*pi_true, delta_true, size=N), dtype=float)
    p = k/n

    # create NB model and priors
    vars = dict(mu_age=mc.Uniform('mu_age', 0., 1000., value=.01),
                sigma=mc.Uniform('sigma', 0., 10000., value=1000.))
    vars['mu_interval'] = mc.Lambda('mu_interval', lambda mu=vars['mu_age']: mu*pl.ones(N))
    vars.update(rate_model.log_normal_model('sim', vars['mu_interval'], vars['sigma'], p, 1./pl.sqrt(n)))

    # fit NB model
    m = mc.MCMC(vars)
    m.sample(1)
コード例 #42
0
def test_covariate_model_sim_w_hierarchy():
    n = 50

    # setup hierarchy
    hierarchy, output_template = data_simulation.small_output()

    # simulate normal data
    area_list = np.array(['all', 'USA', 'CAN'])
    area = area_list[mc.rcategorical([.3, .3, .4], n)]

    sex_list = np.array(['male', 'female', 'total'])
    sex = sex_list[mc.rcategorical([.3, .3, .4], n)]

    year = np.array(mc.runiform(1990, 2010, n), dtype=int)

    alpha_true = dict(all=0., USA=.1, CAN=-.2)

    pi_true = np.exp([alpha_true[a] for a in area])
    sigma_true = .05 * np.ones_like(pi_true)

    p = mc.rnormal(pi_true, 1. / sigma_true**2.)

    model = dismod_mr.data.ModelData()
    model.input_data = pd.DataFrame(
        dict(value=p, area=area, sex=sex, year_start=year, year_end=year))
    model.hierarchy, model.output_template = hierarchy, output_template

    # create model and priors
    vars = {}
    vars.update(
        dismod_mr.model.covariates.mean_covariate_model(
            'test', 1, model.input_data, {}, model, 'all', 'total', 'all'))
    vars.update(
        dismod_mr.model.likelihood.normal('test', vars['pi'], 0., p,
                                          sigma_true))

    # fit model
    m = mc.MCMC(vars)
    m.sample(2)

    assert 'sex' not in vars['U']
    assert 'x_sex' in vars['X']
    assert len(vars['beta']) == 1
コード例 #43
0
ファイル: test_age_pattern.py プロジェクト: aflaxman/gbd
def test_age_pattern_model_sim():
    # simulate normal data
    a = pl.arange(0, 100, 5)
    pi_true = .0001 * (a * (100. - a) + 100.)
    sigma_true = .025*pl.ones_like(pi_true)

    p = pl.maximum(0., mc.rnormal(pi_true, 1./sigma_true**2.))

    # create model and priors
    vars = {}

    vars.update(age_pattern.age_pattern('test', ages=pl.arange(101), knots=pl.arange(0,101,5), smoothing=.1))

    vars['pi'] = mc.Lambda('pi', lambda mu=vars['mu_age'], a=a: mu[a])
    vars.update(rate_model.normal_model('test', vars['pi'], 0., p, sigma_true))

    # fit model
    m = mc.MCMC(vars)
    m.sample(2)
コード例 #44
0
def main():
    """ Demonstrating thinning of two autocorrelated inputs (representing
        posterior probabilities). The key point is the thinned - every 2nd / 3rd
        point - functions approach zero quicker. More thinning is better (but
        expensive)
    """

    # x_t = pm.rnormal(0, 1, 200)
    # x_t[0] = 0
    y_t = np.zeros(200)
    for i in range(1, 200):
        y_t[i] = pm.rnormal(y_t[i - 1], 1)

    max_x = 200 / 3 + 1
    x = np.arange(1, max_x)

    colors = ["#348ABD", "#A60628", "#7A68A6"]
    plt.bar(x,
            autocorr(y_t)[1:max_x],
            edgecolor=colors[0],
            label="no thinning",
            color=colors[0],
            width=1)
    plt.bar(x,
            autocorr(y_t[::2])[1:max_x],
            edgecolor=colors[1],
            label="keeping every 2nd sample",
            color=colors[1],
            width=1)
    plt.bar(x,
            autocorr(y_t[::3])[1:max_x],
            width=1,
            edgecolor=colors[2],
            label="keeping every 3rd sample",
            color=colors[2])

    plt.autoscale(tight=True)
    plt.legend(title="Autocorrelation plot for $y_t$", loc="lower left")
    plt.ylabel("measured correlation \nbetween $y_t$ and $y_{t-k}$.")
    plt.xlabel("k (lag)")
    plt.title("Autocorrelation of $y_t$ (no thinning vs. thinning) \
            at differing $k$ lags.")
    plt.show()
コード例 #45
0
def plot_funnel(pi_true, delta_str):
    n = pl.exp(mc.rnormal(10, 2**-2, size=10000))
    delta = float(delta_str) * pl.ones_like(n)
    p = pi_true * pl.ones_like(n)

    # old way:
    #delta = delta * p * n

    nb = rate_model.neg_binom('funnel', p, delta, p, n)
    r = nb['p_pred'].value

    pl.vlines([pi_true],
              .1 * n.min(),
              10 * n.max(),
              linewidth=2,
              linestyle='-',
              color='w',
              zorder=9)
    pl.vlines([pi_true],
              .1 * n.min(),
              10 * n.max(),
              linewidth=1,
              linestyle='--',
              color='black',
              zorder=10)
    pl.plot(r, n, 'ko', mew=0, alpha=.25)

    pl.semilogy(schiz['r'],
                schiz['n'],
                'ks',
                mew=1,
                mec='white',
                ms=4,
                label='Observed values')

    pl.xlabel('Rate (per PY)')
    pl.ylabel('Study size (PY)')
    pl.xticks([0, .005, .01])
    pl.axis([-.0001, .0101, 50., 15000000])
    pl.title(r'$\delta = %s$' % delta_str)
コード例 #46
0
ファイル: neg_binomial_model.py プロジェクト: aflaxman/gbd
def plot_funnel(pi_true, delta_str):
    n = pl.exp(mc.rnormal(10, 2 ** -2, size=10000))
    delta = float(delta_str) * pl.ones_like(n)
    p = pi_true * pl.ones_like(n)

    # old way:
    # delta = delta * p * n

    nb = rate_model.neg_binom("funnel", p, delta, p, n)
    r = nb["p_pred"].value

    pl.vlines([pi_true], 0.1 * n.min(), 10 * n.max(), linewidth=2, linestyle="-", color="w", zorder=9)
    pl.vlines([pi_true], 0.1 * n.min(), 10 * n.max(), linewidth=1, linestyle="--", color="black", zorder=10)
    pl.plot(r, n, "ko", mew=0, alpha=0.25)

    pl.semilogy(schiz["r"], schiz["n"], "ks", mew=1, mec="white", ms=4, label="Observed values")

    pl.xlabel("Rate (per PY)")
    pl.ylabel("Study size (PY)")
    pl.xticks([0, 0.005, 0.01])
    pl.axis([-0.0001, 0.0101, 50.0, 15000000])
    pl.title(r"$\delta = %s$" % delta_str)
コード例 #47
0
ファイル: beta_binomial_model.py プロジェクト: aflaxman/gbd
def plot_beta_binomial_funnel(alpha, beta):
    pi_true = alpha/(alpha+beta)
    pi = mc.rbeta(alpha, beta, size=10000)

    n = pl.exp(mc.rnormal(10, 2**-2, size=10000))
    k = mc.rbinomial(pl.array(n, dtype=int), pi)
    r = k/n
    pl.vlines([pi_true], .1*n.min(), 10*n.max(),
              linewidth=2, linestyle='-', color='w', zorder=9)
    pl.vlines([pi_true], .1*n.min(), 10*n.max(),
              linewidth=1, linestyle='--', color='black', zorder=10)
    pl.plot(r, n, 'ko',
            mew=0, alpha=.25)

    pl.semilogy(schiz['r'], schiz['n'], 'ks', mew=1, mec='white', ms=4,
                label='Observed values')

    pl.xlabel('Rate (per PY)')
    pl.ylabel('Study size (PY)')
    pl.xticks([0, .005, .01])
    pl.axis([-.0001, .0101, 50., 1500000])
    pl.title(r'$\alpha=%d$, $\beta=%d$' % (alpha, beta))
コード例 #48
0
ファイル: test_covariates.py プロジェクト: aflaxman/gbd
def test_covariate_model_sim_w_hierarchy():
    n = 50

    # setup hierarchy
    hierarchy, output_template = data_simulation.small_output()

    # simulate normal data
    area_list = pl.array(['all', 'USA', 'CAN'])
    area = area_list[mc.rcategorical([.3, .3, .4], n)]

    sex_list = pl.array(['male', 'female', 'total'])
    sex = sex_list[mc.rcategorical([.3, .3, .4], n)]

    year = pl.array(mc.runiform(1990, 2010, n), dtype=int)
        
    alpha_true = dict(all=0., USA=.1, CAN=-.2)

    pi_true = pl.exp([alpha_true[a] for a in area])
    sigma_true = .05*pl.ones_like(pi_true)

    p = mc.rnormal(pi_true, 1./sigma_true**2.)

    model = data.ModelData()
    model.input_data = pandas.DataFrame(dict(value=p, area=area, sex=sex, year_start=year, year_end=year))
    model.hierarchy, model.output_template = hierarchy, output_template

    # create model and priors
    vars = {}
    vars.update(covariate_model.mean_covariate_model('test', 1, model.input_data, {}, model,
                                                     'all', 'total', 'all'))
    vars.update(rate_model.normal_model('test', vars['pi'], 0., p, sigma_true))

    # fit model
    m = mc.MCMC(vars)
    m.sample(2)

    assert 'sex' not in vars['U']
    assert 'x_sex' in vars['X']
    assert len(vars['beta']) == 1
コード例 #49
0
ファイル: perturber.py プロジェクト: amcknight/Combatizer
    def normal(s):
        cur_var = 1.0

        while True:
            result = []
            for utility in s.utilities:
                while True:
                    cur_result = utility + pymc.rnormal(0, 1/cur_var)
                    if s.h_utilities_within_range([cur_result]):
                        break
                result.append(cur_result)
            if comparitor(s.utilities, result) >= s.threshold:
                break
            cur_var /= 2
            
            # DEBUG
            print 'cur_var: ' + str(cur_var)
            print 's.utilities: ' + str(s.utilities)
            print 'result: ' + str(result)
            print 'similarity: ' + str(comparitor(s.utilities, result))
            print
        
        return result
コード例 #50
0
def setup_and_sample(vars, step, iters=5000):
    mod = mc.MCMC(vars)
    if step == 'AdaptiveMetropolis':
        mod.use_step_method(mc.AdaptiveMetropolis, mod.X)
    elif step == 'Hit-and-Run':
        mod.use_step_method(steppers.HitAndRun, mod.X, proposal_sd=.1)
    elif step == 'H-RAM':
        #mod.use_step_method(steppers.HRAM, mod.X, proposal_sd=.01)
        mod.use_step_method(history_steps.HRAM,
                            mod.X,
                            init_history=mc.rnormal(mod.X.value,
                                                    1.,
                                                    size=(20,
                                                          len(mod.X.value))),
                            xprime_sds=2,
                            xprime_n=51)
    elif step == 'Metropolis':
        mod.use_step_method(mc.Metropolis, mod.X, proposal_sd=.1)
    else:
        raise Exception, 'Unrecognized Step Method'
    mod.sample(iters)

    return mod
コード例 #51
0
def plot_funnel(pi_true, sigma_str):
    n = pl.exp(mc.rnormal(10, 2**-2, size=10000))
    sigma = float(sigma_str)*pl.ones_like(n)
    p = pi_true*pl.ones_like(n)

    oln = rate_model.offset_log_normal('funnel', p, sigma, p, pl.sqrt(p*(1-p)/n))
    r = oln['p_pred'].value

    pl.vlines([pi_true], .1*n.min(), 10*n.max(),
              linewidth=2, linestyle='-', color='w', zorder=9)
    pl.vlines([pi_true], .1*n.min(), 10*n.max(),
              linewidth=1, linestyle='--', color='black', zorder=10)
    pl.plot(r, n, 'ko',
            mew=0, alpha=.25)

    pl.semilogy(schiz['r'], schiz['n'], 'ks', mew=1, mec='white', ms=4,
                label='Observed values')

    pl.xlabel('Rate (per PY)')
    pl.ylabel('Study size (PY)')
    pl.xticks([0, .005, .01])
    pl.axis([-.0001, .0101, 50., 15000000])
    pl.title(r'$\sigma = %s$'%sigma_str)
コード例 #52
0
    def test_non_missing(self):
        """
        Test to ensure that masks without any missing values are not imputed.
        """

        fake_data = rnormal(0, 1, size=10)
        m = ma.masked_array(fake_data, fake_data == -999)

        # Priors
        mu = Normal('mu', mu=0, tau=0.0001)
        s = Uniform('s', lower=0, upper=100, value=10)
        tau = s**-2

        # Likelihood with missing data
        x = Normal('x', mu=mu, tau=tau, value=m, observed=True)

        # Instantiate sampler
        M = MCMC([mu, s, tau, x])

        # Run sampler
        M.sample(20000, 19000, progress_bar=0)

        # Ensure likelihood does not have a trace
        assert_raises(AttributeError, x.__getattribute__, 'trace')
コード例 #53
0
def plot_beta_binomial_funnel(alpha, beta):
    pi_true = alpha / (alpha + beta)
    pi = mc.rbeta(alpha, beta, size=10000)

    n = pl.exp(mc.rnormal(10, 2**-2, size=10000))
    k = mc.rbinomial(pl.array(n, dtype=int), pi)
    r = k / n
    pl.vlines([pi_true],
              .1 * n.min(),
              10 * n.max(),
              linewidth=2,
              linestyle='-',
              color='w',
              zorder=9)
    pl.vlines([pi_true],
              .1 * n.min(),
              10 * n.max(),
              linewidth=1,
              linestyle='--',
              color='black',
              zorder=10)
    pl.plot(r, n, 'ko', mew=0, alpha=.25)

    pl.semilogy(schiz['r'],
                schiz['n'],
                'ks',
                mew=1,
                mec='white',
                ms=4,
                label='Observed values')

    pl.xlabel('Rate (per PY)')
    pl.ylabel('Study size (PY)')
    pl.xticks([0, .005, .01])
    pl.axis([-.0001, .0101, 50., 1500000])
    pl.title(r'$\alpha=%d$, $\beta=%d$' % (alpha, beta))
コード例 #54
0
ファイル: test_missing.py プロジェクト: AsymmetricHuang/pymc
    def test_non_missing(self):
        """
        Test to ensure that masks without any missing values are not imputed.
        """

        fake_data = rnormal(0, 1, size=10)
        m = ma.masked_array(fake_data, fake_data == -999)

        # Priors
        mu = Normal('mu', mu=0, tau=0.0001)
        s = Uniform('s', lower=0, upper=100, value=10)
        tau = s ** -2

        # Likelihood with missing data
        x = Normal('x', mu=mu, tau=tau, value=m, observed=True)

        # Instantiate sampler
        M = MCMC([mu, s, tau, x])

        # Run sampler
        M.sample(20000, 19000, progress_bar=0)

        # Ensure likelihood does not have a trace
        assert_raises(AttributeError, x.__getattribute__, 'trace')
コード例 #55
0
ファイル: kq1.py プロジェクト: fonnesbeck/PKUMetaAnalysis
 def iq_pred(mu=mu_iq, tau=tau_iq):
     """Simulated data for posterior predictive checks"""
     return rnormal(mu, tau, size=len(obs_indiv['iq']))
コード例 #56
0
ファイル: straightlinefit.py プロジェクト: jhsa26/pymc
regression parameters.
"""
from pymc import stochastic, observed, deterministic, uniform_like, runiform, rnormal, Sampler, Normal, Uniform
from numpy import inf, log, cos, array
import pylab

# ------------------------------------------------------------------------------
# Synthetic values
# Replace by real data
# ------------------------------------------------------------------------------
slope = 1.5
intercept = 4
N = 30
true_x = runiform(0, 50, N)
true_y = slope * true_x + intercept
data_y = rnormal(true_y, 2)
data_x = rnormal(true_x, 2)

# ------------------------------------------------------------------------------
# Calibration of straight line parameters from data
# ------------------------------------------------------------------------------


@stochastic
def theta(value=array([2., 5.])):
    """Slope and intercept parameters for a straight line.
    The likelihood corresponds to the prior probability of the parameters."""
    slope, intercept = value
    prob_intercept = uniform_like(intercept, -10, 10)
    prob_slope = log(1. / cos(slope)**2)
    return prob_intercept + prob_slope
コード例 #57
0
ファイル: talk_splines.py プロジェクト: studentmicky/gbd
ages = pl.arange(101)
knots = [0, 15, 60, 100]
import scipy.interpolate

Y_true = pl.exp(
    scipy.interpolate.interp1d(knots,
                               pl.log([1.2, .3, .6, 1.5]),
                               kind='linear')(ages))

N = 50
tau = .1**-2
X = pl.array(mc.runiform(pl.arange(0., 100., 100. / N),
                         100. / N + pl.arange(0., 100., 100. / N),
                         size=N),
             dtype=int)
Y = mc.rnormal(Y_true[X], tau)

### @export 'initial-rates'
pl.figure(figsize=(17., 11), dpi=72)

dismod3.graphics.plot_data_bars(df, 'talk')
pl.semilogy([0], [.1], '-')

pl.title(
    'All-cause mortality rate\nin 1990 for females\nin sub-Saharan Africa, Southern.',
    size=55)
pl.ylabel('Rate (Per PY)', size=48)
pl.xlabel('Age (Years)', size=48)

pl.subplots_adjust(.1, .175, .98, .7)
pl.axis([-5, 105, 2.e-4, .8])
コード例 #58
0
ファイル: covariates.py プロジェクト: ngraetz/dismod_mr
def predict_for(model, parameters,
                root_area, root_sex, root_year,
                area, sex, year,
                population_weighted,
                vars,
                lower, upper):
    """ Generate draws from posterior predicted distribution for a
    specific (area, sex, year)

    :Parameters:
      - `model` : data.DataModel
      - `root_area` : str, area for which this model was fit consistently
      - `root_sex` : str, area for which this model was fit consistently
      - `root_year` : str, area for which this model was fit consistently
      - `area` : str, area to predict for
      - `sex` : str, sex to predict for
      - `year` : str, year to predict for
      - `population_weighted` : bool, should prediction be population weighted if it is the aggregation of units area RE hierarchy?
      - `vars` : dict, including entries for alpha, beta, mu_age, U, and X
      - `lower, upper` : float, bounds on predictions from expert priors

    :Results:
      - Returns array of draws from posterior predicted distribution

    """
    area_hierarchy = model.hierarchy
    output_template = model.output_template.copy()

    # find number of samples from posterior
    len_trace = len(vars['mu_age'].trace())

    # compile array of draws from posterior distribution of alpha (random effect covariate values)
    # a row for each draw from the posterior distribution
    # a column for each random effect (e.g. countries with data, regions with countries with data, etc)
    #
    # there are several cases to handle, or at least at one time there were:
    #   vars['alpha'] is a pymc Stochastic with an array for its value (no longer used?)
    #   vars['alpha'] is a list of pymc Nodes
    #   vars['alpha'] is a list of floats
    #   vars['alpha'] is a list of some floats and some pymc Nodes
    #   'alpha' is not in vars
    #
    # when vars['alpha'][i] is a float, there is also information on the uncertainty in this value, stored in
    # vars['const_alpha_sigma'][i], which is not used when fitting the model, but should be incorporated in
    # the prediction
    
    if 'alpha' in vars and isinstance(vars['alpha'], mc.Node):
        assert 0, 'No longer used'
        alpha_trace = vars['alpha'].trace()
    elif 'alpha' in vars and isinstance(vars['alpha'], list):
        alpha_trace = []
        for n, sigma in zip(vars['alpha'], vars['const_alpha_sigma']):
            if isinstance(n, mc.Node):
                alpha_trace.append(n.trace())
            else:
                # uncertainty of constant alpha incorporated here
                sigma = max(sigma, 1.e-9) # make sure sigma is non-zero
                assert not np.isnan(sigma)
                alpha_trace.append(mc.rnormal(float(n), sigma**-2, size=len_trace))
        alpha_trace = np.vstack(alpha_trace).T
    else:
        alpha_trace = np.array([])


    # compile array of draws from posterior distribution of beta (fixed effect covariate values)
    # a row for each draw from the posterior distribution
    # a column for each fixed effect
    #
    # there are several cases to handle, or at least at one time there were:
    #   vars['beta'] is a pymc Stochastic with an array for its value (no longer used?)
    #   vars['beta'] is a list of pymc Nodes
    #   vars['beta'] is a list of floats
    #   vars['beta'] is a list of some floats and some pymc Nodes
    #   'beta' is not in vars
    #
    # when vars['beta'][i] is a float, there is also information on the uncertainty in this value, stored in
    # vars['const_beta_sigma'][i], which is not used when fitting the model, but should be incorporated in
    # the prediction
    #
    # TODO: refactor to reduce duplicate code (this is very similar to code for alpha above)

    if 'beta' in vars and isinstance(vars['beta'], mc.Node):
        assert 0, 'No longer used'
        beta_trace = vars['beta'].trace()
    elif 'beta' in vars and isinstance(vars['beta'], list):
        beta_trace = []
        for n, sigma in zip(vars['beta'], vars['const_beta_sigma']):
            if isinstance(n, mc.Node):
                beta_trace.append(n.trace())
            else:
                # uncertainty of constant beta incorporated here
                sigma = max(sigma, 1.e-9) # make sure sigma is non-zero
                assert not np.isnan(sigma)
                beta_trace.append(mc.rnormal(float(n), sigma**-2., size=len_trace))
        beta_trace = np.vstack(beta_trace).T
    else:
        beta_trace = np.array([])

    # the prediction for the requested area is produced by aggregating predictions for all of the childred
    # of that area in the area_hierarchy (a networkx.DiGraph)

    leaves = [n for n in nx.traversal.bfs_tree(area_hierarchy, area) if area_hierarchy.successors(n) == []]
    if len(leaves) == 0:
        # networkx returns an empty list when the bfs tree is a single node
        leaves = [area]


    # initialize covariate_shift and total_population
    covariate_shift = np.zeros(len_trace)
    total_population = 0.

    # group output_template for easy access
    output_template = output_template.groupby(['area', 'sex', 'year']).mean()

    # if there are fixed effects, the effect coefficients are stored as an array in vars['X']
    # use this to put together a covariate matrix for the predictions, according to the output_template
    # covariate values
    #
    # the resulting array is covs
    if 'X' in vars:
        covs = output_template.filter(vars['X'].columns)
        if 'x_sex' in vars['X'].columns:
            covs['x_sex'] = sex_value[sex]
        assert np.all(covs.columns == vars['X_shift'].index), 'covariate columns and unshift index should match up'
        for x_i in vars['X_shift'].index:
            covs[x_i] -= vars['X_shift'][x_i] # shift covariates so that the root node has X_ar,sr,yr == 0
    else:
        covs = pd.DataFrame(index=output_template.index)

    # if there are random effects, put together an indicator based on
    # their hierarchical relationships
    #
    if 'U' in vars:
        p_U = area_hierarchy.number_of_nodes()  # random effects for area
        U_l = pd.DataFrame(np.zeros((1, p_U)), columns=area_hierarchy.nodes())
        U_l = U_l.filter(vars['U'].columns)
    else:
        U_l = pd.DataFrame(index=[0])

    # loop through leaves of area_hierarchy subtree rooted at 'area',
    # make prediction for each using appropriate random
    # effects and appropriate fixed effect covariates
    #
    for l in leaves:
        log_shift_l = np.zeros(len_trace)
        U_l.ix[0,:] = 0.

        root_to_leaf = nx.shortest_path(area_hierarchy, root_area, l)
        for node in root_to_leaf[1:]:
            if node not in U_l.columns:
                ## Add a columns U_l[node] = rnormal(0, appropriate_tau)
                level = len(nx.shortest_path(area_hierarchy, 'all', node))-1
                if 'sigma_alpha' in vars:
                    tau_l = vars['sigma_alpha'][level].trace()**-2
                    
                U_l[node] = 0.

                # if this node was not already included in the alpha_trace array, add it
                # there are several cases for adding:
                #  if the random effect has a distribution of Constant
                #    add it, using a sigma as well
                #  otherwise, sample from a normal with mean zero and standard deviation tau_l
                if parameters.get('random_effects', {}).get(node, {}).get('dist') == 'Constant':
                    mu = parameters['random_effects'][node]['mu']
                    sigma = parameters['random_effects'][node]['sigma']
                    sigma = max(sigma, 1.e-9) # make sure sigma is non-zero

                    alpha_node = mc.rnormal(mu,
                                            sigma**-2,
                                            size=len_trace)
                else:
                    if 'sigma_alpha' in vars:
                        alpha_node = mc.rnormal(0., tau_l)
                    else:
                        alpha_node = np.zeros(len_trace)

                if len(alpha_trace) > 0:
                    alpha_trace = np.vstack((alpha_trace.T, alpha_node)).T
                else:
                    alpha_trace = np.atleast_2d(alpha_node).T

            # TODO: implement a more robust way to align alpha_trace and U_l
            U_l.ix[0, node] = 1.

        # 'shift' the random effects matrix to have the intended
        # level of the hierarchy as the reference value
        if 'U_shift' in vars:
            for node in vars['U_shift']:
                U_l -= vars['U_shift'][node]

        # add the random effect intercept shift (len_trace draws)
        log_shift_l += np.dot(alpha_trace, U_l.T).flatten()
            
        # make X_l
        if len(beta_trace) > 0:
            X_l = covs.ix[l, sex, year]
            log_shift_l += np.dot(beta_trace, X_l.T).flatten()

        if population_weighted:
            # combine in linear-space with population weights
            shift_l = np.exp(log_shift_l)
            covariate_shift += shift_l * output_template['pop'][l,sex,year]
            total_population += output_template['pop'][l,sex,year]
        else:
            # combine in log-space without weights
            covariate_shift += log_shift_l
            total_population += 1.

    if population_weighted:
        covariate_shift /= total_population
    else:
        covariate_shift = np.exp(covariate_shift / total_population)
        
    parameter_prediction = (vars['mu_age'].trace().T * covariate_shift).T
        
    # clip predictions to bounds from expert priors
    parameter_prediction = parameter_prediction.clip(lower, upper)
    
    return parameter_prediction