Beispiel #1
0
    def step(self):

        self.compute_y_logp()

        t_last = 0
        for stoch in self.state_seq:
            t_end = t_last + np.alen(stoch.value)
            time_range = xrange(t_last, t_end)

            trans_mat = stoch.parents['trans_mat']
            trans_mat = getattr(trans_mat, 'value', trans_mat)

            P = np.column_stack((trans_mat, 1. - trans_mat.sum(axis=1)))

            p0 = stoch.parents['p0']
            p0 = getattr(p0, 'value', p0)
            if p0 is None:
                p0 = compute_steady_state(trans_mat)

            p_run = p0
            # Very inefficient forward pass:
            for t in time_range:
                logp_k_t = self.logp_filtered[t]
                for k in xrange(stoch.K):
                    # This is the forward step (in log scale):
                    # p(S_t=k \mid y_{1:t}) \propto p(y_t \mid S_t=k) *
                    #   p(S_t=k \mid y_{1:t-1})
                    logp_k_t[k] = self.y_logp_vals[k, t] +\
                        pymc.categorical_like(k, p_run)

                # Here we normalize across k
                logp_k_t -= reduce(np.logaddexp, logp_k_t)

                # This computes p(S_{t+1} \mid y_{1:t})
                p_run = np.dot(np.exp(logp_k_t), P)

            np.exp(self.logp_filtered, out=self.p_filtered)

            # An inefficient backward pass:
            # Sample p(S_T \mid y_{1:T})
            new_values = np.empty_like(stoch.value, dtype=stoch.value.dtype)
            new_values[t_end-1] = pymc.rcategorical(self.p_filtered[t_end-1][:-1])
            for t in xrange(t_end-2, t_last-1, -1):
                # Now, sample p(S_t \mid S_{t+1}, y_{1:T}) via the relation
                # p(S_t=j \mid S_{t+1}=k, y_{1:T}) \propto
                #   p(S_t=j \mid S_{t_1}=k, y_{1:t}) \propto
                #   p(S_{t+1}=k \mid S_t=j, y_{1:t}) * p(S_t=j \mid y_{1:t})
                p_back = P[:, int(new_values[t + 1])] * self.p_filtered[t]
                p_back /= p_back.sum()

                new_values[t-t_last] = pymc.rcategorical(p_back[:-1])

            stoch.value = new_values

            t_last += np.alen(stoch.value)
Beispiel #2
0
def states_random(Ptrans=Ptrans, N_chain=N_chain):
    P = np.column_stack((Ptrans, 1. - Ptrans.sum(axis=1)))

    Pinit = unconditionalProbability(Ptrans)

    states = np.empty(N_chain, dtype=np.uint8)

    states[0] = pymc.rcategorical(Pinit)

    for i in range(1, N_chain):
        states[i] = pymc.rcategorical(P[states[i - 1]])

    return states
Beispiel #3
0
def ages_and_data(N_exam, f_samp, correction_factor_array, age_lims):
    """Called by pred_samps. Simulates ages of survey participants and data given f."""

    N_samp = len(f_samp)
    N_age_samps = correction_factor_array.shape[1]

    # Get samples for the age distribution at the observation points.
    age_distribution = []
    for i in xrange(N_samp):
        l = age_lims[i]
        age_distribution.append(S_trace[np.random.randint(S_trace.shape[0]), 0,
                                        l[0]:l[1] + 1])
        age_distribution[-1] /= np.sum(age_distribution[-1])

    # Draw age for each individual, draw an age-correction profile for each location,
    # compute probability of positive for each individual, see how many individuals are
    # positive.
    A = []
    pos = []
    for s in xrange(N_samp):
        A.append(
            np.array(pm.rcategorical(age_distribution[s], size=N_exam[s]),
                     dtype=int) + age_lims[s][0])
        P_samp = pm.invlogit(f_samp[s].ravel(
        )) * correction_factor_array[:, np.random.randint(N_age_samps)][A[-1]]
        pos.append(pm.rbernoulli(P_samp))

    return A, pos, age_distribution
Beispiel #4
0
def test_fixed_effect_priors():
    model = data.ModelData()

    # set prior on sex
    parameters = dict(fixed_effects={'x_sex': dict(dist='TruncatedNormal', mu=1., sigma=.5, lower=-10, upper=10)})

    # simulate normal data
    n = 32.
    sex_list = pl.array(['male', 'female', 'total'])
    sex = sex_list[mc.rcategorical([.3, .3, .4], n)]
    beta_true = dict(male=-1., total=0., female=1.)
    pi_true = pl.exp([beta_true[s] for s in sex])
    sigma_true = .05
    p = mc.rnormal(pi_true, 1./sigma_true**2.)

    model.input_data = pandas.DataFrame(dict(value=p, sex=sex))
    model.input_data['area'] = 'all'
    model.input_data['year_start'] = 2010
    model.input_data['year_start'] = 2010



    # create model and priors
    vars = {}
    vars.update(covariate_model.mean_covariate_model('test', 1, model.input_data, parameters, model,
                                                     'all', 'total', 'all'))

    print vars['beta']
    assert vars['beta'][0].parents['mu'] == 1.
Beispiel #5
0
def test_covariate_model_dispersion():
    # simulate normal data
    n = 100

    model = data.ModelData()
    model.hierarchy, model.output_template = data_simulation.small_output()

    Z = mc.rcategorical([.5, 5.], n)
    zeta_true = -.2

    pi_true = .1
    ess = 10000.*pl.ones(n)
    eta_true = pl.log(50)
    delta_true = 50 + pl.exp(eta_true)

    p = mc.rnegative_binomial(pi_true*ess, delta_true*pl.exp(Z*zeta_true)) / ess

    
    model.input_data = pandas.DataFrame(dict(value=p, z_0=Z))
    model.input_data['area'] = 'all'
    model.input_data['sex'] = 'total'
    model.input_data['year_start'] = 2000
    model.input_data['year_end'] = 2000



    # create model and priors
    vars = dict(mu=mc.Uninformative('mu_test', value=pi_true))
    vars.update(covariate_model.mean_covariate_model('test', vars['mu'], model.input_data, {}, model, 'all', 'total', 'all'))
    vars.update(covariate_model.dispersion_covariate_model('test', model.input_data, .1, 10.))
    vars.update(rate_model.neg_binom_model('test', vars['pi'], vars['delta'], p, ess))

    # fit model
    m = mc.MCMC(vars)
    m.sample(2)
Beispiel #6
0
 def step(self):
     
     # The right-hand sides for the linear constraints
     self.rhs = dict(zip(self.constraint_offdiags, 
                         [np.asarray(np.dot(pm.utils.value(od), self.g.value)).squeeze() for od in self.constraint_offdiags]))
     
     for i in xrange(self.n):
         
         try:
             lb, ub, rhs = self.get_bounds(i)
         except ConstraintError:
             warnings.warn('Bounds could not be set, this element is very highly constrained')
             continue
         
         newgs = np.hstack((self.g.value[i], pm.rtruncnorm(0,1,lb,ub,size=self.n_draws)))
         lpls = np.hstack((self.get_likelihood_only(), np.empty(self.n_draws)))
         for j, newg in enumerate(newgs[1:]):
             self.set_g_value(newg, i)
             # The newgs are drawn from the prior, taking the canstraints into account, so 
             # accept them based on the 'likelihood children' only.
             try:
                 lpls[j+1] = self.get_likelihood_only()
             except pm.ZeroProbability:
                 lpls[j+1] = -np.inf
         
         lpls -= pm.flib.logsum(lpls)
         newg = newgs[pm.rcategorical(np.exp(lpls))]
         self.set_g_value(newg, i)
                 
         for od in self.constraint_offdiags:
             rhs[od] += np.asarray(pm.utils.value(od))[:,i].squeeze() * newg
             self.rhs = rhs
Beispiel #7
0
def test_fixed_effect_priors():
    model = dismod_mr.data.ModelData()

    # set prior on sex
    parameters = dict(
        fixed_effects={
            'x_sex':
            dict(dist='TruncatedNormal', mu=1., sigma=.5, lower=-10, upper=10)
        })

    # simulate normal data
    n = 32
    sex_list = np.array(['male', 'female', 'total'])
    sex = sex_list[mc.rcategorical([.3, .3, .4], n)]
    beta_true = dict(male=-1., total=0., female=1.)
    pi_true = np.exp([beta_true[s] for s in sex])
    sigma_true = .05
    p = mc.rnormal(pi_true, 1. / sigma_true**2.)

    model.input_data = pd.DataFrame(dict(value=p, sex=sex))
    model.input_data['area'] = 'all'
    model.input_data['year_start'] = 2010
    model.input_data['year_start'] = 2010

    # create model and priors
    vars = {}
    vars.update(
        dismod_mr.model.covariates.mean_covariate_model(
            'test', 1, model.input_data, parameters, model, 'all', 'total',
            'all'))

    print(vars['beta'])
    assert vars['beta'][0].parents['mu'] == 1.
Beispiel #8
0
    def step(self):
        x0 = np.copy(self.stochastic.value)
        dx = pymc.rnormal(np.zeros(np.shape(x0)), self.proposal_tau)

        logp = [self.logp_plus_loglike]
        x_prime = [x0]

        for direction in [-1, 1]:
            for i in xrange(25):
                delta = direction * np.exp(.1 * i) * dx
                try:
                    self.stochastic.value = x0 + delta
                    logp.append(self.logp_plus_loglike)
                    x_prime.append(x0 + delta)
                except pymc.ZeroProbability:
                    self.stochastic.value = x0

        i = pymc.rcategorical(np.exp(np.array(logp) - pymc.flib.logsum(logp)))
        self.stochastic.value = x_prime[i]

        if i == 0:
            self.rejected += 1
            if self.verbose > 2:
                print self._id + ' rejecting'
        else:
            self.accepted += 1
            if self.verbose > 2:
                print self._id + ' accepting'
Beispiel #9
0
def test_random_effect_priors():
    model = dismod_mr.data.ModelData()

    # set prior on sex
    parameters = dict(random_effects={
        'USA':
        dict(dist='TruncatedNormal', mu=.1, sigma=.5, lower=-10, upper=10)
    })

    # simulate normal data
    n = 32
    area_list = np.array(['all', 'USA', 'CAN'])
    area = area_list[mc.rcategorical([.3, .3, .4], n)]
    alpha_true = dict(all=0., USA=.1, CAN=-.2)
    pi_true = np.exp([alpha_true[a] for a in area])
    sigma_true = .05
    p = mc.rnormal(pi_true, 1. / sigma_true**2.)

    model.input_data = pd.DataFrame(dict(value=p, area=area))
    model.input_data['sex'] = 'male'
    model.input_data['year_start'] = 2010
    model.input_data['year_end'] = 2010

    model.hierarchy.add_edge('all', 'USA')
    model.hierarchy.add_edge('all', 'CAN')

    # create model and priors
    vars = {}
    vars.update(
        dismod_mr.model.covariates.mean_covariate_model(
            'test', 1, model.input_data, parameters, model, 'all', 'total',
            'all'))

    print(vars['alpha'])
    print(vars['alpha'][1].parents['mu'])
Beispiel #10
0
    def step(self):
        x0 = np.copy(self.stochastic.value)
        dx = pymc.rnormal(np.zeros(np.shape(x0)), self.proposal_tau)

        logp = [self.logp_plus_loglike]
        x_prime = [x0]

        for direction in [-1, 1]:
            for i in xrange(25):
                delta = direction*np.exp(.1*i)*dx
                try:
                    self.stochastic.value = x0 + delta
                    logp.append(self.logp_plus_loglike)
                    x_prime.append(x0 + delta)
                except pymc.ZeroProbability:
                    self.stochastic.value = x0
        
        i = pymc.rcategorical(np.exp(np.array(logp) - pymc.flib.logsum(logp)))
        self.stochastic.value = x_prime[i]

        if i == 0:
            self.rejected += 1
            if self.verbose > 2:
                print self._id + ' rejecting'
        else:
            self.accepted += 1
            if self.verbose > 2:
                print self._id + ' accepting'
Beispiel #11
0
    def step(self):
        x0 = self.value[self.n]
        u = pymc.rnormal(np.zeros(self.N), 1.)
        dx = np.dot(u, self.value)

        self.stochastic.value = x0
        logp = [self.logp_plus_loglike]
        x_prime = [x0]

        for direction in [-1, 1]:
            for i in xrange(25):
                delta = direction * np.exp(.1 * i) * dx
                try:
                    self.stochastic.value = x0 + delta
                    logp.append(self.logp_plus_loglike)
                    x_prime.append(x0 + delta)
                except pymc.ZeroProbability:
                    self.stochastic.value = x0

        i = pymc.rcategorical(np.exp(np.array(logp) - pymc.flib.logsum(logp)))
        self.value[self.n] = x_prime[i]
        self.stochastic.value = x_prime[i]

        if i == 0:
            self.rejected += 1
            if self.verbose > 2:
                print self._id + ' rejecting'
        else:
            self.accepted += 1
            if self.verbose > 2:
                print self._id + ' accepting'

        self.n += 1
        if self.n == self.N:
            self.n = 0
Beispiel #12
0
def test_random_effect_priors():
    model = data.ModelData()

    # set prior on sex
    parameters = dict(random_effects={'USA': dict(dist='TruncatedNormal', mu=.1, sigma=.5, lower=-10, upper=10)})


    # simulate normal data
    n = 32.
    area_list = pl.array(['all', 'USA', 'CAN'])
    area = area_list[mc.rcategorical([.3, .3, .4], n)]
    alpha_true = dict(all=0., USA=.1, CAN=-.2)
    pi_true = pl.exp([alpha_true[a] for a in area])
    sigma_true = .05
    p = mc.rnormal(pi_true, 1./sigma_true**2.)

    model.input_data = pandas.DataFrame(dict(value=p, area=area))
    model.input_data['sex'] = 'male'
    model.input_data['year_start'] = 2010
    model.input_data['year_end'] = 2010

    model.hierarchy.add_edge('all', 'USA')
    model.hierarchy.add_edge('all', 'CAN')

    # create model and priors
    vars = {}
    vars.update(covariate_model.mean_covariate_model('test', 1, model.input_data, parameters, model,
                                                     'all', 'total', 'all'))

    print vars['alpha']
    print vars['alpha'][1].parents['mu']
    assert vars['alpha'][1].parents['mu'] == .1
Beispiel #13
0
    def step(self):
        x0 = self.value[self.n]
        u = pm.rnormal(np.zeros(self.N), 1.)
        dx = np.dot(u, self.value)
 
        self.stochastic.value = x0
        logp = [self.logp_plus_loglike]
        x_prime = [x0]
 
        for direction in [-1, 1]:
            for i in xrange(25):
                delta = direction*np.exp(.1*i)*dx
                try:
                    self.stochastic.value = x0 + delta
                    logp.append(self.logp_plus_loglike)
                    x_prime.append(x0 + delta)
                except pm.ZeroProbability:
                    self.stochastic.value = x0
 
        i = pm.rcategorical(np.exp(np.array(logp) - pm.flib.logsum(logp)))
        self.value[self.n] = x_prime[i]
        self.stochastic.value = x_prime[i]
 
        if i == 0:
            self.rejected += 1
        else:
            self.accepted += 1
 
        self.n += 1
        if self.n == self.N:
            self.n = 0    
Beispiel #14
0
def test_covariate_model_dispersion():
    # simulate normal data
    n = 100

    model = dismod_mr.data.ModelData()
    model.hierarchy, model.output_template = dismod_mr.testing.data_simulation.small_output()

    Z = mc.rcategorical([.5, 5.], n)
    zeta_true = -.2

    pi_true = .1
    ess = 10000.*np.ones(n)
    eta_true = np.log(50)
    delta_true = 50 + np.exp(eta_true)

    p = mc.rnegative_binomial(pi_true*ess, delta_true*np.exp(Z*zeta_true)) / ess

    model.input_data = pd.DataFrame(dict(value=p, z_0=Z))
    model.input_data['area'] = 'all'
    model.input_data['sex'] = 'total'
    model.input_data['year_start'] = 2000
    model.input_data['year_end'] = 2000

    # create model and priors
    variables = dict(mu=mc.Uninformative('mu_test', value=pi_true))
    variables.update(dismod_mr.model.covariates.mean_covariate_model('test', variables['mu'], model.input_data, {},
                                                                     model, 'all', 'total', 'all'))
    variables.update(dismod_mr.model.covariates.dispersion_covariate_model('test', model.input_data, .1, 10.))
    variables.update(dismod_mr.model.likelihood.neg_binom('test', variables['pi'], variables['delta'], p, ess))

    # fit model
    m = mc.MCMC(variables)
    m.sample(2)
Beispiel #15
0
def sim_ordinal(I, J, K, alpha=None, beta=None):

    # test input params here

    Is = range(I)
    Js = range(J)
    Ks = range(K)
    N = I * J
    Ns = range(N)

    if alpha == None:
        alpha = alloc_mat(K, K)
        for k1 in Ks:
            for k2 in Ks:
                alpha[k1][k2] = max(1,(K + (0.5 if k1 == k2 else 0) \
                                       - abs(k1 - k2))**4)

    if beta == None:
        beta = alloc_vec(K, 2.0)

    # simulated params
    beta = alloc_vec(K, 2.0)

    prevalence = pymc.rdirichlet(beta).tolist()
    prevalence.append(1.0 - sum(prevalence))  # complete
    category = []
    for i in Is:
        category.append(pymc.rcategorical(prevalence).tolist())

    accuracy = alloc_tens(J, K, K)
    for j in Js:
        for k in Ks:
            accuracy[j][k] = pymc.rdirichlet(alpha[k]).tolist()
            accuracy[j][k].append(1.0 - sum(accuracy[j][k]))

    # simulated data
    item = []
    anno = []
    label = []
    for i in Is:
        for j in Js:
            item.append(i)
            anno.append(j)
            label.append(pymc.rcategorical(accuracy[j][category[i]]).tolist())
    N = len(item)

    return (prevalence, category, accuracy, item, anno, label)
Beispiel #16
0
    def step(self):
        self._index += 1
        if self._index % self.sleep_interval == 0:
            
            v = pm.value(self.v)
            m = pm.value(self.m)
            val = self.stochastic.value
            lp = pm.logp_of_set(self.other_children)
        
            # Choose a direction along which to step.
            dirvec = np.random.normal(size=self.n)
            dirvec /= np.sqrt(np.sum(dirvec**2))
        
            # Orthogonalize
            orthoproj = gramschmidt(dirvec)
            scaled_orthoproj = v*orthoproj.T
            pck = np.dot(dirvec, scaled_orthoproj.T)
            kck = np.linalg.inv(np.dot(scaled_orthoproj,orthoproj))
            pckkck = np.dot(pck,kck)

            # Figure out conditional variance
            condvar = np.dot(dirvec, dirvec*v) - np.dot(pck, pckkck)
            # condmean = np.dot(dirvec, m) + np.dot(pckkck, np.dot(orthoproj.T, (val-m)))
        
            # Compute slice of log-probability surface
            tries = np.linspace(-4*np.sqrt(condvar), 4*np.sqrt(condvar), 501)
            lps = 0*tries
        
            for i in xrange(len(tries)):
                new_val = val + tries[i]*dirvec
                self.stochastic.value = new_val
                try:
                    lps[i] = self.f_fr.logp + self.stochastic.logp
                except:
                    lps[i] = -np.inf              
            if np.all(np.isinf(lps)):
                raise ValueError, 'All -inf.'
            lps -= pm.flib.logsum(lps[True-np.isinf(lps)])          
            ps = np.exp(lps)
        
            index = pm.rcategorical(ps)
            new_val = val + tries[index]*dirvec
            self.stochastic.value = new_val
            
            try:
                lpp = pm.logp_of_set(self.other_children)
                if np.log(np.random.random()) < lpp - lp:
                    self.accepted += 1
                else:
                    self.stochastic.value = val
                    self.rejected += 1
                    
            except pm.ZeroProbability:
                self.stochastic.value = val
                self.rejected += 1
        self.logp_plus_loglike
Beispiel #17
0
def test_covariate_model_sim_w_hierarchy():
    n = 50

    # setup hierarchy
    hierarchy, output_template = data_simulation.small_output()

    # simulate normal data
    area_list = np.array(['all', 'USA', 'CAN'])
    area = area_list[mc.rcategorical([.3, .3, .4], n)]

    sex_list = np.array(['male', 'female', 'total'])
    sex = sex_list[mc.rcategorical([.3, .3, .4], n)]

    year = np.array(mc.runiform(1990, 2010, n), dtype=int)

    alpha_true = dict(all=0., USA=.1, CAN=-.2)

    pi_true = np.exp([alpha_true[a] for a in area])
    sigma_true = .05 * np.ones_like(pi_true)

    p = mc.rnormal(pi_true, 1. / sigma_true**2.)

    model = dismod_mr.data.ModelData()
    model.input_data = pd.DataFrame(
        dict(value=p, area=area, sex=sex, year_start=year, year_end=year))
    model.hierarchy, model.output_template = hierarchy, output_template

    # create model and priors
    vars = {}
    vars.update(
        dismod_mr.model.covariates.mean_covariate_model(
            'test', 1, model.input_data, {}, model, 'all', 'total', 'all'))
    vars.update(
        dismod_mr.model.likelihood.normal('test', vars['pi'], 0., p,
                                          sigma_true))

    # fit model
    m = mc.MCMC(vars)
    m.sample(2)

    assert 'sex' not in vars['U']
    assert 'x_sex' in vars['X']
    assert len(vars['beta']) == 1
 def step(self):
     direction = self.choose_direction(norm=False)
     current_value = self.get_current_value()
     x_prime = np.vstack((current_value, np.outer(np.linspace(-self.xprime_sds,self.xprime_sds,self.xprime_n),direction) + current_value))
     lps = np.empty(self.xprime_n+1)
     lps[0] = self.logp_plus_loglike
     for i in xrange(self.xprime_n):
         self.set_current_value(x_prime[i+1])
         lps[i+1] = self.logp_plus_loglike
     next_value = x_prime[pm.rcategorical(np.exp(lps-pm.flib.logsum(lps)))]
     self.set_current_value(next_value)
     self.store(next_value)
Beispiel #19
0
def test_covariate_model_sim_w_hierarchy():
    n = 50

    # setup hierarchy
    hierarchy, output_template = data_simulation.small_output()

    # simulate normal data
    area_list = pl.array(['all', 'USA', 'CAN'])
    area = area_list[mc.rcategorical([.3, .3, .4], n)]

    sex_list = pl.array(['male', 'female', 'total'])
    sex = sex_list[mc.rcategorical([.3, .3, .4], n)]

    year = pl.array(mc.runiform(1990, 2010, n), dtype=int)
        
    alpha_true = dict(all=0., USA=.1, CAN=-.2)

    pi_true = pl.exp([alpha_true[a] for a in area])
    sigma_true = .05*pl.ones_like(pi_true)

    p = mc.rnormal(pi_true, 1./sigma_true**2.)

    model = data.ModelData()
    model.input_data = pandas.DataFrame(dict(value=p, area=area, sex=sex, year_start=year, year_end=year))
    model.hierarchy, model.output_template = hierarchy, output_template

    # create model and priors
    vars = {}
    vars.update(covariate_model.mean_covariate_model('test', 1, model.input_data, {}, model,
                                                     'all', 'total', 'all'))
    vars.update(rate_model.normal_model('test', vars['pi'], 0., p, sigma_true))

    # fit model
    m = mc.MCMC(vars)
    m.sample(2)

    assert 'sex' not in vars['U']
    assert 'x_sex' in vars['X']
    assert len(vars['beta']) == 1
Beispiel #20
0
def states_random(trans_mat, N_obs, p0, size=None):
    """ Samples states from an HMM.

    Parameters
    ==========

    trans_mat: ndarray
        A transition probability matrix for K-many states with
        shape (K, K-1).

    N_obs: int
        Number of observations.

    p0: ndarray
        Initial state probabilities.  If `None`, the steady state
        is computed and used.

    size: int
        Not used.

    Returns
    =======

    A ndarray of length N_obs containing sampled state numbers/indices/labels.

    """

    P = np.column_stack((trans_mat, 1. - trans_mat.sum(axis=1)))
    p = p0
    if p is None:
        p = compute_steady_state(trans_mat)

    states = np.empty(N_obs, dtype=np.uint8)

    states[0] = pymc.rcategorical(p)
    for i in range(1, N_obs):
        states[i] = pymc.rcategorical(P[int(states[i - 1])])

    return states
Beispiel #21
0
 def step(self):
     direction = self.choose_direction(norm=False)
     current_value = self.get_current_value()
     x_prime = np.vstack(
         (current_value,
          np.outer(
              np.linspace(-self.xprime_sds, self.xprime_sds, self.xprime_n),
              direction) + current_value))
     lps = np.empty(self.xprime_n + 1)
     lps[0] = self.logp_plus_loglike
     for i in xrange(self.xprime_n):
         self.set_current_value(x_prime[i + 1])
         lps[i + 1] = self.logp_plus_loglike
     next_value = x_prime[pm.rcategorical(np.exp(lps -
                                                 pm.flib.logsum(lps)))]
     self.set_current_value(next_value)
     self.store(next_value)
Beispiel #22
0
def SIR_simplex_sample(mu, tau, cutoff, sum_val, N, N_proposals=1000, N_samps=1000):
    """
    Returns raw log-weights, indices chosen and SIR samples for sets of N draws
    from a truncated lognormal distribution, conditioned so that their sum is
    equal to sum_val.
    
    This SIR algorithm will fail miserably unless sum_val is relatively likely
    given N and the parameters of the lognormal distribution.
    
    :Parameters:
      - mu : float
        The mean parameter.
      - tau : float
        The precision parameter.
      - cutoff : float
        The truncation value.
      - sum_val : float
        The sum that is being conditioned on.
      - N : integer
        The number of variates in each vector
      - N_proposals : integer
        The number of vectors to propose.
      - N_samps : integer
        The number of vectors to return.
    """
    # Draw samples, compute missing values, evaluate log-weights.
    samps = np.exp(geto_truncnorm(mu, tau, log(cutoff), (N-1,N_proposals)))
    last_vals = sum_val - np.sum(samps,axis=0)
    weights = np.array([pm.lognormal_like(last_val_now, mu, tau) for last_val_now in last_vals])

    # Check that there are at least some positive weights.
    where_pos = np.where(weights>-1e308)
    if len(where_pos[0])==0:
        raise RuntimeError, 'No weights are positive. You have used a shitty value for N.'

    # Normalize and exponentiate log-weights.
    weights[where(last_vals>cutoff)]=-np.Inf
    weights -= log_sum(weights)
    
    # Append missing values to samples.
    samps = np.vstack((samps,last_vals))
    
    # Slice and return.
    ind=np.array(pm.rcategorical(p=np.exp(weights),size=N_samps),dtype=int)
    return weights, ind, samps[:,ind]
Beispiel #23
0
def gmm_model(data, K, mu_0=0.0, alpha_0=0.1, beta_0=0.1, alpha=1.0):
    """
    K: number of component
    n_samples: number of n_samples
    n_features: number of features

    mu_0: prior mean of mu_k 
    alpha_0: alpha of Inverse Gamma tau_k 
    beta_0: beta of Inverse Gamma tau_k
    alpha = prior of dirichlet distribution phi_0

    latent variable:
    phi_0: shape = (K-1, ), dirichlet distribution
    phi: shape = (K, ), add K-th value back to phi_0
    z: shape = (n_samples, ), Categorical distribution, z[k] is component indicator 
    mu_k: shape = (K, n_features), normal distribution, mu_k[k] is mean of k-th component
    tau_k : shape = (K, n_features), inverse-gamma distribution, tau_k[k] is variance of k-th component
    """

    n_samples, n_features = data.shape

    # latent variables
    tau_k = pm.InverseGamma('tau_k',
                            alpha_0 * np.ones((K, n_features)),
                            beta_0 * np.ones((K, n_features)),
                            value=beta_0 * np.ones((K, n_features)))
    mu_k = pm.Normal('mu_k',
                     np.ones((K, n_features)) * mu_0,
                     tau_k,
                     value=np.ones((K, n_features)) * mu_0)
    phi_0 = pm.Dirichlet('phi_0', theta=np.ones(K) * alpha)

    @pm.deterministic(dtype=float)
    def phi(value=np.ones(K) / K, phi_0=phi_0):
        val = np.hstack((phi_0, (1 - np.sum(phi_0))))
        return val

    z = pm.Categorical('z',
                       p=phi,
                       value=pm.rcategorical(np.ones(K) / K, size=n_samples))

    # observed variables
    x = pm.Normal('x', mu=mu_k[z], tau=tau_k[z], value=data, observed=True)

    return pm.Model([mu_k, tau_k, phi_0, phi, z, x])
Beispiel #24
0
def gmm_model(data, K, mu_0=0.0, alpha_0=0.1, beta_0=0.1, alpha=1.0):
    """
    K: number of component
    n_samples: number of n_samples
    n_features: number of features

    mu_0: prior mean of mu_k 
    alpha_0: alpha of Inverse Gamma tau_k 
    beta_0: beta of Inverse Gamma tau_k
    alpha = prior of dirichlet distribution phi_0

    latent variable:
    phi_0: shape = (K-1, ), dirichlet distribution
    phi: shape = (K, ), add K-th value back to phi_0
    z: shape = (n_samples, ), Categorical distribution, z[k] is component indicator 
    mu_k: shape = (K, n_features), normal distribution, mu_k[k] is mean of k-th component
    tau_k : shape = (K, n_features), inverse-gamma distribution, tau_k[k] is variance of k-th component
    """

    n_samples, n_features = data.shape

    # latent variables
    tau_k = pm.InverseGamma(
        "tau_k",
        alpha_0 * np.ones((K, n_features)),
        beta_0 * np.ones((K, n_features)),
        value=beta_0 * np.ones((K, n_features)),
    )
    mu_k = pm.Normal("mu_k", np.ones((K, n_features)) * mu_0, tau_k, value=np.ones((K, n_features)) * mu_0)
    phi_0 = pm.Dirichlet("phi_0", theta=np.ones(K) * alpha)

    @pm.deterministic(dtype=float)
    def phi(value=np.ones(K) / K, phi_0=phi_0):
        val = np.hstack((phi_0, (1 - np.sum(phi_0))))
        return val

    z = pm.Categorical("z", p=phi, value=pm.rcategorical(np.ones(K) / K, size=n_samples))

    # observed variables
    x = pm.Normal("x", mu=mu_k[z], tau=tau_k[z], value=data, observed=True)

    return pm.Model([mu_k, tau_k, phi_0, phi, z, x])
Beispiel #25
0
def ages_and_data(N_exam, f_samp, correction_factor_array, age_lims):
    """Called by pred_samps. Simulates ages of survey participants and data given f."""
    
    N_samp = len(f_samp)
    N_age_samps = correction_factor_array.shape[1]
    
    # Get samples for the age distribution at the observation points.
    age_distribution = []
    for i in xrange(N_samp):
        l = age_lims[i]
        age_distribution.append(S_trace[np.random.randint(S_trace.shape[0]),0,l[0]:l[1]+1])
        age_distribution[-1] /= np.sum(age_distribution[-1])
    
    # Draw age for each individual, draw an age-correction profile for each location,
    # compute probability of positive for each individual, see how many individuals are
    # positive.
    A = []
    pos = []
    for s in xrange(N_samp):
        A.append(np.array(pm.rcategorical(age_distribution[s], size=N_exam[s]),dtype=int) + age_lims[s][0])
        P_samp = pm.invlogit(f_samp[s].ravel())*correction_factor_array[:,np.random.randint(N_age_samps)][A[-1]]
        pos.append(pm.rbernoulli(P_samp))
    
    return A, pos, age_distribution
Beispiel #26
0
reload(dismod3)

# set font
book_graphics.set_font()

pi_true = scipy.interpolate.interp1d([0, 20, 40, 60, 100], [.4, .425, .6, .5, .4])
beta_true = .3
delta_true = 50.
N = 30

# start with a simple model with N rows of data
model = data_simulation.simple_model(N)


# set covariate to 0/1 values randomly
model.input_data['x_cov'] = 1. * mc.rcategorical([.5, .5], size=N)

# record the true age-specific rates
model.ages = pl.arange(0, 101, 1)
model.pi_age_true = pi_true(model.ages)


# choose age groups randomly
age_width = pl.zeros(N)
age_mid = mc.runiform(age_width/2, 100-age_width/2, size=N)
age_start = pl.array(age_mid - age_width/2, dtype=int)
age_end = pl.array(age_mid + age_width/2, dtype=int)

model.input_data['age_start'] = age_start
model.input_data['age_end'] = age_end
Beispiel #27
0
def validate_covariate_model_re(N=500, delta_true=.15, pi_true=.01, sigma_true = [.1,.1,.1,.1,.1], ess=1000):
    ## set simulation parameters
    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.parameters['p']['heterogeneity'] = 'Slightly'  # ensure heterogeneity is slightly

    area_list = []
    for sr in sorted(model.hierarchy.successors('all')):
        area_list.append(sr)
        for r in sorted(model.hierarchy.successors(sr)):
            area_list.append(r)
            area_list += sorted(model.hierarchy.successors(r))[:5]
    area_list = pl.array(area_list)


    ## generate simulation data
    model.input_data = pandas.DataFrame(index=range(N))
    initialize_input_data(model.input_data)

    alpha = alpha_true_sim(model, area_list, sigma_true)

    # choose observed prevalence values
    model.input_data['effective_sample_size'] = ess

    model.input_data['area'] = area_list[mc.rcategorical(pl.ones(len(area_list)) / float(len(area_list)), N)]

    model.input_data['true'] = pl.nan
    for i, a in model.input_data['area'].iteritems():
        model.input_data['true'][i] = pi_true * pl.exp(pl.sum([alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha]))

    n = model.input_data['effective_sample_size']
    p = model.input_data['true']
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n



    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total', 'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=20000, burn=10000, thin=10, tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation']
    add_quality_metrics(model.input_data)


    model.alpha = pandas.DataFrame(index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
    model.alpha['true'] = pandas.Series(dict(alpha))
    model.alpha['mu_pred'] = pandas.Series([n.stats()['mean'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns)
    model.alpha['sigma_pred'] = pandas.Series([n.stats()['standard deviation'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns)
    add_quality_metrics(model.alpha)

    print '\nalpha'
    print model.alpha.dropna()


    model.sigma = pandas.DataFrame(dict(true=sigma_true))
    model.sigma['mu_pred'] = [n.stats()['mean'] for n in model.vars['p']['sigma_alpha']]
    model.sigma['sigma_pred']=[n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']]
    add_quality_metrics(model.sigma)

    print 'sigma_alpha'
    print model.sigma

    
    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    add_to_results(model, 'sigma')

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    add_quality_metrics(model.delta)

    print 'delta'
    print model.delta
    add_to_results(model, 'delta')

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())
    print 'effect prediction MAE: %.3f, coverage: %.2f' % (pl.median(pl.absolute(model.alpha['abs_err'].dropna())),
                                                          model.alpha.dropna()['covered?'].mean())
    add_to_results(model, 'input_data')
    add_to_results(model, 'alpha')

    model.results = pandas.DataFrame(model.results)
    return model
def validate_ai_re(N=500, delta_true=.15, sigma_true=[.1,.1,.1,.1,.1], pi_true=quadratic, smoothness='Moderately', heterogeneity='Slightly'):
    ## generate simulated data
    a = pl.arange(0, 101, 1)
    pi_age_true = pi_true(a)


    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    gbd_hierarchy = model.hierarchy

    model = data_simulation.simple_model(N)
    model.hierarchy = gbd_hierarchy

    model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10)
    model.parameters['p']['smoothness'] = dict(amount=smoothness)
    model.parameters['p']['heterogeneity'] = heterogeneity

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    age_weights = pl.ones_like(a)
    sum_pi_wt = pl.cumsum(pi_age_true*age_weights)
    sum_wt = pl.cumsum(age_weights*1.)
    p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start])

    # correct cases where age_start == age_end
    i = age_start == age_end
    if pl.any(i):
        p[i] = pi_age_true[age_start[i]]

    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = mc.runiform(100, 10000, size=N)


    from validate_covariates import alpha_true_sim
    area_list = pl.array(['all', 'super-region_3', 'north_africa_middle_east', 'EGY', 'KWT', 'IRN', 'IRQ', 'JOR', 'SYR'])
    alpha = alpha_true_sim(model, area_list, sigma_true)
    print alpha

    model.input_data['true'] = pl.nan

    model.input_data['area'] = area_list[mc.rcategorical(pl.ones(len(area_list)) / float(len(area_list)), N)]
    
    for i, a in model.input_data['area'].iteritems():
        model.input_data['true'][i] = p[i] * pl.exp(pl.sum([alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha]))
    p = model.input_data['true']

    n = model.input_data['effective_sample_size']
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true*n*p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'north_africa_middle_east', 'total', 'all', None, None, None)
    #model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=1005, burn=500, thin=5, tune_interval=100)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=10000, burn=5000, thin=25, tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)
    graphics.plot_one_type(model, model.vars['p'], {}, 'p')
    pl.plot(range(101), pi_age_true, 'r:', label='Truth')
    pl.legend(fancybox=True, shadow=True, loc='upper left')

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    data_simulation.add_quality_metrics(model.delta)

    model.alpha = pandas.DataFrame(index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
    model.alpha['true'] = pandas.Series(dict(alpha))
    model.alpha['mu_pred'] = pandas.Series([n.stats()['mean'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns)
    model.alpha['sigma_pred'] = pandas.Series([n.stats()['standard deviation'] for n in model.vars['p']['alpha']], index=model.vars['p']['U'].columns)
    model.alpha = model.alpha.dropna()
    data_simulation.add_quality_metrics(model.alpha)

    model.sigma = pandas.DataFrame(dict(true=sigma_true))
    model.sigma['mu_pred'] = [n.stats()['mean'] for n in model.vars['p']['sigma_alpha']]
    model.sigma['sigma_pred']=[n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']]
    data_simulation.add_quality_metrics(model.sigma)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame(dict(true=pi_age_true,
                                     mu_pred=model.vars['p']['mu_age'].stats()['mean'],
                                     sigma_pred=model.vars['p']['mu_age'].stats()['standard deviation']))
    data_simulation.add_quality_metrics(model.mu)

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.add_to_results(model, 'alpha')
    data_simulation.add_to_results(model, 'sigma')
    data_simulation.finalize_results(model)

    print model.results

    return model
def validate_consistent_model_sim(N=500,
                                  delta_true=.5,
                                  true=dict(i=quadratic,
                                            f=constant,
                                            r=constant)):
    types = pl.array(['i', 'r', 'f', 'p'])

    ## generate simulated data
    model = data_simulation.simple_model(N)
    model.input_data['effective_sample_size'] = 1.
    model.input_data['value'] = 0.

    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    sim = consistent_model.consistent_model(model, 'all', 'total', 'all', {})
    for t in 'irf':
        for i, k_i in enumerate(sim[t]['knots']):
            sim[t]['gamma'][i].value = pl.log(true[t](k_i))

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    data_type = types[mc.rcategorical(pl.ones(len(types), dtype=float) /
                                      float(len(types)),
                                      size=N)]

    a = pl.arange(101)
    age_weights = pl.ones_like(a)
    sum_wt = pl.cumsum(age_weights)

    p = pl.zeros(N)
    for t in types:
        mu_t = sim[t]['mu_age'].value
        sum_mu_wt = pl.cumsum(mu_t * age_weights)

        p_t = (sum_mu_wt[age_end] - sum_mu_wt[age_start]) / (sum_wt[age_end] -
                                                             sum_wt[age_start])

        # correct cases where age_start == age_end
        i = age_start == age_end
        if pl.any(i):
            p_t[i] = mu_t[age_start[i]]

        # copy part into p
        p[data_type == t] = p_t[data_type == t]
    n = mc.runiform(100, 10000, size=N)

    model.input_data['data_type'] = data_type
    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = n
    model.input_data['true'] = p
    model.input_data['value'] = mc.rnegative_binomial(n * p,
                                                      delta_true * n * p) / n

    # coarse knot spacing for fast testing
    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars = consistent_model.consistent_model(model, 'all', 'total',
                                                   'all', {})
    model.map, model.mcmc = fit_model.fit_consistent_model(model.vars,
                                                           iter=10000,
                                                           burn=5000,
                                                           thin=25,
                                                           tune_interval=100)

    graphics.plot_convergence_diag(model.vars)

    graphics.plot_fit(model, model.vars, {}, {})
    for i, t in enumerate('i r f p rr pf'.split()):
        pl.subplot(2, 3, i + 1)
        pl.plot(a, sim[t]['mu_age'].value, 'w-', label='Truth', linewidth=2)
        pl.plot(a, sim[t]['mu_age'].value, 'r-', label='Truth', linewidth=1)

    #graphics.plot_one_type(model, model.vars['p'], {}, 'p')
    #pl.legend(fancybox=True, shadow=True, loc='upper left')

    pl.show()

    model.input_data['mu_pred'] = 0.
    model.input_data['sigma_pred'] = 0.
    for t in types:
        model.input_data['mu_pred'][
            data_type == t] = model.vars[t]['p_pred'].stats()['mean']
        model.input_data['sigma_pred'][data_type == t] = model.vars['p'][
            'p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(
        dict(true=[delta_true for t in types if t != 'rr']))
    model.delta['mu_pred'] = [
        pl.exp(model.vars[t]['eta'].trace()).mean() for t in types if t != 'rr'
    ]
    model.delta['sigma_pred'] = [
        pl.exp(model.vars[t]['eta'].trace()).std() for t in types if t != 'rr'
    ]
    data_simulation.add_quality_metrics(model.delta)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame()
    for t in types:
        model.mu = model.mu.append(pandas.DataFrame(
            dict(true=sim[t]['mu_age'].value,
                 mu_pred=model.vars[t]['mu_age'].stats()['mean'],
                 sigma_pred=model.vars[t]['mu_age'].stats()
                 ['standard deviation'])),
                                   ignore_index=True)
    data_simulation.add_quality_metrics(model.mu)
    print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.mu['abs_err'].mean(),
        pl.median(pl.absolute(
            model.mu['rel_err'].dropna())), model.mu['covered?'].mean())
    print

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.finalize_results(model)

    print model.results

    return model
Beispiel #30
0
def validate_consistent_re(N=500, delta_true=.15, sigma_true=[.1,.1,.1,.1,.1], 
                           true=dict(i=quadratic, f=constant, r=constant)):
    types = pl.array(['i', 'r', 'f', 'p'])

    ## generate simulated data
    model = data_simulation.simple_model(N)
    model.input_data['effective_sample_size'] = 1.
    model.input_data['value'] = 0.
    # coarse knot spacing for fast testing
    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    sim = consistent_model.consistent_model(model, 'all', 'total', 'all', {})
    for t in 'irf':
        for i, k_i in enumerate(sim[t]['knots']):
            sim[t]['gamma'][i].value = pl.log(true[t](k_i))

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    data_type = types[mc.rcategorical(pl.ones(len(types), dtype=float) / float(len(types)), size=N)]


    a = pl.arange(101)
    age_weights = pl.ones_like(a)
    sum_wt = pl.cumsum(age_weights)

    p = pl.zeros(N)
    for t in types:
        mu_t = sim[t]['mu_age'].value
        sum_mu_wt = pl.cumsum(mu_t*age_weights)
    
        p_t = (sum_mu_wt[age_end] - sum_mu_wt[age_start]) / (sum_wt[age_end] - sum_wt[age_start])

        # correct cases where age_start == age_end
        i = age_start == age_end
        if pl.any(i):
            p_t[i] = mu_t[age_start[i]]

        # copy part into p
        p[data_type==t] = p_t[data_type==t]


    # add covariate shifts
    import dismod3
    import simplejson as json
    gbd_model = data.ModelData.from_gbd_jsons(json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    model.hierarchy = gbd_model.hierarchy

    from validate_covariates import alpha_true_sim
    area_list = pl.array(['all', 'super-region_3', 'north_africa_middle_east', 'EGY', 'KWT', 'IRN', 'IRQ', 'JOR', 'SYR'])
    alpha = {}
    for t in types:
        alpha[t] = alpha_true_sim(model, area_list, sigma_true)
    print json.dumps(alpha, indent=2)

    model.input_data['area'] = area_list[mc.rcategorical(pl.ones(len(area_list)) / float(len(area_list)), N)]
    
    for i, a in model.input_data['area'].iteritems():
        t = data_type[i]
        p[i] = p[i] * pl.exp(pl.sum([alpha[t][n] for n in nx.shortest_path(model.hierarchy, 'all', a) if n in alpha]))

    n = mc.runiform(100, 10000, size=N)

    model.input_data['data_type'] = data_type
    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = n
    model.input_data['true'] = p
    model.input_data['value'] = mc.rnegative_binomial(n*p, delta_true) / n

    # coarse knot spacing for fast testing
    for t in types:
        model.parameters[t]['parameter_age_mesh'] = range(0, 101, 20)

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars = consistent_model.consistent_model(model, 'all', 'total', 'all', {})
    #model.map, model.mcmc = fit_model.fit_consistent_model(model.vars, iter=101, burn=0, thin=1, tune_interval=100)
    model.map, model.mcmc = fit_model.fit_consistent_model(model.vars, iter=10000, burn=5000, thin=25, tune_interval=100)

    graphics.plot_convergence_diag(model.vars)

    graphics.plot_fit(model, model.vars, {}, {})
    for i, t in enumerate('i r f p rr pf'.split()):
        pl.subplot(2, 3, i+1)
        pl.plot(range(101), sim[t]['mu_age'].value, 'w-', label='Truth', linewidth=2)
        pl.plot(range(101), sim[t]['mu_age'].value, 'r-', label='Truth', linewidth=1)

    pl.show()

    model.input_data['mu_pred'] = 0.
    model.input_data['sigma_pred'] = 0.
    for t in types:
        model.input_data['mu_pred'][data_type==t] = model.vars[t]['p_pred'].stats()['mean']
        model.input_data['sigma_pred'][data_type==t] = model.vars[t]['p_pred'].stats()['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[delta_true for t in types if t != 'rr']))
    model.delta['mu_pred'] = [pl.exp(model.vars[t]['eta'].trace()).mean() for t in types if t != 'rr']
    model.delta['sigma_pred'] = [pl.exp(model.vars[t]['eta'].trace()).std() for t in types if t != 'rr']
    data_simulation.add_quality_metrics(model.delta)

    model.alpha = pandas.DataFrame()
    model.sigma = pandas.DataFrame()
    for t in types:
        alpha_t = pandas.DataFrame(index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
        alpha_t['true'] = pandas.Series(dict(alpha[t]))
        alpha_t['mu_pred'] = pandas.Series([n.stats()['mean'] for n in model.vars[t]['alpha']], index=model.vars[t]['U'].columns)
        alpha_t['sigma_pred'] = pandas.Series([n.stats()['standard deviation'] for n in model.vars[t]['alpha']], index=model.vars[t]['U'].columns)
        alpha_t['type'] = t
        model.alpha = model.alpha.append(alpha_t.dropna(), ignore_index=True)

        sigma_t = pandas.DataFrame(dict(true=sigma_true))
        sigma_t['mu_pred'] = [n.stats()['mean'] for n in model.vars[t]['sigma_alpha']]
        sigma_t['sigma_pred'] = [n.stats()['standard deviation'] for n in model.vars[t]['sigma_alpha']]
        model.sigma = model.sigma.append(sigma_t.dropna(), ignore_index=True)

    data_simulation.add_quality_metrics(model.alpha)
    data_simulation.add_quality_metrics(model.sigma)


    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.input_data['abs_err'].mean(),
                                                     pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
                                                                       model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame()
    for t in types:
        model.mu = model.mu.append(pandas.DataFrame(dict(true=sim[t]['mu_age'].value,
                                                         mu_pred=model.vars[t]['mu_age'].stats()['mean'],
                                                         sigma_pred=model.vars[t]['mu_age'].stats()['standard deviation'])),
                                   ignore_index=True)
    data_simulation.add_quality_metrics(model.mu)
    print '\nparam prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (model.mu['abs_err'].mean(),
                                                                         pl.median(pl.absolute(model.mu['rel_err'].dropna())),
                                                                         model.mu['covered?'].mean())
    print


    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.add_to_results(model, 'alpha')
    data_simulation.add_to_results(model, 'sigma')
    data_simulation.finalize_results(model)

    print model.results

    return model
def validate_ai_re(N=500,
                   delta_true=.15,
                   sigma_true=[.1, .1, .1, .1, .1],
                   pi_true=quadratic,
                   smoothness='Moderately',
                   heterogeneity='Slightly'):
    ## generate simulated data
    a = pl.arange(0, 101, 1)
    pi_age_true = pi_true(a)

    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(
        json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    gbd_hierarchy = model.hierarchy

    model = data_simulation.simple_model(N)
    model.hierarchy = gbd_hierarchy

    model.parameters['p']['parameter_age_mesh'] = range(0, 101, 10)
    model.parameters['p']['smoothness'] = dict(amount=smoothness)
    model.parameters['p']['heterogeneity'] = heterogeneity

    age_start = pl.array(mc.runiform(0, 100, size=N), dtype=int)
    age_end = pl.array(mc.runiform(age_start, 100, size=N), dtype=int)

    age_weights = pl.ones_like(a)
    sum_pi_wt = pl.cumsum(pi_age_true * age_weights)
    sum_wt = pl.cumsum(age_weights * 1.)
    p = (sum_pi_wt[age_end] - sum_pi_wt[age_start]) / (sum_wt[age_end] -
                                                       sum_wt[age_start])

    # correct cases where age_start == age_end
    i = age_start == age_end
    if pl.any(i):
        p[i] = pi_age_true[age_start[i]]

    model.input_data['age_start'] = age_start
    model.input_data['age_end'] = age_end
    model.input_data['effective_sample_size'] = mc.runiform(100, 10000, size=N)

    from validate_covariates import alpha_true_sim
    area_list = pl.array([
        'all', 'super-region_3', 'north_africa_middle_east', 'EGY', 'KWT',
        'IRN', 'IRQ', 'JOR', 'SYR'
    ])
    alpha = alpha_true_sim(model, area_list, sigma_true)
    print alpha

    model.input_data['true'] = pl.nan

    model.input_data['area'] = area_list[mc.rcategorical(
        pl.ones(len(area_list)) / float(len(area_list)), N)]

    for i, a in model.input_data['area'].iteritems():
        model.input_data['true'][i] = p[i] * pl.exp(
            pl.sum([
                alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a)
                if n in alpha
            ]))
    p = model.input_data['true']

    n = model.input_data['effective_sample_size']
    model.input_data['value'] = mc.rnegative_binomial(n * p,
                                                      delta_true * n * p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p',
                                            'north_africa_middle_east',
                                            'total', 'all', None, None, None)
    #model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'], iter=1005, burn=500, thin=5, tune_interval=100)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'],
                                                     iter=10000,
                                                     burn=5000,
                                                     thin=25,
                                                     tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)
    graphics.plot_one_type(model, model.vars['p'], {}, 'p')
    pl.plot(range(101), pi_age_true, 'r:', label='Truth')
    pl.legend(fancybox=True, shadow=True, loc='upper left')

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats(
    )['standard deviation']
    data_simulation.add_quality_metrics(model.input_data)

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    data_simulation.add_quality_metrics(model.delta)

    model.alpha = pandas.DataFrame(
        index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
    model.alpha['true'] = pandas.Series(dict(alpha))
    model.alpha['mu_pred'] = pandas.Series(
        [n.stats()['mean'] for n in model.vars['p']['alpha']],
        index=model.vars['p']['U'].columns)
    model.alpha['sigma_pred'] = pandas.Series(
        [n.stats()['standard deviation'] for n in model.vars['p']['alpha']],
        index=model.vars['p']['U'].columns)
    model.alpha = model.alpha.dropna()
    data_simulation.add_quality_metrics(model.alpha)

    model.sigma = pandas.DataFrame(dict(true=sigma_true))
    model.sigma['mu_pred'] = [
        n.stats()['mean'] for n in model.vars['p']['sigma_alpha']
    ]
    model.sigma['sigma_pred'] = [
        n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']
    ]
    data_simulation.add_quality_metrics(model.sigma)

    print 'delta'
    print model.delta

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())

    model.mu = pandas.DataFrame(
        dict(true=pi_age_true,
             mu_pred=model.vars['p']['mu_age'].stats()['mean'],
             sigma_pred=model.vars['p']['mu_age'].stats()
             ['standard deviation']))
    data_simulation.add_quality_metrics(model.mu)

    data_simulation.initialize_results(model)
    data_simulation.add_to_results(model, 'delta')
    data_simulation.add_to_results(model, 'mu')
    data_simulation.add_to_results(model, 'input_data')
    data_simulation.add_to_results(model, 'alpha')
    data_simulation.add_to_results(model, 'sigma')
    data_simulation.finalize_results(model)

    print model.results

    return model
Beispiel #32
0
def validate_covariate_model_re(N=500,
                                delta_true=.15,
                                pi_true=.01,
                                sigma_true=[.1, .1, .1, .1, .1],
                                ess=1000):
    ## set simulation parameters
    import dismod3
    import simplejson as json
    model = data.ModelData.from_gbd_jsons(
        json.loads(dismod3.disease_json.DiseaseJson().to_json()))
    model.parameters['p']['parameter_age_mesh'] = [0, 100]
    model.parameters['p'][
        'heterogeneity'] = 'Slightly'  # ensure heterogeneity is slightly

    area_list = []
    for sr in sorted(model.hierarchy.successors('all')):
        area_list.append(sr)
        for r in sorted(model.hierarchy.successors(sr)):
            area_list.append(r)
            area_list += sorted(model.hierarchy.successors(r))[:5]
    area_list = pl.array(area_list)

    ## generate simulation data
    model.input_data = pandas.DataFrame(index=range(N))
    initialize_input_data(model.input_data)

    alpha = alpha_true_sim(model, area_list, sigma_true)

    # choose observed prevalence values
    model.input_data['effective_sample_size'] = ess

    model.input_data['area'] = area_list[mc.rcategorical(
        pl.ones(len(area_list)) / float(len(area_list)), N)]

    model.input_data['true'] = pl.nan
    for i, a in model.input_data['area'].iteritems():
        model.input_data['true'][i] = pi_true * pl.exp(
            pl.sum([
                alpha[n] for n in nx.shortest_path(model.hierarchy, 'all', a)
                if n in alpha
            ]))

    n = model.input_data['effective_sample_size']
    p = model.input_data['true']
    model.input_data['value'] = mc.rnegative_binomial(n * p,
                                                      delta_true * n * p) / n

    ## Then fit the model and compare the estimates to the truth
    model.vars = {}
    model.vars['p'] = data_model.data_model('p', model, 'p', 'all', 'total',
                                            'all', None, None, None)
    model.map, model.mcmc = fit_model.fit_data_model(model.vars['p'],
                                                     iter=20000,
                                                     burn=10000,
                                                     thin=10,
                                                     tune_interval=100)

    graphics.plot_one_ppc(model.vars['p'], 'p')
    graphics.plot_convergence_diag(model.vars)

    pl.show()

    model.input_data['mu_pred'] = model.vars['p']['p_pred'].stats()['mean']
    model.input_data['sigma_pred'] = model.vars['p']['p_pred'].stats(
    )['standard deviation']
    add_quality_metrics(model.input_data)

    model.alpha = pandas.DataFrame(
        index=[n for n in nx.traversal.dfs_preorder_nodes(model.hierarchy)])
    model.alpha['true'] = pandas.Series(dict(alpha))
    model.alpha['mu_pred'] = pandas.Series(
        [n.stats()['mean'] for n in model.vars['p']['alpha']],
        index=model.vars['p']['U'].columns)
    model.alpha['sigma_pred'] = pandas.Series(
        [n.stats()['standard deviation'] for n in model.vars['p']['alpha']],
        index=model.vars['p']['U'].columns)
    add_quality_metrics(model.alpha)

    print '\nalpha'
    print model.alpha.dropna()

    model.sigma = pandas.DataFrame(dict(true=sigma_true))
    model.sigma['mu_pred'] = [
        n.stats()['mean'] for n in model.vars['p']['sigma_alpha']
    ]
    model.sigma['sigma_pred'] = [
        n.stats()['standard deviation'] for n in model.vars['p']['sigma_alpha']
    ]
    add_quality_metrics(model.sigma)

    print 'sigma_alpha'
    print model.sigma

    model.results = dict(param=[], bias=[], mare=[], mae=[], pc=[])
    add_to_results(model, 'sigma')

    model.delta = pandas.DataFrame(dict(true=[delta_true]))
    model.delta['mu_pred'] = pl.exp(model.vars['p']['eta'].trace()).mean()
    model.delta['sigma_pred'] = pl.exp(model.vars['p']['eta'].trace()).std()
    add_quality_metrics(model.delta)

    print 'delta'
    print model.delta
    add_to_results(model, 'delta')

    print '\ndata prediction bias: %.5f, MARE: %.3f, coverage: %.2f' % (
        model.input_data['abs_err'].mean(),
        pl.median(pl.absolute(model.input_data['rel_err'].dropna())),
        model.input_data['covered?'].mean())
    print 'effect prediction MAE: %.3f, coverage: %.2f' % (
        pl.median(pl.absolute(model.alpha['abs_err'].dropna())),
        model.alpha.dropna()['covered?'].mean())
    add_to_results(model, 'input_data')
    add_to_results(model, 'alpha')

    model.results = pandas.DataFrame(model.results)
    return model
Beispiel #33
0
def _untilt_sample(x,i,s,a):
    if isinstance(i, int):  # make random sample here (this might be a bad idea)
        i = pm.rcategorical(np.ones(x.shape[0])/x.shape[0],size=i)
    return ba.untilt(x[i,:],s,a)
Beispiel #34
0
def bic_norm_hmm_init_params(y, X_matrices):
    """ Initialize a normal HMM regression mixture with a GMM mixture
    of a BIC determined number of states.  Starting with an initial
    set of design matrices, this function searches for the best number
    of additional constant states to add to the model.


    Parameters
    ==========
    y: pandas.DataFrame or pandas.Series
        Time-indexed vector of observations.
    X_matrices: list of pandas.DataFrame
        Collection of design matrices for each initial state.

    Returns
    =======
    init_params:
        A `NormalHMMInitialParams` object.
    """

    N_states = len(X_matrices)

    from sklearn import mixture
    lowest_bic = np.infty
    bic = []
    for n_components in range(N_states, 10):
        gmm = mixture.GMM(n_components=n_components, covariance_type="diag")
        _ = gmm.fit(y.dropna())
        bic.append(gmm.bic(y.dropna()))
        if bic[-1] < lowest_bic:
            lowest_bic = bic[-1]
            best_gmm = gmm

    from operator import itemgetter
    gmm_order = sorted(enumerate(best_gmm.means_), key=itemgetter(1))
    gmm_order = np.asarray(map(itemgetter(0), gmm_order))
    gmm_order_map = dict(zip(gmm_order, range(len(gmm_order))))

    gmm_states = pd.DataFrame(None,
                              index=y.index,
                              columns=['state'],
                              dtype=np.int)
    gmm_raw_predicted = best_gmm.predict(y.dropna()).astype(np.int)
    gmm_states[~y.isnull().values.ravel()] = gmm_raw_predicted[:, None]

    from functools import partial
    gmm_lam = partial(lambda x: gmm_order_map.get(x, np.nan))
    states_ordered = gmm_states['state'].map(gmm_lam)

    # When best_gmm.n_components > N_states we need to map multiple
    # GMM estimated states to a single state (the last, really) in
    # the model.  Below we create the map that says which states
    # in GMM map to which states in the model.
    from itertools import izip_longest
    from collections import defaultdict
    gmm_to_state_map = dict(
        izip_longest(range(best_gmm.n_components),
                     range(N_states),
                     fillvalue=N_states - 1))
    state_to_gmm_map = defaultdict(list)
    for i, v in zip(gmm_to_state_map.values(), gmm_to_state_map.keys()):
        state_to_gmm_map[i].append(v)

    gmm_to_state_lam = partial(lambda x: gmm_to_state_map.get(x, np.nan))

    states_initial = states_ordered.map(gmm_to_state_lam)

    trans_freqs = compute_trans_freqs(states_initial, N_states)
    alpha_trans_0 = calc_alpha_prior(states_initial,
                                     N_states,
                                     trans_freqs=trans_freqs)

    if any(y.isnull()):
        # Now, let's sample values for the missing observations.
        # TODO: Would be better if we did this according to the
        # initial transition probabilities, no?
        for t in np.arange(y.size)[y.isnull().values.ravel()]:
            if t == 0:
                p0 = compute_steady_state(trans_freqs[:, :-1])
                state = pymc.rcategorical(p0)
            else:
                state = pymc.rcategorical(trans_freqs[int(states_initial[t -
                                                                         1])])
            states_initial[t] = state

    beta_prior_means = []
    beta_prior_covars = []
    obs_prior_vars = np.empty(N_states)
    for i, gmm_states in state_to_gmm_map.items():

        this_order = gmm_order[gmm_states]
        these_weights = best_gmm.weights_[this_order]
        these_weights /= these_weights.sum()
        these_means = best_gmm.means_[this_order]
        these_covars = best_gmm.covars_[this_order]

        # Use the exact mixture variance when we have two
        # states to combine; otherwise, use a crude estimate.
        # TODO: We can get an expression for len(gmm_states) > 2.
        if len(gmm_states) == 2:
            pi_, pi_n = these_weights
            sigma_1, sigma_2 = these_covars
            mu_diff = np.ediff1d(these_means)
            this_cov = pi_ * (sigma_1**2 + mu_diff**2 * pi_n**2) +\
                pi_n * (sigma_2**2 + mu_diff**2 * pi_**2)
            this_cov = float(this_cov)
        else:
            this_cov = these_covars.sum()

        # TODO: How should/could we use this?
        # this_mean = np.dot(these_weights, best_gmm.means_[this_order])

        # Get the data conditional on this [estimated] state.
        states_cond = np.asarray(
            map(lambda x: True if x in gmm_states else False, states_ordered))
        from sklearn import linear_model
        reg_model = linear_model.ElasticNetCV(fit_intercept=False)

        N_beta = X_matrices[i].shape[1]

        X_cond = X_matrices[i][states_cond]
        y_cond = y[states_cond].get_values().ravel()

        if not X_cond.empty:
            # TODO: Could ask how this compares to the intercept-only model above.
            reg_model_fit = reg_model.fit(X_cond, y_cond)
            reg_model_err = np.atleast_1d(
                np.var(reg_model_fit.predict(X_cond) - y_cond))
            beta_prior_means += [np.atleast_1d(reg_model_fit.coef_)]
            beta_prior_covars += [np.repeat(reg_model_err, N_beta)]
        else:
            beta_prior_means += [np.zeros(N_beta)]
            # TODO: A better default for an "uninformed" initial value?
            # This is really a job for a prior distribution.
            beta_prior_covars += [100. * np.ones(N_beta)]

        obs_prior_vars[i] = this_cov

    init_params = NormalHMMInitialParams(alpha_trans_0, None, states_initial,
                                         beta_prior_means, beta_prior_covars,
                                         obs_prior_vars, None)
    return init_params
Beispiel #35
0
def gmm_norm_hmm_init_params(y, X_matrices):
    """ Generates initial parameters for the univariate normal-emissions HMM
    with normal mean priors.

    Parameters
    ==========
    y: pandas.DataFrame or pandas.Series
        Time-indexed vector of observations.
    X_matrices: list of pandas.DataFrame
        Collection of design matrices for each hidden state's mean.

    Returns
    =======
    init_params:
        A `NormalHMMInitialParams` object.
    """

    # initialize with simple gaussian mixture
    from sklearn.mixture import GMM

    N_states = len(X_matrices)

    gmm_model = GMM(N_states, covariance_type='diag')
    gmm_model_fit = gmm_model.fit(y.dropna())

    from operator import itemgetter
    gmm_order = sorted(enumerate(gmm_model_fit.means_), key=itemgetter(1))
    gmm_order = map(itemgetter(0), gmm_order)
    gmm_order_map = dict(zip(gmm_order, range(len(gmm_order))))
    # gmm_ord_weights = np.asarray([gmm_model_fit.weights_[x] for x in
    #                               gmm_order])

    # TODO: attempt conditional regression when X matrices tell us
    # that we'll be fitting regression terms?
    # For now we just set those terms to zero.
    gmm_ord_means = np.asarray([
        np.append(gmm_model_fit.means_[x], [0.] * (X_matrices[i].shape[1] - 1))
        for i, x in enumerate(gmm_order)
    ])
    gmm_ord_obs_covars = np.asarray(
        [gmm_model_fit.covars_[x, 0] for x in gmm_order])
    gmm_states = pd.DataFrame(None,
                              index=y.index,
                              columns=['state'],
                              dtype=np.int)
    gmm_raw_predicted = gmm_model_fit.predict(y.dropna()).astype(np.int)
    gmm_states[~y.isnull().values] = gmm_raw_predicted[:, None]

    from functools import partial
    gmm_lam = partial(lambda x: gmm_order_map.get(x, np.nan))
    gmm_ord_states = gmm_states['state'].map(gmm_lam)

    beta_prior_covars = [
        np.ones(X_matrices[i].shape[1]) * 10 for i in range(len(X_matrices))
    ]

    trans_freqs = compute_trans_freqs(gmm_ord_states, N_states)
    alpha_trans_0 = calc_alpha_prior(gmm_ord_states,
                                     N_states,
                                     trans_freqs=trans_freqs)

    if any(y.isnull()):
        # Now, let's sample values for the missing observations.
        # TODO: Would be better if we did this according to the
        # initial transition probabilities, no?
        for t in np.arange(y.size)[y.isnull().values.ravel()]:
            if t == 0:
                p0 = compute_steady_state(trans_freqs[:, :-1])
                state = pymc.rcategorical(p0)
            else:
                state = pymc.rcategorical(trans_freqs[int(gmm_ord_states[t -
                                                                         1])])
            gmm_ord_states[t] = state

    init_params = NormalHMMInitialParams(alpha_trans_0, None, gmm_ord_states,
                                         gmm_ord_means, beta_prior_covars,
                                         gmm_ord_obs_covars, None)

    return init_params
Beispiel #36
0
reload(dismod3)

# set font
book_graphics.set_font()

pi_true = scipy.interpolate.interp1d([0, 20, 40, 60, 100],
                                     [.4, .425, .6, .5, .4])
beta_true = .3
delta_true = 50.
N = 30

# start with a simple model with N rows of data
model = data_simulation.simple_model(N)

# set covariate to 0/1 values randomly
model.input_data['x_cov'] = 1. * mc.rcategorical([.5, .5], size=N)

# record the true age-specific rates
model.ages = pl.arange(0, 101, 1)
model.pi_age_true = pi_true(model.ages)

# choose age groups randomly
age_width = pl.zeros(N)
age_mid = mc.runiform(age_width / 2, 100 - age_width / 2, size=N)
age_start = pl.array(age_mid - age_width / 2, dtype=int)
age_end = pl.array(age_mid + age_width / 2, dtype=int)

model.input_data['age_start'] = age_start
model.input_data['age_end'] = age_end

# choose effective sample size uniformly at random
Beispiel #37
0
def settlement_size_samples(mu, tau, cutoff, sum_mu, sum_tau, pop_accounted, N):
    """
    Returns N samples from the distribution of unsampled settlement sizes.
    Settlement sizes are drawn from a truncated lognormal distribution 
    conditional on their sum being equal to sum_val.
    
    At the SIR stage, 100 samples are proposed and 10 are retained.
    
    :Parameters:
    - mu : float
      The mean parameter.
    - tau : float
      The precision parameter.
    - cutoff : float
      The truncation value.
    - sum_mu : float
      The mean of the lognormal distribution of total population.
    - sum_tau : float
      The precision parameter of the lognormal distribution of total population.
    - pop_accounted : integer
      The total population accounted for by the GRUMP urban extents.
    - N : integer
      The number of samples to return.    
    """

    N_sum_vals = N/10
    
    # Compute moments and characteristic function for single population size,
    # to be used in all posterior evaluations.
    lnorms = np.exp(geto_truncnorm(mu, tau, log(cutoff), 10000))        
    sum_moments = np.mean(lnorms), np.var(lnorms)
    oFT = robust_CF(mu, tau, cutoff)
    
    # Generate values for total population in region not accounted for by
    # GRUMP urban extents.
    sum_vals = pm.rlognormal(sum_mu, sum_tau, size=N_sum_vals)-pop_accounted
    where_not_OK = np.where(sum_vals < 0)
    while len(where_not_OK[0]) > 0:
        sum_vals[where_not_OK] = pm.rlognormal(sum_mu, sum_tau, size=len(where_not_OK[0]))-pop_accounted
        where_not_OK = np.where(sum_vals < 0)        
    
    # Create 10 samples using SIR for each sum.
    samps = []
    for sum_val in sum_vals:
        
        tries = 0
        while tries < 10:
            try:
                # Find posterior of N given this sum, and draw a single sample from it.
                Nmesh, p = robust_posterior(mu, tau, cutoff, sum_val, prior_fun=None, sum_moments=sum_moments, oFT=oFT)
                N = Nmesh[int(pm.rcategorical(p))]

                # Draw 10 samples for the sizes of the constituent populations given their number and
                # the total population size.
                w,i,s = SIR_simplex_sample(mu, tau, cutoff, sum_val, N, N_proposals=1000, N_samps=10)
                break
            except RuntimeError:
                print 'Failed at N=%f, Nmesh=%s, p=%s. Trying again'%(N,Nmesh,p)
                tries += 1
                if tries==10:
                    import sys
                    a,b,c = sys.exc_info()
                    raise a,b,c
                
        samps.extend(list(s.T))

    # Return, you're done!
    return samps