Ejemplo n.º 1
0
    def inference(self, iter_=5000, burn=1000):
        theta = pm.Container([
            pm.CompletedDirichlet(
                "theta_%s" % d, pm.Dirichlet("ptheta_%s" % d,
                                             theta=self.alpha))
            for d in range(self.D)
        ])
        phi = pm.Container([
            pm.CompletedDirichlet("phi_%s" % k,
                                  pm.Dirichlet("pphi_%s" % k, theta=self.beta))
            for k in range(self.K)
        ])
        z_d = pm.Container([
            pm.Categorical("z_%s" % d,
                           p=theta[d],
                           value=np.random.randint(self.K,
                                                   size=len(self.bw[d])),
                           size=len(self.bw[d])) for d in range(self.D)
        ])
        w_z = pm.Container([
            pm.Categorical("w_%s_%s" % (d, w),
                           p=phi[z_d[d][w].get_value()],
                           value=self.bw[d][w],
                           observed=True) for d in range(self.D)
            for w in range(len(self.bw[d]))
        ])

        model = pm.Model([theta, phi, z_d, w_z])
        self.mcmc = pm.MCMC(model)
        self.mcmc.sample(iter=iter_, burn=burn)
Ejemplo n.º 2
0
def create_pymc_model(x_mat, y_vec, prior_sigma=5.0):
    tau_mc = 1. / (prior_sigma**2)
    (n, d) = np.shape(x_mat)
    w_mc = list([])
    x_mc = list([])
    for i in np.arange(0, d):
        w_mc[len(w_mc):] = [pymc.Normal('w' + str(i + 1) + '_mc', 0.0, tau_mc)]
        x_mc[len(x_mc):] = [
            pymc.Normal('x' + str(i + 1) + '_mc',
                        0.0,
                        1.0,
                        value=x_mat[:, i],
                        observed=True)
        ]
    w_mc = np.array(w_mc, dtype=object)
    x_mc = np.array(x_mc, dtype=object)

    @pymc.deterministic
    def pred_mu(w_mc=w_mc, x_mc=x_mc):
        return sigmoid(np.dot(x_mc, np.transpose(w_mc)))

    y_mc = pymc.Bernoulli('y_mc',
                          p=pred_mu,
                          value=np.array(
                              map(lambda val: 0 if val == -1 else +1, y_vec)),
                          observed=True)
    return pymc.Model(
        [pred_mu, pymc.Container(w_mc),
         pymc.Container(x_mc), y_mc])
Ejemplo n.º 3
0
    def get_z_data(self, p, p_pos, q):
        K = 2  # Num topics
        M = p  # Num documents
        N = q  # Total num of unique words across all documents

        alpha = 1.0  # Concentration parameter for distribution over
        # distributions over words (one for each topic)
        beta = 1.0  # Concentration parameter for distribution over
        # distributions over topics (one for each
        # document)

        phi = pymc.Container([
            pymc.CompletedDirichlet(
                name="phi_" + str(k),
                D=pymc.Dirichlet(name="phi_temp_" + str(k),
                                 theta=beta * numpy.ones(N)),
            ) for k in range(K)
        ])

        theta = pymc.Container([
            pymc.CompletedDirichlet(
                name="theta_" + str(m),
                D=pymc.Dirichlet(name="theta_temp_" + str(m),
                                 theta=alpha * numpy.ones(K)),
            ) for m in range(M)
        ])

        z = pymc.Container([
            pymc.Categorical(name="z_" + str(m), p=theta[m], size=N)
            for m in range(M)
        ])

        w = pymc.Container([
            pymc.Categorical(
                name="w_" + str(m) + "_" + str(n),
                p=pymc.Lambda(
                    "phi_z_" + str(m) + str(n),
                    lambda z_in=z[m][n], phi_in=phi: phi_in[z_in],
                ),
            ) for m in range(M) for n in range(N)
        ])
        lda = pymc.Model([w, z, theta, phi])

        z_rvs = []
        for m in range(M):
            metadata = {"doc_idx": m, "num_unique_words": N}
            rv = WordCountVecRV(
                model=lda, name="w_0_0",
                metadata=metadata)  # Note: w_0_0 is just a dummy
            # argument that must be present in
            # the pymc.Model
            z_rvs += [rv]
        return z_rvs
Ejemplo n.º 4
0
    def __init__(self, corpus, K=10, iterations=1000, burn=100):
        print("Building model ...")
        self.K = K
        self.V = corpus.wordCount + 1
        self.M = corpus.documentCount
        self.alpha = np.ones(self.K)
        self.beta = np.ones(self.V)
        self.corpus = corpus
        self.observations = np.array(corpus.observations)

        self.phi = np.empty(self.K, dtype=object)
        for i in range(self.K):
            self.phi[i] = pm.CompletedDirichlet(
                "Phi[%i]" % i, pm.Dirichlet("phi[%i]" % i, theta=self.beta))
        self.phi = pm.Container(self.phi)

        self.theta = np.empty(self.M, dtype=object)
        for i in range(self.M):
            self.theta[i] = pm.CompletedDirichlet(
                "Theta[%i]" % i, pm.Dirichlet("theta[%i]" % i,
                                              theta=self.alpha))
        self.theta = pm.Container(self.theta)

        self.z = np.empty(self.observations.shape, dtype=object)
        for i in range(self.M):
            self.z[i] = pm.Categorical("z[%i]" % i,
                                       size=len(self.observations[i]),
                                       p=self.theta[i],
                                       value=np.random.randint(
                                           self.K,
                                           size=len(self.observations[i])))
        self.z = pm.Container(self.z)

        self.w = []
        for i in range(self.M):
            self.w.append([])
            for j in range(len(self.observations[i])):
                self.w[i].append(
                    pm.Categorical(
                        "w[%i][%i]" % (i, j),
                        p=pm.Lambda(
                            "phi[z[%i][%i]]" % (i, j),
                            lambda z=self.z[i][j], phi=self.phi: phi[z]),
                        value=self.observations[i][j],
                        observed=True))
        self.w = pm.Container(self.w)

        self.mcmc = pm.MCMC(pm.Model([self.theta, self.phi, self.z, self.w]))

        print("Fitting model ...")
        self.mcmc.sample(iter=iterations, burn=burn)
    def setup_inference(self):
        #depending on the number of wavelengths
        #self.wavelengths = [self.wavelengths[len(self.wavelengths)-1]]
        wavelength_number = len(self.wavelengths)
        
        t = 1. / 5.**2
        #C_sigs = pymc.Container([pymc.HalfCauchy("c_sigs_%i_%i" % (i, x), beta = 10, alpha=1) \
        #                         for i in range(1+2*self.N) for x in range(wavelength_number)])
        C = pymc.Container([pymc.Normal("c_%i_%i" % (i, x), mu=0, tau = t)                             for i in range(1+2*self.N) for x in range(wavelength_number)])
        #i_ = pymc.Container([pymc.DiscreteUniform('i_%i' %i,lower=0,upper=1) for i in range(len(self.xdata))])
        @pymc.stochastic(observed=False)
        def sigma(value=1):
            return -np.log(abs(value))
        @pymc.stochastic(observed=False)
        def sigma3(value=1):
            return -np.log(abs(value))
        
        qw_sigs =  pymc.Container([pymc.HalfCauchy("qw_sigs_%i" % x, beta = 10, alpha=1)                                  for x in range(wavelength_number)])
        if self.wavelength_sd_defined:
            qw = pymc.Container([pymc.distributions.Lognormal('qw_%i' %x,mu=self.wavelengths[x],                                                        tau = 1. / self.wavelength_sd[x] ** 2)                                  for x in range(wavelength_number)])
        else:
            qw = pymc.Container([pymc.distributions.Normal('qw_%i' %x,mu=self.wavelengths[x], tau = 1. / self.wavelengths[x]*.125)                                                        for x in range(wavelength_number)])



        def fourier_series(C,N,QW,x,wavelength_number):
            v = np.array(x)
            v.fill(0.0)
            v = v.astype('float')
            for ii in range(len(x)):
                for w in range(wavelength_number):
                    v += C[w]
                    for i in range(1,N+1):
                        v[ii] = v[ii] + C[(2*i-1)*wavelength_number+w]*np.cos(2*np.pi/QW[w] * i * (x[ii])) +                         C[(2*i)*wavelength_number+w]*np.sin(2*np.pi/QW[w] * i * (x[ii]))
                #if i_[ii] == 0:
                #    v[ii] = -v[ii]
            return v#np.sum(v)
        self.vector_fourier_series = np.vectorize(fourier_series)
        # Define the form of the model and likelihood
        @pymc.deterministic
        def y_model(C=C,x=self.xdata,qw=qw,nn=self.N,wavelength_number=wavelength_number):
            return fourier_series(C,nn,qw,x,wavelength_number) 
        y = pymc.Normal('y', mu=y_model, tau=1. / sigma ** 2, observed=True, value=self.ydata)
        # package the full model in a dictionary
        self.model1 = dict(C=C, qw=qw, sigma=sigma,qw_sigs=qw_sigs,
                      y_model=y_model, y=y,x_values=self.xdata,y_values=self.ydata)
        self.setup = True
        self.mcmc_uptodate = False
        return self.model1
Ejemplo n.º 6
0
    def __init__(self,
                 input,
                 sampler,
                 db='ram',
                 eps=.001,
                 diff_order=5,
                 verbose=0,
                 tune_interval=10):

        Q = pm.Container(input)
        new_input = (Q.nodes | sampler.nodes) - sampler.stochastics

        MAP.__init__(self, input=new_input, eps=eps, diff_order=diff_order)

        self.tune_interval = tune_interval
        self.verbose = verbose
        self.sampler = sampler

        # Figure out which stochastics' log-probabilities need to be averaged.
        self.stochastics_to_integrate = set()

        for s in self.stochastics:
            mb = s.markov_blanket
            if any([other_s in mb for other_s in sampler.stochastics]):
                self.stochastics_to_integrate.add(s)
Ejemplo n.º 7
0
def linear_setup(df, ind_cols, dep_col):
    '''
        Inputs: pandas Data Frame, list of strings for the independent variables,
        single string for the dependent variable
        Output: PyMC Model
    '''

    # model our intercept and error term as above
    b0 = pymc.Normal("b0", 0, 0.0001)
    err = pymc.Uniform("err", 0, 500)

    # initialize a NumPy array to hold our betas
    # and our observed x values
    b = np.empty(len(ind_cols), dtype=object)
    x = np.empty(len(ind_cols), dtype=object)

    # loop through b, and make our ith beta
    # a normal random variable, as in the single variable case
    for i in range(len(b)):
        b[i] = pymc.Normal("b" + str(i + 1), 0, 0.0001)

    # loop through x, and inform our model about the observed
    # x values that correspond to the ith position
    for i, col in enumerate(ind_cols):
        x[i] = pymc.Normal("x" + str(i + 1),
                           0,
                           1,
                           value=np.array(df[col]),
                           observed=True)

    # as above, but use .dot() for 2D array (i.e., matrix) multiplication
    @pymc.deterministic
    def y_pred(b0=b0, b=b, x=x):
        return b0 + b.dot(x)

    # finally, "model" our observed y values as above
    y = pymc.Normal("y",
                    y_pred,
                    err,
                    value=np.array(df[dep_col]),
                    observed=True)

    return pymc.Model(
        [b0, pymc.Container(b), err,
         pymc.Container(x), y, y_pred])
Ejemplo n.º 8
0
 def __setup_sigma(self):
     """Populates the self.sigma list with RVs corresponding to sigma param
     of the Logit-normal distribution, one for each equivalence class.
     """
     self.sigma = pymc.Container([pymc.Uniform('sigma_%s' % j,
                                               lower=0.01,
                                               upper=0.3,
                                               value=0.15)
                                  for j in xrange(0, self.num_equiv)])
Ejemplo n.º 9
0
 def __setup_eqv(self):
     """Populates the self.eqv list for each classifier by assigning it
     a categorical distribution.
     """
     # per_class = self.num_classifiers / self.num_equiv
     self.eqv = pymc.Container(
         [pymc.Categorical('categ_%s' % i,
                           p=self.theta[i],
                           value=numpy.random.randint(0, self.num_equiv))
          # value=min(i / per_class, self.num_equiv - 1))
          for i in xrange(0, self.num_classifiers)])
Ejemplo n.º 10
0
 def __setup_obs(self):
     self.obs = pymc.Container(
         [pymc.Normal('obs_%s' % i,
                      mu=pymc.Lambda('omu_%s' % i,
                                     lambda cls=self.eqv[i]: self.mu[cls]),
                      tau=pymc.Lambda('otau_%s' % i,
                                      lambda cls=self.eqv[i]:
                                      1.0 / (self.sigma[cls]**2)),
                      value=self.logit(acc),
                      observed=True)
          for i, acc in enumerate(self.observations)])
Ejemplo n.º 11
0
 def __setup_mu(self):
     """Populates the self.mu list with RVs corresponding to mu param
     of the Logit-Normal distribution, one for each equivalence class.
     """
     # self.mu = pymc.Container([pymc.Uniform('mu_%s' % j,
     #                                     lower = -4,
     #                                     upper = 4,
     #                                     value = 0)
     #                         for j in xrange(0, self.num_equiv)])
     self.mu = pymc.Container([pymc.Normal('mu_%s' % j,
                                           mu=self.mu_star,
                                           tau=1.0 / (self.sigma_star**2),
                                           value=0.6)
                               for j in xrange(0, self.num_equiv)])
def linear_setup(df, ind_cols, dep_col):
    '''
    Inputs: pandas Data Frame, list of independent features, outcome var
    Output: PyMC Model
    '''

    # Non-informative priors for parameters- intercept and error
    b0 = pm.Normal('b0', 0, 0.0001)
    err = pm.Normal('err', 0, 0.0001)

    # initialize NumPy arrays for b and x with same size as no of covariates
    b = np.empty(len(ind_cols), dtype=object)
    x = np.empty(len(ind_cols), dtype=object)

    # Non-informative priors for each coefficient
    for i in range(len(b)):
        b[i] = pm.Normal('b' + str(i + 1), 0, 0.0001)

    # Equating x with normal distribution for each data point
    for i, col in enumerate(ind_cols):
        x[i] = pm.Normal('x' + str(i + 1),
                         0,
                         1,
                         value=np.array(df[col]),
                         observed=True)

    # For deterministic equations, need to define the function in this format
    # .dot() for 2D array (i.e., matrix) multiplication since its multi-variable regression
    @pm.deterministic
    def y_pred(b0=b0, b=b, x=x):
        return b0 + b.dot(x)

    # Modeling observed y values
    y = pm.Normal('y', y_pred, err, value=np.array(df[dep_col]), observed=True)

    # Returning the required model
    return pm.Model([b0, pm.Container(b), err, pm.Container(x), y, y_pred])
Ejemplo n.º 13
0
def wrapper(priorname='', low=[], up=[], other_args={}, optimized=False):

    if priorname in priors:
        priormethod = priors[priorname]
    elif hasattr(pymc, priorname):
        priormethod = getattr(pymc, priorname)
    else:
        print 'WARNING: prior name not found! Falling back to DiscreteUniform...'
        priormethod = pymc.DiscreteUniform

    truthprior = []
    for bin, (l, u) in enumerate(zip(low, up)):
        name = 'truth%d' % bin
        default_args = dict(name=name, value=l + (u - l) / 2, lower=l, upper=u)
        args = dict(default_args.items() + other_args.items())
        prior = priormethod(**args)
        truthprior.append(prior)

    return pymc.Container(truthprior)
Ejemplo n.º 14
0
    def __init__(self, F, G, V, W, m_0, C_0, Y_vals = None):
        """
        D = DLM(F, G, V, W, m_0, C_0[, Y_vals])

        Returns special NormalSubmodel instance representing the dynamic
        linear model formed by F, G, V and W.

        Resulting probability model:

            theta[0] | m_0, C_0 ~ N(m_0, C_0)

            theta[t] | theta[t-1], G[t], W[t] ~ N(G[t] theta[t-1], W[t]), t = 1..T

            Y[t] | theta[t], F[t], V[t] ~ N(F[t] theta[t], V[t]), t = 0..T


        Arguments F, G, V should be dictionaries keyed by name of component.
            F[comp], G[comp], V[comp] should be lists.
                F[comp][t] should be the design vector of component 'comp' at time t.
                G[comp][t] should be the system matrix.

        Argument W should be either a number between 0 and 1 or a dictionary of lists
        like V.
            If a dictionary of lists, W[comp][t] should be the system covariance or
            variance at time t.
            If a scalar, W should be the discount factor for the DLM.

        Arguments V and Y_vals, if given, should be lists.
            V[t] should be the observation covariance or variance at time t.
            Y_vals[t] should give the value of output Y at time t.

        Arguments m_0 and C_0 should be dictionaries keyed by name of component.
            m_0[comp] should be the mean of theta[comp][0].
            C_0[comp] should be the covariance or variance of theta[comp][0].

        Note: if multiple components are correlated in W or V, they should be made into
        a single component.

        D.comp is a handle to a list.
            D.comp[t] is a Stochastic representing the value of system state 'theta'
            sliced according to component 'comp' at time t.

        D.theta is a dictionary of lists analogous to F, G, V and W.

        D.Y is a list. D.Y[t] is a Stochastic representing the value of the output
        'Y' at time t.
        """

        self.comps = F.keys()

        self.F = dict_to_recarray(F)
        self.G = dict_to_recarray(G)
        self.V = pymc.ListContainer(V)
        if np.isscalar(W):
            self.discount = True
            self.delta = W
        else:
            self.W = dict_to_recarray(W)
            self.discount = False
            self.delta = None
        if self.discount:
            raise NotImplemented, "Have yet to code up the discount factor."
        self.m_0 = dict_to_recarray(m_0)
        self.C_0 = dict_to_recarray(C_0)
        self.T = len(self.V)

        theta = {}
        theta_mean = {}

        Y_mean = []
        Y = []

        # ==============
        # = Make theta =
        # ==============
        for comp in self.comps:
            # Is diagonal the covariance or variance?
            if isinstance(self.W[comp][0], pymc.Variable):
                diag = isvector(self.W[comp][0].value)
            else:
                diag = isvector(self.W[comp][0])

            if diag:
                # Normal variates if diagonal.
                theta[comp] = [pymc.Normal('%s[0]'%comp, m_0[comp], C_0[comp])]
            else:
                # MV normal otherwise.
                theta[comp] = [pymc.MvNormal('%s[0]'%comp, m_0[comp], C_0[comp])]

            theta_mean[comp] = []

            for t in xrange(1,self.T):

                theta_mean[comp].append(pymc.LinearCombination('%s_mean[%i]'%(comp, t), [G[comp][t-1]], [theta[comp][t-1]]))

                if diag:
                    # Normal variates if diagonal.
                    theta[comp].append(pymc.Normal('%s[%i]'%(comp,t), theta_mean[comp][t-1], W[comp][t-1]))
                else:
                    # MV normal otherwise.
                    theta[comp].append(pymc.MvNormal('%s[%i]'%(comp,t), theta_mean[comp][t-1], W[comp][t-1]))


        self.theta = dict_to_recarray(theta)
        self.theta_mean = dict_to_recarray(theta_mean)


        # ==========
        # = Make Y =
        # ==========
        Y_diag = isvector(self.V.value[0])

        for t in xrange(self.T):
            x_coef = []
            y_coef = []

            for comp in self.comps:
                x_coef.append(self.F[comp][t])
                y_coef.append(theta[comp][t])

            Y_mean.append(pymc.LinearCombination('Y_mean[%i]'%t, x_coef, y_coef))
            if Y_diag:
                # Normal variates if diagonal.
                Y.append(pymc.Normal('Y[%i]'%t, Y_mean[t], V[t]))
            else:
                # MV normal otherwise.
                Y.append(pymc.MvNormal('Y[%i]'%t, Y_mean[t], V[t]))

            # If data provided, use it.
            if Y_vals is not None:
                Y[t].value = Y_vals[t]
                Y[t].observed = True

        self.Y_mean = pymc.Container(np.array(Y_mean))
        self.Y = pymc.Container(np.array(Y))

        # No sense creating a NormalSubmodel here... just stay a ListContainer.
        NormalSubmodel.__init__(self, [F,G,W,V,m_0,C_0,Y,theta,theta_mean,Y_mean])
Ejemplo n.º 15
0
beta = pm.Normal("beta", 0, 0.0001)

alpha = np.empty(d, dtype=object)
for i in range(d):
    alpha[i] = pm.Normal('alpha_%i' % i, 0, 0.0001)


@pm.deterministic
def linear_regress(x=x_data, alpha=alpha, beta=beta):
    return x.dot(alpha) + beta


y = pm.Normal('y', linear_regress, prec, value=y_data, observed=True)

model = pm.Model([y, std, prec, pm.Container(alpha), beta])
mcmc = pm.MCMC(model)
mcmc.sample(iter=100000, burn=50000, thin=10)

ae = np.empty(d)
for i in range(d):
    ae[i] = np.mean(mcmc.trace('alpha_%i' % i)[:], axis=0)

be = np.mean(mcmc.trace('beta')[:], axis=0)
print()
print()

yh = xt.dot(ae) + be
print('Yh             Yt    MSE')
for i in range(yt.shape[0]):
    print(yh[i], yt[i], (yh[i] - yt[i])**2)
Ejemplo n.º 16
0
def main(mcmc_args=None):

    print('Setting up parameters and priors...')

    params = Params()
    # Set up location here with command line arguments in a list.
    params.cmd_line_chg(['--kalbar'])
    assert params.site_name + 'fields.txt' == 'data/kalbarfields.txt'
    # Set parameters specific to Bayesian runs
    params.PLOT = False
    params.OUTPUT = False

    # This sends a message to CalcSol on whether or not to use CUDA
    if params.CUDA:
        globalvars.cuda = True
    else:
        globalvars.cuda = False
    # get wind data and day labels
    wind_data, days = PM.get_wind_data(*params.get_wind_params())
    params.ndays = len(days)

    # reduce domain
    params.domain_info = (10000.0, 400)  #25 m sided cells
    domain_res = params.domain_info[0] / params.domain_info[1]
    cell_area = domain_res**2

    locinfo = LocInfo(params.dataset, params.coord, params.domain_info)

    ######################################################################
    #####                        Model Priors                        #####
    ######################################################################
    lam = pm.Beta("lam", 5, 1, value=0.95)
    f_a1 = pm.TruncatedNormal("f_a1", 6, 0.3, 0, 9, value=6)
    f_a2 = pm.TruncatedNormal("f_a2", 20, 0.3, 15, 24, value=20)
    f_b1_p = pm.Gamma("fb1_p", 2, 1, value=1.5, trace=False,
                      plot=False)  #alpha,beta parameterization

    @pm.deterministic(trace=True, plot=True)
    def f_b1(f_b1_p=f_b1_p):
        return f_b1_p + 1

    f_b2_p = pm.Gamma("fb2_p", 2, 1, value=1.5, trace=False, plot=False)

    @pm.deterministic(trace=True, plot=True)
    def f_b2(f_b2_p=f_b2_p):
        return f_b2_p + 1

    g_aw = pm.Gamma("g_aw", 2.2, 1, value=1.0)
    g_bw = pm.Gamma("g_bw", 5, 1, value=3.8)
    # flight diffusion parameters. note: mean is average over flight advection
    sig_x = pm.Gamma("sig_x", 26, 0.15, value=180)
    sig_y = pm.Gamma("sig_y", 15, 0.15, value=150)
    corr_p = pm.Beta("corr_p", 5, 5, value=0.5, trace=False, plot=False)

    @pm.deterministic(trace=True, plot=True)
    def corr(corr_p=corr_p):
        return corr_p * 2 - 1

    # local spread paramters
    sig_x_l = pm.Gamma("sig_xl", 2, 0.08, value=10)
    sig_y_l = pm.Gamma("sig_yl", 2, 0.14, value=10)
    corr_l_p = pm.Beta("corr_l_p", 5, 5, value=0.5, trace=False, plot=False)

    @pm.deterministic(trace=True, plot=True)
    def corr_l(corr_l_p=corr_l_p):
        return corr_l_p * 2 - 1

    mu_r = pm.Normal("mu_r", 1., 1, value=1)
    n_periods = pm.Poisson("n_periods", 30, value=30)
    #alpha_pow = prev. time exponent in ParasitoidModel.h_flight_prob
    xi = pm.Gamma("xi", 1, 1,
                  value=0.75)  # presence to oviposition/emergence factor
    em_obs_prob = pm.Beta("em_obs_prob", 1, 1, value=0.05)  # per-wasp prob of
    # observing emergence in release field grid given max leaf collection
    # this is dependent on the size of the cell surrounding the grid point
    # ...not much to be done about this.
    grid_obs_prob = pm.Beta("grid_obs_prob", 1, 1,
                            value=0.005)  # probability of
    # observing a wasp present in the grid cell given max leaf sampling

    #card_obs_prob = pm.Beta("card_obs_prob",1,1,value=0.5) # probability of
    # observing a wasp present in the grid cell given max leaf sampling

    #### Data collection model background for sentinel fields ####
    # Need to fix linear units for area. Meters would be best.
    # Effective collection area (constant between fields) is very uncertain
    with warnings.catch_warnings():
        # squelsh a warning based on pymc coding we don't need to worry about
        warnings.simplefilter("ignore", RuntimeWarning)
        A_collected = pm.TruncatedNormal("A_collected",
                                         2500,
                                         1 / 2500,
                                         0,
                                         min(locinfo.field_sizes.values()) *
                                         cell_area,
                                         value=2500)  # in m**2
    # Each field has its own binomial probability.
    # Probabilities are likely to be small, and pm.Beta cannot handle small
    #   parameter values. So we will use TruncatedNormal again.
    N = len(locinfo.sent_ids)
    sent_obs_probs = np.empty(N, dtype=object)
    # fix beta for the Beta distribution
    sent_beta = 40
    # mean of Beta distribution will be A_collected/field size
    for n, key in enumerate(locinfo.sent_ids):
        sent_obs_probs[n] = pm.Beta(
            "sent_obs_probs_{}".format(key),
            A_collected / (locinfo.field_sizes[key] * cell_area) * sent_beta /
            (1 - A_collected / (locinfo.field_sizes[key] * cell_area)),
            sent_beta,
            value=0.1 * 3600 / (locinfo.field_sizes[key] * cell_area))

    sent_obs_probs = pm.Container(sent_obs_probs)

    # Max a Posterirori estimates have consistantly returned a value near zero
    #   for sprd_factor. So we will comment these sections.
    # if params.dataset == 'kalbar':
    #     # factor for kalbar initial spread
    #     sprd_factor = pm.Uniform("sprd_factor",0,1,value=0.3)
    # else:
    #     sprd_factor = None
    sprd_factor = None

    #### Collect variables and setup block update ####
    params_ary = pm.Container(
        np.array([
            g_aw, g_bw, f_a1, f_b1, f_a2, f_b2, sig_x, sig_y, corr, sig_x_l,
            sig_y_l, corr_l, lam, n_periods, mu_r
        ],
                 dtype=object))
    # The stochastic variables in this list (and the stochastics behind the
    #   deterministic ones) should be block updated in order to avoid the large
    #   computational expense of evaluating the model multiple times for each
    #   MCMC iteration. To do this, starting step variances must be definied
    #   for each variable. This is done via a scaling dict.
    stoc_vars = [
        g_aw, g_bw, f_a1, f_b1_p, f_a2, f_b2_p, sig_x, sig_y, corr_p, sig_x_l,
        sig_y_l, corr_l_p, lam, n_periods, mu_r
    ]
    step_scales = {
        g_aw: 0.04,
        g_bw: 0.08,
        f_a1: 0.25,
        f_b1_p: 0.05,
        f_a2: 0.25,
        f_b2_p: 0.05,
        sig_x: 2,
        sig_y: 2,
        corr_p: 0.0005,
        sig_x_l: 2,
        sig_y_l: 2,
        corr_l_p: 0.0005,
        lam: 0.0005,
        n_periods: 1,
        mu_r: 0.005
    }

    print('Getting initial model values...')

    ######################################################################
    #####                          Run Model                         #####
    ######################################################################
    @pm.deterministic(plot=False, trace=False)
    def pop_model(params=params,
                  params_ary=params_ary,
                  locinfo=locinfo,
                  wind_data=wind_data,
                  days=days,
                  sprd_factor=sprd_factor):
        '''This function acts as an interface between PyMC and the model.
        Not only does it run the model, but it provides an emergence potential
        based on the population model result projected forward from feasible
        oviposition dates. To modify how this projection happens, edit
        popdensity_to_emergence. Returned values from this function should be
        nearly ready to compare to data.
        '''
        modeltic = time.time()
        ### Alter params with stochastic variables ###

        # g wind function parameters
        params.g_params = tuple(params_ary[0:2])
        # f time of day function parameters
        params.f_params = tuple(params_ary[2:6])
        # Diffusion coefficients
        params.Dparams = tuple(params_ary[6:9])
        params.Dlparams = tuple(params_ary[9:12])
        # Probability of any flight during the day under ideal circumstances
        params.lam = params_ary[12]

        # TRY BOTH SCALINGS - VARYING mu_r and n_periods
        # scaling flight advection to wind advection
        # number of time periods (based on interp_num) in one flight
        params.n_periods = params_ary[
            13]  # if interp_num = 30, this is # of minutes
        params.mu_r = params_ary[14]

        ### PHASE ONE ###
        # First, get spread probability for each day as a coo sparse matrix
        max_shape = np.array([0, 0])
        pm_args = [(days[0], wind_data, *params.get_model_params(),
                    params.r_start)]
        pm_args.extend([(day, wind_data, *params.get_model_params())
                        for day in days[1:params.ndays]])

        ##### Kalbar wind started recording a day late. Spread the population
        #####   locally before running full model.
        if sprd_factor is not None:
            res = params.domain_info[0] / params.domain_info[1]
            mean_drift = np.array([-25., 15.])
            xdrift_int = int(mean_drift[0] // res)
            xdrift_r = mean_drift[0] % res
            ydrift_int = int(mean_drift[1] // res)
            ydrift_r = mean_drift[1] % res
            longsprd = PM.get_mvn_cdf_values(
                res, np.array([xdrift_r, ydrift_r]),
                PM.Dmat(params_ary[6], params_ary[7], params_ary[8]))
            shrtsprd = PM.get_mvn_cdf_values(
                res, np.array([0., 0.]),
                PM.Dmat(params_ary[9], params_ary[10], params_ary[11]))

            mlen = int(
                max(longsprd.shape[0], shrtsprd.shape[0]) +
                max(abs(xdrift_int), abs(ydrift_int)) * 2)
            sprd = np.zeros((mlen, mlen))
            lbds = [
                int(mlen // 2 - longsprd.shape[0] // 2),
                int(mlen // 2 + longsprd.shape[0] // 2 + 1)
            ]
            sprd[lbds[0] - ydrift_int:lbds[1] - ydrift_int, lbds[0] +
                 xdrift_int:lbds[1] + xdrift_int] = longsprd * sprd_factor
            sbds = [
                int(mlen // 2 - shrtsprd.shape[0] // 2),
                int(mlen // 2 + shrtsprd.shape[0] // 2 + 1)
            ]
            sprd[sbds[0]:sbds[1],
                 sbds[0]:sbds[1]] += shrtsprd * (1 - sprd_factor)

            sprd[int(sprd.shape[0] // 2),
                 int(sprd.shape[0] // 2)] += max(0, 1 - sprd.sum())
            pmf_list = [sparse.coo_matrix(sprd)]
        else:
            pmf_list = []

        ###################### Get pmf_list from multiprocessing
        pmf_list.extend(pool.starmap(PM.prob_mass, pm_args))

        for pmf in pmf_list:
            for dim in range(2):
                if pmf.shape[dim] > max_shape[dim]:
                    max_shape[dim] = pmf.shape[dim]

        r_spread = []  # holds the one-day spread for each release day.

        # Reshape the prob. mass function of each release day into solution form
        for ii in range(params.r_dur):
            offset = params.domain_info[1] - pmf_list[ii].shape[0] // 2
            dom_len = params.domain_info[1] * 2 + 1
            r_spread.append(
                sparse.coo_matrix(
                    (pmf_list[ii].data,
                     (pmf_list[ii].row + offset, pmf_list[ii].col + offset)),
                    shape=(dom_len, dom_len)).tocsr())

        ### PHASE TWO ###
        # Pass the probability list, pmf_list, and other info to convolution solver.
        #   This will return the finished population model.
        with Capturing() as output:
            if sprd_factor is not None:
                # extend day count by one
                days_ext = [days[0] - 1]
                days_ext.extend(days)
                modelsol = get_populations(r_spread, pmf_list, days_ext,
                                           params.ndays + 1, dom_len,
                                           max_shape, params.r_dur,
                                           params.r_number, params.r_mthd())
                # remove the first one and start where wind started.
                modelsol = modelsol[1:]
            else:
                modelsol = get_populations(r_spread, pmf_list, days,
                                           params.ndays, dom_len, max_shape,
                                           params.r_dur, params.r_number,
                                           params.r_mthd())

        # modelsol now holds the model results for this run as CSR sparse arrays

        # get emergence potential (measured in expected number of wasps previously
        #   present whose oviposition would result in emergence on the given date)
        #   from the model result
        release_emerg, sentinel_emerg = popdensity_to_emergence(
            modelsol, locinfo)

        # get the expected wasp populations at grid points on sample days
        grid_counts = popdensity_grid(modelsol, locinfo)

        # get the expected wasp populations in cardinal directions
        '''card_counts = popdensity_card(modelsol,locinfo,params.domain_info)'''

        ## For the lists release_emerg and sentinel_emerg:
        ##    Each list entry corresponds to a data collection day (one array)
        ##    In each array:
        ##    Each column corresponds to an emergence observation day (as in data)
        ##    Each row corresponds to a grid point or sentinel field, respectively
        ## For the array grid_counts:
        ##    Each column corresponds to an observation day
        ##    Each row corresponds to a grid point
        ## For the list card_counts:
        ##    Each list entry corresponds to a sampling day (one array)
        ##    Each column corresponds to a step in a cardinal direction
        ##    Each row corresponds to a cardinal direction
        # print('{:03.1f} sec./model at {}'.format(time.time() - modeltic,
        #     time.strftime("%H:%M:%S %d/%m/%Y")),end='\r')
        # sys.stdout.flush()
        return (release_emerg, sentinel_emerg, grid_counts)  #,card_counts)

    print('Parsing model output and connecting to Bayesian model...')

    ######################################################################
    #####                   Connect Model to Data                    #####
    ######################################################################

    ### Parse the results of pop_model into separate deterministic variables ###
    '''Get Poisson probabilities for sentinal field emergence. Parameters:
        xi is constant, emerg is a list of ndarrays, betas is a 1D array of
        field probabilities'''
    Ncollections = len(locinfo.sent_DataFrames)
    sent_poi_rates = []
    for ii in range(Ncollections):
        s_ndays = len(locinfo.sent_DataFrames[ii]['datePR'].unique())
        sent_poi_rates.append(
            pm.Lambda('sent_poi_rate_{}'.format(ii),
                      lambda xi=xi, ndays=s_ndays, betas=sent_obs_probs,
                      emerg_model=pop_model[1][ii]: xi * emerg_model * np.tile(
                          betas, (ndays, 1)).T,
                      trace=False))
    sent_poi_rates = pm.Container(sent_poi_rates)
    '''Return Poisson probabilities for release field grid emergence. Parameters:
        xi is constant, emerg is a list of ndarrays. collection effort is
        specified in locinfo.'''
    Ncollections = len(locinfo.release_DataFrames)
    rel_poi_rates = []
    for ii in range(Ncollections):
        r_effort = locinfo.release_collection[ii]  #fraction of max collection
        r_ndays = len(locinfo.release_DataFrames[ii]['datePR'].unique())
        rel_poi_rates.append(
            pm.Lambda('rel_poi_rate_{}'.format(ii),
                      lambda xi=xi, ndays=r_ndays, r_effort=r_effort, beta=
                      em_obs_prob, emerg_model=pop_model[0][ii]: xi *
                      emerg_model * np.tile(r_effort * beta, (ndays, 1)).T,
                      trace=False))
    rel_poi_rates = pm.Container(rel_poi_rates)

    @pm.deterministic(plot=False, trace=False)
    def grid_poi_rates(locinfo=locinfo,
                       beta=grid_obs_prob,
                       obs_model=pop_model[2]):
        '''Return Poisson probabilities for grid sampling
        obs_model is an ndarray, sampling effort is specified in locinfo.'''
        return beta * locinfo.grid_samples * obs_model

    '''Return Poisson probabilities for cardinal direction sampling
        obs_model is a list of ndarrays, sampling effort is assumed constant'''
    '''
    card_poi_rates = []
    for ii,obs in enumerate(pop_model[3]):
        card_poi_rates.append(pm.Lambda('card_poi_rate_{}'.format(ii),
            lambda beta=card_obs_prob, obs=obs: beta*obs))
    card_poi_rates = pm.Container(card_poi_rates)
    '''

    # Given the expected wasp densities from pop_model, actual wasp densities
    #   are modeled as a thinned Poisson random variable about that mean.
    # Each wasp in the area then has a small probability of being seen.

    ### Connect sentinel emergence data to model ###
    N_sent_collections = len(locinfo.sent_DataFrames)
    # Create list of collection variables
    sent_collections = []
    for ii in range(N_sent_collections):
        # Apparently, pymc does not play well with 2D array parameters
        sent_collections.append(
            np.empty(sent_poi_rates[ii].value.shape, dtype=object))
        for n in range(sent_collections[ii].shape[0]):
            for m in range(sent_collections[ii].shape[1]):
                sent_collections[ii][n, m] = pm.Poisson(
                    "sent_em_obs_{}_{}_{}".format(ii, n, m),
                    sent_poi_rates[ii][n, m],
                    value=float(locinfo.sentinel_emerg[ii][n, m]),
                    observed=True)
    sent_collections = pm.Container(sent_collections)

    ### Connect release-field emergence data to model ###
    N_release_collections = len(locinfo.release_DataFrames)
    # Create list of collection variables
    rel_collections = []
    for ii in range(N_release_collections):
        rel_collections.append(
            np.empty(rel_poi_rates[ii].value.shape, dtype=object))
        for n in range(rel_collections[ii].shape[0]):
            for m in range(rel_collections[ii].shape[1]):
                rel_collections[ii][n, m] = pm.Poisson(
                    "rel_em_obs_{}_{}_{}".format(ii, n, m),
                    rel_poi_rates[ii][n, m],
                    value=float(locinfo.release_emerg[ii][n, m]),
                    observed=True)
    rel_collections = pm.Container(rel_collections)

    ### Connect grid sampling data to model ###
    grid_obs = np.empty(grid_poi_rates.value.shape, dtype=object)
    for n in range(grid_obs.shape[0]):
        for m in range(grid_obs.shape[1]):
            grid_obs[n, m] = pm.Poisson("grid_obs_{}_{}".format(n, m),
                                        grid_poi_rates[n, m],
                                        value=float(locinfo.grid_obs[n, m]),
                                        observed=True)
    grid_obs = pm.Container(grid_obs)

    ### Connect cardinal direction data to model ###
    '''
    N_card_collections = len(locinfo.card_obs_DataFrames)
    # Create list of sampling variables
    card_collections = []
    for ii in range(N_card_collections):
        card_collections.append(np.empty(card_poi_rates[ii].value.shape,
                                         dtype=object))
        for n in range(card_collections[ii].shape[0]):
            for m in range(card_collections[ii].shape[1]):
                card_collections[ii][n,m] = pm.Poisson(
                    "card_obs_{}_{}_{}".format(ii,n,m),
                    card_poi_rates[ii][n,m],
                    value=locinfo.card_obs[ii][n,m],
                    observed=True, plot=False)
    card_collections = pm.Container(card_collections)
    '''

    ######################################################################
    #####                   Collect Model and Run                    #####
    ######################################################################

    ### Collect model ###
    if sprd_factor is not None:
        Bayes_model = pm.Model([
            lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x,
            sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, n_periods,
            mu_r, sprd_factor, grid_obs_prob, xi, em_obs_prob, A_collected,
            sent_obs_probs, params_ary, pop_model, grid_poi_rates,
            rel_poi_rates, sent_poi_rates, grid_obs, rel_collections,
            sent_collections
        ])
    else:
        Bayes_model = pm.Model([
            lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x,
            sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, n_periods,
            mu_r, grid_obs_prob, xi, em_obs_prob, A_collected, sent_obs_probs,
            params_ary, pop_model, grid_poi_rates, rel_poi_rates,
            sent_poi_rates, grid_obs, rel_collections, sent_collections
        ])

    ### Run if parameters were passed in ###
    if mcmc_args is not None:
        if len(mcmc_args) == 3:
            # New run
            nsamples = int(mcmc_args[0])
            burn = int(mcmc_args[1])
            fname = mcmc_args[2]
            if fname[-3:] != '.h5':
                fname += '.h5'
            mcmc = pm.MCMC(Bayes_model,
                           db='hdf5',
                           dbname=fname,
                           dbmode='a',
                           dbcomplevel=0)
            mcmc.use_step_method(pm.AdaptiveMetropolis,
                                 stoc_vars,
                                 scales=step_scales,
                                 interval=500,
                                 shrink_if_necessary=True)
            try:
                tic = time.time()
                print('Sampling...')
                mcmc.sample(nsamples, burn)
                # sampling finished. commit to database and continue
                print('Sampling finished.')
                print('Time elapsed: {}'.format(time.time() - tic))
                print('Saving...')
                #mcmc.save_state()
                mcmc.commit()
                print('Closing...')
                mcmc.db.close()
            except:
                print('Exception: database closing...')
                mcmc.db.close()
                raise
            return
        elif len(mcmc_args) == 2:
            # Resume run
            fname = mcmc_args[0]
            nsamples = int(mcmc_args[1])
            fname = fname.strip()
            if fname[-3:] != '.h5':
                fname += '.h5'
            if os.path.isfile(fname):
                db = pm.database.hdf5.load(fname)
                mcmc = pm.MCMC(Bayes_model, db=db)
                mcmc.use_step_method(pm.AdaptiveMetropolis,
                                     stoc_vars,
                                     scales=step_scales,
                                     interval=500,
                                     shrink_if_necessary=True)
                # database loaded.
            else:
                print('File not found: {}'.format(fname))
                return
            try:
                tic = time.time()
                print('Sampling...')
                mcmc.sample(nsamples)
                # sampling finished. commit to database and continue
                print('Sampling finished.')
                print('Time elapsed: {}'.format(time.time() - tic))
                print('Saving...')
                #mcmc.save_state()
                mcmc.commit()
                print('Closing...')
                mcmc.db.close()
            except:
                print('Exception: database closing...')
                mcmc.db.close()
                raise
            return

    ######################################################################
    #####                   Start Interactive Menu                   #####
    ######################################################################
    print('--------------- MCMC MAIN MENU ---------------')
    print(" 'new': Start a new MCMC chain from the beginning.")
    print("'cont': Continue a previous MCMC chain from an hdf5 file.")
    #print("'plot': Plot traces/distribution from an hdf5 file.")
    print("'quit': Quit.")
    cmd = input('Enter: ')
    cmd = cmd.strip().lower()
    if cmd == 'new':
        print('\n\n')
        print('--------------- New MCMC Chain ---------------')
        while True:
            val = input("Enter number of realizations or 'quit' to quit:")
            val = val.strip()
            if val == 'q' or val == 'quit':
                return
            else:
                try:
                    nsamples = int(val)
                    val2 = input("Enter number of realizations to discard:")
                    val2 = val2.strip()
                    if val2 == 'q' or val2 == 'quit':
                        return
                    else:
                        burn = int(val2)
                    fname = input(
                        "Enter filename to save or 'back' to cancel:")
                    fname = fname.strip()
                    if fname == 'q' or fname == 'quit':
                        return
                    elif fname == 'b' or fname == 'back':
                        continue
                    else:
                        fname = fname + '.h5'
                        break  # BREAK LOOP AND RUN MCMC WITH GIVEN VALUES
                except ValueError:
                    print('Unrecognized input.')
                    continue
        ##### RUN FIRST MCMC HERE #####
        mcmc = pm.MCMC(Bayes_model,
                       db='hdf5',
                       dbname=fname,
                       dbmode='a',
                       dbcomplevel=0)
        mcmc.use_step_method(pm.AdaptiveMetropolis,
                             stoc_vars,
                             scales=step_scales,
                             interval=500,
                             shrink_if_necessary=True)
        try:
            tic = time.time()
            print('Sampling...')
            mcmc.sample(nsamples, burn)
            # sampling finished. commit to database and continue
            print('Sampling finished.')
            print('Time elapsed: {}'.format(time.time() - tic))
            print('Saving...')
            #mcmc.save_state()
            mcmc.commit()
        except:
            print('Exception: database closing...')
            mcmc.db.close()
            raise

    elif cmd == 'cont':
        # Load db and continue
        print('\n')
        while True:
            fname = input("Enter path to database to load, or 'q' to quit:")
            fname = fname.strip()
            if fname.lower() == 'q' or fname.lower() == 'quit':
                return
            else:
                if fname[-3:] != '.h5':
                    fname += '.h5'
                if os.path.isfile(fname):
                    db = pm.database.hdf5.load(fname)
                    mcmc = pm.MCMC(Bayes_model, db=db)
                    mcmc.use_step_method(pm.AdaptiveMetropolis,
                                         stoc_vars,
                                         scales=step_scales,
                                         interval=500,
                                         shrink_if_necessary=True)
                    break  # database loaded
                else:
                    print('File not found.')
                    #continue

    elif cmd == 'plot':
        # Get filename and pass to plotting routine.
        pass
        # return
    elif cmd == 'quit' or cmd == 'q':
        return
    else:
        print('Command not recognized.')
        print('Quitting....')
        return

    ##### MCMC Loop #####
    # This should be reached only by cmd == 'new' or 'cont' with a database.
    # It resumes sampling of a previously sampled chain.
    print('\n')
    while True:
        print('--------------- MCMC ---------------')
        print(" 'report': generate report on traces")
        print("'inspect': launch IPython to inspect state")
        print("    'run': conduct further sampling")
        print("   'quit': Quit")
        cmd = input('Enter: ')
        cmd = cmd.strip()
        cmd = cmd.lower()
        if cmd == 'inspect':
            try:
                import IPython
                IPython.embed()
            except ImportError:
                print('IPython not found.')
            except:
                print('Exception: database closing...')
                mcmc.db.close()
                raise
        elif cmd == 'run':
            val = input("Enter number of realizations or 'back':")
            val = val.strip()
            if val == 'back' or val == 'b':
                continue
            else:
                try:
                    nsamples = int(val)
                except ValueError:
                    print('Unrecognized input.')
                    continue
            # Run chain
            try:
                tic = time.time()
                print('Sampling...')
                mcmc.sample(nsamples)
                # sampling finished. commit to database and continue
                print('Sampling finished.')
                print('Time elapsed: {}'.format(time.time() - tic))
                print('Saving...')
                #mcmc.save_state()
                mcmc.commit()
            except:
                print('Exception: database closing...')
                mcmc.db.close()
                raise
        elif cmd == 'report':
            try:
                import Bayes_Plot
                Bayes_Plot.plot_traces(db=db)
                print('Gelman-Rubin statistics')
                gr = pm.gelman_rubin(mcmc)
                print(gr)
                with open('./diagnostics/gelman-rubin.txt', 'w') as f:
                    f.write('Variable        R_hat\n')
                    f.write('---------------------\n')
                    for key, val in gr.items():
                        f.write(key + ': {}\n'.format(val))
            except:
                print('Exception: database closing...')
                mcmc.db.close()
                raise
        elif cmd == 'quit' or cmd == 'q':
            mcmc.db.close()
            print('Database closed.')
            break
        else:
            print('Command not recognized.')
Ejemplo n.º 17
0
%matplotlib inline
from pymc.Matplot import plot as mcplot
mcplot(mcmc.trace("p"),common_scale=False)

# a simple demo for Dirichlet-Multinomal Conjugate
N = 5 # dimension
beta = np.ones(N)
mu=pm.Dirichlet("mu", theta=beta)
cmu = pm.CompletedDirichlet("cmu", D=mu)

n = pm.Multinomial('n', n=D, p=cmu, value=n_class, observed=True)

alpha = np.ones(N)

theta = pm.Container([pm.Dirichlet("theta_%s" % i,theta=alpha) \
                      for i in range(N)])
ctheta = pm.Container([pm.CompletedDirichlet("ctheta_%s" % i, D=theta[i]) for i in range(N)])
c = pm.Container([pm.Multinomial("c_%s" % i, n=n_class[i], p=theta[i]\
                                ,value = data[i], observed=True)\
                 for i in range(N)])

@pm.deterministic
def precision(mu=cmu, theta=ctheta):
    return np.sum([mu[0][i]*theta[i][0][i] for i in range(N)])


mcmc = pm.MCMC([n,mu,theta,c,precision])
mcmc.sample(25000)

%matplotlib inline
from pymc.Matplot import plot as mcplot
# and now make the beta matrix, which stacks beta
# coefficients (one per doctor) -- the n_clusters is
# because this is how many parameters we have (we
# substract one, but then add an intercept)
Bdr = []
for provider_i in xrange(num_providers):
    Bdr.append(pymc.Normal('beta-dr-%s' % provider_i, mu=beta, tau=inv_var))

# construct a vector of betas |sessions| long
session_betas = []
for session_num, session_provider in enumerate(data.dr_id):
    session_betas.append(Bdr[int(session_provider)])

# the Betas to use for each session (which correspond to the
# dr that participated in them).
SB = pymc.Container(session_betas)

###
# setup the cut-off point parameters (lambda's)
# for this we will use truncated normals
#lambda_inv_var = 1e-5
lambdas = [pymc.Normal('lambda_0', 0, inv_var)]
for i in xrange(3):
    lambdas.append(
        pymc.TruncatedNormal('lambda_%s' % (i + 1), (i + 1), inv_var,
                             lambdas[i], numpy.inf))
lambdas = pymc.Container(lambdas)


#-------------------- model ------------------#
@deterministic()
Ejemplo n.º 19
0
def make_model(data, mi_mean_min, mi_mean_max, GF_mean_min, GF_mean_max, constant_proliferation = False):
    values_SOX2 = {}
    values_m = {}
    values_nonPCNA = {}
    switchpoint = {}
    mi_left = {}
    GF_left = {}
    SOX2_mean_left = {}
    mi_right = {}
    GF_right = {}
    SOX2_mean_right = {}
    cells_SOX2_float = {}
    cells_nonPCNA = {}
    cells_m = {}

    
    ls = 50.0 # length of section
    l = pd.read_csv('../../data/cell_length_data.csv')['cell_length'].mean()  # length of cell
    
    def step_function(x, switchpoint, left_value, right_value):
        ''' This function should return something in the same format as the passed array 

          Specifically, it produces an output that has an array of the same size of the experimental data
        but whose contents are the lower average until the switchpoint, and the upper average past the switchpoint.
        For all purposes, this builds the model to which we want to compare the data.
        '''
        return sp.where(x<=switchpoint, left_value, right_value)

    def ma(array, fill_value):
        return sp.ma.masked_array(array, sp.isnan(array), fill_value = fill_value)
  


    #data = data.dropna(how='all', subset = ['m', 'PCNA', 'SOX2'])
    
    # I'll drop all nan because of the potential bug with the binomials (see my question on stackoverflow)
    data = data.dropna(how='all', subset = ['m', 'PCNA', 'SOX2'])
    data = data.sort_values(['ID', 'pos'])
    
    # priors for global mean values
    
    # define priors for left side of step function
    mi_left_pop= pymc.Uniform('mi_left_pop', lower = mi_mean_min, upper = mi_mean_max, value = 0.02)
    GF_left_pop = pymc.Uniform('GF_left_pop', lower = GF_mean_min, upper = GF_mean_max, value = 0.8)

    # define priors for right side of step function
    if constant_proliferation:
        mi_right_pop = mi_left_pop
        GF_right_pop = GF_left_pop
    else:
        mi_right_pop = pymc.Uniform('mi_right_pop', lower = mi_mean_min, upper = mi_mean_max, value = 0.04)
        GF_right_pop = pymc.Uniform('GF_right_pop', lower = GF_mean_min, upper = GF_mean_max, value = 0.9)
        # stepsizes
        @pymc.deterministic(name='step_mi', plot=True)
        def step_mi(mi_left = mi_left_pop, mi_right = mi_right_pop):
            return mi_right - mi_left

        @pymc.deterministic(name='step_GF', plot=True)
        def step_GF(GF_left = GF_left_pop, GF_right = GF_right_pop):
            return GF_right - GF_left

    
    # prior distribution for sigma beeing uniformly distributed
    GF_sigma_inter = pymc.Uniform('GF_sigma_inter', lower = 0.001, upper = 0.2)
    mi_sigma_inter = pymc.Uniform('mi_sigma_inter', lower = 0.001, upper = 0.2)

    
    # switchpoint
    if not constant_proliferation:
        switchpoint_pop = pymc.Uniform('switchpoint_pop',
                                       lower = -2000,
                                       upper = outgrowth[data['time'].iloc[0]], 
                                       value = -500)
        switchpoint_sigma_inter = pymc.Uniform('switchpoint_sigma_inter', lower=1.0, upper=400.0, value = 50)
    
    
    for ID, IDdata in data.groupby('ID'):
        values_SOX2[ID] = ma(IDdata['SOX2'], 35.5)
        values_nonPCNA[ID] = ma(IDdata['SOX2'] - IDdata['PCNA'], 3.5)
        values_m[ID] = ma(IDdata['m'], 1.5)
        
        # Model definition

        #priors
        # switchpoint[ID]: for all observables
        
        if constant_proliferation:
            switchpoint[ID] = 0.0
        else:
            switchpoint[ID] = pymc.Normal('switchpoint_{0}'.format(ID), mu = switchpoint_pop,                                          tau = 1/switchpoint_sigma_inter**2, value = -500,
                                         plot = False)
            

        # number of SOX2 cells
        SOX2_mean = sp.mean(values_SOX2[ID])
        SOX2_std = sp.std(values_SOX2[ID])


        # define priors for left side of step function
        mi_left[ID] = pymc.TruncatedNormal('mi_left_{0}'.format(ID), mu = mi_left_pop, tau = 1.0 / mi_sigma_inter**2,
                                           a = 0.0, b = 1.0,
                                  value = 0.02, plot = False)
        GF_left[ID] = pymc.TruncatedNormal('GF_left_{0}'.format(ID), mu = GF_left_pop, tau = 1.0 / GF_sigma_inter**2,
                                           a = 0.0, b = 1.0,
                                  value = 0.5, plot = False)
        

        # define priors for right side of step function
        mi_right[ID] = pymc.TruncatedNormal('mi_right_{0}'.format(ID), mu = mi_right_pop, tau = 1.0 / mi_sigma_inter**2,
                                            a = 0.0, b = 1.0,
                                            value = 0.02, plot = False)
        GF_right[ID] = pymc.TruncatedNormal('GF_right_{0}'.format(ID), mu = GF_right_pop, tau = 1.0 / GF_sigma_inter**2,
                                            a = 0.0, b = 1.0,
                                            value = 0.5, plot = False)
    
        
        # step functions
        @pymc.deterministic(name='mi_{}'.format(ID))
        def mi(positions = sp.array(IDdata['pos']), switchpoint = switchpoint[ID],
               left_value = mi_left[ID], right_value = mi_right[ID]):
            return step_function(positions, switchpoint, left_value, right_value)

        @pymc.deterministic(name='GF_{}'.format(ID))
        def GF(positions = sp.array(IDdata['pos']), switchpoint = switchpoint[ID],
               left_value = GF_left[ID], right_value = GF_right[ID]):
            return step_function(positions, switchpoint, left_value, right_value)

        @pymc.deterministic(name='SOX2_mean_{}'.format(ID))
        def SOX2_mean(positions = sp.array(IDdata['pos']), switchpoint = switchpoint[ID],
                      left_value = SOX2_mean , right_value = SOX2_mean):
            return step_function(positions, switchpoint, left_value, right_value)

        #likelihoods
        cells_SOX2_float[ID] = pymc.Normal('cells_SOX2_float_{0}'.format(ID), mu=SOX2_mean, tau = 1/SOX2_std**2, value = values_SOX2[ID],                                           plot = False, observed = True)


        @pymc.deterministic(name='cells_SOX2_{}'.format(ID))
        def cells_SOX2(csf = cells_SOX2_float[ID]):
            return sp.around(csf)




        cells_nonPCNA[ID] = pymc.Binomial('cells_nonPCNA_{0}'.format(ID),                                        n = cells_SOX2,                                        p = (1.0 - GF),                                        value = values_nonPCNA[ID], observed = True, plot = False )

        @pymc.deterministic(name='cells_PCNA_{}'.format(ID))
        def cells_PCNA(cnp = cells_nonPCNA[ID], cs = cells_SOX2):
            return  cs - cnp



        @pymc.deterministic(name='cells_PCNA_section_{}'.format(ID))
        def cells_PCNA_section(cp = cells_PCNA, ls = ls, l = l):
            return cp * ls / l



        cells_m[ID] = pymc.Binomial('cells_m_{0}'.format(ID),                                n = cells_PCNA_section,                                p = mi,                                value = values_m[ID], observed = True, plot = False)



    
    values_SOX2 = pymc.Container(values_SOX2)
    values_SOX2 = pymc.Container(values_SOX2)
    values_m = pymc.Container(values_m)
    values_nonPCNA = pymc.Container(values_nonPCNA)
    switchpoint = pymc.Container(switchpoint)
    mi_left = pymc.Container(mi_left)
    GF_left = pymc.Container(GF_left)
    SOX2_mean_left = pymc.Container(SOX2_mean_left)
    mi_right = pymc.Container(mi_right)
    GF_right = pymc.Container(GF_right)
    SOX2_mean_right = pymc.Container(SOX2_mean_right)
    cells_SOX2_float = pymc.Container(cells_SOX2_float)
    cells_nonPCNA = pymc.Container(cells_nonPCNA)
    cells_m = pymc.Container(cells_m)

    return locals()
Ejemplo n.º 20
0
data_tp, data_sp = [], []
for line in fileinput.input("../../data/stationary.txt"):
	part = line.strip().split("\t")
	uid, items = part[0], part[1:]
	if uid == "460029901722027":
		for item in items:
			tm, poi = [int(i) for i in item.split(" ")[0:2]], [int(i) for i in item.split(" ")[4].split(",")]
			data_tp.append(tm)
			data_sp.append(poi)
fileinput.close()
data_tp, data_sp = np.array(data_tp), np.array(data_sp)
print data_tp
print data_sp

prior = pm.Dirichlet('prior', np.array([50.0,50.0]))
state = pm.Container([pm.Categorical('state_%i' % i, p=prior) for i in range(len(data_tp))])
stime = pm.Container([pm.DiscreteUniform('stime_%i' % i, lower=0, upper=23) for i in range(2)])
ftime = pm.Container([pm.DiscreteUniform('ftime_%i' % i, lower=0, upper=23) for i in range(2)])
@pm.deterministic(plot=False)
def mu_s(state=state, stime=stime):
	return np.array([stime[0] if state[i] == 0 else stime[1] for i in xrange(len(data_tp))])
@pm.deterministic(plot=False)
def mu_f(state=state, stime=ftime):
	return np.array([ftime[0] if state[i] == 0 else ftime[1] for i in xrange(len(data_tp))])
obs_s = pm.Normal('obs_s', mu=mu_s, tau=0.1, value=data_tp[:,0], observed=True)
obs_f = pm.Normal('obs_f', mu=mu_f, tau=0.1, value=data_tp[:,1], observed=True)
model = pm.Model([prior, state, stime, ftime, obs_s, obs_f])
mcmc = pm.MCMC(model)
mcmc.sample(100)
print state.value
print stime[0].value, ftime[0].value
Ejemplo n.º 21
0

y = [5,1,5,14,3,19,1,1,4,22] # Number of failure
t = [94,16,63,126,5,31,1,1,2,10] # Observation time length


# Define hyperparameters
alpha = 1.8
gam = 0.01
delta = 1.0
Nobs = len(y)

beta = pymc.Gamma('beta',alpha=delta, beta=gam, value=1.0)
# lamb = pymc.Gamma('lamb',alpha=alpha, beta=beta, value=np.ones(Nobs))
lamb = np.asarray([pymc.Gamma('lamb_%i'%i,alpha=alpha, beta=beta, value=1.0) for i in range(Nobs)])
lamb = pymc.Container(lamb)
# print lamb
# lamb = np.empty(Nobs,dtype=object)

# for i in range(Nobs):
#     lamb[i] = pymc.Gamma('lamb_%i' %(i+1), alpha = alpha, beta = beta, value=0.5)

@pymc.deterministic
def poi_mu(lamb = lamb, t = t):
    return lamb*t

# @pymc.stochastic
# def data_gen(poi_mu,y):
#     return -np.sum(poi_mu) + np.sum(np.log(poi_mu)*y)
#
# # @pymc.stochastic
Ejemplo n.º 22
0
def dict_to_recarray(dict):
    return pymc.Container(dict)
Ejemplo n.º 23
0
    def run(self):
        self.validateinput()
        data = self.data
        data = self.fluctuate(data) if self.rndseed >= 0 else data

        # unpack background dictionaries
        backgroundkeys = self.backgroundsyst.keys()
        backgrounds = array([self.background[key] for key in backgroundkeys])
        backgroundnormsysts = array(
            [self.backgroundsyst[key] for key in backgroundkeys])

        # unpack object systematics dictionary
        objsystkeys = self.objsyst['signal'].keys()
        signalobjsysts = array(
            [self.objsyst['signal'][key] for key in objsystkeys])
        backgroundobjsysts = array([])
        if len(objsystkeys) > 0 and len(backgroundkeys) > 0:
            backgroundobjsysts = array([[
                self.objsyst['background'][syst][bckg] for syst in objsystkeys
            ] for bckg in backgroundkeys])
        recodim = len(data)
        resmat = self.response
        truthdim = len(resmat)

        import priors
        truth = priors.wrapper(priorname=self.prior,
                               low=self.lower,
                               up=self.upper,
                               other_args=self.priorparams)

        bckgnuisances = []
        for name, err in zip(backgroundkeys, backgroundnormsysts):
            if err < 0.:
                bckgnuisances.append(
                    mc.Uniform('norm_%s' % name, value=1., lower=0., upper=3.))
            else:
                bckgnuisances.append(
                    mc.TruncatedNormal(
                        'gaus_%s' % name,
                        value=0.,
                        mu=0.,
                        tau=1.0,
                        a=(-1.0 / err if err > 0.0 else -inf),
                        b=inf,
                        observed=(False if err > 0.0 else True)))
        bckgnuisances = mc.Container(bckgnuisances)

        objnuisances = [
            mc.Normal('gaus_%s' % name,
                      value=self.systfixsigma,
                      mu=0.,
                      tau=1.0,
                      observed=(True if self.systfixsigma != 0 else False))
            for name in objsystkeys
        ]
        objnuisances = mc.Container(objnuisances)

        # define potential to constrain truth spectrum
        if self.regularization:
            truthpot = self.regularization.getpotential(truth)

        #This is where the FBU method is actually implemented
        @mc.deterministic(plot=False)
        def unfold(truth=truth,
                   bckgnuisances=bckgnuisances,
                   objnuisances=objnuisances):
            smearbckg = 1.
            if len(backgroundobjsysts) > 0:
                smearbckg = smearbckg + dot(objnuisances, backgroundobjsysts)
            smearedbackgrounds = backgrounds * smearbckg
            bckgnormerr = array([
                (-1. + nuis) / nuis if berr < 0. else berr
                for berr, nuis in zip(backgroundnormsysts, bckgnuisances)
            ])
            bckg = dot(1. + bckgnuisances * bckgnormerr, smearedbackgrounds)
            reco = dot(truth, resmat)
            smear = 1. + dot(objnuisances, signalobjsysts)
            out = bckg + reco * smear
            return out

        unfolded = mc.Poisson('unfolded',
                              mu=unfold,
                              value=data,
                              observed=True,
                              size=recodim)
        allnuisances = mc.Container(bckgnuisances + objnuisances)
        modelelements = [unfolded, unfold, truth, allnuisances]
        if self.regularization: modelelements += [truthpot]
        model = mc.Model(modelelements)

        if self.use_emcee:
            from emcee_sampler import sample_emcee
            mcmc = sample_emcee(model,
                                nwalkers=self.nwalkers,
                                samples=self.nMCMC / self.nwalkers,
                                burn=self.nBurn / self.nwalkers,
                                thin=self.nThin)
        else:
            map_ = mc.MAP(model)
            map_.fit()
            mcmc = mc.MCMC(model)
            mcmc.use_step_method(mc.AdaptiveMetropolis, truth + allnuisances)
            mcmc.sample(self.nMCMC, burn=self.nBurn, thin=self.nThin)

#        mc.Matplot.plot(mcmc)

        self.trace = [
            mcmc.trace('truth%d' % bin)[:] for bin in xrange(truthdim)
        ]
        self.nuisancestrace = {}
        for name, err in zip(backgroundkeys, backgroundnormsysts):
            if err < 0.:
                self.nuisancestrace[name] = mcmc.trace('norm_%s' % name)[:]
            if err > 0.:
                self.nuisancestrace[name] = mcmc.trace('gaus_%s' % name)[:]
        for name in objsystkeys:
            if self.systfixsigma == 0.:
                self.nuisancestrace[name] = mcmc.trace('gaus_%s' % name)[:]

        if self.monitoring:
            import monitoring
            monitoring.plot(self.name + '_monitoring', data, backgrounds,
                            resmat, self.trace, self.nuisancestrace,
                            self.lower, self.upper)
Ejemplo n.º 24
0
def generate_MCMC_model(specobj,
                        templates,
                        offset=False,
                        shiftout='velocity',
                        v0=0,
                        multitemps=False,
                        copy=False):
    """
    Makes a PyMC model for the given data and x-axis to scale and offset from a
    template:
    
    offset can specify an offset, or be True to have it be a free variate
    
    shiftout can be 'vel','z',or 'pix'
    
    v0 determines the initial value of the pixshift (random if None)
    
    multitemps determines if a linear combination of templates should be used
    
    data=Normal(tau=ivar,center=A*template(x-shift)+offset)
    """
    import pymc

    x, flux, ivar = specobj.x.copy(), specobj.flux.copy(), specobj.ivar.copy()
    templates = array(templates, copy=copy)

    if any([t.shape != templates[0].shape for t in templates]):
        raise ValueError("templates don't match")
    ntempix = templates[0].shape[0]
    npix = x.shape[0]

    #TODO: match pixels with alignment instead of just assuming middle
    tx = arange(ntempix)
    sx = arange(npix) + (
        ntempix - npix) / 2  #spectrum x-value in terms of template coordinates

    tmax, tmin = templates.max(), templates.min()
    fmax, fmin = flux.max(), flux.min()
    imax, imin = ivar.max(), ivar.min()
    xmax, xmin = x.max(), x.min()
    ivar0 = imin / npix / 100  #A small value to un-weight bad data points

    maxlshift = maxrshift = (ntempix - npix) / 2  #TODO:fix for non-symmetric

    if offset:
        loff, uoff = (fmin + tmin, fmax + tmax)  #TODO:fix
    else:
        loff = uoff = float(offset)

    offset = pymc.Uniform('offset',
                          loff,
                          uoff,
                          trace=bool(offset),
                          plot=bool(offset))

    pixshift = pymc.Uniform('pixshift',
                            -round(npix / 2),
                            round(npix / 2),
                            trace=True,
                            plot=False)
    if v0 is not None:
        pixshift.value = v0
    svar = None
    if 'vel' in shiftout:
        svar = pymc.Lambda(
            'vel',
            lambda pixshift=pixshift: pixshift_to_vel(
                pixshift, x, zout=False, logify=True, lincheck=False))
    elif shiftout == 'z':
        svar = pymc.Lambda(
            'z',
            lambda pixshift=pixshift: pixshift_to_vel(
                pixshift, x, zout=True, logify=True, lincheck=False))
    elif 'pix' in shiftout:
        svar = pixshift
    else:
        raise ValueError('unrecognized shiftout')
    svar.plot = True
    svar.trace = True

    elems = {'offset': offset, 'pixshift': pixshift}
    if svar is not pixshift:
        elems[svar.__name__] = svar

    if multitemps:
        #TODO:smarter initial
        A = pymc.Container([
            pymc.Uniform('A%i' % i, 0, fmax / np.max(t))
            for i, t in enumerate(templates)
        ])
        elems['A'] = A
        for e in A:
            e.plot = False
            e.value = 0.1
        A[0].value = 1

        @pymc.deterministic(trace=True, plot=False)
        def modelflux(A=A, offset=offset, pixshift=pixshift):
            #TODO:caching of some kind ?

            temp = sum((A * templates.T), 1)  #TODO:test

            #r = int(round(pixshift))

            shifted = interp(sx - pixshift + 1, tx, temp)  #TODO:why +1 ?
            #shifted = roll(temp,r)

            #            if r > 0:
            #            #    shifted[:r]=flux[:r] #do something smarter here for the edges
            #                shifted[:r]=shifted[r]
            #            elif r < 0:
            #            #    shifted[r:]=flux[r:] #do something smarter here for the edges
            #                shifted[r:]=shifted[r]

            return shifted + offset

        elems['modelflux'] = modelflux
    else:
        A = pymc.Uniform('A', 1, 1, value=1)  #TODO:smarter setting
        elems['A'] = A
        templatei = pymc.DiscreteUniform('templatei',
                                         0,
                                         len(templates) - 1,
                                         trace=True,
                                         plot=False)
        templatei.value = 0
        elems['templatei'] = templatei

        @pymc.deterministic(trace=True, plot=False)
        def modelflux(A=A,
                      offset=offset,
                      pixshift=pixshift,
                      templatei=templatei,
                      templates=templates):
            """
            The flux expected from the template parameters
            """
            #TODO:caching of some kind ?

            temp = templates[templatei]
            tx = arange(len(temp)) + 1
            #r = int(round(pixshift))

            shifted = interp(sx - pixshift + 1, tx, temp)  #TODO:why +1 ?

            #shifted = roll(temp,r)

            #Rendered unnecessary by ivar variable
            #            if r > 0:
            #            #    shifted[:r]=flux[:r] #do something smarter here for the edges
            #                shifted[:r]=shifted[r]
            #            elif r < 0:
            #            #    shifted[r:]=flux[r:] #do something smarter here for the edges
            #                shifted[r:]=shifted[r]

            return A * shifted + offset

        elems['modelflux'] = modelflux

        @pymc.potential
        def pixelcutoff(pixshift=pixshift):
            #TODO:rethink
            lcut = np.exp(-(pixshift - maxlshift) / npix)
            rcut = np.exp((pixshift - maxrshift) / npix)
            return lcut * rcut

        #elems['pixelcutoff']=pixelcutoff


#    @pymc.deterministic(trace=True,plot=False)
#    def ivar(pixshift=pixshift,ivararr=ivar,ivar0=ivar0):
#        """
#        The inverse variance is adjusted to be very small for points that are
#        off the edge
#        """
#        r = int(round(pixshift))
#        if r > 0:
#            ivars = ivararr.copy()
#            ivars[:r] = ivar0
#        elif r < 0:
#            ivars = ivararr.copy()
#            ivars[r:] = ivar0
#        else:
#            ivars = ivararr #leave alone

#        return ivars
#    elems['ivar']=ivar

#fluxvar = pymc.Poisson('flux',mu=modelflux,observed=True,value=flux)
    dmask = isfinite(ivar) & (ivar > 0)
    ivar[~dmask] = np.min(ivar[dmask]) / 1000  #TODO:test

    fluxvar = pymc.Normal('flux',
                          mu=modelflux,
                          tau=ivar,
                          observed=True,
                          value=flux)
    elems['fluxvar'] = fluxvar

    m = pymc.MCMC(elems)
    m.ivar = ivar
    return m
Ejemplo n.º 25
0
    def __init__(self, snobj, filters=None, inc_var=False, **args):
        '''Create an MCMC sampler based on a sn object. The specified filters
      are fit using the model that is currently selected. Uniform
      priors are assumed for the parameters unless overridden by assigning
      pymc Stochastics through **args.'''

        self.sn = snobj
        if filters is None:
            filters = list(self.sn.data.keys())

        self.model = snobj.model
        self.model.args = {}
        self.model._fbands = filters
        self.model.setup()
        params = []
        paramnames = list(self.model.parameters.keys())
        # First, setup stochastics for our parameters
        for param in paramnames:
            if param in args:
                params.append(args[param])
                del args[param]
                continue
            if param == 'dm15':
                params.append(pymc.Uniform('dm15', 0.7, 2.0))
            elif param == 'st':
                params.append(pymc.Uniform('st', 0.25, 1.22))
            elif param == 'Tmax':
                t0 = min([self.sn.data[f].MJD.min() for f in self.sn.data])
                t1 = max([self.sn.data[f].MJD.max() for f in self.sn.data])
                params.append(pymc.Uniform('Tmax', t0 - 30, t1 + 30))
            elif param == 'EBVhost':
                params.append(pymc.Uniform('EBVhost', 0, 10.))
            elif param == 'DM':
                params.append(pymc.Uniform('DM', 0, 100))
            elif param.find('max') > 0:
                params.append(pymc.Uniform(str(param), 10., 30.))
            else:
                raise AttributeError(
                    "Error, parameter %s not recognized. Update MCMC package" %
                    (param))
            if self.model.parameters[param] is None:
                params[-1].value = self.model.guess(param)
            else:
                params[-1].value = self.model.parameters[param]
        params = pymc.Container(params)

        # now setup intrinsic variances for each filter
        if inc_var:
            vars = pymc.InverseGamma('taus',
                                     alpha=0.5,
                                     beta=0.1**2,
                                     value=np.random.uniform(
                                         0, 0.1**2, size=len(filters)))
        else:
            vars = np.array([0.0] * len(filters))

        # The data stochastic that maps parameters to observations
        @pymc.data
        @pymc.stochastic
        def model(params=params,
                  vars=vars,
                  paramnames=paramnames,
                  filters=filters,
                  value=1.0):
            # Set the parameters in the model
            for i, param in enumerate(paramnames):
                if debug:
                    print("setting ", param, " to ", params[i])
                self.model.parameters[param] = params[i]

            logp = 0
            numpts = 0
            for i, f in enumerate(filters):
                mod, err, mask = self.model(f, self.sn.data[f].MJD)
                m = mask * self.sn.data[f].mask
                if not np.sometrue(m):
                    continue
                numpts += np.sum(m)
                tau = np.power(vars[i] + np.power(self.sn.data[f].e_mag, 2),
                               -1)
                logp += pymc.normal_like(self.sn.data[f].mag[m], mod[m],
                                         tau[m])
            #if numpts < len(paramnames):
            #   return -np.inf
            return logp

        pymc.MCMC.__init__(self, locals(), **args)

        # Setup the step methods
        # 1) params will be AdaptiveMetropolis, so we need to setup initial
        #    scales. If the model has been fit, use error, otherwise guess.
        def_scales = {
            'Tmax': 0.5**2,
            'st': 0.001**2,
            'dm15': 0.001**2,
            'max': 0.01**2,
            'DM': 0.01**2,
            'EBVhost': 0.01**2
        }
        scales = {}
        for i, par in enumerate(self.paramnames):
            if par in self.model.errors and self.model.errors[par] > 0:
                scales[self.params[i]] = self.model.errors[par]
            else:
                if par in def_scales:
                    scales[self.params[i]] = def_scales[par]
                elif par[0] == "T" and par[-3:] == "max":
                    scales[self.params[i]] = def_scales['Tmax']
                elif par[-3:] == "max":
                    scales[self.params[i]] = def_scales['max']
                else:
                    scales[self.params[i]] = self.params[i].value / 10.
        self.use_step_method(pymc.AdaptiveMetropolis,
                             self.params,
                             scales=scales,
                             delay=1000,
                             interval=1000)

        if inc_var:
            self.use_step_method(
                pymc.AdaptiveMetropolis, [self.vars],
                scales={self.vars: self.vars.value * 0 + 0.005**2})
    def setup_inference(self):
        #depending on the number of wavelengths
        wavelength_number = len(self.wavelengths)
        l = []
        i = 0
        #add c0
        t = 1. / 5.**2
        #mu_ = np.mean(self.ydata)
        l.append(pymc.Normal("c_%i" % (i), mu=0, tau=t))
        i += 1
        for x in range(wavelength_number):
            for _ in range(2 * self.N):
                t = 1. / 5.**2
                mu_ = 0
                l.append(pymc.Normal("c_%i" % (i), mu=mu_, tau=t))
                i += 1
        C = pymc.Container(l)  #\
        #for i in range(1+2*self.N) for x in range(wavelength_number)])
        #C[0]
        @pymc.stochastic(observed=False)
        def sigma(value=1):
            return -np.log(abs(value))

        @pymc.stochastic(observed=False)
        def sigma3(value=1):
            return -np.log(abs(value))

        qw_sigs =  pymc.Container([pymc.HalfCauchy("qw_sigs_%i" % x, beta = 10, alpha=1) \
                                 for x in range(wavelength_number)])
        if self.wavelength_sd_defined:
            qw = pymc.Container([pymc.distributions.Lognormal('qw_%i' %x,mu=self.wavelengths[x], \
                                                       tau = 1. / self.wavelength_sd[x] ** 2) \
                                 for x in range(wavelength_number)])
        else:
            qw = pymc.Container([pymc.distributions.TruncatedNormal('qw_%i' %x,mu=self.wavelengths[x],\
                                                           tau = 1. / self.wavelengths[x]/3.,a=0,b=np.inf) \
                                                       for x in range(wavelength_number)])

        def fourier_series(C, N, QW, x, wavelength_number):
            v = np.array(x)
            v.fill(0.0)
            v = v.astype('float')

            for ii in range(len(x)):
                v[ii] += C[0]
                for w in range(wavelength_number):
                    for i in range(1, N + 1):
                        v[ii] = v[ii] + C[(2*i-1)+2*N*w]*np.cos(2*np.pi/QW[w] * i * (x[ii])) + \
                        C[(2*i)+2*N*w]*np.sin(2*np.pi/QW[w] * i * (x[ii]))
            return v

        self.vector_fourier_series = np.vectorize(fourier_series)
        # Define the form of the model and likelihood
        @pymc.deterministic
        def y_model(C=C,
                    x=self.xdata,
                    qw=qw,
                    nn=self.N,
                    wavelength_number=wavelength_number):
            return fourier_series(C, nn, qw, x, wavelength_number)

        y = pymc.Normal('y',
                        mu=y_model,
                        tau=1. / sigma**2,
                        observed=True,
                        value=self.ydata)
        # package the full model in a dictionary
        self.model1 = dict(C=C,
                           qw=qw,
                           sigma=sigma,
                           qw_sigs=qw_sigs,
                           y_model=y_model,
                           y=y,
                           x_values=self.xdata,
                           y_values=self.ydata)
        self.model_e = pymc.Model([C, qw, sigma, y])
        if len(self.vergence) > 0:

            @pymc.deterministic
            def vergence_values(c=C, qw=qw, y=np.array(self.vergence)[:, 0]):
                return np.sign(fourier_series2(c, qw, y))

            @pymc.stochastic(observed=True)
            def vergence(value=np.array(self.vergence)[:, 1],
                         mu=vergence_values):
                loglike = 0.
                loglike += pymc.distributions.normal_like((mu[value == 1]),
                                                          mu=1,
                                                          tau=1.)
                loglike += pymc.distributions.normal_like((mu[value == -1]),
                                                          mu=-1,
                                                          tau=1.)
                if loglike < float(-1.7876931348623157e+308):
                    return float(-1.7876931348623157e+308)
                return loglike

            self.model1.update({'vergence': vergence})
        if len(self.asymmetry_likelihoods) > 0:

            @pymc.deterministic
            def y_model_asym(c=C, qw=qw):
                x = np.linspace(-np.max(qw), np.max(qw))
                v = np.rad2deg(np.arctan(fourier_series2(c, qw, x)))
                m = np.median(v)  #-np.min(v)
                return m  #np.max(v)-np.min(v)

            @pymc.stochastic(observed=True)
            def y_asym(mu=y_model_asym,
                       value=self.asymmetry_likelihoods[0],
                       tau=1. / self.asymmetry_sigma**2):
                loglike = pymc.distributions.normal_like(x=value,
                                                         mu=mu,
                                                         tau=tau)
                return loglike * 10

            #y_interlimb = pymc.Normal('y_interlimb',mu=y_model_interlimb,value=self.interlimb_likelihoods[0],
            #tau = 1. / self.interlimb_sigma**2 )
            self.model1.update({'y_asym': y_asym})
        if len(self.interlimb_likelihoods) > 0:

            @pymc.deterministic
            def y_model_interlimb(c=C, qw=qw):
                x = np.linspace(-np.max(qw), np.max(qw))
                v = np.rad2deg(np.arctan(fourier_series2(c, qw, x)))
                d = np.max(v) - np.min(v)
                return d  #np.max(v)-np.min(v)

            @pymc.stochastic(observed=True)
            def y_interlimb(mu=y_model_interlimb,
                            value=self.interlimb_likelihoods[0],
                            tau=1. / self.interlimb_sigma**2):
                loglike = pymc.distributions.normal_like(x=value,
                                                         mu=y_model_interlimb,
                                                         tau=tau)
                return loglike * 10

            #y_interlimb = pymc.Normal('y_interlimb',mu=y_model_interlimb,value=self.interlimb_likelihoods[0],
            #tau = 1. / self.interlimb_sigma**2 )
            self.model1.update({'y_interlimb': y_interlimb})
        if len(self.axial_trace_likelihoods) > 0:
            d = self.wavelengths[
                0]  #np.max(self.axial_trace_likelihoods_limb) - np.min(self.axial_trace_likelihoods_limb)
            x_at = np.linspace(
                np.min(self.axial_trace_likelihoods) - d,
                np.max(self.axial_trace_likelihoods) + d, 300)

            @pymc.stochastic(observed=False)
            def at_sigma(value=1):
                return -np.log(abs(value))

            @pymc.deterministic
            def z_model_axial_t(c=C, wl=qw, z_at=x_at):
                return np.array(fourier_series_x_intercepts(c, wl, z_at))

            @pymc.stochastic(observed=True)
            def z_at(mu=z_model_axial_t,
                     sigma=at_sigma,
                     value=self.axial_trace_likelihoods):
                loglike = 0.
                mu = np.array(mu)
                #print mu
                if not np.array(mu).size:
                    return float(-1.7876931348623157e+308)  #-99999#-np.2inf
                for v in value:
                    m = 0.
                    if mu.shape:
                        dif = np.sort(np.abs(mu - v))
                        #if there are two hinges for the same axial trace penalise this!
                        if dif[1] < sigma:
                            loglike += -99999
                        m = mu[(np.abs(mu - v)).argmin()]

                        #m = mu[(np.abs(mu-v)).argmin()]
                    else:
                        m = mu
                    #print 'm', m
                    loglike += pymc.distributions.normal_like(x=v,
                                                              mu=m,
                                                              tau=1. /
                                                              sigma**2)
                loglike
                if loglike < float(-1.7876931348623157e+308):
                    return float(-1.7876931348623157e+308)
                return loglike

            #z_at = pymc.Normal('z_at',mu=z_model_axial_t,tau = 1. / self.axial_trace_limb_sigma,value=self.axial_trace_likelihoods_limb)
            self.model1.update({'z_at': z_at, 'at_sigma': at_sigma})
        self.setup = True
        self.mcmc_uptodate = False
        return True
import spacepy.plot as spp  # for the styles
import numpy as np
import pymc as pm

K = 2  # number of topics
V = 4  # number of words
D = 3  # number of documents

data = np.array([[1, 1, 1, 1], [1, 1, 1, 1], [0, 0, 0, 0]])

alpha = np.ones(K)
beta = np.ones(V)

theta = pm.Container([
    pm.CompletedDirichlet("theta_%s" % i,
                          pm.Dirichlet("ptheta_%s" % i, theta=alpha))
    for i in range(D)
])
phi = pm.Container([
    pm.CompletedDirichlet("phi_%s" % k, pm.Dirichlet("pphi_%s" % k,
                                                     theta=beta))
    for k in range(K)
])
Wd = [len(doc) for doc in data]

z = pm.Container([
    pm.Categorical('z_%i' % d,
                   p=theta[d],
                   size=Wd[d],
                   value=np.random.randint(K, size=Wd[d])) for d in range(D)
])
Ejemplo n.º 28
0

nr_assoc_word = numpy_array(word_in_dict, list_lists)
print(nr_assoc_word)

nr_doc = len(nr_assoc_word)
nr_words_doc = [len(doc) for doc in nr_assoc_word]
nr_words = len(word_in_dict)
nr_topics = 3

alpha = np.ones(nr_topics)
beta = np.ones(nr_words)

theta = pm.Container([
    pm.CompletedDirichlet("theta_%s" % i,
                          pm.Dirichlet("theta1_%s" % i, theta=alpha))
    for i in range(nr_doc)
])

for d in range(nr_doc):
    print(theta[d].value)

phi = pm.Container([
    pm.CompletedDirichlet("phi_%s" % j, pm.Dirichlet("phi1_%s" % j,
                                                     theta=beta))
    for j in range(nr_topics)
])

for i in range(nr_topics):
    print(phi[i].value)
    def setup_inference_mixture(self):
        #depending on the number of wavelengths
        wavelength_number = len(self.wavelengths)
        l = []
        i = 0
        #add c0
        t = 1. / 5.**2
        mu_ = np.mean(self.ydata)
        l.append(pymc.Normal("c_%i" % (i), mu=mu_, tau=t))
        i += 1
        for x in range(wavelength_number):
            for _ in range(2 * self.N):
                t = 1. / 5.**2
                mu_ = 0
                l.append(pymc.Normal("c_%i" % (i), mu=mu_, tau=t))
                i += 1
        C = pymc.Container(l)  #\
        #for i in range(1+2*self.N) for x in range(wavelength_number)])
        #C[0]
        i_ = pymc.Container([
            pymc.DiscreteUniform('i_%i' % i, lower=0, upper=1)
            for i in range(len(self.xdata))
        ])

        @pymc.stochastic(observed=False)
        def sigma(value=1):
            return -np.log(abs(value))

        @pymc.stochastic(observed=False)
        def sigma3(value=1):
            return -np.log(abs(value))

        qw_sigs =  pymc.Container([pymc.HalfCauchy("qw_sigs_%i" % x, beta = 10, alpha=1) \
                                 for x in range(wavelength_number)])
        if self.wavelength_sd_defined:
            qw = pymc.Container([pymc.distributions.Lognormal('qw_%i' %x,mu=self.wavelengths[x], \
                                                       tau = 1. / self.wavelength_sd[x] ** 2) \
                                 for x in range(wavelength_number)])
        else:
            qw = pymc.Container([pymc.distributions.Normal('qw_%i' %x,mu=self.wavelengths[x],\
                                                           tau = 1. / self.wavelengths[x]/3.) \
                                                       for x in range(wavelength_number)])

        def fourier_series(C, N, QW, x, wavelength_number, i_):
            v = np.array(x)
            v.fill(0.0)
            v = v.astype('float')

            for ii in range(len(x)):
                v[ii] += C[0]
                for w in range(wavelength_number):
                    for i in range(1, N + 1):
                        v[ii] = v[ii] + C[(2*i-1)+2*N*w]*np.cos(2*np.pi/QW[w] * i * (x[ii])) + \
                        C[(2*i)+2*N*w]*np.sin(2*np.pi/QW[w] * i * (x[ii]))
                if i_[ii] == 0:
                    v[ii] = -v[ii]
            return v

        self.vector_fourier_series = np.vectorize(fourier_series)
        # Define the form of the model and likelihood
        @pymc.deterministic
        def y_model(C=C,
                    x=self.xdata,
                    qw=qw,
                    nn=self.N,
                    wavelength_number=wavelength_number,
                    i_=i_):
            return fourier_series(C, nn, qw, x, wavelength_number, i_)

        y = pymc.Normal('y',
                        mu=y_model,
                        tau=1. / sigma**2,
                        observed=True,
                        value=self.ydata)
        # package the full model in a dictionary
        self.model1 = dict(C=C,
                           qw=qw,
                           sigma=sigma,
                           qw_sigs=qw_sigs,
                           y_model=y_model,
                           y=y,
                           x_values=self.xdata,
                           y_values=self.ydata,
                           i_=i_)
        self.model_e = pymc.Model([C, qw, sigma, y])

        self.setup = True
        self.mcmc_uptodate = False
        return True
Ejemplo n.º 30
0
def main(RUNFLAG, outname):

    print('Setting up parameters and priors...')

    params = Params()
    # Set up location here with command line arguments in a list.
    params.cmd_line_chg(['--kalbar'])
    assert params.site_name + 'fields.txt' == 'data/kalbarfields.txt'
    # Set parameters specific to Bayesian runs
    params.PLOT = False
    params.OUTPUT = False

    # This sends a message to CalcSol on whether or not to use CUDA
    if params.CUDA:
        globalvars.cuda = True
    else:
        globalvars.cuda = False
    # get wind data and day labels
    wind_data, days = PM.get_wind_data(*params.get_wind_params())
    params.ndays = len(days)

    # reduce domain
    params.domain_info = (10000.0, 200)  #50 m sided cells
    domain_res = params.domain_info[0] / params.domain_info[1]
    cell_area = domain_res**2

    locinfo = LocInfo(params.dataset, params.coord, params.domain_info)

    prior_eps = {}

    #### Model priors ####
    lam = pm.Beta("lam", 5, 1, value=0.95)
    prior_eps[lam] = 0.01
    f_a1 = pm.TruncatedNormal("f_a1", 6, 0.3, 0, 9, value=6)
    prior_eps[f_a1] = 0.1
    f_a2 = pm.TruncatedNormal("f_a2", 20, 0.3, 15, 24, value=20)
    prior_eps[f_a2] = 0.1
    f_b1_p = pm.Gamma("fb1_p", 2, 1, value=1.5, trace=False,
                      plot=False)  #alpha,beta parameterization
    prior_eps[f_b1_p] = 0.05

    @pm.deterministic(trace=True, plot=True)
    def f_b1(f_b1_p=f_b1_p):
        return f_b1_p + 1

    f_b2_p = pm.Gamma("fb2_p", 2, 1, value=1.5, trace=False, plot=False)
    prior_eps[f_b2_p] = 0.05

    @pm.deterministic(trace=True, plot=True)
    def f_b2(f_b2_p=f_b2_p):
        return f_b2_p + 1

    g_aw = pm.Gamma("g_aw", 2.2, 1, value=1.0)
    prior_eps[g_aw] = 0.05
    g_bw = pm.Gamma("g_bw", 5, 1, value=3.8)
    prior_eps[g_bw] = 0.1
    # flight diffusion parameters. note: mean is average over flight advection
    sig_x = pm.Gamma("sig_x", 26, 0.15, value=180)
    prior_eps[sig_x] = 1
    sig_y = pm.Gamma("sig_y", 15, 0.15, value=150)
    prior_eps[sig_y] = 1
    corr_p = pm.Beta("corr_p", 5, 5, value=0.5, trace=False, plot=False)
    prior_eps[corr_p] = 0.01

    @pm.deterministic(trace=True, plot=True)
    def corr(corr_p=corr_p):
        return corr_p * 2 - 1

    # local spread paramters
    sig_x_l = pm.Gamma("sig_xl", 2, 0.08, value=10)
    prior_eps[sig_x_l] = 1
    sig_y_l = pm.Gamma("sig_yl", 2, 0.14, value=10)
    prior_eps[sig_y_l] = 1
    corr_l_p = pm.Beta("corr_l_p", 5, 5, value=0.5, trace=False, plot=False)
    prior_eps[corr_l_p] = 0.005

    @pm.deterministic(trace=True, plot=True)
    def corr_l(corr_l_p=corr_l_p):
        return corr_l_p * 2 - 1

    #pymc.MAP can only take float values, so we vary mu_r and set n_periods.
    mu_r = pm.Normal("mu_r", 1., 1, value=1)
    prior_eps[mu_r] = 0.05
    params.n_periods = 30
    #alpha_pow = prev. time exponent in ParasitoidModel.h_flight_prob
    xi = pm.Gamma("xi", 1, 1,
                  value=0.75)  # presence to oviposition/emergence factor
    prior_eps[xi] = 0.05

    #### Observation probabilities. ####
    em_obs_prob = pm.Beta("em_obs_prob", 1, 1, value=0.05)  # per-wasp prob of
    # observing emergence in release field grid given max leaf collection.
    # This is dependent on the size of the cell surrounding the grid point,
    # but there's not much to be done about this. Just remember to
    # interpret this number based on grid coarseness.
    prior_eps[em_obs_prob] = 0.0005
    grid_obs_prob = pm.Beta("grid_obs_prob", 1, 1,
                            value=0.005)  # probability of
    # observing a wasp present in the grid cell given max leaf sampling
    prior_eps[grid_obs_prob] = 0.0005

    #card_obs_prob = pm.Beta("card_obs_prob",1,1,value=0.5) # probability of
    # observing a wasp present in the grid cell given max leaf sampling

    #### Data collection model background for sentinel fields ####
    # Need to fix linear units for area. Meters would be best.
    # Effective collection area (constant between fields) is very uncertain
    with warnings.catch_warnings():
        # squelsh a warning based on pymc coding we don't need to worry about
        warnings.simplefilter("ignore", RuntimeWarning)
        A_collected = pm.TruncatedNormal("A_collected",
                                         2500,
                                         1 / 2500,
                                         0,
                                         min(locinfo.field_sizes.values()) *
                                         cell_area,
                                         value=2500)  # in m**2
    prior_eps[A_collected] = 10
    # Each field has its own binomial probability.
    # Probabilities are likely to be small, and pm.Beta cannot handle small
    #   parameter values. So we will use TruncatedNormal again.
    N = len(locinfo.sent_ids)
    sent_obs_probs = np.empty(N, dtype=object)
    # fix beta for the Beta distribution
    sent_beta = 40
    # mean of Beta distribution will be A_collected/field size

    ## Loop over fields ##
    for n, key in enumerate(locinfo.sent_ids):
        sent_obs_probs[n] = pm.Beta(
            "sent_obs_probs_{}".format(key),
            A_collected / (locinfo.field_sizes[key] * cell_area) * sent_beta /
            (1 - A_collected / (locinfo.field_sizes[key] * cell_area)),
            sent_beta,
            value=0.1 * 3600 / (locinfo.field_sizes[key] * cell_area))
        prior_eps[sent_obs_probs[n]] = 0.0005

    sent_obs_probs = pm.Container(sent_obs_probs)

    #### Collect variables ####
    params_ary = pm.Container(
        np.array([
            g_aw, g_bw, f_a1, f_b1, f_a2, f_b2, sig_x, sig_y, corr, sig_x_l,
            sig_y_l, corr_l, lam, mu_r
        ],
                 dtype=object))

    if params.dataset == 'kalbar':
        # factor for kalbar initial spread
        sprd_factor = pm.Uniform("sprd_factor", 0, 1, value=0.1)
        prior_eps[sprd_factor] = 0.01
    else:
        sprd_factor = None

    print('Getting initial model values...')

    #### Run model ####
    @pm.deterministic(plot=False, trace=False)
    def pop_model(params=params,
                  params_ary=params_ary,
                  locinfo=locinfo,
                  wind_data=wind_data,
                  days=days,
                  sprd_factor=sprd_factor):
        '''This function acts as an interface between PyMC and the model.
        Not only does it run the model, but it provides an emergence potential
        based on the population model result projected forward from feasible
        oviposition dates. To modify how this projection happens, edit
        popdensity_to_emergence. Returned values from this function should be
        nearly ready to compare to data.
        '''
        modeltic = time.time()
        ### Alter params with stochastic variables ###

        # g wind function parameters
        params.g_params = tuple(params_ary[0:2])
        # f time of day function parameters
        params.f_params = tuple(params_ary[2:6])
        # Diffusion coefficients
        params.Dparams = tuple(params_ary[6:9])
        params.Dlparams = tuple(params_ary[9:12])
        # Probability of any flight during the day under ideal circumstances
        params.lam = params_ary[12]

        # scaling flight advection to wind advection
        params.mu_r = params_ary[13]

        ### PHASE ONE ###
        # First, get spread probability for each day as a coo sparse matrix
        max_shape = np.array([0, 0])
        pm_args = [(days[0], wind_data, *params.get_model_params(),
                    params.r_start)]
        pm_args.extend([(day, wind_data, *params.get_model_params())
                        for day in days[1:params.ndays]])

        ##### Kalbar wind started recording a day late. Spread the population
        #####   locally before running full model.
        if params.dataset == 'kalbar':
            res = params.domain_info[0] / params.domain_info[1]
            mean_drift = np.array([-25., 15.])
            xdrift_int = int(mean_drift[0] // res)
            xdrift_r = mean_drift[0] % res
            ydrift_int = int(mean_drift[1] // res)
            ydrift_r = mean_drift[1] % res
            longsprd = PM.get_mvn_cdf_values(
                res, np.array([xdrift_r, ydrift_r]),
                PM.Dmat(params_ary[6], params_ary[7], params_ary[8]))
            shrtsprd = PM.get_mvn_cdf_values(
                res, np.array([0., 0.]),
                PM.Dmat(params_ary[9], params_ary[10], params_ary[11]))

            mlen = int(
                max(longsprd.shape[0], shrtsprd.shape[0]) +
                max(abs(xdrift_int), abs(ydrift_int)) * 2)
            sprd = np.zeros((mlen, mlen))
            lbds = [
                int(mlen // 2 - longsprd.shape[0] // 2),
                int(mlen // 2 + longsprd.shape[0] // 2 + 1)
            ]
            sprd[lbds[0] - ydrift_int:lbds[1] - ydrift_int, lbds[0] +
                 xdrift_int:lbds[1] + xdrift_int] = longsprd * sprd_factor
            sbds = [
                int(mlen // 2 - shrtsprd.shape[0] // 2),
                int(mlen // 2 + shrtsprd.shape[0] // 2 + 1)
            ]
            sprd[sbds[0]:sbds[1],
                 sbds[0]:sbds[1]] += shrtsprd * (1 - sprd_factor)
            '''
            pmf_list = [sparse.coo_matrix(PM.get_mvn_cdf_values(
                        params.domain_info[0]/params.domain_info[1],
                        np.array([0.,0.]),
                        PM.Dmat(sprd_factor*params_ary[9],
                                sprd_factor*params_ary[10],params_ary[11])))]
            '''
            sprd[int(sprd.shape[0] // 2),
                 int(sprd.shape[0] // 2)] += max(0, 1 - sprd.sum())
            pmf_list = [sparse.coo_matrix(sprd)]
        else:
            pmf_list = []

        ###################### Get pmf_list from multiprocessing
        pmf_list.extend(pool.starmap(PM.prob_mass, pm_args))

        ######################
        for pmf in pmf_list:
            for dim in range(2):
                if pmf.shape[dim] > max_shape[dim]:
                    max_shape[dim] = pmf.shape[dim]

        r_spread = []  # holds the one-day spread for each release day.

        # Reshape the prob. mass function of each release day into solution form
        for ii in range(params.r_dur):
            offset = params.domain_info[1] - pmf_list[ii].shape[0] // 2
            dom_len = params.domain_info[1] * 2 + 1
            r_spread.append(
                sparse.coo_matrix(
                    (pmf_list[ii].data,
                     (pmf_list[ii].row + offset, pmf_list[ii].col + offset)),
                    shape=(dom_len, dom_len)).tocsr())

        ### PHASE TWO ###
        # Pass the probability list, pmf_list, and other info to convolution solver.
        #   This will return the finished population model.
        with Capturing() as output:
            if params.dataset == 'kalbar':
                # extend day count by one
                days_ext = [days[0] - 1]
                days_ext.extend(days)
                modelsol = get_populations(r_spread, pmf_list, days_ext,
                                           params.ndays + 1, dom_len,
                                           max_shape, params.r_dur,
                                           params.r_number, params.r_mthd())
                # remove the first one and start where wind started.
                modelsol = modelsol[1:]
            else:
                modelsol = get_populations(r_spread, pmf_list, days,
                                           params.ndays, dom_len, max_shape,
                                           params.r_dur, params.r_number,
                                           params.r_mthd())

        # modelsol now holds the model results for this run as CSR sparse arrays

        # get emergence potential (measured in expected number of wasps previously
        #   present whose oviposition would result in emergence on the given date)
        #   from the model result
        release_emerg, sentinel_emerg = popdensity_to_emergence(
            modelsol, locinfo)

        # get the expected wasp populations at grid points on sample days
        grid_counts = popdensity_grid(modelsol, locinfo)

        # get the expected wasp populations in cardinal directions
        '''card_counts = popdensity_card(modelsol,locinfo,params.domain_info)'''

        ## For the lists release_emerg and sentinel_emerg:
        ##    Each list entry corresponds to a data collection day (one array)
        ##    In each array:
        ##    Each column corresponds to an emergence observation day (as in data)
        ##    Each row corresponds to a grid point or sentinel field, respectively
        ## For the array grid_counts:
        ##    Each column corresponds to an observation day
        ##    Each row corresponds to a grid point
        ## For the list card_counts:
        ##    Each list entry corresponds to a sampling day (one array)
        ##    Each column corresponds to a step in a cardinal direction
        ##    Each row corresponds to a cardinal direction
        print('{:03.1f} sec./model at {}'.format(
            time.time() - modeltic, time.strftime("%H:%M:%S %d/%m/%Y")),
              end='\r')
        sys.stdout.flush()
        return (release_emerg, sentinel_emerg, grid_counts)  #,card_counts)

    print('Parsing model output and connecting to Bayesian model...')

    ### Parse the results of pop_model into separate deterministic variables ###
    '''Get Poisson probabilities for sentinal field emergence. Parameters:
        xi is constant, emerg is a list of ndarrays, betas is a 1D array of
        field probabilities'''
    Ncollections = len(locinfo.sent_DataFrames)
    sent_poi_rates = []
    for ii in range(Ncollections):
        s_ndays = len(locinfo.sent_DataFrames[ii]['datePR'].unique())
        sent_poi_rates.append(
            pm.Lambda('sent_poi_rate_{}'.format(ii),
                      lambda xi=xi, ndays=s_ndays, betas=sent_obs_probs,
                      emerg_model=pop_model[1][ii]: xi * emerg_model * np.tile(
                          betas, (ndays, 1)).T,
                      trace=False))
    sent_poi_rates = pm.Container(sent_poi_rates)
    '''Return Poisson probabilities for release field grid emergence. Parameters:
        xi is constant, emerg is a list of ndarrays. collection effort is
        specified in locinfo.'''
    Ncollections = len(locinfo.release_DataFrames)
    rel_poi_rates = []
    for ii in range(Ncollections):
        r_effort = locinfo.release_collection[ii]  #fraction of max collection
        r_ndays = len(locinfo.release_DataFrames[ii]['datePR'].unique())
        rel_poi_rates.append(
            pm.Lambda('rel_poi_rate_{}'.format(ii),
                      lambda xi=xi, ndays=r_ndays, r_effort=r_effort, beta=
                      em_obs_prob, emerg_model=pop_model[0][ii]: xi *
                      emerg_model * np.tile(r_effort * beta, (ndays, 1)).T,
                      trace=False))
    rel_poi_rates = pm.Container(rel_poi_rates)

    @pm.deterministic(plot=False, trace=False)
    def grid_poi_rates(locinfo=locinfo,
                       beta=grid_obs_prob,
                       obs_model=pop_model[2]):
        '''Return Poisson probabilities for grid sampling
        obs_model is an ndarray, sampling effort is specified in locinfo.'''
        return beta * locinfo.grid_samples * obs_model

    '''Return Poisson probabilities for cardinal direction sampling
        obs_model is a list of ndarrays, sampling effort is assumed constant'''
    '''
    card_poi_rates = []
    for ii,obs in enumerate(pop_model[3]):
        card_poi_rates.append(pm.Lambda('card_poi_rate_{}'.format(ii),
            lambda beta=card_obs_prob, obs=obs: beta*obs))
    card_poi_rates = pm.Container(card_poi_rates)
    '''

    # Given the expected wasp densities from pop_model, actual wasp densities
    #   are modeled as a thinned Poisson random variable about that mean.
    # Each wasp in the area then has a small probability of being seen.

    ### Connect sentinel emergence data to model ###
    N_sent_collections = len(locinfo.sent_DataFrames)
    # Create list of collection variables
    sent_collections = []
    for ii in range(N_sent_collections):
        # Apparently, pymc does not play well with 2D array parameters
        sent_collections.append(
            np.empty(sent_poi_rates[ii].value.shape, dtype=object))
        for n in range(sent_collections[ii].shape[0]):
            for m in range(sent_collections[ii].shape[1]):
                sent_collections[ii][n, m] = pm.Poisson(
                    "sent_em_obs_{}_{}_{}".format(ii, n, m),
                    sent_poi_rates[ii][n, m],
                    value=float(locinfo.sentinel_emerg[ii][n, m]),
                    observed=True)
    sent_collections = pm.Container(sent_collections)

    ### Connect release-field emergence data to model ###
    N_release_collections = len(locinfo.release_DataFrames)
    # Create list of collection variables
    rel_collections = []
    for ii in range(N_release_collections):
        rel_collections.append(
            np.empty(rel_poi_rates[ii].value.shape, dtype=object))
        for n in range(rel_collections[ii].shape[0]):
            for m in range(rel_collections[ii].shape[1]):
                rel_collections[ii][n, m] = pm.Poisson(
                    "rel_em_obs_{}_{}_{}".format(ii, n, m),
                    rel_poi_rates[ii][n, m],
                    value=float(locinfo.release_emerg[ii][n, m]),
                    observed=True)
    rel_collections = pm.Container(rel_collections)

    ### Connect grid sampling data to model ###
    grid_obs = np.empty(grid_poi_rates.value.shape, dtype=object)
    for n in range(grid_obs.shape[0]):
        for m in range(grid_obs.shape[1]):
            grid_obs[n, m] = pm.Poisson("grid_obs_{}_{}".format(n, m),
                                        grid_poi_rates[n, m],
                                        value=float(locinfo.grid_obs[n, m]),
                                        observed=True)
    grid_obs = pm.Container(grid_obs)

    ### Connect cardinal direction data to model ###
    '''
    N_card_collections = len(locinfo.card_obs_DataFrames)
    # Create list of sampling variables
    card_collections = []
    for ii in range(N_card_collections):
        card_collections.append(np.empty(card_poi_rates[ii].value.shape,
                                         dtype=object))
        for n in range(card_collections[ii].shape[0]):
            for m in range(card_collections[ii].shape[1]):
                card_collections[ii][n,m] = pm.Poisson(
                    "card_obs_{}_{}_{}".format(ii,n,m),
                    card_poi_rates[ii][n,m],
                    value=locinfo.card_obs[ii][n,m],
                    observed=True, plot=False)
    card_collections = pm.Container(card_collections)
    '''

    ### Collect model ###
    if params.dataset == 'kalbar':
        Bayes_model = pm.Model([
            lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x,
            sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, mu_r,
            sprd_factor, grid_obs_prob, xi, em_obs_prob, A_collected,
            sent_obs_probs, params_ary, pop_model, grid_poi_rates,
            rel_poi_rates, sent_poi_rates, grid_obs, rel_collections,
            sent_collections
        ])
    else:
        Bayes_model = pm.Model([
            lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x,
            sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, mu_r,
            grid_obs_prob, xi, em_obs_prob, A_collected, sent_obs_probs,
            params_ary, pop_model, grid_poi_rates, rel_poi_rates,
            sent_poi_rates, grid_obs, rel_collections, sent_collections
        ])

    ######################################################################
    #####              Run Methods and Interactive Menu              #####
    ######################################################################

    def MAP_run(outname=None):
        '''Find Maximum a posteriori distribution'''
        tic = time.time()
        M = pm.MAP(Bayes_model, prior_eps)
        print('Fitting....')
        M.fit()
        # Return statistics
        print('Estimate complete. Time elapsed: {}'.format(time.time() - tic))
        print('Free stochastic variables: {}'.format(M.len))
        print('Joint log-probability of model: {}'.format(M.logp))
        print('Max joint log-probability of model: {}'.format(M.logp_at_max))
        print('Maximum log-likelihood: {}'.format(M.lnL))
        print("Akaike's Information Criterion {}".format(M.AIC), flush=True)
        print('---------------Variable estimates---------------')
        for var in Bayes_model.stochastics:
            print('{} = {}'.format(var, var.value))
        # Save result to file
        if outname is None:
            outname = 'Max_aPosteriori_Estimate.txt'
        with open(outname, 'w') as fobj:
            fobj.write('Time elapsed: {}\n'.format(time.time() - tic))
            fobj.write('Free stochastic variables: {}\n'.format(M.len))
            fobj.write('Joint log-probability of model: {}\n'.format(M.logp))
            fobj.write('Max joint log-probability of model: {}\n'.format(
                M.logp_at_max))
            fobj.write('Maximum log-likelihood: {}\n'.format(M.lnL))
            fobj.write("Akaike's Information Criterion {}\n".format(M.AIC))
            fobj.write('---------------Variable estimates---------------\n')
            for var in Bayes_model.stochastics:
                fobj.write('{} = {}\n'.format(var, var.value))
        print('Result saved to {}.'.format(outname))
        return M

    def norm_run(fname, outname=None):
        '''Find normal approximation'''
        try:
            tic = time.time()
            M = pm.NormApprox(Bayes_model,
                              eps=prior_eps,
                              db='hdf5',
                              dbname=fname,
                              dbmode='a',
                              dbcomplevel=0)
            print('Fitting....')
            M.fit()
            # Return statistics
            print('Estimate complete. Time elapsed: {}'.format(time.time() -
                                                               tic))
            print('Free stochastic variables: {}'.format(M.len))
            print('Joint log-probability of model: {}'.format(M.logp))
            print('Max joint log-probability of model: {}'.format(
                M.logp_at_max))
            print("Akaike's Information Criterion {}".format(M.AIC),
                  flush=True)
            print('---------------Variable estimates---------------')
            print('Estimated means: ')
            for var in bio_model.stochastics:
                print('{} = {}'.format(var, M.mu[var]))
            print('Estimated variances: ')
            for var in bio_model.stochastics:
                print('{} = {}'.format(var, M.C[var]))
            # Save result to file
            if outname is None:
                outname = "Normal_approx.txt"
            with open(outname, 'w') as fobj:
                fobj.write('Time elapsed: {}\n'.format(time.time() - tic))
                fobj.write('Free stochastic variables: {}\n'.format(M.len))
                fobj.write('Joint log-probability of model: {}\n'.format(
                    M.logp))
                fobj.write('Max joint log-probability of model: {}\n'.format(
                    M.logp_at_max))
                fobj.write("Akaike's Information Criterion {}\n".format(M.AIC))
                fobj.write(
                    '---------------Variable estimates---------------\n')
                fobj.write('Estimated means: \n')
                for var in bio_model.stochastics:
                    fobj.write('{} = {}\n'.format(var, M.mu[var]))
                fobj.write('Estimated variances: \n')
                for var in bio_model.stochastics:
                    fobj.write('{} = {}\n'.format(var, M.C[var]))
            print('These results have been saved to {}.'.format(outname))
        except Exception as e:
            print(e)
            print('Exception: database closing...')
            M.db.close()
            print('Database closed.')
            raise
        return M

    # Parse run type
    if RUNFLAG == 'MAP_RUN':
        M = MAP_run(outname)
    elif RUNFLAG is not None:
        M = norm_run(RUNFLAG, outname)
        M.db.close()
    else:
        print(
            '----- Maximum a posteriori estimates & Normal approximations -----'
        )
        while True:
            print(" 'map': Calculate maximum a posteriori estimate")
            print("'norm': Calculate normal approximation")
            print("'quit': Quit.")
            cmd = input('Enter: ')
            cmd = cmd.strip()
            cmd = cmd.lower()
            if cmd == 'map':
                M = MAP_run(outname)
                # Option to enter IPython
                cmd_py = input('Enter IPython y/[n]:')
                cmd_py = cmd_py.strip()
                cmd_py = cmd_py.lower()
                if cmd_py == 'y' or cmd_py == 'yes':
                    import IPython
                    IPython.embed()
            elif cmd == 'norm':
                fname = input("Enter database name or 'back' to cancel:")
                fname = fname.strip()
                if fname == 'q' or fname == 'quit':
                    return
                elif fname == 'b' or fname == 'back':
                    continue
                elif fname[-3:] != '.h5':
                    fname = fname + '.h5'
                M = norm_run(fname, outname)
                try:
                    print(
                        'For covariances, enter IPython and request a covariance'
                        +
                        ' matrix by passing variables in the following syntax:\n'
                        + 'M.C[var1,var2,...,varn]\n' +
                        'Example: M.C[f_a1,f_a2] gives the covariance matrix of\n'
                        + ' f_a1 and f_a2.')
                    # Option to enter IPython
                    cmd_py = input('Enter IPython y/[n]:')
                    cmd_py = cmd_py.strip()
                    cmd_py = cmd_py.lower()
                    if cmd_py == 'y' or cmd_py == 'yes':
                        import IPython
                        IPython.embed()
                    M.db.close()
                    print('Database closed.')
                except Exception as e:
                    print(e)
                    print('Exception: database closing...')
                    M.db.close()
                    print('Database closed.')
                    raise
            elif cmd == 'quit' or cmd == 'q':
                return
            else:
                print('Command not recognized.')