Esempio n. 1
0
def model_gen():
    
    varlist = []
    
    stdev = pymc.TruncatedNormal('stdev', mu=400, tau = 1.0/(400**2), a = 0, b = Inf)
    varlist.append(stdev)
    
    
    @pymc.deterministic
    def precision (stdev = stdev):
        return 1.0/(stdev**2)
    

    
    
    a = pymc.TruncatedNormal('a', mu = 1, tau = 1.0/(30**2),a = 0, b = Inf)
    b = pymc.Uniform('b', lower = .05, upper = 2.0)
    varlist.append(a)
    varlist.append(b)
    
    
    @pymc.deterministic
    def nonlinear (Re = ReData,
                   value = measured,
                   a = a, b = b ,
                   observed = True
                   ):
        return a * (ReData )** b
    
    results = pymc.Normal('results', mu = nonlinear, tau = precision, value = measured, observed = True)
    varlist.append(results)
    
    return varlist
Esempio n. 2
0
    def model(x, f): 
        # PRIORS
        y_err = sigma0
        # print (_t_initial,_t_final, one_x_offset_init)
        one_x_offset = pymc.Uniform("one_x_offset", _t_initial, time[np.argmax(signal)], value=_t_initial)
        two_x_offset = pymc.Uniform("two_x_offset", _t_initial, _t_final, value=_t_final)
        sum_of_amps = pymc.TruncatedNormal("sum_amps", 
                                           mu=sum_mu, 
                                           tau=sum_tau, 
                                           a=sum_a, 
                                           b=sum_b, 
                                           value=sum_mu) #sigma/mu is the n=1 std deviation in units of n=1 amplitude
        diff_of_amps = pymc.TruncatedNormal("diff_amps", 
                                            mu=0, 
                                            tau=diff_tau, 
                                            a=diff_a, 
                                            b=diff_b, 
                                            value=0)
        one_x_amplitude = (sum_of_amps+diff_of_amps)/2
        two_x_amplitude = (sum_of_amps-diff_of_amps)/2
        # MODEL
        @pymc.deterministic(plot=False)
        def mod_two_pulse(x=time, 
                          one_x_offset=one_x_offset, 
                          two_x_offset=two_x_offset, 
                          one_x_amplitude=one_x_amplitude, 
                          two_x_amplitude=two_x_amplitude):
              return one_pulse(x, x_offset=one_x_offset, amplitude=one_x_amplitude)+\
            one_pulse(x, x_offset=two_x_offset, amplitude=two_x_amplitude)

        #likelihoodsy
        y = pymc.Normal("y", mu=mod_two_pulse, tau= 1.0/y_err**2, value=signal, observed=True)
        return locals()
Esempio n. 3
0
    def test_aesara_switch_broadcast_edge_cases_2(self):
        # Known issue 2: https://github.com/pymc-devs/pymc/issues/4417
        # fmt: off
        data = np.array([
            1.35202174,
            -0.83690274,
            1.11175166,
            1.29000367,
            0.21282749,
            0.84430966,
            0.24841369,
            0.81803141,
            0.20550244,
            -0.45016253,
        ])
        # fmt: on
        with pm.Model() as m:
            mu = pm.Normal("mu", 0, 5)
            obs = pm.TruncatedNormal("obs",
                                     mu=mu,
                                     sigma=1,
                                     lower=-1,
                                     upper=2,
                                     observed=data)

        npt.assert_allclose(m.dlogp([m.rvs_to_values[mu]])({
            "mu": 0
        }),
                            2.499424682024436,
                            rtol=1e-5)
Esempio n. 4
0
    def make_model(self):
        
        scores,team_idx = self.get_data()

        n_teams = self.n_teams
        n_games = len(scores)
        prob_func = self.prob_func
        scale = self.scale
        #Just set as a constant now, too lazy
        scale = pymc.TruncatedNormal(
           'scale',mu = self.scale, tau = np.power(1/5.0,2),value=self.scale
                ,a = 0,b = 10)
        
        #expo = pymc.Uniform( 'expo',0.45,1,value=0.5)
        expo = 0.5

        #need to put in initial seeding stuff
        theta_i = pymc.Normal('theta_i',
            mu = 0, tau = np.power(1/3.0,2), value=self.inital_theta )

        @pymc.deterministic()
        def theta(beta=theta_i):
            return beta - sum(beta)/(1.0*n_teams)

        @pymc.stochastic(observed=True)
        def games_played(value=scores ,sp=scale,alpha = theta,pow=expo):
            return sum(prob_func(dot(team_idx, alpha),
                 value[:,0], value[:,1], sp, pow))
            
        @pymc.deterministic()
        def marginal_delta(beta = theta):
            return np.dot(self.marginal_mat,beta)
        
        return pymc.Model(locals())
Esempio n. 5
0
    def makeModelPrior(self, manager, parts):

        options = manager.options

        if options.concentration is None:
            parts.log10concentration = pymc.TruncatedNormal(
                'log10concentration', 0.6, 1. / 0.116**2, np.log10(1.),
                np.log10(10.))  #tau!

            @pymc.deterministic
            def cdelta(log10concentration=parts.log10concentration):
                return 10**log10concentration

            parts.cdelta = cdelta
        else:
            parts.cdelta = options.concentration

        manager.massdelta = options.delta
        parts.massdelta = options.delta
        parts.scaledmdelta = pymc.Uniform('scaledmdelta',
                                          options.masslow / massscale,
                                          options.masshigh / massscale)

        @pymc.deterministic
        def mdelta(scaledmdelta=parts.scaledmdelta):
            return massscale * scaledmdelta

        parts.mdelta = mdelta
Esempio n. 6
0
 class likelihood_model: 
     
    # Stochastic variables for signal, background, and total event rates
    #signal_rate     = pymc.Normal('signal_rate',     mu=s*muT,  tau=1/sigmas**2)
    #background_rate = pymc.Normal('background_rate', mu=b,      tau=1/sigmab**2)
    # Doh, need to use truncated normal to prevent negative values
    signal_rate     = pymc.TruncatedNormal('signal_rate',     mu=s*muT, tau=1/sigmas**2, a=0, b=np.inf)
    background_rate = pymc.TruncatedNormal('background_rate', mu=b,     tau=1/sigmab**2, a=0, b=np.inf)
   
    # Deterministic variable (simply the sum of the signal and background rates)
    total_rate = pymc.LinearCombination('total_rate', [1,1], [signal_rate, background_rate])
    # Stochastic variable for number of observed events
    observed_events = pymc.Poisson('observed_events', mu=total_rate)
   
    # Deterministic variable for the test statistic
    @pymc.deterministic()
    def qCLs(n=observed_events):
       q,chi2B = self.QCLs(n,s) 
       return q
Esempio n. 7
0
    def model(x, f):
        """ priors distributions
        """
        one_x_offset = pymc.Uniform("one_x_offset",
                                    _t_initial,
                                    _t_peak,
                                    value=_t_initial)
        two_x_offset = pymc.Uniform("two_x_offset",
                                    _t_initial,
                                    _t_final,
                                    value=_t_final)

        one_amplitude = pymc.TruncatedNormal("one_amplitude",
                                             mu=1,
                                             tau=tau,
                                             a=1 - ampl_limit * sigma_ampl,
                                             b=1 + ampl_limit * sigma_ampl,
                                             value=1)
        two_amplitude = pymc.TruncatedNormal("two_amplitude",
                                             mu=1,
                                             tau=tau,
                                             a=1 - ampl_limit * sigma_ampl,
                                             b=1 + ampl_limit * sigma_ampl,
                                             value=1)

        @pymc.deterministic(plot=False)
        def mod_two_pulse(x=time,
                          one_x_offset=one_x_offset,
                          two_x_offset=two_x_offset,
                          one_amplitude=one_amplitude,
                          two_amplitude=two_amplitude):
            return f_model(x, x_offset=one_x_offset,
                           amplitude=one_amplitude) +\
                f_model(x, x_offset=two_x_offset,
                        amplitude=two_amplitude)

        y = pymc.Normal("y",
                        mu=mod_two_pulse,
                        tau=tau_rms,
                        value=signal,
                        observed=True)
        return locals()
Esempio n. 8
0
def model_gen():

    varlist = []

    stdev = pymc.TruncatedNormal('stdev',
                                 mu=400,
                                 tau=1.0 / (400**2),
                                 a=0,
                                 b=Inf)
    varlist.append(stdev)

    @pymc.deterministic
    def precision(stdev=stdev):
        return 1.0 / (stdev**2)

    fakeA = pymc.TruncatedNormal('a', mu=1, tau=1.0 / (50**2), a=0, b=Inf)
    b = pymc.Uniform('b', lower=.05, upper=2.0)
    a = fakeA * maxRe**(-b)
    z = pymc.Normal('zero', mu=0, tau=1.0 / (400**2))

    varlist.append(fakeA)
    varlist.append(a)
    varlist.append(b)
    varlist.append(z)

    @pymc.deterministic
    def nonlinear(Re=ReData, value=measured, a=a, b=b, z=z, observed=True):
        return (a * (ReData)**b) + z

    results = pymc.Normal('results',
                          mu=nonlinear,
                          tau=precision,
                          value=measured,
                          observed=True)
    varlist.append(results)

    return varlist
Esempio n. 9
0
def model_gen():

    variables = []

    factors = pymc.Normal(
        "factormagnitudes",
        mu=zeros(observations),
        tau=ones(observations),
    )
    limits = ones(dimensions) * -Inf
    limits[0] = 0.0
    loadings = pymc.TruncatedNormal("factorloadings",
                                    mu=ones(dimensions),
                                    tau=ones(dimensions) * (1**-2),
                                    a=limits,
                                    b=Inf)
    returnSDs = pymc.Gamma("residualsds",
                           alpha=ones(dimensions) * 1,
                           beta=ones(dimensions) * .5)

    variables.append(loadings)
    variables.append(returnSDs)
    variables.append(factors)

    @pymc.deterministic
    def returnPrecisions(stdev=returnSDs):
        precisions = (ones(shape) * (stdev**-2)[:, newaxis]).ravel()

        return precisions

    @pymc.deterministic
    def meanReturns(factors=factors, loadings=loadings):

        means = factors[newaxis, :] * loadings[:, newaxis]

        return means.ravel()

    returns = pymc.Normal("returns",
                          mu=meanReturns,
                          tau=returnPrecisions,
                          observed=True,
                          value=data.ravel())

    variables.append(returns)

    return variables
Esempio n. 10
0
    def generate_pymc_(self, params, q0=None):
        '''
        Creates PyMC objects for each param in  dictionary

        NOTE: the second argument for normal distributions is VARIANCE

        Prior option:
            An arbitrary prior distribution derived from a set of samples (e.g.,
            a previous mcmc run) can be passed with the following syntax:

                 = {<name> : ['KDE', <pymc_database>, <param_names>]}

            where <name> is the name of the distribution (e.g., 'prior' or
            'joint_dist'), <pymc_database> is the pymc database containing the
            samples from which the prior distribution will be estimated, and
            <param_names> are the children parameter names corresponding to the
            dimension of the desired sample array. This method will use all
            samples of the Markov chain contained in <pymc_database> for all
            traces named in <param_names>. Gaussian kernel-density estimation
            is used to derive the joint parameter distribution, which is then
            treated as a prior in subsequent mcmc analyses using the current
            class instance. The parameters named in <param_names> will be
            traced as will the multivariate distribution named <name>.
        '''
        pymc_mod = []
        pymc_mod_order = []
        parents = dict()

        # Iterate through , assign prior distributions
        for key, args in self.params.iteritems():
            # Distribution name should be first entry in [key]
            dist = args[0].lower()

            if dist == 'normal':
                if q0 == None:
                    RV = [pymc.Normal(key, mu=args[1], tau=1. / args[2])]
                else:
                    RV = [
                        pymc.Normal(key,
                                    mu=args[1],
                                    tau=1. / args[2],
                                    value=q0[key])
                    ]
            elif dist == 'uniform':
                if q0 == None:
                    RV = [pymc.Uniform(key, lower=args[1], upper=args[2])]
                else:
                    RV = [
                        pymc.Uniform(key,
                                     lower=args[1],
                                     upper=args[2],
                                     value=q0[key])
                    ]
            elif dist == 'discreteuniform':
                if q0 == None:
                    RV = [
                        pymc.DiscreteUniform(key, lower=args[1], upper=args[2])
                    ]
                else:
                    RV = [
                        pymc.DiscreteUniform(key,
                                             lower=args[1],
                                             upper=args[2],
                                             value=q0[key])
                    ]
            elif dist == 'truncatednormal':
                if q0 == None:
                    RV = [
                        pymc.TruncatedNormal(key,
                                             mu=args[1],
                                             tau=1. / args[2],
                                             a=args[3],
                                             b=args[4])
                    ]
                else:
                    RV = [
                        pymc.TruncatedNormal(key,
                                             mu=args[1],
                                             tau=1. / args[2],
                                             a=args[3],
                                             b=args[4],
                                             value=q0[key])
                    ]
            elif dist == 'kde':
                kde = multivariate_kde_from_samples(args[1], args[2])
                kde_rv, rvs = self._create_kde_stochastic(kde, key, args[2])
                if q0 != None:
                    kde_rv.value = q0
                RV = [kde_rv]
                for rv_key, rv_value in rvs.iteritems():
                    parents[rv_key] = rv_value
                    RV.append(rv_value)
            else:
                raise KeyError('The distribution "' + dist +
                               '" is not supported.')

            parents[key] = RV[0]
            pymc_mod_order.append(key)
            pymc_mod += RV

        return parents, pymc_mod, pymc_mod_order
Esempio n. 11
0
    def makeModelPrior(self, manager, parts):

        options = manager.options

        if options.concentration is None:
            parts.log10concentration = pymc.TruncatedNormal(
                'log10concentration', 0.6, 1. / 0.116**2, np.log10(1.),
                np.log10(10.))  #tau!

            @pymc.deterministic
            def concentration(log10concentration=parts.log10concentration):
                return 10**log10concentration

            parts.concentration = concentration
        else:
            parts.concentration = options.concentration

        if options.logprior:

            parts.logmass_15mpc = pymc.Uniform('logmass_15mpc',
                                               np.log10(options.masslow),
                                               np.log10(options.masshigh))

            @pymc.deterministic(name='mass_15mpc')
            def mass_15mpc(logmass=parts.logmass_15mpc):
                return 10**logmass

            parts.mass_15mpc = mass_15mpc

        else:

            parts.mass_15mpc = pymc.Uniform('mass_15mpc', options.masslow,
                                            options.masshigh)

        @pymc.deterministic
        def r_scale(mass=parts.mass_15mpc,
                    concentration=parts.concentration,
                    zcluster=parts.zcluster):

            try:
                rs = nfwutils.RsMassInsideR(mass, concentration, zcluster, 1.5)
            except ValueError:
                raise pymc.ZeroProbability

            return rs

#            parts.m200 = pymc.Uniform('m200', options.masslow, options.masshigh)
#
#        @pymc.deterministic
#        def r_scale(mass = parts.m200,
#                    concentration = parts.concentration,
#                    zcluster = parts.zcluster):
#
#            try:
#                rs = nfwutils.rscaleConstM(mass, concentration, zcluster, 200.)
#            except ValueError:
#                raise pymc.ZeroProbability
#
#            return rs
#
#

        parts.r_scale = r_scale
session_betas = []
for session_num, session_provider in enumerate(data.dr_id):
    session_betas.append(Bdr[int(session_provider)])

# the Betas to use for each session (which correspond to the
# dr that participated in them).
SB = pymc.Container(session_betas)

###
# setup the cut-off point parameters (lambda's)
# for this we will use truncated normals
#lambda_inv_var = 1e-5
lambdas = [pymc.Normal('lambda_0', 0, inv_var)]
for i in xrange(3):
    lambdas.append(
        pymc.TruncatedNormal('lambda_%s' % (i + 1), (i + 1), inv_var,
                             lambdas[i], numpy.inf))
lambdas = pymc.Container(lambdas)


#-------------------- model ------------------#
@deterministic()
def y_hat(X=X_mat, session_betas=SB):
    # y_hat = x_i * beta_i
    # where beta_i are coefficients corresponding
    # to the dr who participated in session i.
    out = numpy.zeros(num_sessions)
    for i, x_i in enumerate(X):
        beta_i = session_betas[i]
        #out = out + numpy.dot(x_i, beta_i)
        #print numpy.dot(x_i, beta_i)[0][0]
        out[i] = numpy.dot(x_i, beta_i)[0]
Esempio n. 13
0
def pdf(trace,
        keys,
        labels=None,
        color='0.2',
        facecolor='C0',
        line_alpha=1.0,
        face_alpha=0.9,
        plot_prior=False,
        params=None,
        xylim=None,
        figsize=None,
        ylabel=None,
        nbins_x=3,
        nbins_y=6,
        fname='pdfs.png',
        truth=None):
    '''
    Plots the probability distribution function of the parameters defined by
    "trace", "keys" and "labels" along with their associated chains obtained
    from MCMC sampling. 
    '''
    print 'plotting parameter chains/pdfs...'

    # set up labels if not provided
    if labels is None:
        labels = keys
    label_dict = {key: lab for key, lab in zip(keys, labels)}

    # handle extra keys that are not in trace
    i_keep = []
    for i, k in enumerate(keys):
        try:
            trace(k)
            i_keep.append(i)
        except KeyError:
            print 'param <%s> is not in trace; skipping this pdf plot.' % k
    keys = [keys[i] for i in i_keep]
    labels = [labels[i] for i in i_keep]

    # plot
    if figsize == None:
        fig = plt.figure(figsize=[10, 10 * len(keys) / 3])
    else:
        fig = plt.figure(figsize=figsize)

    ax_right = []
    ax_left = []
    for i, key in enumerate(keys):

        #TODO: add check for length of lists:
        if type(facecolor) == list:
            facecolor = facecolor[i]
        if type(color) == list:
            color = color[i]
        if type(face_alpha) == list:
            face_alpha = face_alpha[i]
        if type(line_alpha) == list:
            line_alpha = line_alpha[i]

        # define left and right axes (left = chains, right = pdfs)
        ax_left += [fig.add_subplot(len(keys), 2, i * 2 + 1)]
        ax_right += [fig.add_subplot(len(keys), 2, i * 2 + 2)]

        # plot left
        ax_left[i].plot(trace(key)[:],
                        color=color,
                        alpha=line_alpha,
                        linewidth=1)
        ax_left[i].set_ylabel(labels[i])
        ax_left[i].set_xlabel('Chain iteration')
        ax_left[i].locator_params(nbins=nbins_x, axis='x')
        ax_left[i].locator_params(nbins=nbins_y, axis='y')

        # plot right
        x = np.linspace(min(trace(key)[:]), max(trace(key)[:]), 1000)
        y = gaussian_kde(trace(key)[:]).pdf(x)
        ax_right[i].fill_between(x,
                                 np.tile(0, y.shape),
                                 y,
                                 facecolor=facecolor,
                                 alpha=face_alpha)
        ax_right[i].plot(x, y, color)
        ax_right[i].set_xlabel(labels[i])
        if ylabel == None:
            ax_right[i].set_ylabel('Probability density')
        else:
            ax_right[i].set_ylabel(ylabel)
        ax_right[i].locator_params(nbins=nbins_x, axis='x')
        ax_right[i].locator_params(nbins=nbins_y, axis='y')

        # plot prior as dotted line if requested
        if plot_prior == True:
            print 'plot priror = True'
            print params
            if params != None:
                print 'params != None = True'
                if params[key][0] == 'TruncatedNormal':
                    print 'truncatednorm = True'
                    predictive = pymc.TruncatedNormal('predictive',
                                                      params[key][1],
                                                      params[key][2],
                                                      params[key][3],
                                                      params[key][4])
                    model = pymc.Model({"pred": predictive})
                    mcmc = pymc.MCMC(model)
                    mcmc.sample(10000, 1000)
                    samples = mcmc.trace('predictive')[:]
                    print samples

                    kde = sm.nonparametric.KDEUnivariate(samples)
                    kde.fit()
                    x_prior = kde.support
                    y_prior = kde.density
                    ax_right[i].plot(x_prior, y_prior, '--',
                                     color='k')  #color)

        if truth != None:
            if type(truth) == dict:
                ax_right[i].plot(truth[key], 0., 'k^')
            else:
                raise TypeError('truth must be dictionary w/ params as keys')

        # set parameter axis limits if provided
        if xylim != None:
            if key in xylim:
                ax_right[i].set_xlim(xylim[key])
                ax_left[i].set_ylim(xylim[key])
        else:
            ax_right[i].set_ylim(ymin=0)
            ax_left[i].set_xlim([0, len(trace(key)[:])])

    fig.tight_layout()
    plt.savefig(fname, dpi=300)

    return plt.gcf()
Esempio n. 14
0
    def complete_model(self):

        # TODO Priors data should go into configuration file

        # Gas parameters
        ne = pymc2.TruncatedNormal('ne',
                                   self.obj_data['nSII'],
                                   self.obj_data['nSII_error']**-2,
                                   a=50.0,
                                   b=1000.0)
        cHbeta = pymc2.TruncatedNormal('cHbeta', 0.15, 0.05**-2, a=0.0, b=3.0)
        T_low = pymc2.TruncatedNormal('T_low',
                                      self.obj_data['TSIII'],
                                      self.obj_data['TSIII_error']**-2,
                                      a=7000.0,
                                      b=20000.0)

        # Metals abundances
        S2_abund = pymc2.Uniform('S2_abund', 0.000001, 0.001)
        S3_abund = pymc2.Uniform('S3_abund', 0.000001, 0.001)
        O2_abund = pymc2.Uniform('O2_abund', 0.000001, 0.001)
        O3_abund = pymc2.Uniform('O3_abund', 0.000001, 0.001)
        N2_abund = pymc2.Uniform('N2_abund', 0.000001, 0.001)
        Ar3_abund = pymc2.Uniform('Ar3_abund', 0.000001, 0.001)
        Ar4_abund = pymc2.Uniform('Ar4_abund', 0.000001, 0.001)

        # Helium parameters
        He1_abund = pymc2.Uniform('He1_abund', 0.050, 0.15)
        tau = pymc2.TruncatedNormal('tau', 0.75, 0.5**-2, a=0.0, b=7.0)
        cHbeta = pymc2.TruncatedNormal('cHbeta', 0.15, 0.05**-2, a=0.0, b=3.0)
        T_He = pymc2.TruncatedNormal('T_He',
                                     self.obj_data['TSIII'],
                                     self.obj_data['TSIII_error']**-2,
                                     a=7000.0,
                                     b=20000.0,
                                     value=14500.0)

        #Stellar parameters
        Av_star = pymc2.Uniform('Av_star', 0.0, 5.00)
        sigma_star = pymc2.Uniform('sigma_star', 0.0, 5.00)
        # z_star    = pymc2.Uniform('z_star', self.z_min_ssp_limit, self.z_max_ssp_limit)
        ssp_coefs = [
            pymc2.Uniform('ssp_coefs_%i' % i, self.sspPrefit_Limits[i][0],
                          self.sspPrefit_Limits[i][1])
            for i in self.range_bases
        ]

        @pymc2.deterministic()
        def calc_Thigh(Te=T_low):
            return (1.0807 * Te / 10000.0 - 0.0846) * 10000.0

        @pymc2.deterministic()
        def calc_abund_dict(He1_abund=He1_abund,
                            S2_abund=S2_abund,
                            S3_abund=S3_abund,
                            O2_abund=O2_abund,
                            O3_abund=O3_abund,
                            N2_abund=N2_abund,
                            Ar3_abund=Ar3_abund,
                            Ar4_abund=Ar4_abund):

            self.abund_iter_dict['H1'] = He1_abund
            self.abund_iter_dict['He1'] = He1_abund
            self.abund_iter_dict['S2'] = S2_abund
            self.abund_iter_dict['S3'] = S3_abund
            self.abund_iter_dict['O2'] = O2_abund
            self.abund_iter_dict['O3'] = O3_abund
            self.abund_iter_dict['N2'] = N2_abund
            self.abund_iter_dict['Ar3'] = Ar3_abund
            self.abund_iter_dict['Ar4'] = Ar4_abund

            return self.abund_iter_dict

        @pymc2.deterministic
        def calc_colExcit_fluxes(abund_dict=calc_abund_dict,
                                 T_low=T_low,
                                 T_High=calc_Thigh,
                                 ne=ne,
                                 cHbeta=cHbeta):

            colExcit_fluxes = self.calculate_colExcit_flux(
                T_low, T_High, ne, cHbeta, abund_dict,
                self.obj_data['colLine_waves'], self.obj_data['colLine_ions'],
                self.obj_data['colLine_flambda'])

            return colExcit_fluxes

        @pymc2.deterministic
        def calc_nebular_cont(z_star=self.z_object,
                              cHbeta=self.cHbeta,
                              Te=self.TSIII,
                              He1_abund=He1_abund,
                              He2_abund=0.0,
                              Halpha_Flux=self.f_HalphaNorm):

            neb_flux_norm = self.nebular_Cont(self.input_wave, z_star, cHbeta,
                                              Te, He1_abund, He2_abund,
                                              Halpha_Flux)

            return neb_flux_norm

        @pymc2.deterministic
        def calc_continuum(z_star=self.z_object,
                           Av_star=Av_star,
                           sigma_star=sigma_star,
                           ssp_coefs=ssp_coefs,
                           nebular_flux=calc_nebular_cont):

            ssp_grid_i = self.physical_SED_model(self.onBasesWave,
                                                 self.input_wave,
                                                 self.onBasesFluxNorm, Av_star,
                                                 z_star, sigma_star,
                                                 self.Rv_model)

            fit_continuum = ssp_grid_i.dot(ssp_coefs) + nebular_flux

            return fit_continuum

        @pymc2.deterministic
        def calc_recomb_fluxes(abund_dict=calc_abund_dict,
                               T_He=T_He,
                               ne=ne,
                               cHbeta=cHbeta,
                               tau=tau):

            recomb_fluxes = self.calculate_recomb_fluxes(
                T_He, ne, cHbeta, tau, abund_dict,
                self.obj_data['recombLine_labes'],
                self.obj_data['recombLine_ions'],
                self.obj_data['recombLine_flambda'])

            return recomb_fluxes

        #QUESTION Issues with more than one likelihood
        @pymc2.stochastic(observed=True)  # Likelihood
        def likelihood_ssp(value=self.input_continuum,
                           fit_continuum=calc_continuum,
                           sigmaContinuum=self.input_continuum_er):
            calc_continuum_masked = fit_continuum * self.obj_data['int_mask']
            chi_F = sum(
                square(calc_continuum_masked - value) / square(sigmaContinuum))
            return -chi_F / 2

        @pymc2.stochastic(observed=True)  # Likelihood
        def likelihood_recomb(value=self.recomb_fluxes,
                              H_He_TheoFlux=calc_recomb_fluxes,
                              sigmaLines=self.recomb_err):
            chi_F = sum(square(H_He_TheoFlux - value) / square(sigmaLines))
            return -chi_F / 2

        @pymc2.stochastic(observed=True)  # Likelihood
        def likelihood_colExcited(value=self.colExc_fluxes,
                                  theo_metal_fluzes=calc_colExcit_fluxes,
                                  sigmaLines=self.colExc_fluxes):
            chi_F = sum(square(theo_metal_fluzes - value) / square(sigmaLines))
            return -chi_F / 2

        return locals()
Esempio n. 15
0
import numpy as np
from scipy.stats import truncnorm as tn

import pymc
#mu = 25.0
#sigma = 11.25
#a = 1.0
#b = 650.0
#vals = tn(a=a, b=b, loc=mu, scale=sigma)
#plt.hist(vals.rvs(100000), bins=50)
#plt.xlim(0, 100)
#plt.show()


N = 10000
Vcmax = [pymc.TruncatedNormal('Vcmax25', \
          mu=100.0, tau=1.0/61.25**2, a=0.0, b=650.0).value \
          for i in xrange(N)]

Jfac = [pymc.TruncatedNormal('Jfac', mu=1.8, tau=1.0/0.5**2, \
        a=0.0, b=5.0).value for i in xrange(N)]
        
        
Rdfac = [pymc.Uniform('Rdfac', lower=0.005, upper=0.05).value \
         for i in xrange(N)]
        
Eaj = [pymc.TruncatedNormal('Eaj', mu=40000.0, tau=1.0/10000.0**2, a=0.0, 
       b=199999.9).value for i in xrange(N)]
        
Eav = [pymc.TruncatedNormal('Eav', mu=60000.0, tau=1.0/10000.0**2, a=0.0, 
       b=199999.9).value for i in xrange(N)]
        
Esempio n. 16
0
def make_model(data, mi_mean_min, mi_mean_max, GF_mean_min, GF_mean_max, constant_proliferation = False):
    values_SOX2 = {}
    values_m = {}
    values_nonPCNA = {}
    switchpoint = {}
    mi_left = {}
    GF_left = {}
    SOX2_mean_left = {}
    mi_right = {}
    GF_right = {}
    SOX2_mean_right = {}
    cells_SOX2_float = {}
    cells_nonPCNA = {}
    cells_m = {}

    
    ls = 50.0 # length of section
    l = pd.read_csv('../../data/cell_length_data.csv')['cell_length'].mean()  # length of cell
    
    def step_function(x, switchpoint, left_value, right_value):
        ''' This function should return something in the same format as the passed array 

          Specifically, it produces an output that has an array of the same size of the experimental data
        but whose contents are the lower average until the switchpoint, and the upper average past the switchpoint.
        For all purposes, this builds the model to which we want to compare the data.
        '''
        return sp.where(x<=switchpoint, left_value, right_value)

    def ma(array, fill_value):
        return sp.ma.masked_array(array, sp.isnan(array), fill_value = fill_value)
  


    #data = data.dropna(how='all', subset = ['m', 'PCNA', 'SOX2'])
    
    # I'll drop all nan because of the potential bug with the binomials (see my question on stackoverflow)
    data = data.dropna(how='all', subset = ['m', 'PCNA', 'SOX2'])
    data = data.sort_values(['ID', 'pos'])
    
    # priors for global mean values
    
    # define priors for left side of step function
    mi_left_pop= pymc.Uniform('mi_left_pop', lower = mi_mean_min, upper = mi_mean_max, value = 0.02)
    GF_left_pop = pymc.Uniform('GF_left_pop', lower = GF_mean_min, upper = GF_mean_max, value = 0.8)

    # define priors for right side of step function
    if constant_proliferation:
        mi_right_pop = mi_left_pop
        GF_right_pop = GF_left_pop
    else:
        mi_right_pop = pymc.Uniform('mi_right_pop', lower = mi_mean_min, upper = mi_mean_max, value = 0.04)
        GF_right_pop = pymc.Uniform('GF_right_pop', lower = GF_mean_min, upper = GF_mean_max, value = 0.9)
        # stepsizes
        @pymc.deterministic(name='step_mi', plot=True)
        def step_mi(mi_left = mi_left_pop, mi_right = mi_right_pop):
            return mi_right - mi_left

        @pymc.deterministic(name='step_GF', plot=True)
        def step_GF(GF_left = GF_left_pop, GF_right = GF_right_pop):
            return GF_right - GF_left

    
    # prior distribution for sigma beeing uniformly distributed
    GF_sigma_inter = pymc.Uniform('GF_sigma_inter', lower = 0.001, upper = 0.2)
    mi_sigma_inter = pymc.Uniform('mi_sigma_inter', lower = 0.001, upper = 0.2)

    
    # switchpoint
    if not constant_proliferation:
        switchpoint_pop = pymc.Uniform('switchpoint_pop',
                                       lower = -2000,
                                       upper = outgrowth[data['time'].iloc[0]], 
                                       value = -500)
        switchpoint_sigma_inter = pymc.Uniform('switchpoint_sigma_inter', lower=1.0, upper=400.0, value = 50)
    
    
    for ID, IDdata in data.groupby('ID'):
        values_SOX2[ID] = ma(IDdata['SOX2'], 35.5)
        values_nonPCNA[ID] = ma(IDdata['SOX2'] - IDdata['PCNA'], 3.5)
        values_m[ID] = ma(IDdata['m'], 1.5)
        
        # Model definition

        #priors
        # switchpoint[ID]: for all observables
        
        if constant_proliferation:
            switchpoint[ID] = 0.0
        else:
            switchpoint[ID] = pymc.Normal('switchpoint_{0}'.format(ID), mu = switchpoint_pop,                                          tau = 1/switchpoint_sigma_inter**2, value = -500,
                                         plot = False)
            

        # number of SOX2 cells
        SOX2_mean = sp.mean(values_SOX2[ID])
        SOX2_std = sp.std(values_SOX2[ID])


        # define priors for left side of step function
        mi_left[ID] = pymc.TruncatedNormal('mi_left_{0}'.format(ID), mu = mi_left_pop, tau = 1.0 / mi_sigma_inter**2,
                                           a = 0.0, b = 1.0,
                                  value = 0.02, plot = False)
        GF_left[ID] = pymc.TruncatedNormal('GF_left_{0}'.format(ID), mu = GF_left_pop, tau = 1.0 / GF_sigma_inter**2,
                                           a = 0.0, b = 1.0,
                                  value = 0.5, plot = False)
        

        # define priors for right side of step function
        mi_right[ID] = pymc.TruncatedNormal('mi_right_{0}'.format(ID), mu = mi_right_pop, tau = 1.0 / mi_sigma_inter**2,
                                            a = 0.0, b = 1.0,
                                            value = 0.02, plot = False)
        GF_right[ID] = pymc.TruncatedNormal('GF_right_{0}'.format(ID), mu = GF_right_pop, tau = 1.0 / GF_sigma_inter**2,
                                            a = 0.0, b = 1.0,
                                            value = 0.5, plot = False)
    
        
        # step functions
        @pymc.deterministic(name='mi_{}'.format(ID))
        def mi(positions = sp.array(IDdata['pos']), switchpoint = switchpoint[ID],
               left_value = mi_left[ID], right_value = mi_right[ID]):
            return step_function(positions, switchpoint, left_value, right_value)

        @pymc.deterministic(name='GF_{}'.format(ID))
        def GF(positions = sp.array(IDdata['pos']), switchpoint = switchpoint[ID],
               left_value = GF_left[ID], right_value = GF_right[ID]):
            return step_function(positions, switchpoint, left_value, right_value)

        @pymc.deterministic(name='SOX2_mean_{}'.format(ID))
        def SOX2_mean(positions = sp.array(IDdata['pos']), switchpoint = switchpoint[ID],
                      left_value = SOX2_mean , right_value = SOX2_mean):
            return step_function(positions, switchpoint, left_value, right_value)

        #likelihoods
        cells_SOX2_float[ID] = pymc.Normal('cells_SOX2_float_{0}'.format(ID), mu=SOX2_mean, tau = 1/SOX2_std**2, value = values_SOX2[ID],                                           plot = False, observed = True)


        @pymc.deterministic(name='cells_SOX2_{}'.format(ID))
        def cells_SOX2(csf = cells_SOX2_float[ID]):
            return sp.around(csf)




        cells_nonPCNA[ID] = pymc.Binomial('cells_nonPCNA_{0}'.format(ID),                                        n = cells_SOX2,                                        p = (1.0 - GF),                                        value = values_nonPCNA[ID], observed = True, plot = False )

        @pymc.deterministic(name='cells_PCNA_{}'.format(ID))
        def cells_PCNA(cnp = cells_nonPCNA[ID], cs = cells_SOX2):
            return  cs - cnp



        @pymc.deterministic(name='cells_PCNA_section_{}'.format(ID))
        def cells_PCNA_section(cp = cells_PCNA, ls = ls, l = l):
            return cp * ls / l



        cells_m[ID] = pymc.Binomial('cells_m_{0}'.format(ID),                                n = cells_PCNA_section,                                p = mi,                                value = values_m[ID], observed = True, plot = False)



    
    values_SOX2 = pymc.Container(values_SOX2)
    values_SOX2 = pymc.Container(values_SOX2)
    values_m = pymc.Container(values_m)
    values_nonPCNA = pymc.Container(values_nonPCNA)
    switchpoint = pymc.Container(switchpoint)
    mi_left = pymc.Container(mi_left)
    GF_left = pymc.Container(GF_left)
    SOX2_mean_left = pymc.Container(SOX2_mean_left)
    mi_right = pymc.Container(mi_right)
    GF_right = pymc.Container(GF_right)
    SOX2_mean_right = pymc.Container(SOX2_mean_right)
    cells_SOX2_float = pymc.Container(cells_SOX2_float)
    cells_nonPCNA = pymc.Container(cells_nonPCNA)
    cells_m = pymc.Container(cells_m)

    return locals()
Esempio n. 17
0
def main(mcmc_args=None):

    print('Setting up parameters and priors...')

    params = Params()
    # Set up location here with command line arguments in a list.
    params.cmd_line_chg(['--kalbar'])
    assert params.site_name + 'fields.txt' == 'data/kalbarfields.txt'
    # Set parameters specific to Bayesian runs
    params.PLOT = False
    params.OUTPUT = False

    # This sends a message to CalcSol on whether or not to use CUDA
    if params.CUDA:
        globalvars.cuda = True
    else:
        globalvars.cuda = False
    # get wind data and day labels
    wind_data, days = PM.get_wind_data(*params.get_wind_params())
    params.ndays = len(days)

    # reduce domain
    params.domain_info = (10000.0, 400)  #25 m sided cells
    domain_res = params.domain_info[0] / params.domain_info[1]
    cell_area = domain_res**2

    locinfo = LocInfo(params.dataset, params.coord, params.domain_info)

    ######################################################################
    #####                        Model Priors                        #####
    ######################################################################
    lam = pm.Beta("lam", 5, 1, value=0.95)
    f_a1 = pm.TruncatedNormal("f_a1", 6, 0.3, 0, 9, value=6)
    f_a2 = pm.TruncatedNormal("f_a2", 20, 0.3, 15, 24, value=20)
    f_b1_p = pm.Gamma("fb1_p", 2, 1, value=1.5, trace=False,
                      plot=False)  #alpha,beta parameterization

    @pm.deterministic(trace=True, plot=True)
    def f_b1(f_b1_p=f_b1_p):
        return f_b1_p + 1

    f_b2_p = pm.Gamma("fb2_p", 2, 1, value=1.5, trace=False, plot=False)

    @pm.deterministic(trace=True, plot=True)
    def f_b2(f_b2_p=f_b2_p):
        return f_b2_p + 1

    g_aw = pm.Gamma("g_aw", 2.2, 1, value=1.0)
    g_bw = pm.Gamma("g_bw", 5, 1, value=3.8)
    # flight diffusion parameters. note: mean is average over flight advection
    sig_x = pm.Gamma("sig_x", 26, 0.15, value=180)
    sig_y = pm.Gamma("sig_y", 15, 0.15, value=150)
    corr_p = pm.Beta("corr_p", 5, 5, value=0.5, trace=False, plot=False)

    @pm.deterministic(trace=True, plot=True)
    def corr(corr_p=corr_p):
        return corr_p * 2 - 1

    # local spread paramters
    sig_x_l = pm.Gamma("sig_xl", 2, 0.08, value=10)
    sig_y_l = pm.Gamma("sig_yl", 2, 0.14, value=10)
    corr_l_p = pm.Beta("corr_l_p", 5, 5, value=0.5, trace=False, plot=False)

    @pm.deterministic(trace=True, plot=True)
    def corr_l(corr_l_p=corr_l_p):
        return corr_l_p * 2 - 1

    mu_r = pm.Normal("mu_r", 1., 1, value=1)
    n_periods = pm.Poisson("n_periods", 30, value=30)
    #alpha_pow = prev. time exponent in ParasitoidModel.h_flight_prob
    xi = pm.Gamma("xi", 1, 1,
                  value=0.75)  # presence to oviposition/emergence factor
    em_obs_prob = pm.Beta("em_obs_prob", 1, 1, value=0.05)  # per-wasp prob of
    # observing emergence in release field grid given max leaf collection
    # this is dependent on the size of the cell surrounding the grid point
    # ...not much to be done about this.
    grid_obs_prob = pm.Beta("grid_obs_prob", 1, 1,
                            value=0.005)  # probability of
    # observing a wasp present in the grid cell given max leaf sampling

    #card_obs_prob = pm.Beta("card_obs_prob",1,1,value=0.5) # probability of
    # observing a wasp present in the grid cell given max leaf sampling

    #### Data collection model background for sentinel fields ####
    # Need to fix linear units for area. Meters would be best.
    # Effective collection area (constant between fields) is very uncertain
    with warnings.catch_warnings():
        # squelsh a warning based on pymc coding we don't need to worry about
        warnings.simplefilter("ignore", RuntimeWarning)
        A_collected = pm.TruncatedNormal("A_collected",
                                         2500,
                                         1 / 2500,
                                         0,
                                         min(locinfo.field_sizes.values()) *
                                         cell_area,
                                         value=2500)  # in m**2
    # Each field has its own binomial probability.
    # Probabilities are likely to be small, and pm.Beta cannot handle small
    #   parameter values. So we will use TruncatedNormal again.
    N = len(locinfo.sent_ids)
    sent_obs_probs = np.empty(N, dtype=object)
    # fix beta for the Beta distribution
    sent_beta = 40
    # mean of Beta distribution will be A_collected/field size
    for n, key in enumerate(locinfo.sent_ids):
        sent_obs_probs[n] = pm.Beta(
            "sent_obs_probs_{}".format(key),
            A_collected / (locinfo.field_sizes[key] * cell_area) * sent_beta /
            (1 - A_collected / (locinfo.field_sizes[key] * cell_area)),
            sent_beta,
            value=0.1 * 3600 / (locinfo.field_sizes[key] * cell_area))

    sent_obs_probs = pm.Container(sent_obs_probs)

    # Max a Posterirori estimates have consistantly returned a value near zero
    #   for sprd_factor. So we will comment these sections.
    # if params.dataset == 'kalbar':
    #     # factor for kalbar initial spread
    #     sprd_factor = pm.Uniform("sprd_factor",0,1,value=0.3)
    # else:
    #     sprd_factor = None
    sprd_factor = None

    #### Collect variables and setup block update ####
    params_ary = pm.Container(
        np.array([
            g_aw, g_bw, f_a1, f_b1, f_a2, f_b2, sig_x, sig_y, corr, sig_x_l,
            sig_y_l, corr_l, lam, n_periods, mu_r
        ],
                 dtype=object))
    # The stochastic variables in this list (and the stochastics behind the
    #   deterministic ones) should be block updated in order to avoid the large
    #   computational expense of evaluating the model multiple times for each
    #   MCMC iteration. To do this, starting step variances must be definied
    #   for each variable. This is done via a scaling dict.
    stoc_vars = [
        g_aw, g_bw, f_a1, f_b1_p, f_a2, f_b2_p, sig_x, sig_y, corr_p, sig_x_l,
        sig_y_l, corr_l_p, lam, n_periods, mu_r
    ]
    step_scales = {
        g_aw: 0.04,
        g_bw: 0.08,
        f_a1: 0.25,
        f_b1_p: 0.05,
        f_a2: 0.25,
        f_b2_p: 0.05,
        sig_x: 2,
        sig_y: 2,
        corr_p: 0.0005,
        sig_x_l: 2,
        sig_y_l: 2,
        corr_l_p: 0.0005,
        lam: 0.0005,
        n_periods: 1,
        mu_r: 0.005
    }

    print('Getting initial model values...')

    ######################################################################
    #####                          Run Model                         #####
    ######################################################################
    @pm.deterministic(plot=False, trace=False)
    def pop_model(params=params,
                  params_ary=params_ary,
                  locinfo=locinfo,
                  wind_data=wind_data,
                  days=days,
                  sprd_factor=sprd_factor):
        '''This function acts as an interface between PyMC and the model.
        Not only does it run the model, but it provides an emergence potential
        based on the population model result projected forward from feasible
        oviposition dates. To modify how this projection happens, edit
        popdensity_to_emergence. Returned values from this function should be
        nearly ready to compare to data.
        '''
        modeltic = time.time()
        ### Alter params with stochastic variables ###

        # g wind function parameters
        params.g_params = tuple(params_ary[0:2])
        # f time of day function parameters
        params.f_params = tuple(params_ary[2:6])
        # Diffusion coefficients
        params.Dparams = tuple(params_ary[6:9])
        params.Dlparams = tuple(params_ary[9:12])
        # Probability of any flight during the day under ideal circumstances
        params.lam = params_ary[12]

        # TRY BOTH SCALINGS - VARYING mu_r and n_periods
        # scaling flight advection to wind advection
        # number of time periods (based on interp_num) in one flight
        params.n_periods = params_ary[
            13]  # if interp_num = 30, this is # of minutes
        params.mu_r = params_ary[14]

        ### PHASE ONE ###
        # First, get spread probability for each day as a coo sparse matrix
        max_shape = np.array([0, 0])
        pm_args = [(days[0], wind_data, *params.get_model_params(),
                    params.r_start)]
        pm_args.extend([(day, wind_data, *params.get_model_params())
                        for day in days[1:params.ndays]])

        ##### Kalbar wind started recording a day late. Spread the population
        #####   locally before running full model.
        if sprd_factor is not None:
            res = params.domain_info[0] / params.domain_info[1]
            mean_drift = np.array([-25., 15.])
            xdrift_int = int(mean_drift[0] // res)
            xdrift_r = mean_drift[0] % res
            ydrift_int = int(mean_drift[1] // res)
            ydrift_r = mean_drift[1] % res
            longsprd = PM.get_mvn_cdf_values(
                res, np.array([xdrift_r, ydrift_r]),
                PM.Dmat(params_ary[6], params_ary[7], params_ary[8]))
            shrtsprd = PM.get_mvn_cdf_values(
                res, np.array([0., 0.]),
                PM.Dmat(params_ary[9], params_ary[10], params_ary[11]))

            mlen = int(
                max(longsprd.shape[0], shrtsprd.shape[0]) +
                max(abs(xdrift_int), abs(ydrift_int)) * 2)
            sprd = np.zeros((mlen, mlen))
            lbds = [
                int(mlen // 2 - longsprd.shape[0] // 2),
                int(mlen // 2 + longsprd.shape[0] // 2 + 1)
            ]
            sprd[lbds[0] - ydrift_int:lbds[1] - ydrift_int, lbds[0] +
                 xdrift_int:lbds[1] + xdrift_int] = longsprd * sprd_factor
            sbds = [
                int(mlen // 2 - shrtsprd.shape[0] // 2),
                int(mlen // 2 + shrtsprd.shape[0] // 2 + 1)
            ]
            sprd[sbds[0]:sbds[1],
                 sbds[0]:sbds[1]] += shrtsprd * (1 - sprd_factor)

            sprd[int(sprd.shape[0] // 2),
                 int(sprd.shape[0] // 2)] += max(0, 1 - sprd.sum())
            pmf_list = [sparse.coo_matrix(sprd)]
        else:
            pmf_list = []

        ###################### Get pmf_list from multiprocessing
        pmf_list.extend(pool.starmap(PM.prob_mass, pm_args))

        for pmf in pmf_list:
            for dim in range(2):
                if pmf.shape[dim] > max_shape[dim]:
                    max_shape[dim] = pmf.shape[dim]

        r_spread = []  # holds the one-day spread for each release day.

        # Reshape the prob. mass function of each release day into solution form
        for ii in range(params.r_dur):
            offset = params.domain_info[1] - pmf_list[ii].shape[0] // 2
            dom_len = params.domain_info[1] * 2 + 1
            r_spread.append(
                sparse.coo_matrix(
                    (pmf_list[ii].data,
                     (pmf_list[ii].row + offset, pmf_list[ii].col + offset)),
                    shape=(dom_len, dom_len)).tocsr())

        ### PHASE TWO ###
        # Pass the probability list, pmf_list, and other info to convolution solver.
        #   This will return the finished population model.
        with Capturing() as output:
            if sprd_factor is not None:
                # extend day count by one
                days_ext = [days[0] - 1]
                days_ext.extend(days)
                modelsol = get_populations(r_spread, pmf_list, days_ext,
                                           params.ndays + 1, dom_len,
                                           max_shape, params.r_dur,
                                           params.r_number, params.r_mthd())
                # remove the first one and start where wind started.
                modelsol = modelsol[1:]
            else:
                modelsol = get_populations(r_spread, pmf_list, days,
                                           params.ndays, dom_len, max_shape,
                                           params.r_dur, params.r_number,
                                           params.r_mthd())

        # modelsol now holds the model results for this run as CSR sparse arrays

        # get emergence potential (measured in expected number of wasps previously
        #   present whose oviposition would result in emergence on the given date)
        #   from the model result
        release_emerg, sentinel_emerg = popdensity_to_emergence(
            modelsol, locinfo)

        # get the expected wasp populations at grid points on sample days
        grid_counts = popdensity_grid(modelsol, locinfo)

        # get the expected wasp populations in cardinal directions
        '''card_counts = popdensity_card(modelsol,locinfo,params.domain_info)'''

        ## For the lists release_emerg and sentinel_emerg:
        ##    Each list entry corresponds to a data collection day (one array)
        ##    In each array:
        ##    Each column corresponds to an emergence observation day (as in data)
        ##    Each row corresponds to a grid point or sentinel field, respectively
        ## For the array grid_counts:
        ##    Each column corresponds to an observation day
        ##    Each row corresponds to a grid point
        ## For the list card_counts:
        ##    Each list entry corresponds to a sampling day (one array)
        ##    Each column corresponds to a step in a cardinal direction
        ##    Each row corresponds to a cardinal direction
        # print('{:03.1f} sec./model at {}'.format(time.time() - modeltic,
        #     time.strftime("%H:%M:%S %d/%m/%Y")),end='\r')
        # sys.stdout.flush()
        return (release_emerg, sentinel_emerg, grid_counts)  #,card_counts)

    print('Parsing model output and connecting to Bayesian model...')

    ######################################################################
    #####                   Connect Model to Data                    #####
    ######################################################################

    ### Parse the results of pop_model into separate deterministic variables ###
    '''Get Poisson probabilities for sentinal field emergence. Parameters:
        xi is constant, emerg is a list of ndarrays, betas is a 1D array of
        field probabilities'''
    Ncollections = len(locinfo.sent_DataFrames)
    sent_poi_rates = []
    for ii in range(Ncollections):
        s_ndays = len(locinfo.sent_DataFrames[ii]['datePR'].unique())
        sent_poi_rates.append(
            pm.Lambda('sent_poi_rate_{}'.format(ii),
                      lambda xi=xi, ndays=s_ndays, betas=sent_obs_probs,
                      emerg_model=pop_model[1][ii]: xi * emerg_model * np.tile(
                          betas, (ndays, 1)).T,
                      trace=False))
    sent_poi_rates = pm.Container(sent_poi_rates)
    '''Return Poisson probabilities for release field grid emergence. Parameters:
        xi is constant, emerg is a list of ndarrays. collection effort is
        specified in locinfo.'''
    Ncollections = len(locinfo.release_DataFrames)
    rel_poi_rates = []
    for ii in range(Ncollections):
        r_effort = locinfo.release_collection[ii]  #fraction of max collection
        r_ndays = len(locinfo.release_DataFrames[ii]['datePR'].unique())
        rel_poi_rates.append(
            pm.Lambda('rel_poi_rate_{}'.format(ii),
                      lambda xi=xi, ndays=r_ndays, r_effort=r_effort, beta=
                      em_obs_prob, emerg_model=pop_model[0][ii]: xi *
                      emerg_model * np.tile(r_effort * beta, (ndays, 1)).T,
                      trace=False))
    rel_poi_rates = pm.Container(rel_poi_rates)

    @pm.deterministic(plot=False, trace=False)
    def grid_poi_rates(locinfo=locinfo,
                       beta=grid_obs_prob,
                       obs_model=pop_model[2]):
        '''Return Poisson probabilities for grid sampling
        obs_model is an ndarray, sampling effort is specified in locinfo.'''
        return beta * locinfo.grid_samples * obs_model

    '''Return Poisson probabilities for cardinal direction sampling
        obs_model is a list of ndarrays, sampling effort is assumed constant'''
    '''
    card_poi_rates = []
    for ii,obs in enumerate(pop_model[3]):
        card_poi_rates.append(pm.Lambda('card_poi_rate_{}'.format(ii),
            lambda beta=card_obs_prob, obs=obs: beta*obs))
    card_poi_rates = pm.Container(card_poi_rates)
    '''

    # Given the expected wasp densities from pop_model, actual wasp densities
    #   are modeled as a thinned Poisson random variable about that mean.
    # Each wasp in the area then has a small probability of being seen.

    ### Connect sentinel emergence data to model ###
    N_sent_collections = len(locinfo.sent_DataFrames)
    # Create list of collection variables
    sent_collections = []
    for ii in range(N_sent_collections):
        # Apparently, pymc does not play well with 2D array parameters
        sent_collections.append(
            np.empty(sent_poi_rates[ii].value.shape, dtype=object))
        for n in range(sent_collections[ii].shape[0]):
            for m in range(sent_collections[ii].shape[1]):
                sent_collections[ii][n, m] = pm.Poisson(
                    "sent_em_obs_{}_{}_{}".format(ii, n, m),
                    sent_poi_rates[ii][n, m],
                    value=float(locinfo.sentinel_emerg[ii][n, m]),
                    observed=True)
    sent_collections = pm.Container(sent_collections)

    ### Connect release-field emergence data to model ###
    N_release_collections = len(locinfo.release_DataFrames)
    # Create list of collection variables
    rel_collections = []
    for ii in range(N_release_collections):
        rel_collections.append(
            np.empty(rel_poi_rates[ii].value.shape, dtype=object))
        for n in range(rel_collections[ii].shape[0]):
            for m in range(rel_collections[ii].shape[1]):
                rel_collections[ii][n, m] = pm.Poisson(
                    "rel_em_obs_{}_{}_{}".format(ii, n, m),
                    rel_poi_rates[ii][n, m],
                    value=float(locinfo.release_emerg[ii][n, m]),
                    observed=True)
    rel_collections = pm.Container(rel_collections)

    ### Connect grid sampling data to model ###
    grid_obs = np.empty(grid_poi_rates.value.shape, dtype=object)
    for n in range(grid_obs.shape[0]):
        for m in range(grid_obs.shape[1]):
            grid_obs[n, m] = pm.Poisson("grid_obs_{}_{}".format(n, m),
                                        grid_poi_rates[n, m],
                                        value=float(locinfo.grid_obs[n, m]),
                                        observed=True)
    grid_obs = pm.Container(grid_obs)

    ### Connect cardinal direction data to model ###
    '''
    N_card_collections = len(locinfo.card_obs_DataFrames)
    # Create list of sampling variables
    card_collections = []
    for ii in range(N_card_collections):
        card_collections.append(np.empty(card_poi_rates[ii].value.shape,
                                         dtype=object))
        for n in range(card_collections[ii].shape[0]):
            for m in range(card_collections[ii].shape[1]):
                card_collections[ii][n,m] = pm.Poisson(
                    "card_obs_{}_{}_{}".format(ii,n,m),
                    card_poi_rates[ii][n,m],
                    value=locinfo.card_obs[ii][n,m],
                    observed=True, plot=False)
    card_collections = pm.Container(card_collections)
    '''

    ######################################################################
    #####                   Collect Model and Run                    #####
    ######################################################################

    ### Collect model ###
    if sprd_factor is not None:
        Bayes_model = pm.Model([
            lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x,
            sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, n_periods,
            mu_r, sprd_factor, grid_obs_prob, xi, em_obs_prob, A_collected,
            sent_obs_probs, params_ary, pop_model, grid_poi_rates,
            rel_poi_rates, sent_poi_rates, grid_obs, rel_collections,
            sent_collections
        ])
    else:
        Bayes_model = pm.Model([
            lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x,
            sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, n_periods,
            mu_r, grid_obs_prob, xi, em_obs_prob, A_collected, sent_obs_probs,
            params_ary, pop_model, grid_poi_rates, rel_poi_rates,
            sent_poi_rates, grid_obs, rel_collections, sent_collections
        ])

    ### Run if parameters were passed in ###
    if mcmc_args is not None:
        if len(mcmc_args) == 3:
            # New run
            nsamples = int(mcmc_args[0])
            burn = int(mcmc_args[1])
            fname = mcmc_args[2]
            if fname[-3:] != '.h5':
                fname += '.h5'
            mcmc = pm.MCMC(Bayes_model,
                           db='hdf5',
                           dbname=fname,
                           dbmode='a',
                           dbcomplevel=0)
            mcmc.use_step_method(pm.AdaptiveMetropolis,
                                 stoc_vars,
                                 scales=step_scales,
                                 interval=500,
                                 shrink_if_necessary=True)
            try:
                tic = time.time()
                print('Sampling...')
                mcmc.sample(nsamples, burn)
                # sampling finished. commit to database and continue
                print('Sampling finished.')
                print('Time elapsed: {}'.format(time.time() - tic))
                print('Saving...')
                #mcmc.save_state()
                mcmc.commit()
                print('Closing...')
                mcmc.db.close()
            except:
                print('Exception: database closing...')
                mcmc.db.close()
                raise
            return
        elif len(mcmc_args) == 2:
            # Resume run
            fname = mcmc_args[0]
            nsamples = int(mcmc_args[1])
            fname = fname.strip()
            if fname[-3:] != '.h5':
                fname += '.h5'
            if os.path.isfile(fname):
                db = pm.database.hdf5.load(fname)
                mcmc = pm.MCMC(Bayes_model, db=db)
                mcmc.use_step_method(pm.AdaptiveMetropolis,
                                     stoc_vars,
                                     scales=step_scales,
                                     interval=500,
                                     shrink_if_necessary=True)
                # database loaded.
            else:
                print('File not found: {}'.format(fname))
                return
            try:
                tic = time.time()
                print('Sampling...')
                mcmc.sample(nsamples)
                # sampling finished. commit to database and continue
                print('Sampling finished.')
                print('Time elapsed: {}'.format(time.time() - tic))
                print('Saving...')
                #mcmc.save_state()
                mcmc.commit()
                print('Closing...')
                mcmc.db.close()
            except:
                print('Exception: database closing...')
                mcmc.db.close()
                raise
            return

    ######################################################################
    #####                   Start Interactive Menu                   #####
    ######################################################################
    print('--------------- MCMC MAIN MENU ---------------')
    print(" 'new': Start a new MCMC chain from the beginning.")
    print("'cont': Continue a previous MCMC chain from an hdf5 file.")
    #print("'plot': Plot traces/distribution from an hdf5 file.")
    print("'quit': Quit.")
    cmd = input('Enter: ')
    cmd = cmd.strip().lower()
    if cmd == 'new':
        print('\n\n')
        print('--------------- New MCMC Chain ---------------')
        while True:
            val = input("Enter number of realizations or 'quit' to quit:")
            val = val.strip()
            if val == 'q' or val == 'quit':
                return
            else:
                try:
                    nsamples = int(val)
                    val2 = input("Enter number of realizations to discard:")
                    val2 = val2.strip()
                    if val2 == 'q' or val2 == 'quit':
                        return
                    else:
                        burn = int(val2)
                    fname = input(
                        "Enter filename to save or 'back' to cancel:")
                    fname = fname.strip()
                    if fname == 'q' or fname == 'quit':
                        return
                    elif fname == 'b' or fname == 'back':
                        continue
                    else:
                        fname = fname + '.h5'
                        break  # BREAK LOOP AND RUN MCMC WITH GIVEN VALUES
                except ValueError:
                    print('Unrecognized input.')
                    continue
        ##### RUN FIRST MCMC HERE #####
        mcmc = pm.MCMC(Bayes_model,
                       db='hdf5',
                       dbname=fname,
                       dbmode='a',
                       dbcomplevel=0)
        mcmc.use_step_method(pm.AdaptiveMetropolis,
                             stoc_vars,
                             scales=step_scales,
                             interval=500,
                             shrink_if_necessary=True)
        try:
            tic = time.time()
            print('Sampling...')
            mcmc.sample(nsamples, burn)
            # sampling finished. commit to database and continue
            print('Sampling finished.')
            print('Time elapsed: {}'.format(time.time() - tic))
            print('Saving...')
            #mcmc.save_state()
            mcmc.commit()
        except:
            print('Exception: database closing...')
            mcmc.db.close()
            raise

    elif cmd == 'cont':
        # Load db and continue
        print('\n')
        while True:
            fname = input("Enter path to database to load, or 'q' to quit:")
            fname = fname.strip()
            if fname.lower() == 'q' or fname.lower() == 'quit':
                return
            else:
                if fname[-3:] != '.h5':
                    fname += '.h5'
                if os.path.isfile(fname):
                    db = pm.database.hdf5.load(fname)
                    mcmc = pm.MCMC(Bayes_model, db=db)
                    mcmc.use_step_method(pm.AdaptiveMetropolis,
                                         stoc_vars,
                                         scales=step_scales,
                                         interval=500,
                                         shrink_if_necessary=True)
                    break  # database loaded
                else:
                    print('File not found.')
                    #continue

    elif cmd == 'plot':
        # Get filename and pass to plotting routine.
        pass
        # return
    elif cmd == 'quit' or cmd == 'q':
        return
    else:
        print('Command not recognized.')
        print('Quitting....')
        return

    ##### MCMC Loop #####
    # This should be reached only by cmd == 'new' or 'cont' with a database.
    # It resumes sampling of a previously sampled chain.
    print('\n')
    while True:
        print('--------------- MCMC ---------------')
        print(" 'report': generate report on traces")
        print("'inspect': launch IPython to inspect state")
        print("    'run': conduct further sampling")
        print("   'quit': Quit")
        cmd = input('Enter: ')
        cmd = cmd.strip()
        cmd = cmd.lower()
        if cmd == 'inspect':
            try:
                import IPython
                IPython.embed()
            except ImportError:
                print('IPython not found.')
            except:
                print('Exception: database closing...')
                mcmc.db.close()
                raise
        elif cmd == 'run':
            val = input("Enter number of realizations or 'back':")
            val = val.strip()
            if val == 'back' or val == 'b':
                continue
            else:
                try:
                    nsamples = int(val)
                except ValueError:
                    print('Unrecognized input.')
                    continue
            # Run chain
            try:
                tic = time.time()
                print('Sampling...')
                mcmc.sample(nsamples)
                # sampling finished. commit to database and continue
                print('Sampling finished.')
                print('Time elapsed: {}'.format(time.time() - tic))
                print('Saving...')
                #mcmc.save_state()
                mcmc.commit()
            except:
                print('Exception: database closing...')
                mcmc.db.close()
                raise
        elif cmd == 'report':
            try:
                import Bayes_Plot
                Bayes_Plot.plot_traces(db=db)
                print('Gelman-Rubin statistics')
                gr = pm.gelman_rubin(mcmc)
                print(gr)
                with open('./diagnostics/gelman-rubin.txt', 'w') as f:
                    f.write('Variable        R_hat\n')
                    f.write('---------------------\n')
                    for key, val in gr.items():
                        f.write(key + ': {}\n'.format(val))
            except:
                print('Exception: database closing...')
                mcmc.db.close()
                raise
        elif cmd == 'quit' or cmd == 'q':
            mcmc.db.close()
            print('Database closed.')
            break
        else:
            print('Command not recognized.')
Esempio n. 18
0
    -0.8762523, 0.47377688, 0.76516415, 0.27890419, -0.07819642, -0.13399348,
    0.82877293, 0.22308624, 0.7485783, -0.14700254, -1.03145657, 0.85641097,
    0.43396285, 0.47901653, 0.80137086, 0.33566812, 0.71443253, -1.57590815,
    -0.24090179, -2.0128344, 0.34503324, 0.12944091, -1.5327008, 0.06363034,
    0.21042021, -0.81425636, 0.20209279, -1.48130423, -1.04983523, 0.16001774,
    -0.75239072, 0.33427956, -0.10224921, 0.26463561, -1.09374674, -0.72749811,
    -0.54892116, -1.89631844, -0.94393545, -0.2521341, 0.26840341, 0.23563219,
    0.35333094
])

# Model: the data are truncated-normally distributed with unknown upper bound.
mu = pm.Normal('mu', 0, .01, value=0)
tau = pm.Exponential('tau', .01, value=1)
cutoff = pm.Exponential('cutoff', 1, value=1.3)
D = pm.TruncatedNormal('D',
                       mu,
                       tau,
                       -np.inf,
                       cutoff,
                       value=data,
                       observed=True)

M = pm.MCMC([mu, tau, cutoff, D])

# Use a TruncatedMetropolis step method that will never propose jumps below D's maximum value.
M.use_step_method(TruncatedMetropolis, cutoff, D.value.max(), np.inf)
# Get a handle to the step method handling cutoff to investigate its behavior.
S = M.step_method_dict[cutoff][0]

M.isample(10000, 0, 10)
Esempio n. 19
0
    def run(self):
        self.validateinput()
        data = self.data
        data = self.fluctuate(data) if self.rndseed >= 0 else data

        # unpack background dictionaries
        backgroundkeys = self.backgroundsyst.keys()
        backgrounds = array([self.background[key] for key in backgroundkeys])
        backgroundnormsysts = array(
            [self.backgroundsyst[key] for key in backgroundkeys])

        # unpack object systematics dictionary
        objsystkeys = self.objsyst['signal'].keys()
        signalobjsysts = array(
            [self.objsyst['signal'][key] for key in objsystkeys])
        backgroundobjsysts = array([])
        if len(objsystkeys) > 0 and len(backgroundkeys) > 0:
            backgroundobjsysts = array([[
                self.objsyst['background'][syst][bckg] for syst in objsystkeys
            ] for bckg in backgroundkeys])
        recodim = len(data)
        resmat = self.response
        truthdim = len(resmat)

        import priors
        truth = priors.wrapper(priorname=self.prior,
                               low=self.lower,
                               up=self.upper,
                               other_args=self.priorparams)

        bckgnuisances = []
        for name, err in zip(backgroundkeys, backgroundnormsysts):
            if err < 0.:
                bckgnuisances.append(
                    mc.Uniform('norm_%s' % name, value=1., lower=0., upper=3.))
            else:
                bckgnuisances.append(
                    mc.TruncatedNormal(
                        'gaus_%s' % name,
                        value=0.,
                        mu=0.,
                        tau=1.0,
                        a=(-1.0 / err if err > 0.0 else -inf),
                        b=inf,
                        observed=(False if err > 0.0 else True)))
        bckgnuisances = mc.Container(bckgnuisances)

        objnuisances = [
            mc.Normal('gaus_%s' % name,
                      value=self.systfixsigma,
                      mu=0.,
                      tau=1.0,
                      observed=(True if self.systfixsigma != 0 else False))
            for name in objsystkeys
        ]
        objnuisances = mc.Container(objnuisances)

        # define potential to constrain truth spectrum
        if self.regularization:
            truthpot = self.regularization.getpotential(truth)

        #This is where the FBU method is actually implemented
        @mc.deterministic(plot=False)
        def unfold(truth=truth,
                   bckgnuisances=bckgnuisances,
                   objnuisances=objnuisances):
            smearbckg = 1.
            if len(backgroundobjsysts) > 0:
                smearbckg = smearbckg + dot(objnuisances, backgroundobjsysts)
            smearedbackgrounds = backgrounds * smearbckg
            bckgnormerr = array([
                (-1. + nuis) / nuis if berr < 0. else berr
                for berr, nuis in zip(backgroundnormsysts, bckgnuisances)
            ])
            bckg = dot(1. + bckgnuisances * bckgnormerr, smearedbackgrounds)
            reco = dot(truth, resmat)
            smear = 1. + dot(objnuisances, signalobjsysts)
            out = bckg + reco * smear
            return out

        unfolded = mc.Poisson('unfolded',
                              mu=unfold,
                              value=data,
                              observed=True,
                              size=recodim)
        allnuisances = mc.Container(bckgnuisances + objnuisances)
        modelelements = [unfolded, unfold, truth, allnuisances]
        if self.regularization: modelelements += [truthpot]
        model = mc.Model(modelelements)

        if self.use_emcee:
            from emcee_sampler import sample_emcee
            mcmc = sample_emcee(model,
                                nwalkers=self.nwalkers,
                                samples=self.nMCMC / self.nwalkers,
                                burn=self.nBurn / self.nwalkers,
                                thin=self.nThin)
        else:
            map_ = mc.MAP(model)
            map_.fit()
            mcmc = mc.MCMC(model)
            mcmc.use_step_method(mc.AdaptiveMetropolis, truth + allnuisances)
            mcmc.sample(self.nMCMC, burn=self.nBurn, thin=self.nThin)

#        mc.Matplot.plot(mcmc)

        self.trace = [
            mcmc.trace('truth%d' % bin)[:] for bin in xrange(truthdim)
        ]
        self.nuisancestrace = {}
        for name, err in zip(backgroundkeys, backgroundnormsysts):
            if err < 0.:
                self.nuisancestrace[name] = mcmc.trace('norm_%s' % name)[:]
            if err > 0.:
                self.nuisancestrace[name] = mcmc.trace('gaus_%s' % name)[:]
        for name in objsystkeys:
            if self.systfixsigma == 0.:
                self.nuisancestrace[name] = mcmc.trace('gaus_%s' % name)[:]

        if self.monitoring:
            import monitoring
            monitoring.plot(self.name + '_monitoring', data, backgrounds,
                            resmat, self.trace, self.nuisancestrace,
                            self.lower, self.upper)
Esempio n. 20
0
def GenerateTrace(emulators,
                  exp_Ys,
                  exp_Yerrs,
                  prior,
                  id_,
                  iter,
                  output_filename,
                  burnin=1000):
    """
    The main function to generate pandas trace file after comparing the emulator with experimental value
    Uses pymc2 as it is found to be faster
    """
    pymc.numpy.random.seed(random.randint(0, 1000) + id_)
    n_models = len(emulators)
    emulators_list = []
    id_to_model_names = []
    parameters = []
    for i, ename in enumerate(sorted(emulators.keys())):
        id_to_model_names.append(ename)
        emulators_list.append(emulators[ename])
        ind_parameters = []
        for name, row in prior.iterrows():
            if row["Type"] == "Uniform":
                ind_parameters.append(
                    pymc.Uniform(
                        name if i == 0 else '%s_%d' % (name, i),
                        float(row["Min"]),
                        float(row["Max"]),
                        value=0.5 * (float(row["Min"]) + float(row["Max"])),
                    ))
            else:
                ind_parameters.append(
                    pymc.TruncatedNormal(
                        name if i == 0 else '%s_%d' % (name, i),
                        mu=float(row["Mean"]),
                        tau=1.0 / float(row["SD"])**2,
                        a=float(row["Min"]),
                        b=float(row["Max"]),
                        value=float(row["Mean"]),
                    ))
        parameters.append(ind_parameters)

    # transpose emulator_list
    emulators_list = list(map(list, zip(*emulators_list)))

    if n_models == 1:
        model_choice = 0
    else:
        model_choice = pymc.DiscreteUniform('ModelChoice',
                                            lower=0,
                                            upper=n_models - 1)

    for emu, exp_Y, exp_Yerr in zip(emulators_list, exp_Ys, exp_Yerrs):
        exp_cov = np.diag(np.square(exp_Yerr))

        @pymc.stochastic(observed=True)
        def emulator_result(value=exp_Y,
                            x=parameters,
                            exp_cov=exp_cov,
                            emulator=emu,
                            mc=model_choice):
            mean, var = emulator[mc].Predict(np.array(x[mc]).reshape(1, -1))
            return np.array(
                mvn.logpdf(value, np.squeeze(mean),
                           np.squeeze(var) + exp_cov))

    # model = pymc.Model(parameters)
    # prepare for MCMC
    new_output_filename = "%s_%d.h5" % (output_filename, id_)
    mcmc = pymc.MCMC(parameters if model_choice == 0 else parameters +
                     [model_choice],
                     dbname=new_output_filename,
                     db="hdf5",
                     dbmode="w")

    # sample from our posterior distribution 50,000 times, but
    # throw the first 20,000 samples out to ensure that we're only
    # sampling from our steady-state posterior distribution
    mcmc.sample(iter, burn=burnin)
    mcmc.db.close()

    return new_output_filename, id_to_model_names  # pd.DataFrame.from_dict(trace_dict)
Esempio n. 21
0
def make_model(observed_sed, e_observed_sed):
    """ This function returns all prior and likelihood objects """

    # Prior: mass (Kroupa 2001)
    @pymc.stochastic()
    def logM(value=np.array([np.log10(0.5)]), a=np.log10(0.1), b=np.log10(7)):

        def logp(value, a, b):
            if value > b or value < a:
                return -np.Inf  # Stay within the model limits (a,b).
            else:
                mass = 10 ** value
                if mass < 0.5:
                    return np.log(mass ** -1.3)  # Kroupa (2001)
                else:
                    return np.log(0.5 * mass ** -2.3)  # Kroupa (2001)

        def random(a, b):
            val = (b - a) * np.random.rand() + a
            return np.array([val])

    # Prior: age (uniform in the logarithm)
    logT = pymc.Uniform("logT", np.array([5]), np.array([8]))

    # Prior: accretion rate (uniform in the logarithm)
    logMacc = pymc.Uniform("logMacc", np.array([-15]), np.array([-2]))

    # Prior: disc truncation radius (Rin = 5 +\- 2 R, Gullbring et al. 1998)
    Rin = pymc.TruncatedNormal("Rin", mu=np.array([5.0]), tau=2.0 ** -2,
                               a=1.01, b=9e99)

    # Prior: distance (d = 760 +\- 5 pc, Sung 1997)
    d = pymc.TruncatedNormal("d", mu=np.array([760.0]), tau=5.0 ** -2,
                             a=700, b=9e99)

    # Prior: extinction (logA0 = -0.27 +/- 0.46, Rebull et al. 2002)
    logA0 = pymc.Normal("logA0", mu=np.array([-0.27]), tau=0.46 ** -2)

    # Likelihood: intrinsic SED
    @pymc.deterministic()
    def SED_intrinsic(logM=logM, logT=logT):
        r = siess_Mr(logM, logT)  # IPHAS r' as a function of (mass, age)
        i = siess_Mi(logM, logT)  # IPHAS i
        j = siess_Mj(logM, logT)  # 2MASS J
        ha = r - rminHa_intrinsic(r - i)  # IPHAS H-alpha
        return np.array([r[0], ha[0], i[0], j[0]])

    # Likelihood: H-alpha excess luminosity
    @pymc.deterministic()
    def logLacc(logM=logM, logT=logT, logMacc=logMacc, Rin=Rin):
        logR = siess_logR(logM, logT)  # Radius as a function of (mass, age)
        return 7.496 + logM + logMacc - logR + np.log10(1 - 1 / Rin)
    logLha = pymc.Normal("logLha", mu=(0.64 * logLacc - 2.12), tau=0.43 ** -2)

    # Likelihood: H-alpha equivalent width (EW).
    @pymc.deterministic()
    def logEW(logLha=logLha, SED_intrinsic=SED_intrinsic):
        Lha = 10 ** logLha  # Excess luminosity
        Lha_con = 0.316 * 10 ** (-0.4 * (SED_intrinsic[1] + 0.03))  # Continuum
        ew = -95.0 * Lha / Lha_con  # Equivalent width.
        return np.log10(-ew)

    # Likelihood: apparent SED
    @pymc.deterministic()
    def SED_apparent(d=d, logA0=logA0, SED_intr=SED_intrinsic, logEW=logEW):
        dismod = 5.0 * np.log10(d) - 5.0  # Distance modulus.
        A0 = 10.0 ** logA0  # Extinction parameter
        ri_intr = np.array([SED_intr[0] - SED_intr[2]])  # Intrinsic (r'-i')
        # Correct the intrinsic magnitudes for extinction and H-alpha emission:
        r = SED_intr[0] + dismod + r_offset(ri_intr, A0, logEW)
        ha = SED_intr[1] + dismod + ha_offset(ri_intr, A0, logEW)
        i = SED_intr[2] + dismod + i_offset(ri_intr, A0, logEW)
        j = SED_intr[3] + dismod + 0.276 * A0
        return np.array([r[0], ha[0], i[0], j[0]])

    # Likelihood: observed SED
    @pymc.stochastic(observed=True)
    def SED_observed(value=observed_sed, SED_apparent=SED_apparent):
        e_calib = np.array([0.1, 0.1, 0.1, 0.1])  # Absolute uncertainty term
        D2 = sum((observed_sed - SED_apparent) ** 2 /
                 (e_observed_sed ** 2 + e_calib ** 2))
        logp = -D2 / 2.0
        return logp

    return locals()  # Return all model components defined above
Esempio n. 22
0
import pymc
import numpy as np

# Priors on unknown parameters
init = 0.5
minv = 0.
maxv = 1.
theta = pymc.TruncatedNormal('theta', value=init, mu=0, tau=1., a=minv, b=maxv)

# Binomial likelihood for data
d = pymc.Binomial('d', n=100, p=theta, value=75, observed=True)
Esempio n. 23
0
    def set_priors(self, df):
        """ default priors
        
        When setting normals I am assuming that sigma = range / 4 to set these 
        priors
        """
        # mu=25, range=(5-50)
        Vcvals = [pymc.TruncatedNormal('Vcmax25_%d' % (i), \
                  mu=25.0, tau=1.0/11.25**2, a=0.0, b=650.0) \
                  for i in np.unique(df["Leaf"])]

        # mu=1.8, range=(0.8-2.8)
        Jfac = pymc.TruncatedNormal('Jfac', mu=1.8, tau=1.0/0.5**2, \
                                    a=0.0, b=5.0)

        # broad prior
        Rdfac = pymc.Uniform('Rdfac', lower=0.005, upper=0.05)

        # mu=40000, range=(20000-60000)
        Eaj = pymc.TruncatedNormal('Eaj',
                                   mu=40000.0,
                                   tau=1.0 / 10000.0**2,
                                   a=0.0,
                                   b=199999.9)

        # mu=60000, range=(40000-80000)
        Eav = pymc.TruncatedNormal('Eav',
                                   mu=60000.0,
                                   tau=1.0 / 10000.0**2,
                                   a=0.0,
                                   b=199999.9)

        # mu=34000, range=(20000-60000)
        Ear = pymc.TruncatedNormal('Ear',
                                   mu=34000.0,
                                   tau=1.0 / 10000.0**2,
                                   a=0.0,
                                   b=199999.9)

        # mu=640, range=(620-660)
        delSj = pymc.TruncatedNormal('delSj', mu=640.0, tau=1.0/10.0**2, \
                                      a=300.0, b=800.0)

        # mu=640, range=(620-660)
        delSv = pymc.TruncatedNormal('delSv', mu=640.0, tau=1.0/10.0**2, \
                                      a=300.0, b=800.0)
        """
        log_mu = np.log(25.0)
        log_sigma = np.log(11.25)
        log_tau = 1.0/log_sigma**2
        Vcvals = [pymc.Lognormal('Vcmax25_%d' % (i), mu=log_mu, tau=log_tau)\
                  for i in np.unique(df["Leaf"])]
        
        log_mu = np.log(1.8)
        log_sigma = np.log(0.5)
        log_tau = 1.0/log_sigma**2
        Jfac = pymc.Lognormal('Jfac', mu=log_mu, tau=log_tau)
        
        Rdfac = pymc.Uniform('Rdfac', lower=0.005, upper=0.05)
        
        log_mu = np.log(40000.0)
        log_sigma = np.log(20000.0)
        log_tau = 1.0/log_sigma**2
        Eaj = pymc.Lognormal('Eaj', mu=log_mu, tau=log_tau)
        
        log_mu = np.log(60000.0)
        log_sigma = np.log(20000.0)
        log_tau = 1.0/log_sigma**2
        Eav = pymc.Lognormal('Eav', mu=log_mu, tau=log_tau)
        
        log_mu = np.log(34000)
        log_sigma = np.log(15000.0)
        log_tau = 1.0/log_sigma**2
        Ear = pymc.Lognormal('Ear', mu=log_mu, tau=log_tau)
        
        log_mu = np.log(640.0)
        log_sigma = np.log(50.0)
        log_tau = 1.0/log_sigma**2
        delSj = pymc.Lognormal('delSj', mu=log_mu, tau=log_tau)
        
        log_mu = np.log(640.0)
        log_sigma = np.log(50.0)
        log_tau = 1.0/log_sigma**2
        delSv = pymc.Lognormal('delSv', mu=log_mu, tau=log_tau)
        """

        return Vcvals, Jfac, Rdfac, Eaj, Eav, Ear, delSj, delSv
Esempio n. 24
0
def main(RUNFLAG, outname):

    print('Setting up parameters and priors...')

    params = Params()
    # Set up location here with command line arguments in a list.
    params.cmd_line_chg(['--kalbar'])
    assert params.site_name + 'fields.txt' == 'data/kalbarfields.txt'
    # Set parameters specific to Bayesian runs
    params.PLOT = False
    params.OUTPUT = False

    # This sends a message to CalcSol on whether or not to use CUDA
    if params.CUDA:
        globalvars.cuda = True
    else:
        globalvars.cuda = False
    # get wind data and day labels
    wind_data, days = PM.get_wind_data(*params.get_wind_params())
    params.ndays = len(days)

    # reduce domain
    params.domain_info = (10000.0, 200)  #50 m sided cells
    domain_res = params.domain_info[0] / params.domain_info[1]
    cell_area = domain_res**2

    locinfo = LocInfo(params.dataset, params.coord, params.domain_info)

    prior_eps = {}

    #### Model priors ####
    lam = pm.Beta("lam", 5, 1, value=0.95)
    prior_eps[lam] = 0.01
    f_a1 = pm.TruncatedNormal("f_a1", 6, 0.3, 0, 9, value=6)
    prior_eps[f_a1] = 0.1
    f_a2 = pm.TruncatedNormal("f_a2", 20, 0.3, 15, 24, value=20)
    prior_eps[f_a2] = 0.1
    f_b1_p = pm.Gamma("fb1_p", 2, 1, value=1.5, trace=False,
                      plot=False)  #alpha,beta parameterization
    prior_eps[f_b1_p] = 0.05

    @pm.deterministic(trace=True, plot=True)
    def f_b1(f_b1_p=f_b1_p):
        return f_b1_p + 1

    f_b2_p = pm.Gamma("fb2_p", 2, 1, value=1.5, trace=False, plot=False)
    prior_eps[f_b2_p] = 0.05

    @pm.deterministic(trace=True, plot=True)
    def f_b2(f_b2_p=f_b2_p):
        return f_b2_p + 1

    g_aw = pm.Gamma("g_aw", 2.2, 1, value=1.0)
    prior_eps[g_aw] = 0.05
    g_bw = pm.Gamma("g_bw", 5, 1, value=3.8)
    prior_eps[g_bw] = 0.1
    # flight diffusion parameters. note: mean is average over flight advection
    sig_x = pm.Gamma("sig_x", 26, 0.15, value=180)
    prior_eps[sig_x] = 1
    sig_y = pm.Gamma("sig_y", 15, 0.15, value=150)
    prior_eps[sig_y] = 1
    corr_p = pm.Beta("corr_p", 5, 5, value=0.5, trace=False, plot=False)
    prior_eps[corr_p] = 0.01

    @pm.deterministic(trace=True, plot=True)
    def corr(corr_p=corr_p):
        return corr_p * 2 - 1

    # local spread paramters
    sig_x_l = pm.Gamma("sig_xl", 2, 0.08, value=10)
    prior_eps[sig_x_l] = 1
    sig_y_l = pm.Gamma("sig_yl", 2, 0.14, value=10)
    prior_eps[sig_y_l] = 1
    corr_l_p = pm.Beta("corr_l_p", 5, 5, value=0.5, trace=False, plot=False)
    prior_eps[corr_l_p] = 0.005

    @pm.deterministic(trace=True, plot=True)
    def corr_l(corr_l_p=corr_l_p):
        return corr_l_p * 2 - 1

    #pymc.MAP can only take float values, so we vary mu_r and set n_periods.
    mu_r = pm.Normal("mu_r", 1., 1, value=1)
    prior_eps[mu_r] = 0.05
    params.n_periods = 30
    #alpha_pow = prev. time exponent in ParasitoidModel.h_flight_prob
    xi = pm.Gamma("xi", 1, 1,
                  value=0.75)  # presence to oviposition/emergence factor
    prior_eps[xi] = 0.05

    #### Observation probabilities. ####
    em_obs_prob = pm.Beta("em_obs_prob", 1, 1, value=0.05)  # per-wasp prob of
    # observing emergence in release field grid given max leaf collection.
    # This is dependent on the size of the cell surrounding the grid point,
    # but there's not much to be done about this. Just remember to
    # interpret this number based on grid coarseness.
    prior_eps[em_obs_prob] = 0.0005
    grid_obs_prob = pm.Beta("grid_obs_prob", 1, 1,
                            value=0.005)  # probability of
    # observing a wasp present in the grid cell given max leaf sampling
    prior_eps[grid_obs_prob] = 0.0005

    #card_obs_prob = pm.Beta("card_obs_prob",1,1,value=0.5) # probability of
    # observing a wasp present in the grid cell given max leaf sampling

    #### Data collection model background for sentinel fields ####
    # Need to fix linear units for area. Meters would be best.
    # Effective collection area (constant between fields) is very uncertain
    with warnings.catch_warnings():
        # squelsh a warning based on pymc coding we don't need to worry about
        warnings.simplefilter("ignore", RuntimeWarning)
        A_collected = pm.TruncatedNormal("A_collected",
                                         2500,
                                         1 / 2500,
                                         0,
                                         min(locinfo.field_sizes.values()) *
                                         cell_area,
                                         value=2500)  # in m**2
    prior_eps[A_collected] = 10
    # Each field has its own binomial probability.
    # Probabilities are likely to be small, and pm.Beta cannot handle small
    #   parameter values. So we will use TruncatedNormal again.
    N = len(locinfo.sent_ids)
    sent_obs_probs = np.empty(N, dtype=object)
    # fix beta for the Beta distribution
    sent_beta = 40
    # mean of Beta distribution will be A_collected/field size

    ## Loop over fields ##
    for n, key in enumerate(locinfo.sent_ids):
        sent_obs_probs[n] = pm.Beta(
            "sent_obs_probs_{}".format(key),
            A_collected / (locinfo.field_sizes[key] * cell_area) * sent_beta /
            (1 - A_collected / (locinfo.field_sizes[key] * cell_area)),
            sent_beta,
            value=0.1 * 3600 / (locinfo.field_sizes[key] * cell_area))
        prior_eps[sent_obs_probs[n]] = 0.0005

    sent_obs_probs = pm.Container(sent_obs_probs)

    #### Collect variables ####
    params_ary = pm.Container(
        np.array([
            g_aw, g_bw, f_a1, f_b1, f_a2, f_b2, sig_x, sig_y, corr, sig_x_l,
            sig_y_l, corr_l, lam, mu_r
        ],
                 dtype=object))

    if params.dataset == 'kalbar':
        # factor for kalbar initial spread
        sprd_factor = pm.Uniform("sprd_factor", 0, 1, value=0.1)
        prior_eps[sprd_factor] = 0.01
    else:
        sprd_factor = None

    print('Getting initial model values...')

    #### Run model ####
    @pm.deterministic(plot=False, trace=False)
    def pop_model(params=params,
                  params_ary=params_ary,
                  locinfo=locinfo,
                  wind_data=wind_data,
                  days=days,
                  sprd_factor=sprd_factor):
        '''This function acts as an interface between PyMC and the model.
        Not only does it run the model, but it provides an emergence potential
        based on the population model result projected forward from feasible
        oviposition dates. To modify how this projection happens, edit
        popdensity_to_emergence. Returned values from this function should be
        nearly ready to compare to data.
        '''
        modeltic = time.time()
        ### Alter params with stochastic variables ###

        # g wind function parameters
        params.g_params = tuple(params_ary[0:2])
        # f time of day function parameters
        params.f_params = tuple(params_ary[2:6])
        # Diffusion coefficients
        params.Dparams = tuple(params_ary[6:9])
        params.Dlparams = tuple(params_ary[9:12])
        # Probability of any flight during the day under ideal circumstances
        params.lam = params_ary[12]

        # scaling flight advection to wind advection
        params.mu_r = params_ary[13]

        ### PHASE ONE ###
        # First, get spread probability for each day as a coo sparse matrix
        max_shape = np.array([0, 0])
        pm_args = [(days[0], wind_data, *params.get_model_params(),
                    params.r_start)]
        pm_args.extend([(day, wind_data, *params.get_model_params())
                        for day in days[1:params.ndays]])

        ##### Kalbar wind started recording a day late. Spread the population
        #####   locally before running full model.
        if params.dataset == 'kalbar':
            res = params.domain_info[0] / params.domain_info[1]
            mean_drift = np.array([-25., 15.])
            xdrift_int = int(mean_drift[0] // res)
            xdrift_r = mean_drift[0] % res
            ydrift_int = int(mean_drift[1] // res)
            ydrift_r = mean_drift[1] % res
            longsprd = PM.get_mvn_cdf_values(
                res, np.array([xdrift_r, ydrift_r]),
                PM.Dmat(params_ary[6], params_ary[7], params_ary[8]))
            shrtsprd = PM.get_mvn_cdf_values(
                res, np.array([0., 0.]),
                PM.Dmat(params_ary[9], params_ary[10], params_ary[11]))

            mlen = int(
                max(longsprd.shape[0], shrtsprd.shape[0]) +
                max(abs(xdrift_int), abs(ydrift_int)) * 2)
            sprd = np.zeros((mlen, mlen))
            lbds = [
                int(mlen // 2 - longsprd.shape[0] // 2),
                int(mlen // 2 + longsprd.shape[0] // 2 + 1)
            ]
            sprd[lbds[0] - ydrift_int:lbds[1] - ydrift_int, lbds[0] +
                 xdrift_int:lbds[1] + xdrift_int] = longsprd * sprd_factor
            sbds = [
                int(mlen // 2 - shrtsprd.shape[0] // 2),
                int(mlen // 2 + shrtsprd.shape[0] // 2 + 1)
            ]
            sprd[sbds[0]:sbds[1],
                 sbds[0]:sbds[1]] += shrtsprd * (1 - sprd_factor)
            '''
            pmf_list = [sparse.coo_matrix(PM.get_mvn_cdf_values(
                        params.domain_info[0]/params.domain_info[1],
                        np.array([0.,0.]),
                        PM.Dmat(sprd_factor*params_ary[9],
                                sprd_factor*params_ary[10],params_ary[11])))]
            '''
            sprd[int(sprd.shape[0] // 2),
                 int(sprd.shape[0] // 2)] += max(0, 1 - sprd.sum())
            pmf_list = [sparse.coo_matrix(sprd)]
        else:
            pmf_list = []

        ###################### Get pmf_list from multiprocessing
        pmf_list.extend(pool.starmap(PM.prob_mass, pm_args))

        ######################
        for pmf in pmf_list:
            for dim in range(2):
                if pmf.shape[dim] > max_shape[dim]:
                    max_shape[dim] = pmf.shape[dim]

        r_spread = []  # holds the one-day spread for each release day.

        # Reshape the prob. mass function of each release day into solution form
        for ii in range(params.r_dur):
            offset = params.domain_info[1] - pmf_list[ii].shape[0] // 2
            dom_len = params.domain_info[1] * 2 + 1
            r_spread.append(
                sparse.coo_matrix(
                    (pmf_list[ii].data,
                     (pmf_list[ii].row + offset, pmf_list[ii].col + offset)),
                    shape=(dom_len, dom_len)).tocsr())

        ### PHASE TWO ###
        # Pass the probability list, pmf_list, and other info to convolution solver.
        #   This will return the finished population model.
        with Capturing() as output:
            if params.dataset == 'kalbar':
                # extend day count by one
                days_ext = [days[0] - 1]
                days_ext.extend(days)
                modelsol = get_populations(r_spread, pmf_list, days_ext,
                                           params.ndays + 1, dom_len,
                                           max_shape, params.r_dur,
                                           params.r_number, params.r_mthd())
                # remove the first one and start where wind started.
                modelsol = modelsol[1:]
            else:
                modelsol = get_populations(r_spread, pmf_list, days,
                                           params.ndays, dom_len, max_shape,
                                           params.r_dur, params.r_number,
                                           params.r_mthd())

        # modelsol now holds the model results for this run as CSR sparse arrays

        # get emergence potential (measured in expected number of wasps previously
        #   present whose oviposition would result in emergence on the given date)
        #   from the model result
        release_emerg, sentinel_emerg = popdensity_to_emergence(
            modelsol, locinfo)

        # get the expected wasp populations at grid points on sample days
        grid_counts = popdensity_grid(modelsol, locinfo)

        # get the expected wasp populations in cardinal directions
        '''card_counts = popdensity_card(modelsol,locinfo,params.domain_info)'''

        ## For the lists release_emerg and sentinel_emerg:
        ##    Each list entry corresponds to a data collection day (one array)
        ##    In each array:
        ##    Each column corresponds to an emergence observation day (as in data)
        ##    Each row corresponds to a grid point or sentinel field, respectively
        ## For the array grid_counts:
        ##    Each column corresponds to an observation day
        ##    Each row corresponds to a grid point
        ## For the list card_counts:
        ##    Each list entry corresponds to a sampling day (one array)
        ##    Each column corresponds to a step in a cardinal direction
        ##    Each row corresponds to a cardinal direction
        print('{:03.1f} sec./model at {}'.format(
            time.time() - modeltic, time.strftime("%H:%M:%S %d/%m/%Y")),
              end='\r')
        sys.stdout.flush()
        return (release_emerg, sentinel_emerg, grid_counts)  #,card_counts)

    print('Parsing model output and connecting to Bayesian model...')

    ### Parse the results of pop_model into separate deterministic variables ###
    '''Get Poisson probabilities for sentinal field emergence. Parameters:
        xi is constant, emerg is a list of ndarrays, betas is a 1D array of
        field probabilities'''
    Ncollections = len(locinfo.sent_DataFrames)
    sent_poi_rates = []
    for ii in range(Ncollections):
        s_ndays = len(locinfo.sent_DataFrames[ii]['datePR'].unique())
        sent_poi_rates.append(
            pm.Lambda('sent_poi_rate_{}'.format(ii),
                      lambda xi=xi, ndays=s_ndays, betas=sent_obs_probs,
                      emerg_model=pop_model[1][ii]: xi * emerg_model * np.tile(
                          betas, (ndays, 1)).T,
                      trace=False))
    sent_poi_rates = pm.Container(sent_poi_rates)
    '''Return Poisson probabilities for release field grid emergence. Parameters:
        xi is constant, emerg is a list of ndarrays. collection effort is
        specified in locinfo.'''
    Ncollections = len(locinfo.release_DataFrames)
    rel_poi_rates = []
    for ii in range(Ncollections):
        r_effort = locinfo.release_collection[ii]  #fraction of max collection
        r_ndays = len(locinfo.release_DataFrames[ii]['datePR'].unique())
        rel_poi_rates.append(
            pm.Lambda('rel_poi_rate_{}'.format(ii),
                      lambda xi=xi, ndays=r_ndays, r_effort=r_effort, beta=
                      em_obs_prob, emerg_model=pop_model[0][ii]: xi *
                      emerg_model * np.tile(r_effort * beta, (ndays, 1)).T,
                      trace=False))
    rel_poi_rates = pm.Container(rel_poi_rates)

    @pm.deterministic(plot=False, trace=False)
    def grid_poi_rates(locinfo=locinfo,
                       beta=grid_obs_prob,
                       obs_model=pop_model[2]):
        '''Return Poisson probabilities for grid sampling
        obs_model is an ndarray, sampling effort is specified in locinfo.'''
        return beta * locinfo.grid_samples * obs_model

    '''Return Poisson probabilities for cardinal direction sampling
        obs_model is a list of ndarrays, sampling effort is assumed constant'''
    '''
    card_poi_rates = []
    for ii,obs in enumerate(pop_model[3]):
        card_poi_rates.append(pm.Lambda('card_poi_rate_{}'.format(ii),
            lambda beta=card_obs_prob, obs=obs: beta*obs))
    card_poi_rates = pm.Container(card_poi_rates)
    '''

    # Given the expected wasp densities from pop_model, actual wasp densities
    #   are modeled as a thinned Poisson random variable about that mean.
    # Each wasp in the area then has a small probability of being seen.

    ### Connect sentinel emergence data to model ###
    N_sent_collections = len(locinfo.sent_DataFrames)
    # Create list of collection variables
    sent_collections = []
    for ii in range(N_sent_collections):
        # Apparently, pymc does not play well with 2D array parameters
        sent_collections.append(
            np.empty(sent_poi_rates[ii].value.shape, dtype=object))
        for n in range(sent_collections[ii].shape[0]):
            for m in range(sent_collections[ii].shape[1]):
                sent_collections[ii][n, m] = pm.Poisson(
                    "sent_em_obs_{}_{}_{}".format(ii, n, m),
                    sent_poi_rates[ii][n, m],
                    value=float(locinfo.sentinel_emerg[ii][n, m]),
                    observed=True)
    sent_collections = pm.Container(sent_collections)

    ### Connect release-field emergence data to model ###
    N_release_collections = len(locinfo.release_DataFrames)
    # Create list of collection variables
    rel_collections = []
    for ii in range(N_release_collections):
        rel_collections.append(
            np.empty(rel_poi_rates[ii].value.shape, dtype=object))
        for n in range(rel_collections[ii].shape[0]):
            for m in range(rel_collections[ii].shape[1]):
                rel_collections[ii][n, m] = pm.Poisson(
                    "rel_em_obs_{}_{}_{}".format(ii, n, m),
                    rel_poi_rates[ii][n, m],
                    value=float(locinfo.release_emerg[ii][n, m]),
                    observed=True)
    rel_collections = pm.Container(rel_collections)

    ### Connect grid sampling data to model ###
    grid_obs = np.empty(grid_poi_rates.value.shape, dtype=object)
    for n in range(grid_obs.shape[0]):
        for m in range(grid_obs.shape[1]):
            grid_obs[n, m] = pm.Poisson("grid_obs_{}_{}".format(n, m),
                                        grid_poi_rates[n, m],
                                        value=float(locinfo.grid_obs[n, m]),
                                        observed=True)
    grid_obs = pm.Container(grid_obs)

    ### Connect cardinal direction data to model ###
    '''
    N_card_collections = len(locinfo.card_obs_DataFrames)
    # Create list of sampling variables
    card_collections = []
    for ii in range(N_card_collections):
        card_collections.append(np.empty(card_poi_rates[ii].value.shape,
                                         dtype=object))
        for n in range(card_collections[ii].shape[0]):
            for m in range(card_collections[ii].shape[1]):
                card_collections[ii][n,m] = pm.Poisson(
                    "card_obs_{}_{}_{}".format(ii,n,m),
                    card_poi_rates[ii][n,m],
                    value=locinfo.card_obs[ii][n,m],
                    observed=True, plot=False)
    card_collections = pm.Container(card_collections)
    '''

    ### Collect model ###
    if params.dataset == 'kalbar':
        Bayes_model = pm.Model([
            lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x,
            sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, mu_r,
            sprd_factor, grid_obs_prob, xi, em_obs_prob, A_collected,
            sent_obs_probs, params_ary, pop_model, grid_poi_rates,
            rel_poi_rates, sent_poi_rates, grid_obs, rel_collections,
            sent_collections
        ])
    else:
        Bayes_model = pm.Model([
            lam, f_a1, f_a2, f_b1_p, f_b2_p, f_b1, f_b2, g_aw, g_bw, sig_x,
            sig_y, corr_p, corr, sig_x_l, sig_y_l, corr_l_p, corr_l, mu_r,
            grid_obs_prob, xi, em_obs_prob, A_collected, sent_obs_probs,
            params_ary, pop_model, grid_poi_rates, rel_poi_rates,
            sent_poi_rates, grid_obs, rel_collections, sent_collections
        ])

    ######################################################################
    #####              Run Methods and Interactive Menu              #####
    ######################################################################

    def MAP_run(outname=None):
        '''Find Maximum a posteriori distribution'''
        tic = time.time()
        M = pm.MAP(Bayes_model, prior_eps)
        print('Fitting....')
        M.fit()
        # Return statistics
        print('Estimate complete. Time elapsed: {}'.format(time.time() - tic))
        print('Free stochastic variables: {}'.format(M.len))
        print('Joint log-probability of model: {}'.format(M.logp))
        print('Max joint log-probability of model: {}'.format(M.logp_at_max))
        print('Maximum log-likelihood: {}'.format(M.lnL))
        print("Akaike's Information Criterion {}".format(M.AIC), flush=True)
        print('---------------Variable estimates---------------')
        for var in Bayes_model.stochastics:
            print('{} = {}'.format(var, var.value))
        # Save result to file
        if outname is None:
            outname = 'Max_aPosteriori_Estimate.txt'
        with open(outname, 'w') as fobj:
            fobj.write('Time elapsed: {}\n'.format(time.time() - tic))
            fobj.write('Free stochastic variables: {}\n'.format(M.len))
            fobj.write('Joint log-probability of model: {}\n'.format(M.logp))
            fobj.write('Max joint log-probability of model: {}\n'.format(
                M.logp_at_max))
            fobj.write('Maximum log-likelihood: {}\n'.format(M.lnL))
            fobj.write("Akaike's Information Criterion {}\n".format(M.AIC))
            fobj.write('---------------Variable estimates---------------\n')
            for var in Bayes_model.stochastics:
                fobj.write('{} = {}\n'.format(var, var.value))
        print('Result saved to {}.'.format(outname))
        return M

    def norm_run(fname, outname=None):
        '''Find normal approximation'''
        try:
            tic = time.time()
            M = pm.NormApprox(Bayes_model,
                              eps=prior_eps,
                              db='hdf5',
                              dbname=fname,
                              dbmode='a',
                              dbcomplevel=0)
            print('Fitting....')
            M.fit()
            # Return statistics
            print('Estimate complete. Time elapsed: {}'.format(time.time() -
                                                               tic))
            print('Free stochastic variables: {}'.format(M.len))
            print('Joint log-probability of model: {}'.format(M.logp))
            print('Max joint log-probability of model: {}'.format(
                M.logp_at_max))
            print("Akaike's Information Criterion {}".format(M.AIC),
                  flush=True)
            print('---------------Variable estimates---------------')
            print('Estimated means: ')
            for var in bio_model.stochastics:
                print('{} = {}'.format(var, M.mu[var]))
            print('Estimated variances: ')
            for var in bio_model.stochastics:
                print('{} = {}'.format(var, M.C[var]))
            # Save result to file
            if outname is None:
                outname = "Normal_approx.txt"
            with open(outname, 'w') as fobj:
                fobj.write('Time elapsed: {}\n'.format(time.time() - tic))
                fobj.write('Free stochastic variables: {}\n'.format(M.len))
                fobj.write('Joint log-probability of model: {}\n'.format(
                    M.logp))
                fobj.write('Max joint log-probability of model: {}\n'.format(
                    M.logp_at_max))
                fobj.write("Akaike's Information Criterion {}\n".format(M.AIC))
                fobj.write(
                    '---------------Variable estimates---------------\n')
                fobj.write('Estimated means: \n')
                for var in bio_model.stochastics:
                    fobj.write('{} = {}\n'.format(var, M.mu[var]))
                fobj.write('Estimated variances: \n')
                for var in bio_model.stochastics:
                    fobj.write('{} = {}\n'.format(var, M.C[var]))
            print('These results have been saved to {}.'.format(outname))
        except Exception as e:
            print(e)
            print('Exception: database closing...')
            M.db.close()
            print('Database closed.')
            raise
        return M

    # Parse run type
    if RUNFLAG == 'MAP_RUN':
        M = MAP_run(outname)
    elif RUNFLAG is not None:
        M = norm_run(RUNFLAG, outname)
        M.db.close()
    else:
        print(
            '----- Maximum a posteriori estimates & Normal approximations -----'
        )
        while True:
            print(" 'map': Calculate maximum a posteriori estimate")
            print("'norm': Calculate normal approximation")
            print("'quit': Quit.")
            cmd = input('Enter: ')
            cmd = cmd.strip()
            cmd = cmd.lower()
            if cmd == 'map':
                M = MAP_run(outname)
                # Option to enter IPython
                cmd_py = input('Enter IPython y/[n]:')
                cmd_py = cmd_py.strip()
                cmd_py = cmd_py.lower()
                if cmd_py == 'y' or cmd_py == 'yes':
                    import IPython
                    IPython.embed()
            elif cmd == 'norm':
                fname = input("Enter database name or 'back' to cancel:")
                fname = fname.strip()
                if fname == 'q' or fname == 'quit':
                    return
                elif fname == 'b' or fname == 'back':
                    continue
                elif fname[-3:] != '.h5':
                    fname = fname + '.h5'
                M = norm_run(fname, outname)
                try:
                    print(
                        'For covariances, enter IPython and request a covariance'
                        +
                        ' matrix by passing variables in the following syntax:\n'
                        + 'M.C[var1,var2,...,varn]\n' +
                        'Example: M.C[f_a1,f_a2] gives the covariance matrix of\n'
                        + ' f_a1 and f_a2.')
                    # Option to enter IPython
                    cmd_py = input('Enter IPython y/[n]:')
                    cmd_py = cmd_py.strip()
                    cmd_py = cmd_py.lower()
                    if cmd_py == 'y' or cmd_py == 'yes':
                        import IPython
                        IPython.embed()
                    M.db.close()
                    print('Database closed.')
                except Exception as e:
                    print(e)
                    print('Exception: database closing...')
                    M.db.close()
                    print('Database closed.')
                    raise
            elif cmd == 'quit' or cmd == 'q':
                return
            else:
                print('Command not recognized.')