예제 #1
0
def three_model_comparison(p_df):

    a_n = len(p_df)
    t_lam = pm.Uniform('d_lam', 0, 1)
    #d_lam = 1.0 / np.mean(p_df)
    t_lambda_1 = pm.Exponential("t_lambda_1", t_lam)
    #t_lambda_1 = pm.Uniform("t_lambda_1", min(p_df), max(p_df))
    t_lambda_2 = pm.Exponential("t_lambda_2", t_lam)
    #t_lambda_2 = pm.Uniform("t_lambda_2",min(p_df), max(p_df))
    t_lambda_3 = pm.Exponential("t_lambda_3", t_lam)
    #t_lambda_2 = pm.Uniform("t_lambda_2",min(p_df), max(p_df))

    #tau = pm.DiscreteUniform("tau", lower=min(p_df), upper=max(p_df) )
    t_tau_1 = pm.DiscreteUniform("tau1", lower=0, upper=max(p_df) - 1)
    t_tau_2 = pm.DiscreteUniform("tau", lower=t_tau_1, upper=max(p_df))

    @pm.deterministic
    def lambda_(tau_1=t_tau_1,
                tau_2=t_tau_2,
                lambda_1=t_lambda_1,
                lambda_2=t_lambda_2,
                lambda_3=t_lambda_3):
        out = np.zeros(a_n)
        out[:tau_1] = lambda_1  # lambda before tau_1 is lambda1
        out[tau_1:tau_2] = lambda_2  # lambda_2 between tau_1 and tau_2
        out[tau_2:] = lambda_3  # lambda after (and including) tau is lambda_3
        return out

    t_obs = pm.Poisson('t_observed', mu=lambda_, value=p_df, observed=True)

    t_model = pm.Model(
        [t_obs, t_lam, t_lambda_1, t_lambda_2, t_lambda_3, t_tau_1, t_tau_2])
    #d_model = pm.Model([d_obs,  t_lambda_1, t_lambda_2, tau])

    return t_model, t_lam, t_lambda_1, t_lambda_2, t_lambda_3, t_tau_1, t_tau_2
예제 #2
0
def all_matches(matches, match_evaluator):
    match_vars = []
    
    for i in range(0,len(matches)):
        match=matches[i]
        match_name = 'match_%i' % i

        if match.order == "unordered":
            order = pm.DiscreteUniform('match_%i_order' % i,
                                       lower=0,
                                       upper=len(match.players)*2 - 1)
        else:
            observed = match.order == "total"
            order = pm.DiscreteUniform('match_%i_order' % i,
                                       value=0,
                                       lower=0,
                                       observed=observed,
                                       upper=len(match.players) - 1)

        eval_func = match_evaluator.eval_with_order
        parents = {'players': match.players,
                   'winning_team': match.winning_team,
                   'order': order,
                   'foul_end': match.foul_end}
        match_var = pm.Deterministic(eval = eval_func,
                                     doc = match_name,
                                     name = match_name,
                                     parents = parents,
                                     plot=False,
                                     dtype=float);
        
        match_vars.append(match_var)

    return match_vars
예제 #3
0
def two_model_comparison(p_df):

    a_n = len(p_df)
    d_lam = pm.Uniform('d_lam', 0, 1)
    #d_lam = 1.0 / np.mean(p_df)
    lambda_1 = pm.Exponential("lambda_1", d_lam)
    #lambda_1 = pm.Uniform("lambda_1", min(p_df), max(p_df))
    lambda_2 = pm.Exponential("lambda_2", d_lam)
    #lambda_2 = pm.Uniform("lambda_2",min(p_df), max(p_df))

    #tau = pm.DiscreteUniform("tau", lower=min(p_df), upper=max(p_df) )
    tau = pm.DiscreteUniform("tau", lower=0, upper=max(p_df))

    @pm.deterministic
    def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
        out = np.zeros(a_n)
        out[:tau] = lambda_1  # lambda before tau is lambda1
        out[tau:] = lambda_2  # lambda after (and including) tau is lambda2
        return out

    d_obs = pm.Poisson('d_observed', mu=lambda_, value=p_df, observed=True)

    d_model = pm.Model([d_obs, d_lam, lambda_1, lambda_2, tau])
    #d_model = pm.Model([d_obs,  lambda_1, lambda_2, tau])

    return d_model, d_obs, d_lam, lambda_1, lambda_2, tau
예제 #4
0
def compute_n_sat_prior(informative=False,
                        poisson_mu=None,
                        uniform_lower=None,
                        uniform_upper=None):
    """
    Compute n_sat prior.

    Note:
    There are two options for modelling n_sat:
    - uninformative: discrete uniform distribution
    - informative: Poisson distribution

    Parameters
    ----------
    informative : bool, optional (default: False)
        If True, n_sat is modelled by a
        Poisson distribution. Else, n_sat
        is modelled by a discrete uniform
        distribution.
    poisson_mu : int, optional (default: None)
        Parameter mu (i.e. mean) of
        the Poisson distribution used to
        model n_sat. Must be specified if
        `informative` is True.
    uniform_lower : int, optional (default: None)
        Lower bound of the discrete uniform
        distribution used to model n_sat.
        Must be specified if `informative`
        is False.
    uniform_upper : int, optional (default: None)
        Upper bound of the discrete uniform
        distribution used to model n_sat.
        Must be specified if `informative`
        is False.

    Returns
    -------
    pymc distribution
        Prior distribution for n_sat.
    """
    if informative:
        if poisson_mu is None:
            error_msg = ("If you want to use a Poisson prior for n_sat, "
                         "please specify the parameter `poisson_mu`.")
            sys.exit(error_msg)

        return pymc.Poisson("n_sat", mu=poisson_mu)

    if (uniform_lower is None or uniform_upper is None):
        error_msg = ("If you want to use an uniform prior for n_sat, "
                     "please specify the parameters `uniform_lower` "
                     "and `uniform_upper`.")
        sys.exit(error_msg)

    return pymc.DiscreteUniform("n_sat",
                                lower=uniform_lower,
                                upper=uniform_upper)
예제 #5
0
 def priors_sample(self, n):
     """Generates n samples of all priors"""
     self.prior_samples = {}
     for key in self.priors:
         if str(self.priors[str(key)]["type"])=="Normal":
             self.prior_samples[str(key)] =  [pymc.Normal(str(key), self.priors[str(key)]["mean"],1./np.square(self.priors[str(key)]["stdev"])).random()  for i in range(n)]
         elif str(self.priors[str(key)]["type"])=="DiscreteUniform":
             self.prior_samples[str(key)] =  [pymc.DiscreteUniform(str(key), self.priors[str(key)]["lower"],self.priors[str(key)]["upper"]).random()  for i in range(n)]
         else:
             print("Distribution type not supported.")
             break
예제 #6
0
    def make_model(data):
        switchpoint = pm.DiscreteUniform('switchpoint', 0, len(data))
        early_rate = pm.Beta('early_rate', 0.5, 0.5)
        late_rate = pm.Beta('late_rate', 0.5, 0.5)

        @pm.deterministic(plot=False)
        def rate(s=switchpoint, early=early_rate, late=late_rate):
            out = np.empty(len(data))
            out[:s] = early
            out[s:] = late
            return out

        phredscore = pm.Bernoulli('phredscore', p=rate, value=data,
                                  observed=True)
        return locals()
예제 #7
0
    def makeSampledPrior(self, manager, parts):


        parts.shape_sample_index = [pymc.DiscreteUniform('shape_index_%d' % i, 0, len(x)-1) \
                                        for i, x in enumerate(self.shapedistro_params)]

        parts.shape_params = np.empty(self.nshapebins, dtype=object)
        for i, index, samples in zip(range(self.nshapebins),
                                     parts.shape_sample_index,
                                     self.shapedistro_params):

            @pymc.deterministic(name='shape_params_%d' % i)
            def shape_param_func(index=index, samples=samples):
                return np.ascontiguousarray(samples[:, index])

            parts.shape_params[i] = shape_param_func
    def setup_inference_mixture(self):
        #depending on the number of wavelengths
        #self.wavelengths = [self.wavelengths[len(self.wavelengths)-1]]
        wavelength_number = len(self.wavelengths)
        
        t = 1. / 2.5**2
        C_sigs = pymc.Container([pymc.HalfCauchy("c_sigs_%i_%i" % (i, x), beta = 10, alpha=1)                                  for i in range(1+2*self.N) for x in range(wavelength_number)])
        C = pymc.Container([pymc.Normal("c_%i_%i" % (i, x), mu=0, tau = 1. / C_sigs[i*wavelength_number+x]**2)                             for i in range(1+2*self.N) for x in range(wavelength_number)])
        i_ = pymc.Container([pymc.DiscreteUniform('i_%i' %i,lower=0,upper=1) for i in range(len(self.xdata))])
        @pymc.stochastic(observed=False)
        def sigma(value=1):
            return -np.log(abs(value))
        @pymc.stochastic(observed=False)
        def sigma3(value=1):
            return -np.log(abs(value))
        
        qw_sigs =  pymc.Container([pymc.HalfCauchy("qw_sigs_%i" % x, beta = 10, alpha=1)                                  for x in range(wavelength_number)])
        if self.wavelength_sd_defined:
            qw = pymc.Container([pymc.distributions.Lognormal('qw_%i' %x,mu=self.wavelengths[x],                                                        tau = 1. / self.wavelength_sd[x] ** 2)                                  for x in range(wavelength_number)])
        else:
            qw = pymc.Container([pymc.distributions.Uniform('qw_%i' %x,lower=0., upper=self.wavelengths[x]*2)                                                        for x in range(wavelength_number)])


        def fourier_series(C,N,QW,x,wavelength_number,i_):
            v = np.array(x)
            v.fill(0.0)
            v = v.astype('float')
            for ii in range(len(x)):
                for w in range(wavelength_number):
                    v += C[w]
                    for i in range(1,N+1):
                        v[ii] = v[ii] + C[(2*i-1)*wavelength_number+w]*np.cos(2*np.pi/QW[w] * i * (x[ii])) +                         C[(2*i)*wavelength_number+w]*np.sin(2*np.pi/QW[w] * i * (x[ii]))
                #if i_[ii] == 0:
                #    v[ii] = -v[ii]
            return v#np.sum(v)
        self.vector_fourier_series = np.vectorize(fourier_series)
        # Define the form of the model and likelihood
        @pymc.deterministic
        def y_model(C=C,x=self.xdata,qw=qw,nn=self.N,wavelength_number=wavelength_number,i_=i_):
            return fourier_series(C,nn,qw,x,wavelength_number,i_) 
        y = pymc.Normal('y', mu=y_model, tau=1. / sigma ** 2, observed=True, value=self.ydata)
        # package the full model in a dictionary
        self.model1 = dict(C=C, qw=qw, sigma=sigma,qw_sigs=qw_sigs,
                      y_model=y_model, y=y,x_values=self.xdata,y_values=self.ydata,i_=i_)
        self.setup = True
        self.mcmc_uptodate = False
        return self.model1
예제 #9
0
def main():
    lambda_1 = pm.Exponential("lambda_1", 1)  # prior on first behaviour
    lambda_2 = pm.Exponential("lambda_2", 1)  # prior on second behaviour
    tau = pm.DiscreteUniform("tau", lower=0,
                             upper=10)  # prior on behaviour change

    print "lambda_1.value = %.3f" % lambda_1.value
    print "lambda_2.value = %.3f" % lambda_2.value
    print "tau.value = %.3f" % tau.value
    print

    lambda_1.random(), lambda_2.random(), tau.random()

    print "After calling random() on the variables..."
    print "lambda_1.value = %.3f" % lambda_1.value
    print "lambda_2.value = %.3f" % lambda_2.value
    print "tau.value = %.3f" % tau.value

    samples = [lambda_1.random() for i in range(20000)]
    plt.hist(samples, bins=70, normed=True, histtype="stepfilled")
    plt.title("Prior distribution for $\lambda_1$")
    plt.xlim(0, 8)
    plt.show()

    data = np.array([10, 5])
    fixed_variable = pm.Poisson("fxd", 1, value=data, observed=True)
    print "value: ", fixed_variable.value
    print "calling .random()"
    fixed_variable.random()
    print "value: ", fixed_variable.value

    n_data_points = 5  # in CH1 we had ~70 data points

    @pm.deterministic
    def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
        out = np.zeros(n_data_points)
        out[:tau] = lambda_1  # lambda before tau is lambda1
        out[tau:] = lambda_2  # lambda after tau is lambda2
        return out

    data = np.array([10, 25, 15, 20, 35])
    obs = pm.Poisson("obs", lambda_, value=data, observed=True)

    model = pm.Model([obs, lambda_, lambda_1, lambda_2, tau])
예제 #10
0
def ML_NFW_Model(r_mpc, ghats, betas, pdz, concentration, zcluster,
                 likelihood_func, shapedistro_samples):

    #r_mpc, ghats, pdz, and shapedistro_samples may be either arrays or lists of arrays
    # if lists of arrays, then each entry is associated across the lists and will be passed
    # to likelihood_func seperately, with the logProbs summed together

    #######
    # Data Prep
    #######

    if not isinstance(r_mpc, list):
        r_mpc = [r_mpc]
        ghats = [ghats]
        pdz = [pdz]
        shapedistro_samples = [shapedistro_samples]

    r_mpc = [np.ascontiguousarray(x) for x in r_mpc]
    pdz = [np.ascontiguousarray(x) for x in pdz]

    shapedistro_samples = [
        np.ascontiguousarray(x) for x in shapedistro_samples
    ]

    D_lens = sp.angulardist(zcluster)

    nzbins = len(betas)
    ghats = [np.ascontiguousarray(x) for x in ghats]

    betas = np.ascontiguousarray(betas)

    nshapebins = len(shapedistro_samples)

    #######
    # Model
    #######

    ## shape parameter priors

    shape_sample_index = [
        pymc.DiscreteUniform('shape_index_%d' % i, 0, len(x))
        for i, x in enumerate(shapedistro_samples)
    ]

    shape_params = np.empty(nshapebins, dtype=object)
    for i, index, samples in zip(range(len(shapedistro_samples)),
                                 shape_sample_index, shapedistro_samples):

        @pymc.deterministic(name='shape_params_%d' % i)
        def shape_param_func(index=index, samples=samples):
            return samples[:, index]

        shape_params[i] = shape_param_func

    ## r_scale is log-uniform
    log_r_scale = pymc.Uniform('log_r_scale', np.log(.01), np.log(1.))

    @pymc.stochastic(observed=True)
    def data(value=ghats, log_r_scale=log_r_scale, shape_params=shape_params):

        logprobs = np.array([likelihood_func(log_r_scale, cur_r_mpc,
                               cur_ghats, betas,
                               cur_pdz, cur_shapedistro_samples,
                               concentration, zcluster, D_lens) \
                                       for (cur_r_mpc, cur_ghats, cur_pdz, cur_shapedistro_samples) \
                                       in zip(r_mpc, ghats, pdz, shape_params)])

        return np.sum(logprobs)

    ########

    return locals()
예제 #11
0
    def generate_pymc_(self, params, q0=None):
        '''
        Creates PyMC objects for each param in  dictionary

        NOTE: the second argument for normal distributions is VARIANCE

        Prior option:
            An arbitrary prior distribution derived from a set of samples (e.g.,
            a previous mcmc run) can be passed with the following syntax:

                 = {<name> : ['KDE', <pymc_database>, <param_names>]}

            where <name> is the name of the distribution (e.g., 'prior' or
            'joint_dist'), <pymc_database> is the pymc database containing the
            samples from which the prior distribution will be estimated, and
            <param_names> are the children parameter names corresponding to the
            dimension of the desired sample array. This method will use all
            samples of the Markov chain contained in <pymc_database> for all
            traces named in <param_names>. Gaussian kernel-density estimation
            is used to derive the joint parameter distribution, which is then
            treated as a prior in subsequent mcmc analyses using the current
            class instance. The parameters named in <param_names> will be
            traced as will the multivariate distribution named <name>.
        '''
        pymc_mod = []
        pymc_mod_order = []
        parents = dict()

        # Iterate through , assign prior distributions
        for key, args in self.params.iteritems():
            # Distribution name should be first entry in [key]
            dist = args[0].lower()

            if dist == 'normal':
                if q0 == None:
                    RV = [pymc.Normal(key, mu=args[1], tau=1. / args[2])]
                else:
                    RV = [
                        pymc.Normal(key,
                                    mu=args[1],
                                    tau=1. / args[2],
                                    value=q0[key])
                    ]
            elif dist == 'uniform':
                if q0 == None:
                    RV = [pymc.Uniform(key, lower=args[1], upper=args[2])]
                else:
                    RV = [
                        pymc.Uniform(key,
                                     lower=args[1],
                                     upper=args[2],
                                     value=q0[key])
                    ]
            elif dist == 'discreteuniform':
                if q0 == None:
                    RV = [
                        pymc.DiscreteUniform(key, lower=args[1], upper=args[2])
                    ]
                else:
                    RV = [
                        pymc.DiscreteUniform(key,
                                             lower=args[1],
                                             upper=args[2],
                                             value=q0[key])
                    ]
            elif dist == 'truncatednormal':
                if q0 == None:
                    RV = [
                        pymc.TruncatedNormal(key,
                                             mu=args[1],
                                             tau=1. / args[2],
                                             a=args[3],
                                             b=args[4])
                    ]
                else:
                    RV = [
                        pymc.TruncatedNormal(key,
                                             mu=args[1],
                                             tau=1. / args[2],
                                             a=args[3],
                                             b=args[4],
                                             value=q0[key])
                    ]
            elif dist == 'kde':
                kde = multivariate_kde_from_samples(args[1], args[2])
                kde_rv, rvs = self._create_kde_stochastic(kde, key, args[2])
                if q0 != None:
                    kde_rv.value = q0
                RV = [kde_rv]
                for rv_key, rv_value in rvs.iteritems():
                    parents[rv_key] = rv_value
                    RV.append(rv_value)
            else:
                raise KeyError('The distribution "' + dist +
                               '" is not supported.')

            parents[key] = RV[0]
            pymc_mod_order.append(key)
            pymc_mod += RV

        return parents, pymc_mod, pymc_mod_order
예제 #12
0
#     x_n_s = x_n #np.sort(x_n)
#     y_n_s = y_n #np.sort(y_n)
#     x = (x_n_s[d]-x_n_s[o])*f + x_n_s[o]
#     y = (y_n_s[d]-y_n_s[o])*f + y_n_s[o]
#     out = np.column_stack([x, y])
#     return out

Prows = np.empty(Nnodes, dtype=object)
for i in range(Nnodes):
    t = np.ones(Nnodes) * 10
    t[i] = 0.5
    Prows[i] = pymc.Dirichlet('Dir_%i' % i, theta=t)

#Cardinality / Sparsity of the transition matrices
Vk = pymc.DiscreteUniform('Sparsity',
                          lower=1,
                          upper=min(4, Nnodes),
                          size=Nnodes)

#Vk = pymc.Geometric('Sparsity', p = 0.7, size=Nnodes)

Nsamples_multi = Nsamples / Nnodes
# s_tp1 = np.array([pymc.Multinomial('multi_%i'%i, p=P_s_tp1[i], n=Nsamples_multi, plot=False) for i in range(Nnodes)])
#frac = np.linspace(0, 1, Nsamples_multi)
frac = np.random.rand(Nsamples_multi)


@pymc.deterministic
def adjMatrix(Prows=Prows, Vk=Vk):
    P = np.empty((Nnodes, Nnodes))

    for (row, s_o) in enumerate(np.arange(Nnodes)):
예제 #13
0
    # for python 2.7
    # xLabel = "Рік".decode('utf8')
    # yLabel = "Кількість штормів".decode('utf8')

    # for python 3.5
    xLabel = "Рік"
    yLabel = "Кількість штормів"

    plt.xlabel(xLabel)
    plt.ylabel(yLabel)
    general.set_grid_to_plot()
    plt.savefig(general.folderPath1 + "exp1_storms.png")
    plt.clf()

    switchpoint = pymc.DiscreteUniform('switchpoint', lower=0, upper=len(arr))
    early_mean = pymc.Exponential('early_mean', beta=1)
    late_mean = pymc.Exponential('late_mean', beta=1)

    @pymc.deterministic(plot=False)
    def rate(s=switchpoint, e=early_mean, l=late_mean):
        out = np.empty(len(arr))
        out[:s] = e
        out[s:] = l
        return out

    storms = pymc.Poisson('storms', mu=rate, value=arr, observed=True)

    model = pymc.Model([switchpoint, early_mean, late_mean, rate, storms])

    mcmc = pymc.MCMC(model)
import pymc as pm

car_door = pm.DiscreteUniform("car_door", lower=1, upper=3)
picked_door = pm.DiscreteUniform("picked_door", lower=1, upper=3)
preference = pm.DiscreteUniform("preference", lower=0, upper=1)


@pm.deterministic
def host_choice(car_door=car_door,
                picked_door=picked_door,
                preference=preference):
    if car_door != picked_door: return 6 - car_door - picked_door
    if car_door == 1:
        left = 2
        right = 3
    else:

        left = 1
        if car_door == 2:
            right = 3
        else:
            right = 2
    out = right if preference else left
    return out


@pm.deterministic
def changed_door(picked_door=picked_door, host_choice=host_choice):
    return 6 - host_choice - picked_door

def compare_groups(list1, list2):

    data = list1 + list2
    count_data = np.array(data)
    n_count_data = len(count_data)
    plt.bar(np.arange(n_count_data), count_data, color="#348ABD")
    plt.xlabel("Time (days)")
    plt.ylabel("count of text-msgs received")
    plt.title(
        "Did the viewers' ad viewing increase with the number of ads shown?")
    plt.xlim(0, n_count_data)
    #plt.show()

    alpha = 1.0 / count_data.mean()  # Recall count_data is the
    # variable that holds our txt counts
    print alpha
    lambda_1 = pm.Exponential("lambda_1", alpha)
    lambda_2 = pm.Exponential("lambda_2", alpha)

    tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data)

    @pm.deterministic
    def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
        out = np.zeros(n_count_data)
        out[:tau] = lambda_1  # lambda before tau is lambda1
        out[tau:] = lambda_2  # lambda after (and including) tau is lambda2
        return out

    observation = pm.Poisson("obs", lambda_, value=count_data, observed=True)

    model = pm.Model([observation, lambda_1, lambda_2, tau])

    mcmc = pm.MCMC(model)
    mcmc.sample(40000, 10000, 1)

    lambda_1_samples = mcmc.trace('lambda_1')[:]
    lambda_2_samples = mcmc.trace('lambda_2')[:]
    tau_samples = mcmc.trace('tau')[:]

    print tau_samples
    # histogram of the samples:

    ax = plt.subplot(311)
    ax.set_autoscaley_on(False)

    plt.hist(lambda_1_samples,
             histtype='stepfilled',
             bins=30,
             alpha=0.85,
             label="posterior of $\lambda_1$",
             color="#A60628",
             normed=True)
    plt.legend(loc="upper left")
    plt.title(r"""Posterior distributions of the variables
        $\lambda_1,\;\lambda_2,\;\tau$""")
    plt.xlim([0, 6])
    plt.ylim([0, 7])
    plt.xlabel("$\lambda_1$ value")

    ax = plt.subplot(312)
    ax.set_autoscaley_on(False)
    plt.hist(lambda_2_samples,
             histtype='stepfilled',
             bins=30,
             alpha=0.85,
             label="posterior of $\lambda_2$",
             color="#7A68A6",
             normed=True)
    plt.legend(loc="upper left")
    plt.xlim([0, 6])
    plt.ylim([0, 7])
    plt.xlabel("$\lambda_2$ value")

    plt.subplot(313)
    w = 1.0 / tau_samples.shape[0] * np.ones_like(tau_samples)
    plt.hist(tau_samples,
             bins=n_count_data,
             alpha=1,
             label=r"posterior of $\tau$",
             color="#467821",
             weights=w,
             rwidth=2.)
    plt.xticks(np.arange(n_count_data))

    plt.legend(loc="upper left")
    plt.ylim([0, .75])
    plt.xlim([0, len(count_data)])
    plt.xlabel(r"$\tau$ (iterations)")
    plt.ylabel("probability")

    plt.show()
data_generator.parents
data_generator.children

# 'value' attribute
parameter.value
data_generator.value
data_plus_one.value

# 'stochastic' vars - still random even if parents are known
# 'deterministic' vars - not random if parents are known
# Initializing variables
#   * name argument - retrieves posterior dist 
#   * class specific arguments
#   * size - multivariate indp array of stochastic vars

some_var = pm.DiscreteUniform( "discrete_uni_var", 0, 4 )

betas = pm.Uniform( "betas", 0, 1, size=10 )
betas.value

# var.random() - generates new value
# var.value - returns new value
lambda_1 = pm.Exponential( "lambda_1", 1 )
lambda_2 = pm.Exponential( "lambda_2", 2 )
tau      = pm.DiscreteUniform( "tau", lower = 0, upper = 10 )

lambda_1.value
lambda_2.value
tau.value

lambda_1.random()
예제 #17
0
def make_normal_baseline_hmm(y_data, X_data, baseline_end, initial_params):
    """ Construct a PyMC2 scalar normal-emmisions HMM with a
    stochastic reporting period start time parameter and baseline, reporting
    parameters for all other stochastics/estimated terms in the model.
    The reporting period start time parameter is given a discrete uniform
    distribution starting from the first observation after the baseline to the
    end of the series.

    Parameters
    ==========
    y_data: pandas.DataFrame
        Usage/response observations.
    X_data: list of pandas.DataFrame
        List of design matrices for each state.  Each must
        span the entire length of observations (i.e. `y_data`).
    baseline_end: pandas.tslib.Timestamp
        End of baseline period (inclusive), beginning of reporting period.
    initial_params: NormalHMMInitialParams
        An object containing the following fields/members:
    Returns
    =======
    A pymc.Model object used for sampling.
    """

    N_states = len(X_data)
    N_obs = X_data[0].shape[0]

    alpha_trans = initial_params.alpha_trans

    # TODO: If we wanted a distribution over the time
    # when a renovation becomes effective...
    baseline_idx = X_data[0].index.get_loc(baseline_end)
    reporting_start = pymc.DiscreteUniform("reporting_start",
                                           baseline_idx + 1,
                                           N_obs,
                                           value=baseline_idx + 1)

    trans_mat_baseline = TransProbMatrix("trans_mat_baseline",
                                         alpha_trans,
                                         value=initial_params.trans_mat)
    trans_mat_reporting = TransProbMatrix("trans_mat_reporting",
                                          alpha_trans,
                                          value=initial_params.trans_mat)

    @pymc.deterministic(trace=True, plot=False)
    def N_baseline(rs_=reporting_start):
        return rs_ - 1

    states_baseline_0 = initial_params.states[slice(0, baseline_idx)]
    states_baseline = HMMStateSeq("states_baseline",
                                  trans_mat_baseline,
                                  N_baseline,
                                  p0=initial_params.p0,
                                  value=states_baseline_0)

    @pymc.deterministic(trace=True, plot=False)
    def N_reporting(rs_=reporting_start):
        return N_obs - rs_

    states_reporting_0 = initial_params.states[slice(baseline_idx, N_obs)]
    # TODO, FIXME: p0 should depend on states_baseline and trans_mat_baseline,
    # no?
    states_reporting = HMMStateSeq("states_reporting",
                                   trans_mat_reporting,
                                   N_reporting,
                                   p0=initial_params.p0,
                                   value=states_reporting_0)

    @pymc.deterministic(trace=True, plot=False)
    def states(sb_=states_baseline, sr_=states_reporting):
        return np.concatenate([sb_, sr_])

    Ws = initial_params.Ws
    betas = [[], []]
    for s in range(N_states):
        size_s = len(initial_params.betas[s])
        baseline_beta_s = pymc.Cauchy('base-beta-{}'.format(s),
                                      initial_params.betas[s],
                                      Ws[s],
                                      value=initial_params.betas[s],
                                      size=size_s if size_s > 1 else None)
        betas[0] += [baseline_beta_s]

        reporting_beta_s = pymc.Cauchy('rep-beta-{}'.format(s),
                                       initial_params.betas[s],
                                       Ws[s],
                                       value=initial_params.betas[s],
                                       size=size_s if size_s > 1 else None)
        betas[1] += [reporting_beta_s]

    del s, baseline_beta_s, reporting_beta_s, size_s

    Vs = initial_params.Vs

    mu = HMMLinearCombination('mu', X_data, betas, states)

    @pymc.deterministic(trace=False, plot=False)
    def V(states_=states, V_=Vs):
        return V_[states_]

    if y_data is not None:
        y_data = np.ma.masked_invalid(y_data).astype(np.object)
        y_data.set_fill_value(None)

    y_rv = pymc.Normal('y',
                       mu,
                       1. / V,
                       value=y_data,
                       observed=True if y_data is not None else False)

    del initial_params

    return pymc.Model(locals())
예제 #18
0
import pymc as pm
import numpy
from pymc.examples.DisasterModel import *
s = pm.DiscreteUniform('s', 1851, 1962, value=1900)


@pm.stochastic(dtype=int)
def s(value=1900, t_l=1851, t_h=1962):
    """The switchpoint for the rate of disaster occurrence."""
    if value > t_h or value < t_l:
        # Invalid values
        return -numpy.inf
    else:
        # Uniform log-likelihood
        return -numpy.log(t_h - t_l + 1)


@pm.stochastic(dtype=int)
def s(value=1900, t_l=1851, t_h=1962):
    """The switchpoint for the rate of disaster occurrence."""
    def logp(value, t_l, t_h):
        if value > t_h or value < t_l:
            return -numpy.inf
        else:
            return -numpy.log(t_h - t_l + 1)

    def random(t_l, t_h):
        return numpy.round((t_l - t_h) * random()) + t_l


def s_logp(value, t_l, t_h):
예제 #19
0
import pymc as mc

count_data = np.loadtxt("../../Chapter1_Introduction/data/txtdata.csv")
n_count_data = len(count_data)

alpha = 1.0 / count_data.mean()  #recall count_data is
#the variable that holds our txt counts

lambda_1 = mc.Exponential("lambda_1", alpha)
lambda_2 = mc.Exponential("lambda_2", alpha)

tau = mc.DiscreteUniform("tau", lower=0, upper=n)


@mc.deterministic
def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
    out = np.zeros(n_count_data)
    out[:tau] = lambda_1  #lambda before tau is lambda1
    out[tau:] = lambda_2  #lambda after tau is lambda1
    return out


observation = mc.Poisson("obs", lambda_, value=count_data, observed=True)
model = mc.Model([observation, lambda_1, lambda_2, tau])

mcmc = mc.MCMC(model)
mcmc.sample(100000, 50000, 1)
import numpy as np
import pymc as pm
from matplotlib import pyplot as plt

true_N = 500
D = pm.rdiscrete_uniform(1, true_N, size=10)

N = pm.DiscreteUniform("N", lower=D.max(), upper=10000)

observation = pm.DiscreteUniform("obs",
                                 lower=0,
                                 upper=N,
                                 value=D,
                                 observed=True)

model = pm.Model([observation, N])

mcmc = pm.MCMC(model)
mcmc.sample(40000, 10000, 1)

N_samples = mcmc.trace('N')[:]

# histogram of the samples:

plt.hist(N_samples, normed=True)
plt.show()
예제 #21
0
@author: Usamahk
"""

# Testing the PyMC function. Learning about stochastic and deterministic
# variables. From Cam Pilon's Book

import pymc as pm
import numpy as np

# Determining a stochastic value - random with no influences from 
# parent variables

lambda_1 = pm.Exponential("lambda_1", 1)  # prior on first behaviour
lambda_2 = pm.Exponential("lambda_2", 1)  # prior on second behaviour
tau = pm.DiscreteUniform("tau", lower=0, upper=10)  # prior on behaviour change

print ("lambda_1.value = %.3f" % lambda_1.value)
print ("lambda_2.value = %.3f" % lambda_2.value)
print ("tau.value = %.3f" % tau.value)

lambda_1.random(), lambda_2.random(), tau.random()

print ("After calling random() on the variables...")
print ("lambda_1.value = %.3f" % lambda_1.value)
print ("lambda_2.value = %.3f" % lambda_2.value)
print ("tau.value = %.3f" % tau.value)

# Note: - Don't change values in-place. It messes with PyMCs caching

# Defining a deterministic value - Values dependent on lambda_1 and lambda_2
예제 #22
0
    plt.scatter(stormsYears, stormsNumbers, s=stormsNumbers)
    plt.xlabel("Рік")
    plt.ylabel("Кількість штормів")
    plt.savefig(general.folderPath2 + "exp2_storms1.png")
    plt.clf()

    plt.plot(stormsYears, stormsNumbers, '-ok')
    plt.xlim(year0, year1)
    plt.xlabel("Рік")
    plt.ylabel("Кількість штормів")
    general.set_grid_to_plot()
    plt.savefig(general.folderPath2 + "exp2_storms2.png")
    plt.clf()

    switchpoint = pm.DiscreteUniform('switchpoint',
                                     lower=0,
                                     upper=len(stormsNumbers) - 1,
                                     doc='Switchpoint[year]')

    avg = np.mean(stormsNumbers)
    early_mean = pm.Exponential('early_mean', beta=1./avg)
    late_mean = pm.Exponential('late_mean', beta=1./avg)

    @ pm.deterministic(plot=False)
    def rate(s=switchpoint, e=early_mean, l=late_mean):
        # Concatenate Poisson means
        out = np.zeros(len(stormsNumbers))
        out[:s] = e
        out[s:] = l
        return out

    storms = pm.Poisson('storms',
예제 #23
0
"""
from __future__ import division
import numpy as np
import pymc as pm
import matplotlib.pyplot as plt
from plot_post import plot_post

# THE DATA.
N = 30
z = 8
y = np.repeat([1, 0], [z, N - z])

# THE MODEL.
with pm.Model() as model:
    # Hyperprior on model index:
    model_index = pm.DiscreteUniform('model_index', lower=0, upper=1)
    # Prior
    nu = pm.Normal('nu', mu=0, tau=0.1)  # it is posible to use tau or sd
    eta = pm.Gamma('eta', .1, .1)
    theta0 = 1 / (1 + pm.exp(-nu))  # theta from model index 0
    theta1 = pm.exp(-eta)  # theta from model index 1
    theta = pm.switch(pm.eq(model_index, 0), theta0, theta1)
    # Likelihood
    y = pm.Bernoulli('y', p=theta, observed=y)
    # Sampling
    start = pm.find_MAP()
    steps = [pm.Metropolis([i]) for i in model.unobserved_RVs[1:]]
    steps.append(pm.ElemwiseCategoricalStep(var=model_index, values=[0, 1]))
    trace = pm.sample(10000, steps, start=start, progressbar=False)

# EXAMINE THE RESULTS.
예제 #24
0
파일: model_1_gof.py 프로젝트: wqren/pymc
    'disasters_array', 'switchpoint', 'early_mean', 'late_mean', 'disasters'
]

disasters_array = array([
    4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, 3, 3, 5, 4, 5, 3, 1, 4, 4,
    1, 5, 5, 3, 4, 2, 5, 2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0, 1, 0,
    1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0,
    0, 1, 1, 0, 2, 3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, 0, 0, 0, 1,
    0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1
])

n = len(disasters_array)

# Define data and stochastics

switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=110)
early_mean = pm.Exponential('early_mean', beta=1.)
late_mean = pm.Exponential('late_mean', beta=1.)


@pm.stochastic(observed=True, dtype=int)
def disasters(value=disasters_array,
              early_mean=early_mean,
              late_mean=late_mean,
              switchpoint=switchpoint):
    """Annual occurences of coal mining disasters."""
    return pm.poisson_like(value[:switchpoint], early_mean) + pm.poisson_like(
        value[switchpoint:], late_mean)


@pm.deterministic
예제 #25
0
neighbors, triangles, trimap, b = spherical.triangulate_sphere(X)
# spherical.plot_triangulation(X,neighbors)

# Matrix generation
triangle_areas = [spherical.triangle_area(X, t) for t in triangles]
Ctilde = spherical.Ctilde(X, triangles, triangle_areas)
C = spherical.C(X, triangles, triangle_areas)
G = spherical.G(X, triangles, triangle_areas)

# Operator generation
Ctilde = cholmod.into_matrix_type(Ctilde)
G = cholmod.into_matrix_type(G)
M = np.zeros(n)

kappa = pm.Exponential('kappa', 1, value=3)
alpha = pm.DiscreteUniform('alpha', 1, 10, value=2.)


@pm.deterministic
def Q(kappa=kappa, alpha=alpha):
    out = operators.mod_frac_laplacian_precision(Ctilde, G, kappa, alpha,
                                                 cholmod)
    return out


# Nailing this ahead of time reduces time to compute logp from .18 to .13s for n=25000.
pattern_products = cholmod.pattern_to_products(Q.value)
# @pm.deterministic
# def pattern_products(Q=Q):
#     return cholmod.pattern_to_products(Q)
예제 #26
0
    def __init__(self, param, frags, stream=None,  platform=None, param_to_opt=None, rj=False, sample_n5=False,
                 continuous_phase=False, sample_phase=False, init_random=True):
        """

        Parameters
        ----------
        param : Parmed CharmmParameterSet
        frags : list of torsionfit.QMDataBase
        stream : str
            Path to CHARMM stream file. Default None.
        platform : openmm.Platform
            Default None.
        param_to_opt : list of tuples of torsions.
            Default None.
        rj : bool
            If True, will use reversible jump to sample Fourier terms. If False, will sample all Ks. Default False
        sample_n5 : bool
            If True, will also sample n=5. Default False
        eliminate_phase : bool
            If True, will not sample phase. Instead, Ks will be able to take on negative values. Default True. If True,
            make sure continuous_phase is also False.
        continuous_phase : bool
            If True, will allow phases to take on any value between 0-180. If False, phase will be a discrete and only
            sample 0 or 180
        init_random: bool
            Randomize starting condition. Default is True. If false, will resort to whatever value is in the parameter set.
        tau: float
            hyperparameter on Gaussian prior on K


        Returns
        -------
        pymc model

        """

        if type(frags) != list:
            frags = [frags]

        self.pymc_parameters = dict()
        self.frags = frags
        self.platform = platform
        self.rj = rj
        self.sample_n5 = sample_n5
        self.continuous_phase = continuous_phase
        self.sample_phase = sample_phase
        if param_to_opt:
            self.parameters_to_optimize = param_to_opt
        else:
            self.parameters_to_optimize = TorsionScan.to_optimize(param, stream)

        # Check that options are reasonable
        if not sample_phase and continuous_phase:
            warnings.warn("You can't eliminate phase but have continuous phase. Changing continuous phase to False")
            self.continuous_phase = False

        # set all phases to 0 if eliminate phase is True
        if not self.sample_phase:
            par.set_phase_0(self.parameters_to_optimize, param)

        multiplicities = [1, 2, 3, 4, 6]
        if self.sample_n5:
            multiplicities = [1, 2, 3, 4, 5, 6]
        multiplicity_bitstrings = dict()

        # offset
        for frag in self.frags:
            name = '%s_offset' % frag.topology._residues[0]
            offset = pymc.Uniform(name, lower=-50, upper=50, value=0)
            self.pymc_parameters[name] = offset

        # self.pymc_parameters['log_sigma_k'] = pymc.Uniform('log_sigma_k', lower=-4.6052, upper=3.453, value=np.log(0.01))
        # self.pymc_parameters['sigma_k'] = pymc.Lambda('sigma_k',
        #                                             lambda log_sigma_k=self.pymc_parameters['log_sigma_k']: np.exp(
        #                                                log_sigma_k))
        # self.pymc_parameters['precision_k'] = pymc.Lambda('precision_k',
        #                                                lambda log_sigma_k=self.pymc_parameters['log_sigma_k']: np.exp(
        #                                                     -2 * log_sigma_k))

        for p in self.parameters_to_optimize:
            torsion_name = p[0] + '_' + p[1] + '_' + p[2] + '_' + p[3]

            self.pymc_parameters['log_sigma_k_{}'.format(torsion_name)] = pymc.Uniform('log_sigma_k_{}'.format(torsion_name), lower=-4.6052, upper=3.453, value=np.log(0.01))
            self.pymc_parameters['sigma_k_{}'.format(torsion_name)] = pymc.Lambda('sigma_k_{}'.format(torsion_name),
                                                    lambda log_sigma_k=self.pymc_parameters['log_sigma_k_{}'.format(torsion_name)]: np.exp(
                                                       log_sigma_k))
            self.pymc_parameters['precision_k_{}'.format(torsion_name)] = pymc.Lambda('precision_k_{}'.format(torsion_name),
                                                       lambda log_sigma_k=self.pymc_parameters['log_sigma_k_{}'.format(torsion_name)]: np.exp(
                                                            -2 * log_sigma_k))


            if torsion_name not in multiplicity_bitstrings.keys():
                multiplicity_bitstrings[torsion_name] = 0

            for m in multiplicities:
                name = p[0] + '_' + p[1] + '_' + p[2] + '_' + p[3] + '_' + str(m) + '_K'
                if not self.sample_phase:
                    k = pymc.Normal(name, mu=0, tau=self.pymc_parameters['precision_k_{}'.format(torsion_name)], value=0)
                else:
                    k = pymc.Uniform(name, lower=0, upper=20, value=0)

                for i in range(len(param.dihedral_types[p])):
                    if param.dihedral_types[p][i].per == m:
                        multiplicity_bitstrings[torsion_name] += 2 ** (m - 1)
                        if not self.sample_phase:
                            k = pymc.Normal(name, mu=0, tau=self.pymc_parameters['precision_k_{}'.format(torsion_name)],
                                            value=param.dihedral_types[p][i].phi_k)
                        else:
                            k = pymc.Uniform(name, lower=0, upper=20, value=param.dihedral_types[p][i].phi_k)
                        break

                self.pymc_parameters[name] = k

                if self.sample_phase:
                    name = p[0] + '_' + p[1] + '_' + p[2] + '_' + p[3] + '_' + str(m) + '_Phase'
                    for i in range(len(param.dihedral_types[p])):
                        if param.dihedral_types[p][i].per == m:
                            if self.continuous_phase:
                                phase = pymc.Uniform(name, lower=0, upper=180.0, value=param.dihedral_types[p][i].phase)
                            else:
                                if param.dihedral_types[p][i].phase == 0:
                                    phase = pymc.DiscreteUniform(name, lower=0, upper=1, value=0)
                                    break

                                if param.dihedral_types[p][i].phase == 180.0:
                                    phase = pymc.DiscreteUniform(name, lower=0, upper=1, value=1)
                                    break
                        else:
                            if self.continuous_phase:
                                phase = pymc.Uniform(name, lower=0, upper=180.0, value=0)
                            else:
                                phase = pymc.DiscreteUniform(name, lower=0, upper=1, value=0)

                    self.pymc_parameters[name] = phase

        if self.rj:
            for torsion_name in multiplicity_bitstrings.keys():
                name = torsion_name + '_multiplicity_bitstring'
                bitstring = pymc.DiscreteUniform(name, lower=0, upper=63, value=multiplicity_bitstrings[torsion_name])
                self.pymc_parameters[name] = bitstring

        if init_random:
            # randomize initial value
            for parameter in self.pymc_parameters:
                if type(self.pymc_parameters[parameter]) != pymc.CommonDeterministics.Lambda: # and parameter[:11] != 'log_sigma_k':
                    self.pymc_parameters[parameter].random()
                    logger().info('initial value for {} is {}'.format(parameter, self.pymc_parameters[parameter].value))


        self.pymc_parameters['log_sigma'] = pymc.Uniform('log_sigma', lower=-10, upper=3, value=np.log(0.01))
        self.pymc_parameters['sigma'] = pymc.Lambda('sigma',
                                                    lambda log_sigma=self.pymc_parameters['log_sigma']: np.exp(
                                                        log_sigma))
        self.pymc_parameters['precision'] = pymc.Lambda('precision',
                                                        lambda log_sigma=self.pymc_parameters['log_sigma']: np.exp(
                                                            -2 * log_sigma))

        # add missing multiplicity terms to parameterSet so that the system has the same number of parameters
        par.add_missing(self.parameters_to_optimize, param, sample_n5=self.sample_n5)

        @pymc.deterministic
        def mm_energy(pymc_parameters=self.pymc_parameters, param=param):
            mm = np.ndarray(0)
            par.update_param_from_sample(self.parameters_to_optimize, param, model=self, rj=self.rj,
                                         phase=self.sample_phase, n_5=self.sample_n5, continuous=self.continuous_phase,
                                         model_type='openmm')
            for mol in self.frags:
                mol.compute_energy(param, offset=self.pymc_parameters['%s_offset' % mol.topology._residues[0]],
                                   platform=self.platform)
                mm = np.append(mm, mol.mm_energy / kilojoules_per_mole)
            return mm

        size = sum([len(i.qm_energy) for i in self.frags])
        qm_energy = np.ndarray(0)
        for i in range(len(frags)):
             qm_energy = np.append(qm_energy, frags[i].qm_energy)
        #diff_energy = np.ndarray(0)
        #for i in range(len(frags)):
        #    diff_energy = np.append(diff_energy, frags[i].delta_energy)
        self.pymc_parameters['mm_energy'] = mm_energy
        self.pymc_parameters['qm_fit'] = pymc.Normal('qm_fit', mu=self.pymc_parameters['mm_energy'],
                                                     tau=self.pymc_parameters['precision'], size=size, observed=True,
                                                     value=qm_energy)
예제 #27
0
from matplotlib import pyplot as plt

#count_data = np.loadtxt("txtdata.csv")
count_data = np.loadtxt("txtdata_sim.csv")

n_count_data = len(count_data)

print(count_data.mean())

alpha = 1.0 / count_data.mean()  # Recall count_data is the
# variable that holds our txt counts

lambda_1 = pm.Exponential("lambda_1", alpha)
lambda_2 = pm.Exponential("lambda_2", alpha)

tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data)


@pm.deterministic
def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
    out = np.zeros(n_count_data)
    out[:tau] = lambda_1  # lambda before tau is lambda1
    out[tau:] = lambda_2  # lambda after (and including) tau is lambda2
    return out


observation = pm.Poisson("obs", lambda_, value=count_data, observed=True)

model = pm.Model([observation, lambda_1, lambda_2, tau])

mcmc = pm.MCMC(model)
예제 #28
0
def mult_voigts(velocity, fluxv, fluxv_err, f, gamma, l0, nvoigts, RES, velo_range):
    '''
    Fitting a number of Voigt profiles to a spectrum in velocity space,
    given the restframe wavelenth l0 (Angstrom), the oscillator
    strength f, damping constant gamma (km/s), and spectral resolution
    RES (km/s)
    '''

    #low_b = 2  
    low_b = round(0.354*RES/(2*np.sqrt(np.log(2))),2)

    print "\n Components with ~ b >", low_b, \
        "km/s can be resolved \n"

    tau_s = []   
    for i in [0, 1]:
        tau_s.append(1 / np.array(fluxv_err[i])**2)

    #@pymc.stochastic(dtype=float)
    # def a(value=1.0, mu=1.0, sig=0.1, doc="B"):
    # pp = 0.0
    # #if 0.85 <= value < 1.15:
    # pp = gauss(value, mu, sig)
    # #else:
    # # pp = -np.inf
    # return pp

    # Continuum model (up to quadratic polinomial)
    mu_bg_1 = np.nansum(fluxv[0]) / (len(fluxv[0]) - fluxv[0].count(np.nan))
    mu_bg_2 = np.nansum(fluxv[1]) / (len(fluxv[1]) - fluxv[1].count(np.nan))

    print mu_bg_1, mu_bg_2

    @pymc.stochastic(dtype=float)
    def a_1(value=mu_bg_1, mu=mu_bg_1, sig=0.5 * mu_bg_1, doc="a"):
        if mu_bg_1 / 2.0 < value < mu_bg_1 * 2.0:
            pp = gauss(value, mu, sig)
        else:
            pp = -np.inf
        return pp

    @pymc.stochastic(dtype=float)
    def a1_1(value=0.0, mu=0.0, sig=0.5, doc="a1"):
        if -0.3 < value < 0.3:
            pp = gauss(value, mu, sig)
        else:
            pp = -np.inf
        return pp

    @pymc.stochastic(dtype=float)
    def a2_1(value=0.0, mu=0.0, sig=0.5, doc="a2"):
        if -0.3 < value < 0.3:
            pp = gauss(value, mu, sig)
        else:
            pp = -np.inf
        return pp


    @pymc.stochastic(dtype=float)
    def a_2(value=mu_bg_2, mu=mu_bg_2, sig=0.5 * mu_bg_2, doc="a"):
        if mu_bg_2 / 2.0 < value < mu_bg_2 * 2.0:
            pp = gauss(value, mu, sig)
        else:
            pp = -np.inf
        return pp

    @pymc.stochastic(dtype=float)
    def a1_2(value=0.0, mu=0.0, sig=0.5, doc="a1"):
        if -0.3 < value < 0.3:
            pp = gauss(value, mu, sig)
        else:
            pp = -np.inf
        return pp

    @pymc.stochastic(dtype=float)
    def a2_2(value=0.0, mu=0.0, sig=0.5, doc="a2"):
        if -0.3 < value < 0.3:
            pp = gauss(value, mu, sig)
        else:
            pp = -np.inf
        return pp


    vars_dic = {}

    for i in range(1, nvoigts + 1):

        v0 = pymc.Uniform('v0' + str(i), lower=-velo_range, upper=velo_range, doc='v0' + str(i))
        b = pymc.DiscreteUniform('b' + str(i), lower=round(low_b, 0), upper=30, value=low_b+20, doc='b' + str(i))
        N = pymc.Uniform('N' + str(i), lower=0.0, upper=20, value=15, doc='N' + str(i))

        vars_dic['v0' + str(i)] = v0
        vars_dic['b' + str(i)] = b
        vars_dic['N' + str(i)] = N

    print "\n Starting MCMC " + '(pymc version:', pymc.__version__, ")"
    print "\n This might take a while ..."

    @pymc.deterministic(plot=False)
    def multVoigt(vv=velocity, a_1=a_1, a1_1=a1_1, a2_1=a2_1, a_2=a_2, a1_2=a1_2, a2_2=a2_2,
                  f=f, gamma=gamma, l0=l0,
                  nvoigts=nvoigts, vars_dic=vars_dic):

        model_matrix = []

        for i in [0, 1]:
          conv_val = RES / (2 * np.sqrt(2 * np.log(2)) * tf[i])
          gauss_k = Gaussian1DKernel(stddev=conv_val, mode="oversample")
  
          if i == 0:
              flux = np.ones(len(vv[i])) * a_1 #(a_1 + a1_1 * vv[i] + a2_1 * (power_lst(vv[i], 2)))
          if i == 1:
              flux = np.ones(len(vv[i])) * a_2 # (a_2 + a1_2 * vv[i] + a2_2 * (power_lst(vv[i], 2)))

          for j in range(1, nvoigts + 1):
              v = vv[i] - vars_dic["v0" + str(j)]
              flux *= add_abs_velo(v, vars_dic["N" + str(j)],
                                   vars_dic["b" + str(j)], gamma[i], f[i], l0[i])
  
          #model_matrix.append(flux)
          model_matrix.append(np.convolve(flux, gauss_k, mode='same'))


        #print a_1, a1_1, a2_1, a_2, a1_2, a2_2
        #print vars_dic
        #print model_matrix
        return model_matrix

    y_val = pymc.Normal('y_val', mu=multVoigt, tau=tau_s, value=fluxv, observed=True)

    return locals()
예제 #29
0
    def __init__(self,
                 param,
                 frags,
                 stream=None,
                 param_to_opt=None,
                 rj=False,
                 init_random=True,
                 tau='mult'):
        """

        Parameters
        ----------
        param : Parmed CharmmParameterSet
        frags : list of torsionfit.QMDataBase
        stream : str
            Path to CHARMM stream file. Default None. If None, param_to_opt list must be given. When a stream file is
            specified, param_to_opt is generated if the penalty of the parameters are greater than a threshold.
        param_to_opt : list of tuples of torsions.
            Default None.
        rj : bool
            If True, will use reversible jump to sample Fourier terms. If False, will sample all Ks. Default False
        init_random: bool
            Randomize starting condition. Default is True. If false, will resort to whatever value is in the parameter set.
            Default True
        tau: string.
            options are 'mult' or 'single'. When 'mult', every element in K_m will have its own 'tau', when 'single',
            each K_m will have one tau.
            Default 'mult'

        Returns
        -------
        pymc model

        """

        if type(frags) != list:
            frags = [frags]

        self.pymc_parameters = dict()
        self.frags = frags
        self.rj = rj
        if param_to_opt:
            self.parameters_to_optimize = param_to_opt
        else:
            self.parameters_to_optimize = TorsionScan.to_optimize(
                param, stream)

        multiplicity_bitstrings = dict()

        # offset
        for frag in self.frags:
            name = '%s_offset' % frag.topology._residues[0]
            offset = pymc.Uniform(name, lower=-50, upper=50, value=0)
            self.pymc_parameters[name] = offset

        if tau == 'mult':
            value = np.log(np.ones(6) * 0.01)
        elif tau == 'single':
            value = np.log(0.01)
        else:
            raise Exception(
                "Only 'mult' and 'single' are allowed options for tau")

        for p in self.parameters_to_optimize:
            torsion_name = p[0] + '_' + p[1] + '_' + p[2] + '_' + p[3]

            # lower and upper for this distribution are based on empirical data that below this amount the prior is too
            # biased and above the moves are usually rejected.
            self.pymc_parameters['log_sigma_k_{}'.format(
                torsion_name)] = pymc.Uniform(
                    'log_sigma_k_{}'.format(torsion_name),
                    lower=-4.6052,
                    upper=3.453,
                    value=value)
            self.pymc_parameters['sigma_k_{}'.format(
                torsion_name)] = pymc.Lambda(
                    'sigma_k_{}'.format(torsion_name),
                    lambda log_sigma_k=self.pymc_parameters[
                        'log_sigma_k_{}'.format(torsion_name)]: np.exp(
                            log_sigma_k))
            self.pymc_parameters['precision_k_{}'.format(
                torsion_name)] = pymc.Lambda(
                    'precision_k_{}'.format(torsion_name),
                    lambda log_sigma_k=self.pymc_parameters[
                        'log_sigma_k_{}'.format(torsion_name)]: np.exp(
                            -2 * log_sigma_k))

            self.pymc_parameters['{}_K'.format(torsion_name)] = pymc.Normal(
                '{}_K'.format(torsion_name),
                value=np.zeros(6),
                mu=0,
                tau=self.pymc_parameters['precision_k_{}'.format(
                    torsion_name)])

            if torsion_name not in multiplicity_bitstrings.keys():
                multiplicity_bitstrings[torsion_name] = 0

        if self.rj:
            for torsion_name in multiplicity_bitstrings.keys():
                name = torsion_name + '_multiplicity_bitstring'
                bitstring = pymc.DiscreteUniform(
                    name,
                    lower=0,
                    upper=63,
                    value=multiplicity_bitstrings[torsion_name])
                self.pymc_parameters[name] = bitstring

        if init_random:
            # randomize initial value
            for parameter in self.pymc_parameters:
                if type(
                        self.pymc_parameters[parameter]
                ) != pymc.CommonDeterministics.Lambda and parameter[:
                                                                    11] != 'log_sigma_k':
                    self.pymc_parameters[parameter].random()
                    logger().info('initial value for {} is {}'.format(
                        parameter, self.pymc_parameters[parameter].value))

        self.pymc_parameters['log_sigma'] = pymc.Uniform('log_sigma',
                                                         lower=-10,
                                                         upper=3,
                                                         value=np.log(0.01))
        self.pymc_parameters['sigma'] = pymc.Lambda(
            'sigma',
            lambda log_sigma=self.pymc_parameters['log_sigma']: np.exp(
                log_sigma))
        self.pymc_parameters['precision'] = pymc.Lambda(
            'precision',
            lambda log_sigma=self.pymc_parameters['log_sigma']: np.exp(
                -2 * log_sigma))

        # Precalculate phis
        n = np.array([1., 2., 3., 4., 5., 6.])
        self.models = []
        for i in itertools.product((0, 1), repeat=6):
            self.models.append(i)

        inner_sum = []
        for i, frag in enumerate(frags):
            inner_sum.append(OrderedDict())
            for t in frag.phis:
                inner_sum[i][t] = (1 + np.cos(
                    frag.phis[t][:, np.newaxis] * n[:, np.newaxis])).sum(-1)
        self.inner_sum = inner_sum

        @pymc.deterministic
        def torsion_energy(pymc_parameters=self.pymc_parameters):
            mm = np.ndarray(0)

            for i, mol in enumerate(self.frags):
                Fourier_sum = np.zeros((mol.n_frames))
                for t in inner_sum[i]:
                    name = t[0] + '_' + t[1] + '_' + t[2] + '_' + t[3]
                    if self.rj:
                        K = pymc_parameters['{}_K'.format(name)] * self.models[
                            pymc_parameters['{}_multiplicity_bitstring'.format(
                                name)]]
                    else:
                        K = pymc_parameters['{}_K'.format(name)]
                    Fourier_sum += (K * inner_sum[i][t]).sum(1)
                Fourier_sum_rel = Fourier_sum - min(Fourier_sum)
                Fourier_sum_rel += pymc_parameters['{}_offset'.format(
                    mol.topology._residues[0])]
                mm = np.append(mm, Fourier_sum)
            return mm

        size = sum([len(i.qm_energy) for i in self.frags])
        residual_energy = np.ndarray(0)
        for i in range(len(frags)):
            residual_energy = np.append(residual_energy, frags[i].delta_energy)

        self.pymc_parameters['torsion_energy'] = torsion_energy
        self.pymc_parameters['qm_fit'] = pymc.Normal(
            'qm_fit',
            mu=self.pymc_parameters['torsion_energy'],
            tau=self.pymc_parameters['precision'],
            size=size,
            observed=True,
            value=residual_energy)
예제 #30
0
def create_multi_mk_model(tree, chars, Qtype, pi, nregime=2):
    """
    Create an mk model with multiple regimes to be sampled from with MCMC.

    Regime number is fixed and the location of the regime shift is allowed
    to change
    """
    if type(chars) == dict:
        chars = [chars[l] for l in [n.label for n in tree.leaves()]]
    # Preparations
    nchar = len(set(chars))
    if Qtype=="ER":
        N = 1
    elif Qtype=="Sym":
        N = int(binom(nchar, 2))
    elif Qtype=="ARD":
        N = int((nchar ** 2 - nchar))
    else:
        ValueError("Qtype must be one of: ER, Sym, ARD")
    # This model has 2 components: Q parameters and a switchpoint
    # They are combined in a custom likelihood function

    ###########################################################################
    # Switchpoint:
    ###########################################################################
    # Modeling the movement of the regime shift(s) is the tricky part
    # Regime shifts will only be allowed to happen at a node
    # Regime shift: Uniform categorical distribution
    valid_switches = [i.ni for i in tree if not (i.isleaf or i.isroot)]
    # Uniform
    switch_ind = pymc.DiscreteUniform("switch_ind",lower=0, upper=len(valid_switches)-1)
    @pymc.deterministic(dtype=int)
    def switch(name="switch",switch_ind=switch_ind):
        return valid_switches[switch_ind]
    ###########################################################################
    # Qparams:
    ###########################################################################
    # Unscaled Q param: Dirichlet distribution
    # Setting a Dirichlet prior with Jeffrey's hyperprior of 1/2
    theta = [1.0/2.0]*N

    # One set of Q-parameters per regime
    allQparams_init = np.empty(nregime, dtype=object)
    allQparams_init_full = np.empty(nregime, dtype=object)
    allScaling_factors = np.empty(nregime, dtype=object)
    for i in range(nregime):
        if N != 1:
            allQparams_init[i] = pymc.Dirichlet("allQparams_init"+str(i), theta)
            allQparams_init_full[i] = pymc.CompletedDirichlet("allQparams_init_full"+str(i), allQparams_init[i])
        else: # Dirichlet function does not like creating a distribution
              # with only 1 state. Set it to 1 by hand
            allQparams_init_full[i] = [[1.0]]
        # Exponential scaling factor for Qparams
        allScaling_factors[i] = pymc.Exponential(name="allScaling_factors"+str(i), beta=1.0)
        # Scaled Qparams; we would not expect them to necessarily add
        # to 1 as would be the case in a Dirichlet distribution

    # Regimes are grouped by rows. Each row is a regime.
    @pymc.deterministic(plot=False)
    def Qparams(q=allQparams_init_full, s=allScaling_factors):
        Qs = np.empty([nregime,N])
        for n in range(N):
            for i in range(nregime):
                Qs[i][n] = q[i][0][n]*s[i]
        return Qs
    ###########################################################################
    # Likelihood
    ###########################################################################
    # The likelihood function

    # Pre-allocating arrays
    qarray = np.zeros([nregime,N])
    locsarray = np.empty([2], dtype=object)
    l = mk_mr.create_likelihood_function_multimk(tree=tree, chars=chars,
        Qtype=Qtype,
        pi="Equal", findmin=False, nregime=2)

    @pymc.potential
    def multi_mklik(q = Qparams, switch=switch, name="multi_mklik"):

        locs = mk_mr.locs_from_switchpoint(tree,tree[int(switch)],locsarray)

        np.copyto(qarray, q)
        return l(qarray, locs=locs)
    return locals()