Ejemplo n.º 1
0
def MCMCPoissonPosteriorRatio(sample_number, burn, count1, count2):
    """MCMC method to calculate ratio distribution of two Posterior Poisson distributions.

    sample_number: number of sampling. It must be greater than burn, however there is no check.
    burn: number of samples being burned.
    count1: observed counts of condition 1
    count2: observed counts of condition 2

    return: list of log2-ratios
    """
    lam1 = pymc.Uniform('U1', 0,
                        10000)  # prior of lambda is uniform distribution
    lam2 = pymc.Uniform('U2', 0,
                        10000)  # prior of lambda is uniform distribution
    poi1 = pymc.Poisson('P1', lam1, value=count1,
                        observed=True)  # Poisson with observed value count1
    poi2 = pymc.Poisson('P2', lam2, value=count2,
                        observed=True)  # Poisson with observed value count2

    @deterministic
    def ratio(l1=lam1, l2=lam2):
        return log(l1, 2) - log(l2, 2)

    mcmcmodel = pymc.MCMC([ratio, lam1, poi1, lam2, poi2])
    mcmcmodel.use_step_method(pymc.AdaptiveMetropolis,
                              [ratio, lam1, lam2, poi1, poi2],
                              delay=20000)
    if PROGRESS_BAR_ENABLED:
        mcmcmodel.sample(iter=sample_number, progress_bar=False, burn=burn)
    else:
        mcmcmodel.sample(iter=sample_number, burn=burn)
    return ratio.trace()
Ejemplo n.º 2
0
def simple_mcmc_model(p_df):

    s_mu = pm.Normal('mu', mu=np.mean(p_df), tau=0.00001)
    s_ob = pm.Poisson('observed', mu=s_mu, value=p_df, observed=True)
    s_es = pm.Poisson('estimated', mu=s_mu, observed=False)

    s_model = pm.Model([s_mu, s_ob, s_es])
    return s_mu, s_ob, s_es, s_model
Ejemplo n.º 3
0
def gaussian_plus_constant(bins, observed_counts_per_bin, init=None):
    """Assumes the line can be modeled using a constant and a Gaussian
    """
    if init is None:
        constant = pymc.Uniform('constant',
                                lower=0.0,
                                upper=6.0,
                                doc='constant')

        amplitude = pymc.Uniform('amplitude',
                                 lower=0.0,
                                 upper=10.0,
                                 doc='amplitude')

        position = pymc.Uniform('position',
                                lower=-20.0,
                                upper=10.0,
                                doc='position')

        width = pymc.Uniform('width', lower=-20.0, upper=10.0, doc='width')
    else:
        raise ValueError('Not implemented yet')

    # Model for the emission line
    @pymc.deterministic(plot=False)
    def modeled_emission(c=constant,
                         a=amplitude,
                         p=position,
                         w=width,
                         bins=bins):
        # A pure and simple power law model
        out = integral_across_all_bins(bins, (c, a, p, w))
        return out

    #
    @pymc.potential
    def constrain_total_emission():
        total_observed_emission = np.sum(observed_counts_per_bin)
        total_fit_emission = np.sum(
            modeled_emission(c=constant,
                             a=amplitude,
                             p=position,
                             w=width,
                             bins=bins))
        return

    spectrum = pymc.Poisson('emission',
                            mu=modeled_emission,
                            value=observed_counts_per_bin,
                            observed=True)

    # Need to add in the potential constraint

    predictive = pymc.Poisson('predictive', mu=modeled_emission)
    # MCMC model
    return locals()
def gamma_poisson(x, t):
    """ x: number of failures (N vector)
        t: operation time, thousands of hours (N vector) """

    if x is not None:
        N = x.shape
    else:
        N = num_points

    # place an exponential prior on t, for when it is unknown
    t = pymc.Exponential('t',
                         beta=1.0 / 50.0,
                         value=t,
                         size=N,
                         observed=(t is not None))

    alpha = pymc.Exponential('alpha', beta=1.0, value=1.0)
    beta = pymc.Gamma('beta', alpha=0.1, beta=1.0, value=1.0)

    theta = pymc.Gamma('theta', alpha=alpha, beta=beta, size=N)

    @pymc.deterministic
    def mu(theta=theta, t=t):
        return theta * t

    x = pymc.Poisson('x', mu=mu, value=x, observed=(x is not None))

    return locals()
Ejemplo n.º 5
0
    def likelihood_model1(self, nObs, yObs, Slope, Norm, Sig, dx, xp=14, deg=3):

        # (1) Calculate the expected Mass -> MCR
        # (2) Calculate the slope and scatter parameter in -> MOR
        # (3) Calculate Number Count
        # (4) Write the likelihood

        mu = Slope * yObs + Norm
        # alpha = 1.0 / Slope # First Order Approximation
        # sigma = Sig / Slope # First Order Approximation

        # [beta_n, beta_n-1, beta_n-2, ...]
        beta = [pymc.Normal('beta_%i'%i, mu=0., tau=0.0001, value=0.0, observed=False) for i in range(deg+1)]

        @pymc.deterministic(plot=False)
        def exp_n(beta=beta, mu=mu, deg=deg, slope=Slope, sig=Sig, dx=dx):
            # It returns the normalization and the first order approximation for the scatter
            # MF = A x exp(beta1 x mu)

            p = np.poly1d(beta)
            A = dx * np.exp(p(mu))

            c = [beta[j] * (deg - j) for j in range(deg)]
            p = np.poly1d(c)
            beta1 = p(mu)

            return A * slope * np.exp(- sig**2 * beta1)

        likelihood = pymc.Poisson('n_obs', mu=exp_n, value=nObs, observed=True)

        return locals()
Ejemplo n.º 6
0
def three_model_comparison(p_df):

    a_n = len(p_df)
    t_lam = pm.Uniform('d_lam', 0, 1)
    #d_lam = 1.0 / np.mean(p_df)
    t_lambda_1 = pm.Exponential("t_lambda_1", t_lam)
    #t_lambda_1 = pm.Uniform("t_lambda_1", min(p_df), max(p_df))
    t_lambda_2 = pm.Exponential("t_lambda_2", t_lam)
    #t_lambda_2 = pm.Uniform("t_lambda_2",min(p_df), max(p_df))
    t_lambda_3 = pm.Exponential("t_lambda_3", t_lam)
    #t_lambda_2 = pm.Uniform("t_lambda_2",min(p_df), max(p_df))

    #tau = pm.DiscreteUniform("tau", lower=min(p_df), upper=max(p_df) )
    t_tau_1 = pm.DiscreteUniform("tau1", lower=0, upper=max(p_df) - 1)
    t_tau_2 = pm.DiscreteUniform("tau", lower=t_tau_1, upper=max(p_df))

    @pm.deterministic
    def lambda_(tau_1=t_tau_1,
                tau_2=t_tau_2,
                lambda_1=t_lambda_1,
                lambda_2=t_lambda_2,
                lambda_3=t_lambda_3):
        out = np.zeros(a_n)
        out[:tau_1] = lambda_1  # lambda before tau_1 is lambda1
        out[tau_1:tau_2] = lambda_2  # lambda_2 between tau_1 and tau_2
        out[tau_2:] = lambda_3  # lambda after (and including) tau is lambda_3
        return out

    t_obs = pm.Poisson('t_observed', mu=lambda_, value=p_df, observed=True)

    t_model = pm.Model(
        [t_obs, t_lam, t_lambda_1, t_lambda_2, t_lambda_3, t_tau_1, t_tau_2])
    #d_model = pm.Model([d_obs,  t_lambda_1, t_lambda_2, tau])

    return t_model, t_lam, t_lambda_1, t_lambda_2, t_lambda_3, t_tau_1, t_tau_2
Ejemplo n.º 7
0
    def likelihood_model_obs2(self, nObs, yObs, dObs, dV, expMu, slope_pri, sig_pri, deg=3):

        slope = slope_pri #pymc.Normal('slope', mu=1.0, tau=1600.0, value=1.0, observed=False)
        sig = sig_pri #pymc.Normal('slope', mu=1.0, tau=1600.0, value=1.0, observed=False)

        # [beta_n, beta_n-1, beta_n-2, ...]
        beta = [pymc.Normal('beta_%i'%i, mu=0., tau=0.0001, value=0.0, observed=False) for i in range(deg+1)]

        @pymc.deterministic(plot=False)
        def exp_n(beta=beta, mu=expMu, deg=deg, slope=slope, sig=sig, dx=dObs, dV=dV):
            # It returns the normalization and the first order approximation for the scatter
            # MF = A x exp(beta1 x mu)

            p = np.poly1d(beta)
            A = dx * np.exp(p(mu))

            c = [beta[j] * (deg - j) for j in range(deg)]
            p = np.poly1d(c)
            beta1 = p(mu)

            return dV * A * slope * np.exp(np.array(sig)**2 * beta1)

        likelihood = pymc.Poisson('n_obs', mu=exp_n, value=nObs, observed=True)

        return locals()
Ejemplo n.º 8
0
def two_model_comparison(p_df):

    a_n = len(p_df)
    d_lam = pm.Uniform('d_lam', 0, 1)
    #d_lam = 1.0 / np.mean(p_df)
    lambda_1 = pm.Exponential("lambda_1", d_lam)
    #lambda_1 = pm.Uniform("lambda_1", min(p_df), max(p_df))
    lambda_2 = pm.Exponential("lambda_2", d_lam)
    #lambda_2 = pm.Uniform("lambda_2",min(p_df), max(p_df))

    #tau = pm.DiscreteUniform("tau", lower=min(p_df), upper=max(p_df) )
    tau = pm.DiscreteUniform("tau", lower=0, upper=max(p_df))

    @pm.deterministic
    def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
        out = np.zeros(a_n)
        out[:tau] = lambda_1  # lambda before tau is lambda1
        out[tau:] = lambda_2  # lambda after (and including) tau is lambda2
        return out

    d_obs = pm.Poisson('d_observed', mu=lambda_, value=p_df, observed=True)

    d_model = pm.Model([d_obs, d_lam, lambda_1, lambda_2, tau])
    #d_model = pm.Model([d_obs,  lambda_1, lambda_2, tau])

    return d_model, d_obs, d_lam, lambda_1, lambda_2, tau
Ejemplo n.º 9
0
def compute_n_sat_prior(informative=False,
                        poisson_mu=None,
                        uniform_lower=None,
                        uniform_upper=None):
    """
    Compute n_sat prior.

    Note:
    There are two options for modelling n_sat:
    - uninformative: discrete uniform distribution
    - informative: Poisson distribution

    Parameters
    ----------
    informative : bool, optional (default: False)
        If True, n_sat is modelled by a
        Poisson distribution. Else, n_sat
        is modelled by a discrete uniform
        distribution.
    poisson_mu : int, optional (default: None)
        Parameter mu (i.e. mean) of
        the Poisson distribution used to
        model n_sat. Must be specified if
        `informative` is True.
    uniform_lower : int, optional (default: None)
        Lower bound of the discrete uniform
        distribution used to model n_sat.
        Must be specified if `informative`
        is False.
    uniform_upper : int, optional (default: None)
        Upper bound of the discrete uniform
        distribution used to model n_sat.
        Must be specified if `informative`
        is False.

    Returns
    -------
    pymc distribution
        Prior distribution for n_sat.
    """
    if informative:
        if poisson_mu is None:
            error_msg = ("If you want to use a Poisson prior for n_sat, "
                         "please specify the parameter `poisson_mu`.")
            sys.exit(error_msg)

        return pymc.Poisson("n_sat", mu=poisson_mu)

    if (uniform_lower is None or uniform_upper is None):
        error_msg = ("If you want to use an uniform prior for n_sat, "
                     "please specify the parameters `uniform_lower` "
                     "and `uniform_upper`.")
        sys.exit(error_msg)

    return pymc.DiscreteUniform("n_sat",
                                lower=uniform_lower,
                                upper=uniform_upper)
Ejemplo n.º 10
0
def main():
    lambda_1 = pm.Exponential("lambda_1", 1)  # prior on first behaviour
    lambda_2 = pm.Exponential("lambda_2", 1)  # prior on second behaviour
    tau = pm.DiscreteUniform("tau", lower=0,
                             upper=10)  # prior on behaviour change

    print "lambda_1.value = %.3f" % lambda_1.value
    print "lambda_2.value = %.3f" % lambda_2.value
    print "tau.value = %.3f" % tau.value
    print

    lambda_1.random(), lambda_2.random(), tau.random()

    print "After calling random() on the variables..."
    print "lambda_1.value = %.3f" % lambda_1.value
    print "lambda_2.value = %.3f" % lambda_2.value
    print "tau.value = %.3f" % tau.value

    samples = [lambda_1.random() for i in range(20000)]
    plt.hist(samples, bins=70, normed=True, histtype="stepfilled")
    plt.title("Prior distribution for $\lambda_1$")
    plt.xlim(0, 8)
    plt.show()

    data = np.array([10, 5])
    fixed_variable = pm.Poisson("fxd", 1, value=data, observed=True)
    print "value: ", fixed_variable.value
    print "calling .random()"
    fixed_variable.random()
    print "value: ", fixed_variable.value

    n_data_points = 5  # in CH1 we had ~70 data points

    @pm.deterministic
    def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
        out = np.zeros(n_data_points)
        out[:tau] = lambda_1  # lambda before tau is lambda1
        out[tau:] = lambda_2  # lambda after tau is lambda2
        return out

    data = np.array([10, 25, 15, 20, 35])
    obs = pm.Poisson("obs", lambda_, value=data, observed=True)

    model = pm.Model([obs, lambda_, lambda_1, lambda_2, tau])
Ejemplo n.º 11
0
 def test_start(self):
     with pm.Model() as model:
         a = pm.Poisson("a", 5)
         b = pm.HalfNormal("b", 10)
         y = pm.Normal("y", a, b, observed=[1, 2, 3, 4])
         start = {
             "a": np.random.poisson(5, size=500),
             "b_log__": np.abs(np.random.normal(0, 10, size=500)),
         }
         trace = pm.sample_smc(500, chains=1, start=start)
Ejemplo n.º 12
0
    def test_variable_type(self):
        with pm.Model() as model:
            mu = pm.HalfNormal("mu", 1)
            a = pm.Normal("a", mu=mu, sigma=2, observed=np.array([1, 2]))
            b = pm.Poisson("b", mu, observed=np.array([1, 2]))
            trace = pm.sample(compute_convergence_checks=False, return_inferencedata=False)

        with model:
            ppc = pm.sample_posterior_predictive(trace, return_inferencedata=False, samples=1)
            assert ppc["a"].dtype.kind == "f"
            assert ppc["b"].dtype.kind == "i"
Ejemplo n.º 13
0
def model_factory():
    """Build a PyMC model and return it as a dict"""

    x = pymc.Uniform("x", value=S0[0], lower=XMIN, upper=XMAX)
    y = pymc.Uniform("y", value=S0[1], lower=YMIN, upper=YMAX)
    I = pymc.Uniform("I", value=I0, lower=IMIN, upper=IMAX)

    @pymc.deterministic(plot=False)
    def model_pred(x=x, y=y, I=I):
        return P([x, y], I)

    detector_response = pymc.Poisson(
        "d",
        data,
        value=data,
        observed=True,
        plot=False,
    )

    background = pymc.Poisson(
        "background",
        DWELL * BG,
        value=DWELL * BG,
        observed=True,
        plot=False,
    )

    observed_response = model_pred + background

    # return locals() # the lazy way

    return {
        "x": x,
        "y": y,
        "I": I,
        "detector_response": detector_response,
        "background": background,
        "observed_response": observed_response,
    }
Ejemplo n.º 14
0
def make_on_off(n_off, expo_off, n_on, expo_on, mean0):
    """
    Make a PyMC model for inferring a Poisson signal rate parameter, `s`, for
    'on-off' observations with uncertain background rate, `b`.

    Parameters
    ----------

    n_off, n_on : int
        Event counts off-source and on-source

    expo_off, expo_on : float
        Exposures off-source and on-source

    mean0 : float
        Prior mean for both background and signal rates
    """

    # PyMC's exponential dist'n uses beta = 1/scale = 1/mean.
    # Here we initialize rates to good guesses.
    b_est = float(n_off)/expo_off
    s_est = max(float(n_on)/expo_on - b_est, .1*b_est)
    b = pymc.Exponential('b', beta=1./mean0, value=b_est)
    s = pymc.Exponential('s', beta=1./mean0, value=s_est)

    # The expected number of counts on and off source, as deterministic functions.
    @pymc.deterministic
    def mu_off(b=b):
        return b*expo_off

    @pymc.deterministic
    def mu_on(s=s, b=b):
        return (s+b)*expo_on

    # Poisson likelihood functions:
    off_count = pymc.Poisson('off_count', mu=mu_off, value=n_off, observed=True)
    on_count = pymc.Poisson('on_count', mu=mu_on, value=n_on, observed=True)

    return locals()
Ejemplo n.º 15
0
    def test_model_not_drawable_prior(self):
        data = np.random.poisson(lam=10, size=200)
        model = pm.Model()
        with model:
            mu = pm.HalfFlat("sigma")
            pm.Poisson("foo", mu=mu, observed=data)
            idata = pm.sample(tune=1000)

        with model:
            with pytest.raises(NotImplementedError) as excinfo:
                pm.sample_prior_predictive(50)
            assert "Cannot sample" in str(excinfo.value)
            samples = pm.sample_posterior_predictive(idata, 40, return_inferencedata=False)
            assert samples["foo"].shape == (40, 200)
Ejemplo n.º 16
0
 def test_respects_shape(self):
     for shape in (2, (2,), (10, 2), (10, 10)):
         with pm.Model():
             mu = pm.Gamma("mu", 3, 1, size=1)
             goals = pm.Poisson("goals", mu, size=shape)
             trace1 = pm.sample_prior_predictive(
                 10, return_inferencedata=False, var_names=["mu", "mu", "goals"]
             )
             trace2 = pm.sample_prior_predictive(
                 10, return_inferencedata=False, var_names=["mu", "goals"]
             )
         if shape == 2:  # want to test shape as an int
             shape = (2,)
         assert trace1["goals"].shape == (10,) + shape
         assert trace2["goals"].shape == (10,) + shape
Ejemplo n.º 17
0
    def likelihood_model3(self, nObs, yObs, Slope, Norm, Sig, dx, xp=14, deg=3):

        # (1) Calculate the expected Mass -> MCR
        # (2) Calculate the slope and scatter parameter in -> MOR
        # (3) Calculate Number Count
        # (4) Write the likelihood

        # alpha = 1.0 / Slope # First Order Approximation
        # sigma = Sig / Slope # First Order Approximation
        print np.mean(Slope)
        print Slope
        print Norm
        print Slope * yObs + Norm
        print nObs
        print (Sig[10] - Sig[4]) / (Slope[10]*yObs[10] + Norm[10] - Slope[4]*yObs[4] - Norm[4])
        # exit()

        slope = pymc.Normal('slope', mu=0.7, tau=100.0, value=0.7, observed=False)
        slope_mu = pymc.Normal('slope_mu', mu=0.0, tau=100.0, value=0.0, observed=False)
        norm = pymc.Normal('norm', mu=-9.0, tau=100.0, value=-9.0, observed=False)
        sig = 0.15 #pymc.Uniform('sig', 0.001, 0.4, value=np.mean(Sig), observed=False)

        # [beta_n, beta_n-1, beta_n-2, ...]
        beta = [pymc.Normal('beta_%i'%i, mu=0., tau=0.0001, value=0.0, observed=False) for i in range(deg+1)]

        @pymc.deterministic(plot=False)
        def mu(yObs=yObs, slope=slope, norm=norm, slope_mu=slope_mu):
            return (slope + slope_mu*yObs) * yObs + norm

        @pymc.deterministic(plot=False)
        def exp_n(beta=beta, mu=mu, deg=deg, slope=slope, sig=sig, dx=dx):
            # It returns the normalization and the first order approximation for the scatter
            # MF = A x exp(beta1 x mu)

            p = np.poly1d(beta)
            A = dx * np.exp(p(mu))

            c = [beta[j] * (deg - j) for j in range(deg)]
            p = np.poly1d(c)
            beta1 = p(mu)

            return A * slope * np.exp(- np.array(sig)**2 * beta1)

        likelihood = pymc.Poisson('n_obs', mu=exp_n, value=nObs, observed=True)

        return locals()
Ejemplo n.º 18
0
 class likelihood_model: 
     
    # Stochastic variables for signal, background, and total event rates
    #signal_rate     = pymc.Normal('signal_rate',     mu=s*muT,  tau=1/sigmas**2)
    #background_rate = pymc.Normal('background_rate', mu=b,      tau=1/sigmab**2)
    # Doh, need to use truncated normal to prevent negative values
    signal_rate     = pymc.TruncatedNormal('signal_rate',     mu=s*muT, tau=1/sigmas**2, a=0, b=np.inf)
    background_rate = pymc.TruncatedNormal('background_rate', mu=b,     tau=1/sigmab**2, a=0, b=np.inf)
   
    # Deterministic variable (simply the sum of the signal and background rates)
    total_rate = pymc.LinearCombination('total_rate', [1,1], [signal_rate, background_rate])
    # Stochastic variable for number of observed events
    observed_events = pymc.Poisson('observed_events', mu=total_rate)
   
    # Deterministic variable for the test statistic
    @pymc.deterministic()
    def qCLs(n=observed_events):
       q,chi2B = self.QCLs(n,s) 
       return q
Ejemplo n.º 19
0
def make_poisson(n, intvl, mean0):
    """
    Make a PyMC model for inferring a Poisson distribution rate parameter,
    for a datum consisting of `n` counts observed in an interval of size
    `intvl`.  The inference will use an exponential prior for the rate,
    with prior mean `mean0`.
    """

    # PyMC's exponential dist'n uses beta = 1/scale = 1/mean.
    # Here we initialize rate to n/intvl.
    rate = pymc.Exponential('rate', beta=1./mean0, value=float(n)/intvl)

    # The expected number of counts, mu=rate*intvl, is a deterministic function
    # of the rate RV (and the constant intvl).
    @pymc.deterministic
    def mu(rate=rate):
        return rate*intvl

    # Poisson likelihood function:
    count = pymc.Poisson('count', mu=mu, value=n, observed=True)

    return locals()
def Main():
    
    # Create observation
    observation = pm.Poisson("obs", 
                             lambda_, 
                             value=count_data, 
                             observed=True)
    
    # Create model
    model = pm.Model([observation, lambda_1, lambda_2, tau])
    
    # Solve using MCMC (Explained in Chapter 3)
    mcmc = pm.MCMC(model)
    mcmc.sample(40000, 10000, 1)
    
    # Get traces for parameters
    lambda_1_samples = mcmc.trace('lambda_1')[:]
    lambda_2_samples = mcmc.trace('lambda_2')[:]
    tau_samples = mcmc.trace('tau')[:]
    
    plot_data(lambda_1_samples, lambda_2_samples, tau_samples)

    solve_exercises(mcmc)
Ejemplo n.º 21
0
    def run(self):
        self.validateinput()
        data = self.data
        data = self.fluctuate(data) if self.rndseed >= 0 else data

        # unpack background dictionaries
        backgroundkeys = self.backgroundsyst.keys()
        backgrounds = array([self.background[key] for key in backgroundkeys])
        backgroundnormsysts = array(
            [self.backgroundsyst[key] for key in backgroundkeys])

        # unpack object systematics dictionary
        objsystkeys = self.objsyst['signal'].keys()
        signalobjsysts = array(
            [self.objsyst['signal'][key] for key in objsystkeys])
        backgroundobjsysts = array([])
        if len(objsystkeys) > 0 and len(backgroundkeys) > 0:
            backgroundobjsysts = array([[
                self.objsyst['background'][syst][bckg] for syst in objsystkeys
            ] for bckg in backgroundkeys])
        recodim = len(data)
        resmat = self.response
        truthdim = len(resmat)

        import priors
        truth = priors.wrapper(priorname=self.prior,
                               low=self.lower,
                               up=self.upper,
                               other_args=self.priorparams)

        bckgnuisances = []
        for name, err in zip(backgroundkeys, backgroundnormsysts):
            if err < 0.:
                bckgnuisances.append(
                    mc.Uniform('norm_%s' % name, value=1., lower=0., upper=3.))
            else:
                bckgnuisances.append(
                    mc.TruncatedNormal(
                        'gaus_%s' % name,
                        value=0.,
                        mu=0.,
                        tau=1.0,
                        a=(-1.0 / err if err > 0.0 else -inf),
                        b=inf,
                        observed=(False if err > 0.0 else True)))
        bckgnuisances = mc.Container(bckgnuisances)

        objnuisances = [
            mc.Normal('gaus_%s' % name,
                      value=self.systfixsigma,
                      mu=0.,
                      tau=1.0,
                      observed=(True if self.systfixsigma != 0 else False))
            for name in objsystkeys
        ]
        objnuisances = mc.Container(objnuisances)

        # define potential to constrain truth spectrum
        if self.regularization:
            truthpot = self.regularization.getpotential(truth)

        #This is where the FBU method is actually implemented
        @mc.deterministic(plot=False)
        def unfold(truth=truth,
                   bckgnuisances=bckgnuisances,
                   objnuisances=objnuisances):
            smearbckg = 1.
            if len(backgroundobjsysts) > 0:
                smearbckg = smearbckg + dot(objnuisances, backgroundobjsysts)
            smearedbackgrounds = backgrounds * smearbckg
            bckgnormerr = array([
                (-1. + nuis) / nuis if berr < 0. else berr
                for berr, nuis in zip(backgroundnormsysts, bckgnuisances)
            ])
            bckg = dot(1. + bckgnuisances * bckgnormerr, smearedbackgrounds)
            reco = dot(truth, resmat)
            smear = 1. + dot(objnuisances, signalobjsysts)
            out = bckg + reco * smear
            return out

        unfolded = mc.Poisson('unfolded',
                              mu=unfold,
                              value=data,
                              observed=True,
                              size=recodim)
        allnuisances = mc.Container(bckgnuisances + objnuisances)
        modelelements = [unfolded, unfold, truth, allnuisances]
        if self.regularization: modelelements += [truthpot]
        model = mc.Model(modelelements)

        if self.use_emcee:
            from emcee_sampler import sample_emcee
            mcmc = sample_emcee(model,
                                nwalkers=self.nwalkers,
                                samples=self.nMCMC / self.nwalkers,
                                burn=self.nBurn / self.nwalkers,
                                thin=self.nThin)
        else:
            map_ = mc.MAP(model)
            map_.fit()
            mcmc = mc.MCMC(model)
            mcmc.use_step_method(mc.AdaptiveMetropolis, truth + allnuisances)
            mcmc.sample(self.nMCMC, burn=self.nBurn, thin=self.nThin)

#        mc.Matplot.plot(mcmc)

        self.trace = [
            mcmc.trace('truth%d' % bin)[:] for bin in xrange(truthdim)
        ]
        self.nuisancestrace = {}
        for name, err in zip(backgroundkeys, backgroundnormsysts):
            if err < 0.:
                self.nuisancestrace[name] = mcmc.trace('norm_%s' % name)[:]
            if err > 0.:
                self.nuisancestrace[name] = mcmc.trace('gaus_%s' % name)[:]
        for name in objsystkeys:
            if self.systfixsigma == 0.:
                self.nuisancestrace[name] = mcmc.trace('gaus_%s' % name)[:]

        if self.monitoring:
            import monitoring
            monitoring.plot(self.name + '_monitoring', data, backgrounds,
                            resmat, self.trace, self.nuisancestrace,
                            self.lower, self.upper)
Ejemplo n.º 22
0
def turnover_piecewise_exponential_model():
    # hyperpriors for team-level distributions
    std_dev_att = pm.Uniform('std_dev_att', lower=0, upper=50)

    # priors on coefficients
    baseline_hazards = pm.Normal('baseline_hazards',
                                 0,
                                 .0001,
                                 size=num_pieces,
                                 value=baseline_starting_vals.values)
    two_minute_drill = pm.Normal('two_minute_drill', 0, .0001, value=-.01)
    offense_losing_badly = pm.Normal('offense_losing_badly',
                                     0,
                                     .0001,
                                     value=-.01)
    offense_winning_greatly = pm.Normal('offense_winning_greatly',
                                        0,
                                        .0001,
                                        value=.01)
    home = pm.Normal('home', 0, .0001, value=-.01)

    @pm.deterministic(plot=False)
    def tau_att(std_dev_att=std_dev_att):
        return std_dev_att**-2

    # team-specific parameters
    atts_star = pm.Normal("atts_star",
                          mu=0,
                          tau=tau_att,
                          size=num_teams,
                          value=np.zeros(num_teams))

    # trick to code the sum to zero contraint
    @pm.deterministic
    def atts(atts_star=atts_star):
        atts = atts_star.copy()
        atts = atts - np.mean(atts_star)
        return atts

    @pm.deterministic
    def lambdas(attacking_team=attacking_team,
                defending_team=defending_team,
                defending_team_is_home=defending_team_is_home,
                two_minute_drill=two_minute_drill,
                drive_is_two_minute_drill=drive_is_two_minute_drill,
                offense_losing_badly=offense_losing_badly,
                offense_is_losing_badly=offense_is_losing_badly,
                offense_winning_greatly=offense_winning_greatly,
                offense_is_winning_greatly=offense_is_winning_greatly,
                home=home,
                atts=atts,
                baseline_hazards=baseline_hazards,
                observed_exposures=observed_exposures,
                piece_i=piece_i):
        return observed_exposures * baseline_hazards[piece_i] * \
               np.exp(home * defending_team_is_home + \
                      two_minute_drill * drive_is_two_minute_drill + \
                      offense_losing_badly * offense_is_losing_badly + \
                      offense_winning_greatly * offense_is_winning_greatly + \
                      atts[attacking_team])

    drive_deaths = pm.Poisson("drive_deaths",
                              lambdas,
                              value=observed_drive_deaths_turnover,
                              observed=True)

    @pm.potential
    def limit_sd(std_dev_att=std_dev_att):
        if std_dev_att < 0:
            return -np.inf
        return 0

    @pm.potential
    def limit_tau(tau_att=tau_att):
        if tau_att > 10000:
            return -np.inf
        return 0

    return locals()
Ejemplo n.º 23
0
# lamb = np.empty(Nobs,dtype=object)

# for i in range(Nobs):
#     lamb[i] = pymc.Gamma('lamb_%i' %(i+1), alpha = alpha, beta = beta, value=0.5)

@pymc.deterministic
def poi_mu(lamb = lamb, t = t):
    return lamb*t

# @pymc.stochastic
# def data_gen(poi_mu,y):
#     return -np.sum(poi_mu) + np.sum(np.log(poi_mu)*y)
#
# # @pymc.stochastic
# # def data_gen(poi_mu, y):
# #     return pymc.Poisson('data',mu=poi_mu, value = y, observed=True)
# #
data = pymc.Poisson('data',mu=poi_mu, value = y, observed=True)
sampler = pymc.MCMC([lamb,beta,data,y,t])
sampler.use_step_method(pymc.Gibbs,lamb[0],beta)
sampler.sample(iter=10000,burn=3000,thin=10)

print np.mean(beta.trace())
# print np.mean(lamb.trace())
for i in range(Nobs):
    print np.mean(lamb[i].trace())



# MCMC
## Define prior distribution with the initial value
Ejemplo n.º 24
0
def make_poisson_hmm(y_data, X_data, initial_params):
    r""" Construct a PyMC2 scalar poisson-emmisions HMM model.

    TODO: Update to match normal model design.

    The model takes the following form:

    .. math::

        y_t &\sim \operatorname{Poisson}(\exp(x_t^{(S_t)\top} \beta^{(S_t)})) \\
        \beta^{(S_t)}_i &\sim \operatorname{N}(m^{(S_t)}, C^{(S_t)}),
        \quad i \in \{1,\dots,M\} \\
        S_t \mid S_{t-1} &\sim \operatorname{Categorical}(\pi^{(S_{t-1})}) \\
        \pi^{(S_t-1)} &\sim \operatorname{Dirichlet}(\alpha^{(S_{t-1})})

    where :math:`C^{(S_t)} = \lambda_i^{(S_t) 2} \tau^{(S_t) 2}` and

    .. math::

        \lambda^{(S_t)}_i &\sim \operatorname{Cauchy}^{+}(0, 1) \\
        \tau^{(S_t)} &\sim \operatorname{Cauchy}^{+}(0, 1)

    for observations :math:`y_t` in :math:`t \in \{0, \dots, T\}`,
    features :math:`x_t^{(S_t)} \in \mathbb{R}^M`,
    regression parameters :math:`\beta^{(S_t)}`, state sequences
    :math:`\{S_t\}^T_{t=1}` and
    state transition probabilities :math:`\pi \in [0, 1]^{K}`.
    :math:`\operatorname{Cauchy}^{+}` is the standard half-Cauchy distribution
    and :math:`\operatorname{N}` is the normal/Gaussian distribution.

    The set of random variables,
    :math:`\mathcal{S} = \{\{\beta^{(k)}, \lambda^{(k)}, \tau^{(k)}, \tau^{(k)}, \pi^{(k)}\}_{k=1}^K, \{S_t\}^T_{t=1}\}`,
    are referred to as "stochastics" throughout the code.


    Parameters
    ==========
    y_data: pandas.DataFrame
        Usage/response observations :math:`y_t`.
    X_data: list of pandas.DataFrame
        List of design matrices for each state, i.e. :math:`x_t^{(S_t)}`.  Each
        must span the entire length of observations (i.e. `y_data`).
    initial_params: NormalHMMInitialParams
        The initial parameters, which include
        :math:`\pi_0, m^{(k)}, \alpha^{(k)}, V^{(k)}`.
        Ignores `V` parameters.
        FIXME: using the "Normal" initial params objects is only temporary.

    Returns
    =======
    A ``pymc.Model`` object used for sampling.
    """

    N_states = len(X_data)
    N_obs = X_data[0].shape[0]

    alpha_trans = initial_params.alpha_trans

    trans_mat = TransProbMatrix("trans_mat",
                                alpha_trans,
                                value=initial_params.trans_mat)

    states = HMMStateSeq("states",
                         trans_mat,
                         N_obs,
                         p0=initial_params.p0,
                         value=initial_params.states)

    betas = []
    etas = []
    lambdas = []
    for s in range(N_states):

        initial_beta = None
        if initial_params.betas is not None:
            initial_beta = initial_params.betas[s]

        size_s = X_data[s].shape[1]
        size_s = size_s if size_s > 1 else None

        lambda_s = pymc.HalfCauchy('lambda-{}'.format(s), 0., 1., size=size_s)

        eta_s = pymc.HalfCauchy('tau-{}'.format(s), 0., 1.)

        beta_s = pymc.Normal('beta-{}'.format(s),
                             0., (lambda_s * eta_s)**(-2),
                             value=initial_beta,
                             size=size_s)

        betas += [beta_s]
        etas += [eta_s]
        lambdas += [lambda_s]

    mu_reg = HMMLinearCombination('mu', X_data, betas, states, trace=False)

    @pymc.deterministic(trace=True, plot=False)
    def mu(mu_reg_=mu_reg):
        return np.exp(mu_reg_)

    if y_data is not None:
        y_data = np.ma.masked_invalid(y_data).astype(np.object)
        y_data.set_fill_value(None)

    y_rv = pymc.Poisson('y',
                        mu,
                        value=y_data,
                        observed=True if y_data is not None else False)

    del initial_params, s, beta_s, size_s, lambda_s, eta_s

    return pymc.Model(locals())
Ejemplo n.º 25
0
    def model_factory():
        """Build a PyMC model and return it as a dict"""

        x = pymc.Uniform("x", value=S0[0], lower=XMIN, upper=XMAX)
        y = pymc.Uniform("y", value=S0[1], lower=YMIN, upper=YMAX)
        I = pymc.Uniform("I", value=I0, lower=IMIN, upper=IMAX)

        # Distributions for the cross sections

        # Just the interstitial material
        s_i_xs = P.interstitial_material.Sigma_T
        interstitial_xs = pymc.Uniform(
            "Sigma_inter",
            s_i_xs * (1 - XS_DELTA),
            s_i_xs * (1 + XS_DELTA),
            value=s_i_xs,
            observed=False,
        )

        # All the rest
        mu_xs = np.array([M.Sigma_T for M in P.materials])

        building_xs = pymc.Uniform(
            "Sigma",
            mu_xs * (1 - XS_DELTA),
            mu_xs * (1 + XS_DELTA),
            value=mu_xs,
            observed=False,
        )

        # Predictions

        @pymc.deterministic(plot=False)
        def model_pred(x=x,
                       y=y,
                       I=I,
                       interstitial_xs_p=interstitial_xs,
                       building_xs_p=building_xs):
            # The _p annotation is so that I can access the actual stochastics
            # in the enclosing scope, see down a couple lines where I resample

            inter_mat = gefry3.Material(1.0, interstitial_xs_p)
            building_mats = [gefry3.Material(1.0, s) for s in building_xs_p]

            # Force the cross sections to be resampled
            interstitial_xs.set_value(interstitial_xs.random(), force=True)
            building_xs.set_value(building_xs.random(), force=True)

            return P(
                [x, y],
                I,
                inter_mat,
                building_mats,
            )

        background = pymc.Poisson(
            "b",
            DWELL * BG,
            value=DWELL * BG,
            observed=False,
            plot=False,
        )

        @pymc.stochastic(plot=False, observed=True)
        def observed_response(value=[],
                              model_pred=model_pred,
                              background=background):
            resp = model_pred + background
            return multivariate_normal.logpdf(data,
                                              mean=resp,
                                              cov=np.diag(resp))

        return {
            "x": x,
            "y": y,
            "I": I,
            "interstitial_xs": interstitial_xs,
            "building_xs": building_xs,
            "model_pred": model_pred,
            "background": background,
            "observed_response": observed_response,
        }
Ejemplo n.º 26
0
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 17 15:34:47 2014

@author: Koangel
"""

import pymc as pm

a = [1, 2, 3, 4, 5]
val = pm.Poisson("obs", 0.1, value=a, observed=True)
print val.value
val.random()
print val.value

val1 = pm.Poisson("obs1", 0.1)
print val1.value
val1.random()
print val1.value
import pymc as pm
import numpy  as np
import matplotlib.pyplot as plt

parameter = pm.Exponential( "poisson_param", 1 )
data_generator = pm.Poisson( "data_generator", parameter )
data_plus_one = data_generator + 1

# 'parents' influence another variable
# 'children' subject of parent vars
parameter.children
data_generator.parents
data_generator.children

# 'value' attribute
parameter.value
data_generator.value
data_plus_one.value

# 'stochastic' vars - still random even if parents are known
# 'deterministic' vars - not random if parents are known
# Initializing variables
#   * name argument - retrieves posterior dist 
#   * class specific arguments
#   * size - multivariate indp array of stochastic vars

some_var = pm.DiscreteUniform( "discrete_uni_var", 0, 4 )

betas = pm.Uniform( "betas", 0, 1, size=10 )
betas.value
Ejemplo n.º 28
0
lambda_1 = pm.Exponential("lambda_1", alpha)
lambda_2 = pm.Exponential("lambda_2", alpha)

tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data)


@pm.deterministic
def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
    out = np.zeros(n_count_data)
    out[:tau] = lambda_1  # lambda before tau is lambda1
    out[tau:] = lambda_2  # lambda after (and including) tau is lambda2
    return out


observation = pm.Poisson("obs", lambda_, value=count_data, observed=True)

model = pm.Model([observation, lambda_1, lambda_2, tau])

mcmc = pm.MCMC(model)
mcmc.sample(40000, 10000, 1)

lambda_1_samples = mcmc.trace('lambda_1')[:]
lambda_2_samples = mcmc.trace('lambda_2')[:]
tau_samples = mcmc.trace('tau')[:]

# histogram of the samples:

ax = plt.subplot(311)
ax.set_autoscaley_on(False)
Ejemplo n.º 29
0
                                     doc='Switchpoint[year]')

    avg = np.mean(stormsNumbers)
    early_mean = pm.Exponential('early_mean', beta=1./avg)
    late_mean = pm.Exponential('late_mean', beta=1./avg)

    @ pm.deterministic(plot=False)
    def rate(s=switchpoint, e=early_mean, l=late_mean):
        # Concatenate Poisson means
        out = np.zeros(len(stormsNumbers))
        out[:s] = e
        out[s:] = l
        return out

    storms = pm.Poisson('storms',
                        mu=rate,
                        value=stormsNumbers,
                        observed=True)

    storms_model = pm.Model([storms,
                             early_mean,
                             late_mean, rate])

    strmsM = pm.MCMC(storms_model)
    strmsM.sample(iter=40000, burn=1000, thin=20)

    plt.hist(strmsM.trace('late_mean')[:], edgecolor="k")
    general.set_grid_to_plot()
    plt.savefig(general.folderPath2 + "exp2_late_mean.png")
    plt.clf()

    plt.hist(strmsM.trace('early_mean')[:], edgecolor="k")
Ejemplo n.º 30
0
def compare_groups(list1, list2):

    data = list1 + list2
    count_data = np.array(data)
    n_count_data = len(count_data)
    plt.bar(np.arange(n_count_data), count_data, color="#348ABD")
    plt.xlabel("Time (days)")
    plt.ylabel("count of text-msgs received")
    plt.title(
        "Did the viewers' ad viewing increase with the number of ads shown?")
    plt.xlim(0, n_count_data)
    #plt.show()

    alpha = 1.0 / count_data.mean()  # Recall count_data is the
    # variable that holds our txt counts
    print alpha
    lambda_1 = pm.Exponential("lambda_1", alpha)
    lambda_2 = pm.Exponential("lambda_2", alpha)

    tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data)

    @pm.deterministic
    def lambda_(tau=tau, lambda_1=lambda_1, lambda_2=lambda_2):
        out = np.zeros(n_count_data)
        out[:tau] = lambda_1  # lambda before tau is lambda1
        out[tau:] = lambda_2  # lambda after (and including) tau is lambda2
        return out

    observation = pm.Poisson("obs", lambda_, value=count_data, observed=True)

    model = pm.Model([observation, lambda_1, lambda_2, tau])

    mcmc = pm.MCMC(model)
    mcmc.sample(40000, 10000, 1)

    lambda_1_samples = mcmc.trace('lambda_1')[:]
    lambda_2_samples = mcmc.trace('lambda_2')[:]
    tau_samples = mcmc.trace('tau')[:]

    print tau_samples
    # histogram of the samples:

    ax = plt.subplot(311)
    ax.set_autoscaley_on(False)

    plt.hist(lambda_1_samples,
             histtype='stepfilled',
             bins=30,
             alpha=0.85,
             label="posterior of $\lambda_1$",
             color="#A60628",
             normed=True)
    plt.legend(loc="upper left")
    plt.title(r"""Posterior distributions of the variables
        $\lambda_1,\;\lambda_2,\;\tau$""")
    plt.xlim([0, 6])
    plt.ylim([0, 7])
    plt.xlabel("$\lambda_1$ value")

    ax = plt.subplot(312)
    ax.set_autoscaley_on(False)
    plt.hist(lambda_2_samples,
             histtype='stepfilled',
             bins=30,
             alpha=0.85,
             label="posterior of $\lambda_2$",
             color="#7A68A6",
             normed=True)
    plt.legend(loc="upper left")
    plt.xlim([0, 6])
    plt.ylim([0, 7])
    plt.xlabel("$\lambda_2$ value")

    plt.subplot(313)
    w = 1.0 / tau_samples.shape[0] * np.ones_like(tau_samples)
    plt.hist(tau_samples,
             bins=n_count_data,
             alpha=1,
             label=r"posterior of $\tau$",
             color="#467821",
             weights=w,
             rwidth=2.)
    plt.xticks(np.arange(n_count_data))

    plt.legend(loc="upper left")
    plt.ylim([0, .75])
    plt.xlim([0, len(count_data)])
    plt.xlabel(r"$\tau$ (iterations)")
    plt.ylabel("probability")

    plt.show()