Ejemplo n.º 1
0
    def _sample_pymc3(cls, dist, size):
        """Sample from PyMC3."""

        import pymc3
        pymc3_rv_map = {
            'GeometricDistribution': lambda dist: pymc3.Geometric('X', p=float(dist.p)),
            'PoissonDistribution': lambda dist: pymc3.Poisson('X', mu=float(dist.lamda)),
            'NegativeBinomialDistribution': lambda dist: pymc3.NegativeBinomial('X',
            mu=float((dist.p*dist.r)/(1-dist.p)), alpha=float(dist.r))
        }

        dist_list = pymc3_rv_map.keys()

        if dist.__class__.__name__ not in dist_list:
            return None

        with pymc3.Model():
            pymc3_rv_map[dist.__class__.__name__](dist)
            return pymc3.sample(size, chains=1, progressbar=False)[:]['X']
def baysian_latency(count_data):
    import pymc3 as pm
    import theano.tensor as tt
    n_count_data = len(count_data)
    with pm.Model() as model:
        alpha = 1.0 / count_data.mean()  # Recall count_data is the
        # variable that holds our txt counts
        lambda_1 = pm.Exponential("lambda_1", alpha)
        lambda_2 = pm.Exponential("lambda_2", alpha)

        tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data - 1)

    with model:
        idx = np.arange(n_count_data)  # Index
        lambda_ = pm.math.switch(tau >= idx, lambda_1, lambda_2)
        observation = pm.Poisson("obs", lambda_, observed=count_data)
        step = pm.Metropolis()
        trace = pm.sample(10000, tune=5000, step=step)
    return trace
def make_switchpoint_model(counts: ndarray, prior_lambda: float):
    """
    A model that assumes counts are generated by 2 different poisson processes:
    * counts up to switchpoint (not inclusive) ~ Poisson(early_rate)
    * counts from switchpoint on (inclusive) ~ Poisson(late_rate)

    Parameters
    ----------

    counts :
        1 - dimensional array of counts
    prior_lambda :
        rate parameter for exponential prior; 1 / prior_lambda is the mean of the exponential


    Returns
    -------

    pm.Model :
        the model instance

    Based on https://docs.pymc.io/notebooks/getting_started.html#Case-study-2:-Coal-mining-disasters
    """
    model = pm.Model()
    with model:
        idxs = np.arange(len(counts))
        lower_idx = idxs[1]
        upper_idx = idxs[-1]
        mid = (upper_idx - lower_idx) // 2

        switchpoint = pm.DiscreteUniform("switchpoint",
                                         lower=lower_idx,
                                         upper=upper_idx,
                                         testval=mid)

        early_rate = pm.Exponential("early_rate", prior_lambda)
        late_rate = pm.Exponential("late_rate", prior_lambda)

        rate = pm.math.switch(switchpoint > idxs, early_rate, late_rate)

        pm.Poisson("counted", rate, observed=counts)
    return model
Ejemplo n.º 4
0
def ppmf_core(matrix, n_team):
    alpha_u = alpha_v = 1 / np.var(matrix)
    alpha = np.ones(
        (n_team, n_team)) * 2  # fixed precision for likelihood function
    dim = 10  # dimensionality
    #num_sample = 200

    with pm.Model() as pmf:
        pmf_U = pm.MvNormal('U',
                            mu=0,
                            tau=alpha_u * np.eye(dim),
                            shape=(n_team, dim),
                            testval=np.random.randn(n_team, dim) * .01)
        pmf_V = pm.MvNormal('V',
                            mu=0,
                            tau=alpha_v * np.eye(dim),
                            shape=(n_team, dim),
                            testval=np.random.randn(n_team, dim) * .01)
        #   pmf_R = pm.Normal('R', mu=theano.tensor.dot(pmf_U, pmf_V.T),
        #                    tau=alpha, observed=matrix)
        pmf_R = pm.Poisson('R',
                           mu=theano.tensor.dot(pmf_U, pmf_V.T),
                           observed=matrix)
        # Find mode of posterior using optimization
        start = pm.find_MAP(fmin=sp.optimize.fmin_powell
                            )  # Find starting values by optimization

    #    step = pm.NUTS(scaling=start)
    #    trace = pm.sample(num_sample, step, start=start)
    #
    #U_all = trace['U']
    #U = sum(U_all,0)/num_sample
    #V_all = trace['V']
    #V = sum(V_all,0)/num_sample
    #
    #R = U.dot(V.T)

    U = start['U']
    V = start['V']
    R = U.dot(V.T)
    return R
Ejemplo n.º 5
0
def testMCMCTrace():
    with pm.Model() as model:
        mu = pm.Uniform('mu', lower=0, upper=60)
        likelihood = pm.Poisson('likelihood',
                                mu=mu,
                                observed=msg['time_delay_seconds'].values)
        start = pm.find_MAP()
        step = pm.Metropolis()
        trace = pm.sample(20000, step, start=start, progressbar=True)
        pm.traceplot(trace, varnames=['mu'], lines={'mu': freq_results['x']})
        # fig = plt.figure(figsize=(11, 3))
        # plt.subplot(121)
        # plt.title('Burnin trace')
        # plt.ylim(ymin=16.5, ymax=19.5)
        # plt.plot(trace.get_values('mu')[:1000])
        # fig = plt.subplot(122)
        # plt.title('Full trace')
        # plt.ylim(ymin=16.5, ymax=19.5)
        # plt.plot(trace.get_values('mu'))
        # pm.autocorrplot(trace[:2000], varnames=['mu'])
        # plt.show()
        return trace
Ejemplo n.º 6
0
def create_pymc3_coal_mining_disaster_model(
        modelname='pymc3_coal_mining_disaster_model', fit=True):
    if fit:
        modelname = modelname + '_fitted'

    disasters = np.array([
        4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, 3, 3, 5, 4, 5, 3, 1, 4,
        4, 1, 5, 5, 3, 4, 2, 5, 2, 2, 3, 4, 2, 1, 3, 3, 2, 1, 1, 1, 1, 3, 0, 0,
        1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 2,
        1, 0, 0, 0, 1, 1, 0, 2, 3, 3, 1, 2, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4,
        0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1
    ])
    years = np.arange(1851, 1962)

    data = pd.DataFrame({'years': years, 'disasters': disasters})
    years = theano.shared(years)
    with pm.Model() as disaster_model:

        switchpoint = pm.DiscreteUniform('switchpoint',
                                         lower=years.min(),
                                         upper=years.max(),
                                         testval=1900)

        # Priors for pre- and post-switch rates number of disasters
        early_rate = pm.Exponential('early_rate', 1.0)
        late_rate = pm.Exponential('late_rate', 1.0)

        # Allocate appropriate Poisson rates to years before and after current
        rate = pm.math.switch(switchpoint >= years, early_rate, late_rate)

        disasters = pm.Poisson('disasters', rate, observed=data['disasters'])
        #years = pm.Normal('years', mu=data['years'], sd=0.1, observed=data['years'])

    m = ProbabilisticPymc3Model(modelname,
                                disaster_model,
                                shared_vars={'years': years})
    if fit:
        m.fit(data)
    return data, m
Ejemplo n.º 7
0
def parametric_scheme_mcmc(daily_cases, CI = 0.95, gamma = 0.2, chains = 4, tune = 1000, draws = 1000, **kwargs):
    if isinstance(daily_cases, (pd.DataFrame, pd.Series)):
        case_values = daily_cases.values
    else: 
        case_values = np.array(daily_cases)
    with pm.Model() as mcmc_model:
        # lag new case counts
        dT_lag0 = case_values[1:]
        dT_lag1 = case_values[:-1]
        n = len(dT_lag0)

        # set up distributions 
        # alpha   = 3 + dT_lag0.cumsum()
        # beta_L  = 2 + np.array(range(len(dT_lag0)))
        # beta_b  = 2 + dT_lag1.cumsum()

        dT = pm.Poisson("dT", mu = dT_lag0, shape = (n,))
        bt = pm.Gamma("bt", alpha = dT_lag0.cumsum(), beta = 0.0001 + dT_lag1.cumsum(), shape = (n,))
        Rt = pm.Deterministic("Rt", 1 + pm.math.log(bt)/gamma)
    
        trace = pm.sample(model = mcmc_model, chains = chains, tune = tune, draws = draws, cores = 1, **kwargs)
        return (mcmc_model, trace, pm.summary(trace, hdi_prob = CI))
    def build_model(*, read_depth, reg_topics, expr_data, lambda_prior,
                    std_mult):

        #expr_factor_shape = (reg_topics.shape[1], expr_data.shape[1])

        beta = 1 / std_mult
        alpha = lambda_prior * beta

        logging.info('Building model')
        with pm.Model() as model:

            expr_topics = pm.Gamma('gamma',
                                   alpha=alpha,
                                   beta=beta,
                                   shape=lambda_prior.shape)

            response = pm.Poisson('response',
                                  read_depth *
                                  pm.math.dot(reg_topics, expr_topics),
                                  observed=expr_data)
        logging.info('Done building model')
        return model
Ejemplo n.º 9
0
def poisson_model():
    with pm.Model() as disaster_model:

        switchpoint = pm.DiscreteUniform('switchpoint',
                                         lower=year.min(),
                                         upper=year.max(),
                                         testval=1900)

        # Priors for pre- and post-switch rates number of disasters
        early_rate = pm.Exponential('early_rate', 1)
        late_rate = pm.Exponential('late_rate', 1)

        # Allocate appropriate Poisson rates to years before and after current
        rate = pm.math.switch(switchpoint >= year, early_rate, late_rate)

        disasters = pm.Poisson('disasters', rate, observed=disaster_data)

        trace = pm.sample(10000)

        pm.traceplot(trace)

    plt.show()
Ejemplo n.º 10
0
    def run(self, chains=1, tune=3000, draws=1000, cores=4, target_accept=.95):

        with pm.Model() as model:

            # Random walk magnitude
            step_size = pm.HalfNormal('step_size', sigma=.03)

            # Theta random walk
            theta_raw_init = pm.Normal('theta_raw_init', 0.1, 0.1)
            theta_raw_steps = pm.Normal('theta_raw_steps',
                                        shape=len(self.onset) - 2) * step_size
            theta_raw = tt.concatenate([[theta_raw_init], theta_raw_steps])
            theta = pm.Deterministic('theta', theta_raw.cumsum())

            # Let the serial interval be a random variable and calculate r_t
            serial_interval = pm.Gamma('serial_interval', alpha=6, beta=1.5)
            gamma = 1.0 / serial_interval
            r_t = pm.Deterministic('r_t', theta / gamma + 1)

            inferred_yesterday = self.onset.values[:
                                                   -1] / self.cumulative_p_delay[:
                                                                                 -1]

            expected_today = inferred_yesterday * self.cumulative_p_delay[
                1:] * pm.math.exp(theta)

            # Ensure cases stay above zero for poisson
            mu = pm.math.maximum(.1, expected_today)
            observed = self.onset.round().values[1:]
            cases = pm.Poisson('cases', mu=mu, observed=observed)

            self.trace = pm.sample(chains=chains,
                                   tune=tune,
                                   draws=draws,
                                   cores=cores,
                                   target_accept=target_accept)

            return self
    def infer_lambda(self):
        """
        Ci ~ Poisson(lambda)

        Is there a day ("tau") where the lambda suddenly jumps to a higher value?
        We are looking for a 'switchpoint' s.t. lambda
            (1) (lambda_1 if t < tau) and (lambda_2 if t > tau)
            (2) lambda_2 > lambda_1

        lambda_1 ~ Exponential(alpha)
        lambda_2 ~ Exponential(alpha)

        tau ~ Discrete_uniform(1/n_count_data)
        """
        print("Infer with PyMC3...")
        with pm.Model() as model:
            ## assign lambdas and tau to stochastic variables
            alpha = 1.0 / self.count_data.mean()
            lambda_1 = pm.Exponential("lambda_1", alpha)
            lambda_2 = pm.Exponential("lambda_2", alpha)
            tau = pm.DiscreteUniform("tau", lower=0, upper=self.n_count_data)

            ## create a combined function for lambda (it is still a random variable)
            idx = np.arange(self.n_count_data)  # Index
            lambda_ = pm.math.switch(tau >= idx, lambda_1, lambda_2)

            ## combine the data with our proposed data generation scheme
            observation = pm.Poisson("obs", lambda_, observed=self.count_data)

            ## inference
            step = pm.Metropolis()
            self.trace = pm.sample(10000, tune=5000, step=step)

            ## get the variables we want to plot from our trace
            self.lambda_1_samples = self.trace['lambda_1']
            self.lambda_2_samples = self.trace['lambda_2']
            self.tau_samples = self.trace['tau']
def mob_th_mcmcm_model(mt, onset, poly_id, path_to_save_trace=None):
    with pm.Model() as Rt_mobility_model:            
        # Create the alpha and beta parameters
        # Assume a uninformed distribution
        beta  = pm.Uniform('beta', lower=-100, upper=100)
        Ro    = pm.Uniform('R0', lower=2, upper=5)

        # The effective reproductive number is given by:
        Rt              = pm.Deterministic('Rt', Ro*pm.math.exp(-beta*(1+mt[:-1].values)))
        serial_interval = pm.Gamma('serial_interval', alpha=6, beta=1.5)
        GAMMA           = 1 / serial_interval
        lam             = onset[:-1].values * pm.math.exp( GAMMA * (Rt- 1))
        observed        = onset.round().values[1:]

        # Likelihood
        cases = pm.Poisson('cases', mu=lam, observed=observed)

        with Rt_mobility_model:
            # Draw the specified number of samples
            N_SAMPLES = 10000
            # Using Metropolis Hastings Sampling
            step     = pm.Metropolis(vars=[ Rt_mobility_model.beta, Rt_mobility_model.R0 ], S = np.array([ (100+100)**2 , (5-2)**2 ]), target_accept=.85,)
            Rt_trace = pm.sample( 5000, chains=2, tune=2000, step=step, progressbar=False, compute_convergence_checks=False)


        BURN_IN = 2000
        rt_info = statistics_from_trace_model(Rt_trace.get_values(burn=BURN_IN,varname='Rt'))

        R0_dist   = Rt_trace.get_values(burn=BURN_IN, varname='R0')
        beta_dist = Rt_trace.get_values(burn=BURN_IN,varname='beta')
        mb_th     = calculate_threshold(beta_dist.mean(), R0_dist.mean())

        if path_to_save_trace:
            with open(path_to_save_trace, 'wb') as buff:
                pickle.dump({'model': Rt_mobility_model, 'trace': Rt_trace }, buff)

    return {'poly_id': poly_id, 'R0':R0_dist.mean(), 'beta':beta_dist.mean(), 'mob_th':mb_th }
Ejemplo n.º 13
0
def branching_random_walk(daily_cases, CI = 0.95, gamma = 0.2, chains = 4, tune = 1000, draws = 1000, **kwargs):
    """ estimate Rt using a random walk for branch parameter, adapted from old Rt.live code """
    if isinstance(daily_cases, (pd.DataFrame, pd.Series)):
        case_values = daily_cases.values
    else: 
        case_values = np.array(daily_cases)
    with pm.Model() as mcmc_model:
        # lag new case counts
        dT_lag0 = case_values[1:]
        dT_lag1 = case_values[:-1]
        n = len(dT_lag0)
        
        # Random walk magnitude
        step_size = pm.HalfNormal('step_size', sigma = 0.03)
        theta_raw_init = pm.Normal('theta_raw_init', 0.1, 0.1)
        theta_raw_steps = pm.Normal('theta_raw_steps', shape = n - 1) * step_size
        theta_raw = tt.concatenate([[theta_raw_init], theta_raw_steps])
        theta = pm.Deterministic('theta', theta_raw.cumsum())

        Rt = pm.Deterministic("Rt", 1 + theta/gamma)
        expected_cases = pm.Poisson('dT', mu = dT_lag1 * pm.math.exp(theta), observed = dT_lag0)
    
        trace = pm.sample(model = mcmc_model, chains = chains, tune = tune, draws = draws, cores = 1, **kwargs)
        return (mcmc_model, trace, pm.summary(trace, hdi_prob = CI))
Ejemplo n.º 14
0
    def run_gp(self):
        with pm.Model() as model:
            gp_shape = len(self.onset) - 1

            length_scale = pm.Gamma("length_scale", alpha=3, beta=.4)

            eta = .05
            cov_func = eta**2 * pm.gp.cov.ExpQuad(1, length_scale)

            gp = pm.gp.Latent(mean_func=pm.gp.mean.Constant(c=0),
                              cov_func=cov_func)

            # Place a GP prior over the function f.
            theta = gp.prior("theta", X=np.arange(gp_shape)[:, None])

            # Let the serial interval be a random variable and calculate r_t
            serial_interval = pm.Gamma('serial_interval', alpha=6, beta=1.5)
            gamma = 1.0 / serial_interval
            r_t = pm.Deterministic('r_t', theta / gamma + 1)

            inferred_yesterday = self.onset.values[:
                                                   -1] / self.cumulative_p_delay[:
                                                                                 -1]
            expected_today = inferred_yesterday * self.cumulative_p_delay[
                1:] * pm.math.exp(theta)

            # Ensure cases stay above zero for poisson
            mu = pm.math.maximum(.1, expected_today)
            observed = self.onset.round().values[1:]
            cases = pm.Poisson('cases', mu=mu, observed=observed)

            self.trace = pm.sample(chains=1,
                                   tune=1000,
                                   draws=1000,
                                   target_accept=.8)
        return self
Ejemplo n.º 15
0
            theta_cumulative = pm.Deterministic('theta_c', theta.cumsum())

            # Let the serial interval be a random variable and calculate r_t
            serial_interval = pm.Gamma('serial_interval', alpha=6, beta=1.5)
            gamma = 1.0 / serial_interval
            r_t = pm.Deterministic('r_t', theta / gamma + 1)

            # Up until here is fine.
            N_t = 100_000 * np.ones_like(
                self.num_positive
            )  # Some large numbers, think of them as candidates

            I_t_mu = self.I_0 * pm.math.exp(theta_cumulative)
            # Ensure cases stay above zero for poisson
            I_t = pm.math.maximum(.1, I_t_mu)
            cases = pm.Poisson('cases', mu=I_t, shape=self.num_positive.shape)

            # Random walk magnitude
            step_size = pm.HalfNormal('step_size', sigma=.03)

            # Theta random walk
            theta_raw_init = pm.Normal('theta_raw_init', 0.1, 0.1)
            theta_raw_steps = pm.Normal(
                'theta_raw_steps',
                shape=len(self.num_positive) - 2) * step_size
            theta_raw = tt.concatenate([[0., theta_raw_init], theta_raw_steps])
            theta = pm.Deterministic('theta', theta_raw.cumsum())
            theta_cumulative = pm.Deterministic('theta_c', theta.cumsum())

            # Let the serial interval be a random variable and calculate r_t
            serial_interval = pm.Gamma('serial_interval', alpha=6, beta=1.5)
Ejemplo n.º 16
0
        # Where we store the results
        self.trace = None

    def run(self, chains=1, tune=3_000, draws=500, target_accept=.9, cores=1):

        with pm.Model() as model:
            # Figure out the new R_t
            R_t = pm.Normal('R_t', mu=self.R_t_mu, sigma=self.R_t_sigma)
            R_t_drift = pm.Normal('R_t_drift', mu=0, sigma=self.R_t_drift)
            R_t_1 = pm.Deterministic('R_t_1', R_t + R_t_drift)

            # Now, take the new I_t_1
            # Effective serial_interval is basically 9, from empirical tests.
            serial_interval = 9.
            gamma = 1 / serial_interval
            dI_t = pm.Poisson('dI_t', mu=self.dI_t_mu)
            exp_rate = pm.Deterministic('exp_rate',
                                        pm.math.exp((R_t_1 - 1) * gamma))
            # Restrict I_t to be nonzero
            dI_t_1_mu = pm.math.minimum(pm.math.maximum(0.1, dI_t * exp_rate),
                                        self.num_positive)
            dI_t_1 = pm.Poisson('dI_t_1', mu=dI_t_1_mu)

            # From here, find the expected number of positive cases
            N_t_1 = 100_000 if self.N_t == -1 else self.N_t  # For now, assume random tests among a large set.
            positives = HyperGeometric(name='positives',
                                       N=N_t_1,
                                       n=self.num_tests,
                                       k=dI_t_1,
                                       observed=self.num_positive)
Ejemplo n.º 17
0
    def set_likelihood(self):
        """
        Convert any bilby likelihoods to PyMC3 distributions.
        """

        # create theano Op for the log likelihood if not using a predefined model
        pymc3, STEP_METHODS, floatX = self._import_external_sampler()
        theano, tt, as_op = self._import_theano()

        class LogLike(tt.Op):

            itypes = [tt.dvector]
            otypes = [tt.dscalar]

            def __init__(self, parameters, loglike, priors):
                self.parameters = parameters
                self.likelihood = loglike
                self.priors = priors

                # set the fixed parameters
                for key in self.priors.keys():
                    if isinstance(self.priors[key], float):
                        self.likelihood.parameters[key] = self.priors[key]

                self.logpgrad = LogLikeGrad(self.parameters, self.likelihood, self.priors)

            def perform(self, node, inputs, outputs):
                theta, = inputs
                for i, key in enumerate(self.parameters):
                    self.likelihood.parameters[key] = theta[i]

                outputs[0][0] = np.array(self.likelihood.log_likelihood())

            def grad(self, inputs, g):
                theta, = inputs
                return [g[0] * self.logpgrad(theta)]

        # create theano Op for calculating the gradient of the log likelihood
        class LogLikeGrad(tt.Op):

            itypes = [tt.dvector]
            otypes = [tt.dvector]

            def __init__(self, parameters, loglike, priors):
                self.parameters = parameters
                self.Nparams = len(parameters)
                self.likelihood = loglike
                self.priors = priors

                # set the fixed parameters
                for key in self.priors.keys():
                    if isinstance(self.priors[key], float):
                        self.likelihood.parameters[key] = self.priors[key]

            def perform(self, node, inputs, outputs):
                theta, = inputs

                # define version of likelihood function to pass to derivative function
                def lnlike(values):
                    for i, key in enumerate(self.parameters):
                        self.likelihood.parameters[key] = values[i]
                    return self.likelihood.log_likelihood()

                # calculate gradients
                grads = derivatives(theta, lnlike, abseps=1e-5, mineps=1e-12, reltol=1e-2)

                outputs[0][0] = grads

        with self.pymc3_model:
            #  check if it is a predefined likelhood function
            if isinstance(self.likelihood, GaussianLikelihood):
                # check required attributes exist
                if (not hasattr(self.likelihood, 'sigma') or
                        not hasattr(self.likelihood, 'x') or
                        not hasattr(self.likelihood, 'y')):
                    raise ValueError("Gaussian Likelihood does not have all the correct attributes!")

                if 'sigma' in self.pymc3_priors:
                    # if sigma is suppled use that value
                    if self.likelihood.sigma is None:
                        self.likelihood.sigma = self.pymc3_priors.pop('sigma')
                    else:
                        del self.pymc3_priors['sigma']

                for key in self.pymc3_priors:
                    if key not in self.likelihood.function_keys:
                        raise ValueError("Prior key '{}' is not a function key!".format(key))

                model = self.likelihood.func(self.likelihood.x, **self.pymc3_priors)

                # set the distribution
                pymc3.Normal('likelihood', mu=model, sd=self.likelihood.sigma,
                             observed=self.likelihood.y)
            elif isinstance(self.likelihood, PoissonLikelihood):
                # check required attributes exist
                if (not hasattr(self.likelihood, 'x') or
                        not hasattr(self.likelihood, 'y')):
                    raise ValueError("Poisson Likelihood does not have all the correct attributes!")

                for key in self.pymc3_priors:
                    if key not in self.likelihood.function_keys:
                        raise ValueError("Prior key '{}' is not a function key!".format(key))

                # get rate function
                model = self.likelihood.func(self.likelihood.x, **self.pymc3_priors)

                # set the distribution
                pymc3.Poisson('likelihood', mu=model, observed=self.likelihood.y)
            elif isinstance(self.likelihood, ExponentialLikelihood):
                # check required attributes exist
                if (not hasattr(self.likelihood, 'x') or
                        not hasattr(self.likelihood, 'y')):
                    raise ValueError("Exponential Likelihood does not have all the correct attributes!")

                for key in self.pymc3_priors:
                    if key not in self.likelihood.function_keys:
                        raise ValueError("Prior key '{}' is not a function key!".format(key))

                # get mean function
                model = self.likelihood.func(self.likelihood.x, **self.pymc3_priors)

                # set the distribution
                pymc3.Exponential('likelihood', lam=1. / model, observed=self.likelihood.y)
            elif isinstance(self.likelihood, StudentTLikelihood):
                # check required attributes exist
                if (not hasattr(self.likelihood, 'x') or
                        not hasattr(self.likelihood, 'y') or
                        not hasattr(self.likelihood, 'nu') or
                        not hasattr(self.likelihood, 'sigma')):
                    raise ValueError("StudentT Likelihood does not have all the correct attributes!")

                if 'nu' in self.pymc3_priors:
                    # if nu is suppled use that value
                    if self.likelihood.nu is None:
                        self.likelihood.nu = self.pymc3_priors.pop('nu')
                    else:
                        del self.pymc3_priors['nu']

                for key in self.pymc3_priors:
                    if key not in self.likelihood.function_keys:
                        raise ValueError("Prior key '{}' is not a function key!".format(key))

                model = self.likelihood.func(self.likelihood.x, **self.pymc3_priors)

                # set the distribution
                pymc3.StudentT('likelihood', nu=self.likelihood.nu, mu=model, sd=self.likelihood.sigma,
                               observed=self.likelihood.y)
            elif isinstance(self.likelihood, (GravitationalWaveTransient, BasicGravitationalWaveTransient)):
                # set theano Op - pass _search_parameter_keys, which only contains non-fixed variables
                logl = LogLike(self._search_parameter_keys, self.likelihood, self.pymc3_priors)

                parameters = OrderedDict()
                for key in self._search_parameter_keys:
                    try:
                        parameters[key] = self.pymc3_priors[key]
                    except KeyError:
                        raise KeyError(
                            "Unknown key '{}' when setting GravitationalWaveTransient likelihood".format(key))

                # convert to theano tensor variable
                values = tt.as_tensor_variable(list(parameters.values()))

                pymc3.DensityDist('likelihood', lambda v: logl(v), observed={'v': values})
            else:
                raise ValueError("Unknown likelihood has been provided")
    out[switchpoint:] = late_mean
    return out


with pm.Model() as model:

    # Prior for distribution of switchpoint location
    switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=years)
    # Priors for pre- and post-switch mean number of disasters
    early_mean = pm.Exponential('early_mean', lam=1.)
    late_mean = pm.Exponential('late_mean', lam=1.)

    # Allocate appropriate Poisson rates to years before and after current
    # switchpoint location
    idx = arange(years)
    rate = rate_(switchpoint, early_mean, late_mean)

    # Data likelihood
    disasters = pm.Poisson('disasters', rate, observed=disasters_data)

    # Use slice sampler for means
    step1 = pm.Slice([early_mean, late_mean])
    # Use Metropolis for switchpoint, since it accomodates discrete variables
    step2 = pm.Metropolis([switchpoint])

    # Initial values for stochastic nodes
    start = {'early_mean': 2., 'late_mean': 3.}

    tr = pm.sample(1000, tune=500, start=start, step=[step1, step2], njobs=2)
    pm.traceplot(tr)
Ejemplo n.º 19
0
 def test_discrete_continuous(self):
     with pm.Model() as model:
         a = pm.Poisson("a", 5)
         b = pm.HalfNormal("b", 10)
         y = pm.Normal("y", a, b, observed=[1, 2, 3, 4])
         trace = pm.sample_smc()
Ejemplo n.º 20
0
n_count_data = len(count_data)

with pm.Model() as model:
    alpha = 1.0 / count_data.mean()

    lambda_1 = pm.Exponential('lambda_1', alpha)
    lambda_2 = pm.Exponential('lambda_2', alpha)

    tau = pm.DiscreteUniform('tau', lower=0, upper=n_count_data - 1)

with model:
    idx = np.arange(n_count_data)
    lambda_ = pm.math.switch(tau > idx, lambda_1, lambda_2)

with model:
    observation = pm.Poisson('obs', lambda_, observed=count_data)

with model:
    step = pm.Metropolis()
    trace = pm.sample(10000, tune=5000, step=step)

lambda_1_samples = trace['lambda_1']
lambda_2_samples = trace['lambda_2']
tau_samples = trace['tau']
tau_count = collections.Counter(tau_samples)

plt.subplot(311)
plt.hist(lambda_1_samples, bins=30)
plt.subplot(312)
plt.hist(lambda_2_samples, bins=30)
plt.subplot(313)
Ejemplo n.º 21
0
n_count_data = len(count_data)

with pm.Model() as model:
    alpha = 1.0 / count_data.mean()  # Recall count_data is the
    # variable that holds our txt counts
    lambda_1 = pm.Exponential("lambda_1", alpha)
    lambda_2 = pm.Exponential("lambda_2", alpha)

    tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data - 1)

with model:
    idx = np.arange(n_count_data)  # Index
    lambda_ = pm.math.switch(tau > idx, lambda_1, lambda_2)

with model:
    observation = pm.Poisson("obs", lambda_, observed=count_data)

with model:
    step = pm.Metropolis()
    trace = pm.sample(10000, tune=5000, step=step)

lambda_1_samples = trace['lambda_1']
lambda_2_samples = trace['lambda_2']
tau_samples = trace['tau']

ax = plt.subplot(311)
ax.set_autoscaley_on(False)

plt.hist(lambda_1_samples,
         histtype='stepfilled',
         bins=30,
Ejemplo n.º 22
0
        df = pd.DataFrame(scratch_chain, columns=['y0', 'y1'])
        df.to_csv('./data/linear_regression_scratch_trace.csv', index=False)

if run_pymc3:
    ####################################
    ########### Using pymc3 ############
    ####################################
    print('Running linear regression using pymc3 example.')

    with pymc3.Model() as model:
        # Define priors
        y0 = pymc3.Uniform('y0', 250, 600)
        y1 = pymc3.Uniform('y1', 0, 20)

        # Define likelihood
        likelihood = pymc3.Poisson('f(y)', mu=y0 + y1*x, observed=y_obs_noise)

        # Inference
        pymc_chain = pymc3.sample(draws=niter, cores=None, tune=nburn)

####################################
######## Plot the results ##########
####################################
if run_scratch:
    fig = plt.figure(constrained_layout=True, figsize=(8, 5))
    gs = gridspec.GridSpec(nrows=2, ncols=4, figure=fig)
    ax = np.array([
                    [fig.add_subplot(gs[0, 0]), fig.add_subplot(gs[0, 1])],
                    [fig.add_subplot(gs[1, 0]), fig.add_subplot(gs[1, 1])]
                ])
    bx = fig.add_subplot(gs[:, 2:])
dk.loc[:, "society"] = np.arange(Nsociety)
Dmat_ = Dmat.values
Dmatsq = np.power(Dmat_, 2)

# %%
with pm.Model() as m_13_7:
    etasq = pm.HalfCauchy("etasq", 1)
    rhosq = pm.HalfCauchy("rhosq", 1)
    Kij = etasq * (tt.exp(-rhosq * Dmatsq) + np.diag([0.01] * Nsociety))

    g = pm.MvNormal("g", mu=np.zeros(Nsociety), cov=Kij, shape=Nsociety)

    a = pm.Normal("a", 0, 10)
    bp = pm.Normal("bp", 0, 1)
    lam = pm.math.exp(a + g[dk.society.values] + bp * dk.logpop)
    obs = pm.Poisson("total_tools", lam, observed=dk.total_tools)
    trace_13_7 = pm.sample(1000, tune=1000)

# %%
az.plot_trace(trace_13_7,
              var_names=["g", "a", "bp", "etasq", "rhosq"],
              compact=True)

# %%
az.summary(trace_13_7,
           var_names=["g", "a", "bp", "etasq", "rhosq"],
           round_to=2)

# %%
post = pm.trace_to_dataframe(trace_13_7,
                             varnames=["g", "a", "bp", "etasq", "rhosq"])
Ejemplo n.º 24
0
def main(args):
    print("Loading data...")
    teams, df = load_data()
    nt = len(teams)
    train = df[df["split"] == "train"]

    print("Starting inference...")
    with pm.Model() as model:
        # priors
        alpha = pm.Normal("alpha", mu=0, sigma=1)
        sd_att = pm.HalfStudentT("sd_att", nu=3, sigma=2.5)
        sd_def = pm.HalfStudentT("sd_def", nu=3, sigma=2.5)

        home = pm.Normal("home", mu=0, sigma=1)  # home advantage

        # team-specific model parameters
        attack = pm.Normal("attack", mu=0, sigma=sd_att, shape=nt)
        defend = pm.Normal("defend", mu=0, sigma=sd_def, shape=nt)

        # data
        home_id = pm.Data("home_data", train["Home_id"])
        away_id = pm.Data("away_data", train["Away_id"])

        # likelihood
        theta1 = tt.exp(alpha + home + attack[home_id] - defend[away_id])
        theta2 = tt.exp(alpha + attack[away_id] - defend[home_id])

        pm.Poisson("s1", mu=theta1, observed=train["score1"])
        pm.Poisson("s2", mu=theta2, observed=train["score2"])

    with model:
        fit = pm.sample(
            draws=args.num_samples,
            tune=args.num_warmup,
            chains=args.num_chains,
            cores=args.num_cores,
            random_seed=args.rng_seed,
        )

    print("Analyse posterior...")
    az.plot_forest(
        fit,
        var_names=("alpha", "home", "sd_att", "sd_def"),
        backend="bokeh",
    )

    az.plot_trace(
        fit,
        var_names=("alpha", "home", "sd_att", "sd_def"),
        backend="bokeh",
    )

    # Attack and defence
    quality = teams.copy()
    quality = quality.assign(
        attack=fit["attack"].mean(axis=0),
        attacksd=fit["attack"].std(axis=0),
        defend=fit["defend"].mean(axis=0),
        defendsd=fit["defend"].std(axis=0),
    )
    quality = quality.assign(
        attack_low=quality["attack"] - quality["attacksd"],
        attack_high=quality["attack"] + quality["attacksd"],
        defend_low=quality["defend"] - quality["defendsd"],
        defend_high=quality["defend"] + quality["defendsd"],
    )

    plot_quality(quality)

    # Predicted goals and table
    predict = df[df["split"] == "predict"]

    with model:
        pm.set_data({"home_data": predict["Home_id"]})
        pm.set_data({"away_data": predict["Away_id"]})

        predicted_score = pm.sample_posterior_predictive(
            fit, var_names=["s1", "s2"], random_seed=1)

    predicted_full = predict.copy()
    predicted_full = predicted_full.assign(
        score1=predicted_score["s1"].mean(axis=0).round(),
        score1error=predicted_score["s1"].std(axis=0),
        score2=predicted_score["s2"].mean(axis=0).round(),
        score2error=predicted_score["s2"].std(axis=0),
    )

    predicted_full = train.append(
        predicted_full.drop(columns=["score1error", "score2error"]))

    print(score_table(df))
    print(score_table(predicted_full))
def main(input_dir, output_dir, dataset, model_type, n_samples, n_tune, target_accept, n_cores, seed, init, profile):
    '''Fit log-parabola model to DATASET. 

    Parameters
    ----------
    input_dir : [type]
        input directory containing subdirs for each instrument with dl3 data
    output_dir : [type]
        where to save the results. traces and two plots
    dataset : string
        telescope name
    model_type : string
        whether to use the profile likelihood ('wstat' or 'profile') or not ('full')
    n_samples : int
        number of samples to draw
    n_tune : int
        number of tuning steps
    target_accept : float
        target accept fraction for the pymc sampler
    n_cores : int
        number of cpu cores to use
    seed : int
        random seed
    init : string
        pymc init string
    profile : bool
        whether to output debugging/profiling information to the console
    Raises
    ------
    NotImplementedError
        This does not yet work on the joint dataset. but thats good enough for me.
    '''
    np.random.seed(seed)

    if dataset == 'joint':
        #TODO need to calculate mu_b for each observation independently.
        raise NotImplementedError('This is not implemented for the joint dataset yet.')
        # observations, lo, hi = load_joint_spectrum_observation(input_dir)
    else:
        p = os.path.join(input_dir, dataset)
        observations, lo, hi = load_spectrum_observations(p)

    prepare_output(output_dir)

    # TODO: this has to happen for every observation independently
    exposure_ratio = observations[0].alpha[0]
    # print(exposure_ratio)
    on_data, off_data = get_observed_counts(observations)

    integrator = init_integrators(observations)

    print('On Data')
    display_data(on_data)

    print('Off Data')
    display_data(off_data)
    
    print('--' * 30)
    print(f'Fitting data for {dataset} in {len(observations)} observations.  ')
    print(f'Using {len(on_data)} bins with { on_data.sum()} counts in on region and {off_data.sum()} counts in off region.')
    print(f'Fit range is: {(lo, hi) * u.TeV}.')
    model = pm.Model(theano_config={'compute_test_value': 'ignore'})
    with model:
        # amplitude = pm.TruncatedNormal('amplitude', mu=4, sd=1, lower=0.01, testval=4)
        # alpha = pm.TruncatedNormal('alpha', mu=2.5, sd=1, lower=0.00, testval=2.5)
        # beta = pm.TruncatedNormal('beta', mu=0.5, sd=0.5, lower=0.00000, testval=0.5)
        amplitude = pm.HalfFlat('amplitude', testval=4)
        alpha = pm.HalfFlat('alpha', testval=2.5)
        beta = pm.HalfFlat('beta', testval=0.5)

        mu_s = forward_fold_log_parabola_symbolic(integrator, amplitude, alpha, beta, observations)
        # mu_s = forward_fold_log_parabola_analytic(amplitude, alpha, beta, observations)

        if model_type == 'wstat':
            print('Building profiled likelihood model')
            mu_b = pm.Deterministic('mu_b', calc_mu_b(mu_s, on_data, off_data, exposure_ratio))
        else:
            print('Building full likelihood model')
            mu_b = pm.HalfFlat('mu_b', shape=len(off_data))

        pm.Poisson('background', mu=mu_b, observed=off_data, shape=len(off_data))
        pm.Poisson('signal', mu=mu_s + exposure_ratio * mu_b, observed=on_data, shape=len(on_data))


    print('--' * 30)
    print('Model debug information:')
    for RV in model.basic_RVs:
        print(RV.name, RV.logp(model.test_point))

    if profile:
        model.profile(model.logpt).summary()

    print(model.check_test_point())

    print('--' * 30)
    print('Plotting landscape:')
    fig, _ = plot_landscape(model, off_data)
    fig.savefig(os.path.join(output_dir, 'landscape.pdf'))

    print('--' * 30)
    print('Printing  graphs:')
    theano.printing.pydotprint(mu_s, outfile=os.path.join(output_dir, 'graph_mu_s.pdf'), format='pdf', var_with_name_simple=True)  
    theano.printing.pydotprint(mu_s + exposure_ratio * mu_b, outfile=os.path.join(output_dir, 'graph_n_on.pdf'), format='pdf', var_with_name_simple=True)  


    print('--' * 30)
    print('Sampling likelihood:')
    with model:
        trace = pm.sample(n_samples, cores=n_cores, tune=n_tune, init=init, seed=[seed] * n_cores)

    print('--' * 30)
    print(f'Fit results for {dataset}')
    print(trace['amplitude'].mean(), trace['alpha'].mean(), trace['beta'].mean())
    print(np.median(trace['amplitude']), np.median(trace['alpha']), np.median(trace['beta']))

    print('--' * 30)
    # print('Plotting traces')
    # plt.figure()
    # varnames = ['amplitude', 'alpha', 'beta'] if model_type != 'full' else ['amplitude', 'alpha', 'beta', 'mu_b']
    # pm.traceplot(trace, varnames=varnames)
    # plt.savefig(os.path.join(output_dir, 'traces.pdf'))

    p = os.path.join(output_dir, 'num_samples.txt')
    with open(p, "w") as text_file:
        text_file.write(f'\\num{{{n_samples}}}')

    p = os.path.join(output_dir, 'num_chains.txt')
    with open(p, "w") as text_file:
        text_file.write(f'\\num{{{n_cores}}}')
    
    p = os.path.join(output_dir, 'num_tune.txt')
    with open(p, "w") as text_file:
        text_file.write(f'\\num{{{n_tune}}}')

    plt.figure()
    pm.energyplot(trace)
    plt.savefig(os.path.join(output_dir, 'energy.pdf'))

    # plt.figure()
    # pm.autocorrplot(trace, burn=n_tune)
    # plt.savefig(os.path.join(output_dir, 'autocorr.pdf'))
    
    plt.figure()
    pm.forestplot(trace, varnames=['amplitude', 'alpha', 'beta'])
    plt.savefig(os.path.join(output_dir, 'forest.pdf'))
    

    trace_output = os.path.join(output_dir, 'traces')
    print(f'Saving traces to {trace_output}')
    with model:
        pm.save_trace(trace, trace_output, overwrite=True)
Ejemplo n.º 26
0
    tau_att = pm3.Gamma('tau_att', .1, .1)
    tau_def = pm3.Gamma('tau_def', .1, .1)
    intercept = pm3.Normal('intercept', 0, .0001)

    # team-specific model parameters
    atts_star = pm3.Normal("atts_star", mu=0, tau=tau_att, shape=num_teams)
    defs_star = pm3.Normal("defs_star", mu=0, tau=tau_def, shape=num_teams)

    atts = pm3.Deterministic('atts', atts_star - tt.mean(atts_star))
    defs = pm3.Deterministic('defs', defs_star - tt.mean(defs_star))
    home_theta = tt.exp(intercept + home + atts[away_team] + defs[home_team])
    away_theta = tt.exp(intercept + atts[away_team] + defs[home_team])

    # likelihood of observed data
    home_points = pm3.Poisson('home_points',
                              mu=home_theta,
                              observed=observed_home_goals)
    away_points = pm3.Poisson('away_points',
                              mu=away_theta,
                              observed=observed_away_goals)

# * We specified the model and the likelihood function
# * Now we need to fit our model using the Maximum A Posteriori algorithm to decide where to start out No U Turn Sampler

with model:

    start = pm3.find_MAP()
    step = pm3.NUTS(state=start)
    trace = pm3.sample(2000, step, start=start, progressbar=True)

    pm3.traceplot(trace)
Ejemplo n.º 27
0
variant_c = [np.random.randint(30,36) for varinat_b in range(120)] #dummy
uplift = 1.1 # representing 10%
with pm.Model() as model:
    alpha_a = 1.0/np.mean(variant_a)
    alpha_b = 1.0/np.mean(variant_b)
    alpha_c = 1.0/np.mean(variant_c)
    lambda_a = pm.Exponential("lambda_a", alpha_a)
    lambda_b = pm.Exponential("lambda_b", alpha_b)
    lambda_c = pm.Exponential("lambda_c", alpha_c)
    delta_ba = pm.Deterministic("delta_ba", lambda_b - lambda_a)
    delta_ca = pm.Deterministic("delta_ca", lambda_c - lambda_a) #marginal
    delta_ba_uplift = pm.Deterministic("delta_ba_uplift", lambda_b - uplift*lambda_a) #for 10%
    delta_ca_uplift = pm.Deterministic("delta_ca_uplift", lambda_c - uplift*lambda_a)

with model:
    observation_a = pm.Poisson("obs_a", lambda_a, observed=variant_a)
    observation_b = pm.Poisson("obs_b", lambda_b, observed=variant_b)
    observation_c = pm.Poisson("obs_c", lambda_c, observed=variant_c)

with model:
    step = pm.Metropolis()
    trace = pm.sample(10000, tune=5000,step=step)

#Variant A
lambda_a_samples = trace['lambda_a']
lambda_a_samples = lambda_a_samples[1000:] # burned trace
#Variant B
lambda_b_samples = trace['lambda_b']
lambda_b_samples = lambda_b_samples[1000:] # burned trace
#Variant C
lambda_c_samples = trace['lambda_c']
Ejemplo n.º 28
0
    #    l = pm.HalfCauchy('l', beta=3.)
    # informative prior
    l = pm.Gamma('l',
                 alpha=5,
                 beta=1,
                 transform=pm.distributions.transforms.LogExpM1())

    eta = pm.HalfCauchy('eta', beta=3.)
    cov_func = eta**2 * pm.gp.cov.Matern32(D, ls=l * np.ones(D))

    #Gaussian Process
    gp = pm.gp.Latent(cov_func=cov_func)
    f = gp.prior('f', X=Xbatch, shape=batchsize**2)

    obs = pm.Poisson('obs',
                     mu=tt.exp(f),
                     observed=Ybatch,
                     total_size=y_data.shape)

    approx = pm.fit(
        20000,
        method='fullrank_advi',
        callbacks=[pm.callbacks.CheckParametersConvergence(tolerance=1e-4)])
    trace = approx.sample(1000)
pm.traceplot(trace, varnames=['l', 'eta'])
#%%
with model:
    group_1 = pm.Group([l, eta],
                       vfam='fr')  # latent1 has full rank approximation
    group_other = pm.Group(None,
                           vfam='mf')  # other variables have mean field Q
    approx = pm.Approximation([group_1, group_other])
Ejemplo n.º 29
0
sd = np.log(y.std())
t_ = np.linspace(0., 1., t)
nbreak = 3

with pm.Model() as m:
    lambdas = pm.Normal('lambdas', mu, sd=sd, shape=nbreak)
    trafo = Composed(pm.distributions.transforms.LogOdds(), Ordered())
    b = pm.Beta('b', 1., 1., shape=nbreak - 1, transform=trafo, testval=[0.4, 0.6])
    # index_t = tt.switch(tt.gt(t_, b[0]) * tt.lt(t_, b[1]), 1, 0) + tt.switch(tt.gt(t_, b[1]), 2, 0)

    index_t = tt.switch(tt.gt(t_, b[0]) * tt.lt(t_, b[1]), 1, 0)
    for idx in range(1, nbreak - 1):
        index_t +=  tt.switch(tt.gt(t_, b[1]), 2, 0)

    theta_ = pm.Deterministic('theta', tt.exp(lambdas[index_t]))
    obs = pm.Poisson('obs', theta_, observed=y)

    # sample
    step_method = pm.NUTS(target_accept=0.90, max_treedepth=15)
    cpt_trace = pm.sample(1000, chains=None, step=step_method, tune=1000)
    cpt_smry = pm.summary(cpt_trace)
    pm.traceplot(cpt_trace)

# ##################################################################################
# generate data
# https://gist.github.com/junpenglao/f7098c8e0d6eadc61b3e1bc8525dd90d
t = 1000
n_cpt = 1
tbreak = np.sort(np.random.randint(100, 900, n_cpt + 1))
theta = np.random.exponential(25, size=n_cpt + 1)
theta_t = np.zeros(t)
Ejemplo n.º 30
0
_ = ax.set_xlim(0, x_lim)
#_ = ax.set_ylim(0, 0.2)
_ = ax.set_xlabel('EVs Connected')
_ = ax.set_ylabel('Probability mass')
_ = ax.set_title('Frequentist Estimated Poisson distribution for EVs Connected')
_ = plt.legend(['$\lambda$ = %s' % mu])

#%% Bayesian Approach (using MCMC sampler)

print('Running on PyMC3 v{}'.format(pm.__version__))

if __name__ == '__main__':

    with pm.Model() as model:
        mu = pm.Uniform('mu', lower=0, upper=5)
        likelihood = pm.Poisson('likelihood', mu=mu, observed=y_obs)
        
        start = pm.find_MAP()
        step = pm.Metropolis()
        trace = pm.sample(10000, step, start=start, progressbar=True)
    
#% Optimal Mu
    
#pm.traceplot(trace, var_names=['mu'], lines={'mu': freq_results['x']})
pm.traceplot(trace)

print('\n--- Optimal Model Parameters ---')

#%% Discarding early samples (burnin)

fig = plt.figure(figsize=(10,4))