Beispiel #1
0
def binom_model(df):
    # todo: make sure this works ok
    with pm.Model() as disaster_model:
        switchpoint = pm.DiscreteUniform('switchpoint',
                                         lower=df['t'].min(),
                                         upper=df['t'].max())

        # Priors for pre- and post-switch probability of "yes"...is there a better prior?
        early_rate = pm.Beta('early_rate', 1, 1)
        late_rate = pm.Beta('late_rate', 1, 1)

        # Allocate appropriate probabilities to periods before and after current
        p = pm.math.switch(switchpoint >= df['t'].values, early_rate,
                           late_rate)

        p = pm.Deterministic('p', p)

        successes = pm.Binomial('successes',
                                n=df['n'].values,
                                p=p,
                                observed=df['category'].values)

        trace = pm.sample(10000)

        pm.traceplot(trace)

        plt.show()
Beispiel #2
0
def uniform_model(df):
    """
    The switchpoint is modeled using a Discrete Uniform distribution.
    The observed data is modeled using the Normal distribution (likelihood).
    The priors are each assumed to be exponentially distributed.
    """
    alpha = 1.0 / df['score'].mean()
    beta = 1.0 / df['score'].std()

    t = df['t_encoded'].values

    with pm.Model() as model:
        switchpoint = pm.DiscreteUniform("switchpoint",
                                         lower=df['t_encoded'].min(),
                                         upper=df['t_encoded'].max())
        mu_1 = pm.Exponential("mu_1", alpha)
        mu_2 = pm.Exponential("mu_2", alpha)
        sd_1 = pm.Exponential("sd_1", beta)
        sd_2 = pm.Exponential("sd_2", beta)
        mu = pm.math.switch(switchpoint >= t, mu_1, mu_2)
        sd = pm.math.switch(switchpoint >= t, sd_1, sd_2)
        X = pm.Normal('x', mu=mu, sd=sd, observed=df['score'].values)
        trace = pm.sample(20000)

    pm.traceplot(trace[1000:],
                 varnames=['switchpoint', 'mu_1', 'mu_2', 'sd_1', 'sd_2'])
    plt.show()
Beispiel #3
0
def build_disaster_model(masked=False):
    # fmt: off
    disasters_data = np.array([4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6,
                               3, 3, 5, 4, 5, 3, 1, 4, 4, 1, 5, 5, 3, 4, 2, 5,
                               2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0,
                               1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1,
                               0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1, 0, 2,
                               3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4,
                               0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1])
    # fmt: on
    if masked:
        disasters_data[[23, 68]] = -1
        disasters_data = np.ma.masked_values(disasters_data, value=-1)
    years = len(disasters_data)

    with pm.Model() as model:
        # Prior for distribution of switchpoint location
        switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=years)
        # Priors for pre- and post-switch mean number of disasters
        early_mean = pm.Exponential("early_mean", lam=1.0)
        late_mean = pm.Exponential("late_mean", lam=1.0)
        # Allocate appropriate Poisson rates to years before and after current
        # switchpoint location
        idx = np.arange(years)
        rate = tt.switch(switchpoint >= idx, early_mean, late_mean)
        # Data likelihood
        pm.Poisson("disasters", rate, observed=disasters_data)
    return model
def coal_mining_desaster(modelname='pymc3_coal_mining_disaster_model'):
    """TODO: Documentation here. why is there two returned models? what for?"""
    # data
    disasters = np.array([
        4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, 3, 3, 5, 4, 5, 3, 1, 4,
        4, 1, 5, 5, 3, 4, 2, 5, 2, 2, 3, 4, 2, 1, 3, 3, 2, 1, 1, 1, 1, 3, 0, 0,
        1, 0, 1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 2,
        1, 0, 0, 0, 1, 1, 0, 2, 3, 3, 1, 2, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4,
        0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1
    ])
    years = np.arange(1851, 1962)
    data = pd.DataFrame({'years': years, 'disasters': disasters})
    years = theano.shared(years)

    with pm.Model() as disaster_model:
        switchpoint = pm.DiscreteUniform('switchpoint',
                                         lower=years.min(),
                                         upper=years.max(),
                                         testval=1900)

        # Priors for pre- and post-switch rates number of disasters
        early_rate = pm.Exponential('early_rate', 1.0)
        late_rate = pm.Exponential('late_rate', 1.0)

        # Allocate appropriate Poisson rates to years before and after current
        rate = pm.math.switch(switchpoint >= years, early_rate, late_rate)

        disasters = pm.Poisson('disasters', rate, observed=data['disasters'])
        # years = pm.Normal('years', mu=data['years'], sd=0.1, observed=data['years'])

    model = ProbabilisticPymc3Model(modelname,
                                    disaster_model,
                                    shared_vars={'years': years})
    model_fitted = model.copy(name=modelname + '_fitted').fit(data)
    return model, model_fitted
Beispiel #5
0
def main(imagePath, hashmethod = hashmethod_example, num_rect = 1):
    image = Image.open(imagePath)
    hashobj = hashmethod(image)

    @as_op(itypes=[tt.lvector, tt.lvector, tt.lvector], otypes=[tt.lvector])
    def evaluate(xpositions, ypositions, colors):
        im = Image.new('RGB', (image.width, image.height))

        draw = ImageDraw.Draw(im)
        for i in range(num_rect):
            x1, x2 = xpositions[i*2:i*2+2]
            y1, y2 = ypositions[i*2:i*2+2]
            r, g, b = colors[i*3:i*3+3]
            draw.rectangle([(x1, y1), (x2, y2)], fill=(r, g, b))
        del draw

        return hashmethod(im)

    with pm.Model() as model:

        # Priors
        xpositions = pm.DiscreteUniform('xpositions', lower=0, upper=image.width-1, shape=2*num_rect)
        ypositions = pm.DiscreteUniform('ypositions', lower=0, upper=image.height-1, shape=2*num_rect)
        colors = pm.DiscreteUniform('colors', lower=0, upper=255, shape=3*num_rect)

        hashval = evaluate(xpositions, ypositions, colors)

        # Data likelihood
        hashobj_obs = pm.Poisson('objective', hashval, observed=hashobj)

        step = pm.Metropolis([xpositions, ypositions])
        step2 = pm.Metropolis([colors])

        # # Initial values for stochastic nodes
        start = {
            'xpositions': [random.randrange(image.width) for _ in range(2*num_rect)],
            'ypositions': [random.randrange(image.height) for _ in range(2*num_rect)],
            'colors': [random.randrange(256) for _ in range(3*num_rect)]
        }

        tr = pm.sample(200, tune=100, start=start, step=[step, step2], cores=2)
        pm.traceplot(tr)

    import matplotlib.pyplot as plt
    plt.show()
Beispiel #6
0
def laser_late_trials(data, num_emissions):

    # Make the pymc3 model
    with pm.Model() as model:
        # Dirichlet prior on the emission/spiking probabilities - 4 states
        p = pm.Dirichlet('p', np.ones(num_emissions), shape=(4, num_emissions))

        # Discrete Uniform switch times
        # Switch from detection to identity firing
        t1 = pm.DiscreteUniform('t1', lower=20, upper=60)
        # Switch from identity to palatability firing
        t2 = pm.DiscreteUniform('t2', lower=t1 + 20, upper=120)
        # Switch from palatability firing to end
        t3 = pm.DiscreteUniform('t3', lower=t2 + 30, upper=150)

        # Add potentials to keep the switch times from coming too close to each other
        #t_pot1 = pm.Potential('t_pot1', tt.switch(t2 - t1 >= 20, 0, -np.inf))
        #t_pot2 = pm.Potential('t_pot2', tt.switch(t3 - t2 >= 20, 0, -np.inf))
        #t_pot3 = pm.Potential('t_pot3', tt.switch(t3 - t1 >= 40, 0, -np.inf))

        # Get the actual state numbers based on the switch times
        states1 = tt.switch(t1 >= np.arange(150), 0, 1)
        states2 = tt.switch(t2 >= np.arange(150), states1, 2)
        states = tt.switch(t3 >= np.arange(150), states2, 3)

        # Categorical observations
        obs = pm.Categorical('obs',
                             p=p[states],
                             observed=np.append(data[:140], data[190:]))

    # Inference button :D
    with model:
        tr = pm.sample(300000,
                       init=None,
                       step=pm.Metropolis(),
                       njobs=2,
                       start={
                           't1': 25,
                           't2': 75,
                           't3': 125
                       },
                       progressbar=False)

    # Return the inference!
    return model, tr[250000:]
    def find_changepoint(self):
        niter_vec = [5000, 2000, 2000, 3000]
        niter = niter_vec[self.case]
        data = self.data
        #initialize defaultdict for change point priors
        tau = defaultdict(list)
        #initialize defaultdict for uniform priors
        u = defaultdict(list)
        #time array
        t = np.arange(0, self.N)

        with pm.Model() as model:  # context management

            #define uniform priors for mean values/standard deviation
            #depending on the type of problem
            for i in range(self.ncpt + 1):
                if (not self.type == "1-cpt-var"
                        and not self.type == "normal-var"):
                    varname = "mu" + str(i + 1)
                    u[i] = pm.Uniform(varname, 650, 1200)
                else:
                    varname = "sigma" + str(i + 1)
                    u[i] = pm.Uniform(varname, 5., 60.)

            #define switch function
            for i in range(self.ncpt):
                varname = "tau" + str(i + 1)
                if (i == 0):
                    tmin = t.min()
                    switch_function = u[0]
                else:
                    tmin = tau[i - 1]
                tau[i] = pm.DiscreteUniform(varname, tmin, t.max())
                switch_function = T.switch(tau[i] >= t, switch_function,
                                           u[i + 1])

            #we are finally in a position to define the mu and sigma random variables
            if (not self.type == "1-cpt-var"
                    and not self.type == "normal-var"):
                mu = switch_function
                sigma = pm.Uniform("sigma", 1, 60)
            else:
                mu = pm.Uniform("mu", 600, 1500)
                sigma = switch_function

            #define log-likelihood function
            logp = - T.log(sigma * T.sqrt(2.0 * np.pi)) \
                  - T.sqr(data - mu) / (2.0 * sigma * sigma)

            def logp_func(data):
                return logp.sum()

            #evaluate log-likelihood given the observed data
            L_obs = pm.DensityDist('L_obs', logp_func, observed=data)

            self.trace = pm.sample(niter, random_seed=123, progressbar=True)
Beispiel #8
0
def bayes_multiple_detector(t, s, n):
    scala = 1000
    with pm.Model() as abrupt_model:
        sigma = pm.Normal('sigma', mu=0.02 * scala, sigma=0.015 * scala)
        # sigma = pm.Uniform('sigma', 5, 15)
        mu = pm.Uniform("mu1", -1.5 * scala, -1.4 * scala)
        tau = pm.DiscreteUniform("tau" + "1", t.min(), t.max())

        for i in np.arange(2, n + 2):
            _mu = pm.Uniform("mu" + str(i), -1.6 * scala, -1.4 * scala)
            mu = T.switch(tau >= t, mu, _mu)
            if (i < (n + 1)):
                tau = pm.DiscreteUniform("tau" + str(i), tau, t.max())
        s_obs = pm.Normal("s_obs", mu=mu, sigma=sigma, observed=s)

    with abrupt_model:
        pm.find_MAP()
        trace = pm.sample(20000, tune=5000)
        az.plot_trace(trace)
        az.to_netcdf(trace, getpath('tracepath') + 'bd9_4')
        plt.show()
        pm.summary(trace)
    return trace
Beispiel #9
0
def run_model(steps=10000):
    model = pymc.Model()
    with model:
        α = 1 / count_data.mean()
        λ1 = pymc.Exponential("λ1", α)
        λ2 = pymc.Exponential("λ2", α)
        τ = pymc.DiscreteUniform("τ", lower=0.0, upper=len(count_data))
        process_mean = mean(τ, λ1, λ2)
        observation = pymc.Poisson("observation", process_mean, observed=count_data)
        start = {"λ1": 10.0, "λ2": 30.0}
        step1 = pymc.Slice([λ1, λ2])
        step2 = pymc.Metropolis([τ])
        trace = pymc.sample(steps, tune=500, start=start, step=[step1, step2], cores=2)
    return pymc.trace_to_dataframe(trace)
Beispiel #10
0
    def pymc3_dist(self, name, hypers):
        lower = self.lower
        upper = self.upper
        if(len(hypers) == 1):
                hyper_dist = hypers[0][0]
                hyper_name = hypers[0][1]
                idx = hypers[0][2]
                if(idx == 0):
                    lower = hyper_dist.pymc3_dist(hyper_name, [])
                else:
                    upper = hyper_dist.pymc3_dist(hyper_name, [])
        elif(len(hypers) == 2):
                hyper_dist_1 = hypers[0][0]
                hyper_name_1 = hypers[0][1]
                hyper_dist_2 = hypers[1][0]
                hyper_name_2 = hypers[1][1]
                lower = hyper_dist_1.pymc3_dist(hyper_name_1, [])
                upper = hyper_dist_2.pymc3_dist(hyper_name_2, [])


        if(self.num_elements==-1):
            return pm.DiscreteUniform(name, lower=lower, upper=upper)
        else:
            return pm.DiscreteUniform(name, lower=lower, upper=upper, shape=self.num_elements)
Beispiel #11
0
def bayes_single_detector(t, s):
    with pm.Model() as abrupt_model:
        steppoint = pm.DiscreteUniform("steppoint",
                                       lower=t[1],
                                       upper=t[-1],
                                       testval=50)
        early_mu = pm.Uniform("early_mu", -50, 50)
        late_mu = pm.Uniform("late_mu", -50, 50)
        mu = pm.math.switch(steppoint >= t, early_mu, late_mu)
        sigma = pm.Normal('sigma', mu=30, sigma=20)
        s_obs = pm.Normal("s_obs", mu=mu, sigma=sigma, observed=s)

    with abrupt_model:
        trace = pm.sample(1000)
        az.plot_trace(trace)
        plt.show()
    return trace
def infer(some_count_data):
    """
    Docstring: Run bayesian inference on any count data. Outputs distributions/trace plots of
    lambda_1, lambda_2 and tau in addition to returning a list of expected values
    to be able to plot observed vs expected
    source: https://nbviewer.jupyter.org/github/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/blob/master/Chapter1_Introduction/Ch1_Introduction_PyMC3.ipynb
    """
    n_count_data = len(some_count_data)
    with pm.Model() as model:
        alpha = 1.0 / some_count_data.mean()
        lambda_1 = pm.Exponential("lambda_1", alpha)
        lambda_2 = pm.Exponential("lambda_2", alpha)
        tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data - 1)
        #with model:
        idx = np.arange(n_count_data)
        lambda_ = pm.math.switch(tau > idx, lambda_1, lambda_2)
        #with model:
        observation = pm.Poisson("obs", lambda_, observed=some_count_data)
        #with model:
        step = pm.Metropolis()
        trace = pm.sample(10000, tune=5000, step=step)

    lambda_1_samples = trace['lambda_1']
    lambda_2_samples = trace['lambda_2']
    tau_samples = trace['tau']
    print(pm.gelman_rubin(trace))
    pm.traceplot(trace)
    N = tau_samples.shape[0]
    expected_violence = np.zeros(n_count_data)
    for day in range(0, n_count_data):
        # ix is a bool index of all tau samples corresponding to
        # the switchpoint occurring prior to value of 'day'
        ix = day < tau_samples
        # Each posterior sample corresponds to a value for tau.
        # for each day, that value of tau indicates whether we're "before"
        # (in the lambda1 "regime") or
        #  "after" (in the lambda2 "regime") the switchpoint.
        # by taking the posterior sample of lambda1/2 accordingly, we can average
        # over all samples to get an expected value for lambda on that day.
        # As explained, the "count" random variable is Poisson distributed,
        # and therefore lambda (the poisson parameter) is the expected value of
        # "count".
        expected_violence[day] = (lambda_1_samples[ix].sum() +
                                  lambda_2_samples[~ix].sum()) / N
    return expected_violence
def baysian_latency(count_data):
    import pymc3 as pm
    import theano.tensor as tt
    n_count_data = len(count_data)
    with pm.Model() as model:
        alpha = 1.0 / count_data.mean()  # Recall count_data is the
        # variable that holds our txt counts
        lambda_1 = pm.Exponential("lambda_1", alpha)
        lambda_2 = pm.Exponential("lambda_2", alpha)

        tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data - 1)

    with model:
        idx = np.arange(n_count_data)  # Index
        lambda_ = pm.math.switch(tau >= idx, lambda_1, lambda_2)
        observation = pm.Poisson("obs", lambda_, observed=count_data)
        step = pm.Metropolis()
        trace = pm.sample(10000, tune=5000, step=step)
    return trace
def make_switchpoint_model(counts: ndarray, prior_lambda: float):
    """
    A model that assumes counts are generated by 2 different poisson processes:
    * counts up to switchpoint (not inclusive) ~ Poisson(early_rate)
    * counts from switchpoint on (inclusive) ~ Poisson(late_rate)

    Parameters
    ----------

    counts :
        1 - dimensional array of counts
    prior_lambda :
        rate parameter for exponential prior; 1 / prior_lambda is the mean of the exponential


    Returns
    -------

    pm.Model :
        the model instance

    Based on https://docs.pymc.io/notebooks/getting_started.html#Case-study-2:-Coal-mining-disasters
    """
    model = pm.Model()
    with model:
        idxs = np.arange(len(counts))
        lower_idx = idxs[1]
        upper_idx = idxs[-1]
        mid = (upper_idx - lower_idx) // 2

        switchpoint = pm.DiscreteUniform("switchpoint",
                                         lower=lower_idx,
                                         upper=upper_idx,
                                         testval=mid)

        early_rate = pm.Exponential("early_rate", prior_lambda)
        late_rate = pm.Exponential("late_rate", prior_lambda)

        rate = pm.math.switch(switchpoint > idxs, early_rate, late_rate)

        pm.Poisson("counted", rate, observed=counts)
    return model
Beispiel #15
0
    def single_model(self, idx):

        minimum = 0.
        maximum = 8.
        sample_space = np.arange(minimum, maximum + 1, 1)
        sample_space = 1. / 10**(sample_space / 4.)

        with pm.Model() as smodel:
            # uniform priors on h
            hab_ten = pm.DiscreteUniform('h', 0., 8.)

            # convert to a tensor
            alpha = tt.as_tensor_variable([10**(hab_ten / 4.)])
            probs_a, probs_r = self.inferrer(alpha)

            # use a DensityDist
            pm.Categorical('actions', probs_a, observed=self.actions[idx])
            pm.Categorical('rewards', probs_r, observed=self.rewards[idx])

        return smodel, sample_space
Beispiel #16
0
def gev0_shift_1(dataset):

    locm = dataset.mean()
    locs = dataset.std() / (np.sqrt(len(dataset)))
    scalem = dataset.std()
    scales = dataset.std() / (np.sqrt(2 * (len(dataset) - 1)))
    with pm.Model() as model:
        # Priors for unknown model parameters
        c1 = pm.Beta(
            'c1', alpha=6, beta=9
        )  # c=x-0,5: transformation in gev_logp is required due to Beta domain between 0 and 1
        loc1 = pm.Normal('loc1', mu=locm, sd=locs)
        scale1 = pm.Normal('scale1', mu=scalem, sd=scales)

        c2 = pm.Beta('c2', alpha=6, beta=9)
        loc2 = pm.Normal('loc2', mu=locm, sd=locs)
        scale2 = pm.Normal('scale2', mu=scalem, sd=scales)

        def gev_logp(value):
            scaled = (value - loc_) / scale_
            logp = -(tt.log(scale_) +
                     (((c_ - 0.5) + 1) / (c_ - 0.5) * tt.log1p(
                         (c_ - 0.5) * scaled) +
                      (1 + (c_ - 0.5) * scaled)**(-1 / (c_ - 0.5))))
            bound1 = loc_ - scale_ / (c_ - 0.5)
            bounds = tt.switch((c_ - 0.5) > 0, value > bound1, value < bound1)
            return bound(logp, bounds, c_ != 0)

        tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data - 1)
        idx = np.arange(n_count_data)
        c_ = pm.math.switch(tau > idx, c1, c2)
        loc_ = pm.math.switch(tau > idx, loc1, loc2)
        scale_ = pm.math.switch(tau > idx, scale1, scale2)
        gev = pm.DensityDist('gev', gev_logp, observed=dataset)
        trace = pm.sample(1000, chains=1, progressbar=True)

    # geweke_plot = pm.geweke(trace, 0.05, 0.5, 20)
    # gelman_and_rubin = pm.diagnostics.gelman_rubin(trace)
    posterior = pm.trace_to_dataframe(trace)
    summary = pm.summary(trace)
    return summary, posterior
Beispiel #17
0
    def switch_test(current, sample_num=1000):
        with pm.Model() as switch_point:
            #sps = pm.Poisson('points', 0)
            #ragnes = np.random.randint(years.min(),years.max(), sps)
            current_data = current[0]
            time = current[1]
            switchpoint = pm.DiscreteUniform('switchpoint', lower=time.min(), upper=time.max())

            # Priors for pre- and post-switch rates number of disasters
            early_rate = pm.Normal('early_rate', mu=0, sd=1000)
            late_rate = pm.Normal('late_rate', mu=0, sd=1000)

            # Allocate appropriate Poisson rates to years before and after current
            rate = pm.math.switch(switchpoint >= time, early_rate, late_rate)

            switch_points = pm.Normal('current', mu=rate, sd=70, observed=current_data)
            
            trace = pm.sample(sample_num) if sample_num < 5000 else pm.sample(5000)
                
        return [trace['switchpoint'].mean(), trace['switchpoint'].std(),
                trace['early_rate'].mean(), trace['early_rate'].std(),
                trace['late_rate'].mean(), trace['late_rate'].std()]
Beispiel #18
0
def poisson_model():
    with pm.Model() as disaster_model:

        switchpoint = pm.DiscreteUniform('switchpoint',
                                         lower=year.min(),
                                         upper=year.max(),
                                         testval=1900)

        # Priors for pre- and post-switch rates number of disasters
        early_rate = pm.Exponential('early_rate', 1)
        late_rate = pm.Exponential('late_rate', 1)

        # Allocate appropriate Poisson rates to years before and after current
        rate = pm.math.switch(switchpoint >= year, early_rate, late_rate)

        disasters = pm.Poisson('disasters', rate, observed=disaster_data)

        trace = pm.sample(10000)

        pm.traceplot(trace)

    plt.show()
    def infer_lambda(self):
        """
        Ci ~ Poisson(lambda)

        Is there a day ("tau") where the lambda suddenly jumps to a higher value?
        We are looking for a 'switchpoint' s.t. lambda
            (1) (lambda_1 if t < tau) and (lambda_2 if t > tau)
            (2) lambda_2 > lambda_1

        lambda_1 ~ Exponential(alpha)
        lambda_2 ~ Exponential(alpha)

        tau ~ Discrete_uniform(1/n_count_data)
        """
        print("Infer with PyMC3...")
        with pm.Model() as model:
            ## assign lambdas and tau to stochastic variables
            alpha = 1.0 / self.count_data.mean()
            lambda_1 = pm.Exponential("lambda_1", alpha)
            lambda_2 = pm.Exponential("lambda_2", alpha)
            tau = pm.DiscreteUniform("tau", lower=0, upper=self.n_count_data)

            ## create a combined function for lambda (it is still a random variable)
            idx = np.arange(self.n_count_data)  # Index
            lambda_ = pm.math.switch(tau >= idx, lambda_1, lambda_2)

            ## combine the data with our proposed data generation scheme
            observation = pm.Poisson("obs", lambda_, observed=self.count_data)

            ## inference
            step = pm.Metropolis()
            self.trace = pm.sample(10000, tune=5000, step=step)

            ## get the variables we want to plot from our trace
            self.lambda_1_samples = self.trace['lambda_1']
            self.lambda_2_samples = self.trace['lambda_2']
            self.tau_samples = self.trace['tau']
Beispiel #20
0
def bayesian_tipping_point(obs_data):
    """

    :param obs_data: 1-d numpy array containing the daily precipitation data
    :return: summary of sampled values and trace itself
    """
    n_dd = obs_data.shape[0]
    idx = np.arange(n_dd)
    with pm.Model() as model:
        alpha_1 = pm.Uniform("alpha_1", lower=0, upper=10)
        alpha_2 = pm.Uniform("alpha_2", lower=0, upper=10)
        beta_1 = pm.Uniform("beta_1", lower=0, upper=10)
        beta_2 = pm.Uniform("beta_2", lower=0, upper=10)
        pi_1 = pm.Uniform("pi_1", lower=0, upper=0.9)
        pi_2 = pm.Uniform("pi_2", lower=0, upper=0.9)
        tau = pm.DiscreteUniform("tau", lower=365 * (5/4.), upper=n_dd - 365 * (5/4.))
        alpha_ = pm.math.switch(tau >= idx, alpha_1, alpha_2)
        beta_ = pm.math.switch(tau >= idx, beta_1, beta_2)
        pi_ = pm.math.switch(tau >= idx, pi_1, pi_2)
        observation = ZeroInflatedGamma("obs", alpha=alpha_, beta=beta_, pi=pi_, observed=obs_data)
        step = pm.NUTS()
        trace = pm.sample(5000,  tune=20000, step=step, nuts_kwargs=dict(target_accept=.9))
        summary = pm.stats.summary(trace)
    return summary, trace
Beispiel #21
0
def main():
    with pm.Model() as model:
        xl1 = pm.DiscreteUniform('xl1', lower=0,upper=500)
        yl1 = pm.DiscreteUniform('yl1', lower=150,upper=500)
        θl1 = pm.DiscreteUniform('θl1', lower=-100,upper=100)

        xl2 = pm.DiscreteUniform('xl2', lower=0,upper=500)
        yl2 = pm.DiscreteUniform('yl2', lower=150,upper=500)
        θl2 = pm.DiscreteUniform('θl2', lower=-100,upper=100)
        
        obs = pm.Normal('obs',
                        mu=simulation(xl1, yl1, θl1, xl2, yl2, θl2),
                        sigma=.001, observed=484)

        trace = pm.sample(10,tune=2000,cores=2)

        
        def print_and_visualize(t):
            print(t)
            visualize_simulation(t['xl1'], t['yl1'], t['θl1'],
                                 t['xl2'], t['yl2'], t['θl2'])
        
        [print_and_visualize(t) for t in trace]
Beispiel #22
0
    cpt_smry = pm.summary(cpt_trace)
    pm.traceplot(cpt_trace)
    spp = pm.sample_posterior_predictive(cpt_trace, samples=1000, progressbar=False, var_names=['w', 'theta', 'obs'])

n = len(cpf)
cpf = data[['ds', 'y']].set_index('ds').resample('7D').sum().reset_index()
_, _, _, _, cpf['t'] = set_times(cpf)  # add 't'
g = np.gradient(cpf['y'].values)  # trend
cpf['w_trend'] = np.abs(g) / np.sum(np.abs(g))  # changepoint density at each point
cpf['trend'] = g

alpha = 1.0 / cpf['w_trend'].mean()
beta = 1.0 / cpf['w_trend'].std()
t = np.range(0, n)
with pm.Model() as my_model:
    switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=cpf['t'].max(), shape=10)
    for i in range(n):
        mu_name = 'mu_' + str(i)
        setattr(my_model, mu_name, pm.Exponential(mu_name, alpha))
        sd_name = 'sd_' + str(i)
        setattr(my_model, sd_name, pm.Exponential(sd_name, beta))

    t = 0
    for i in range(n):
        mu = pm.switch(switchpoint >= t, mu_1, mu_2 )
        var = pm.switch(switchpoint >= t, sd_1, sd_2 )
    obs = pm.Normal('x',mu=mu,sd=sd,observed=data)

with model:
    step1 = pm.NUTS( [mu_1, mu_2, sd_1, sd_2] )
    step2 = pm.Metropolis( [switchpoint] )
import pymc3 as pm
import arviz as az
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#Gather and transform data
data_path = '/home/gerardo/Desktop/Projects/PGA-Analysis/data/driving-data/driving-data.csv'
raw_data = pd.read_csv(data_path)
data = raw_data['average_driving_distance'].to_numpy()

#Declare model
with pm.Model() as model:
    #Switchpoint
    tau = pm.DiscreteUniform("tau", lower=0, upper=len(data) - 1)
    #Prior when t <= tau
    mu_1 = pm.Normal("mu_1", mu=280, sd=20)
    sd_1 = pm.HalfNormal("sd_1", sigma=40)
    #Prior when t > tau
    mu_2 = pm.Normal("mu_2", mu=280, sd=20)
    sd_2 = pm.HalfNormal("sd_2", sigma=40)
    #Observations
    idx = np.arange(len(data))
    mu_t = pm.math.switch(tau > idx, mu_1, mu_2)
    sd_t = pm.math.switch(tau > idx, sd_1, sd_2)
    observations = pm.Normal("observations", mu=mu_t, sd=sd_t, observed=data)

#Perform inference
with model:
    step = pm.NUTS()
    trace = pm.sample(50000, tune=5000, step=step)
Beispiel #24
0
"""
Comparing models using Hierarchical modelling.
"""
from __future__ import division
import numpy as np
import pymc3 as pm
import matplotlib.pyplot as plt
from plot_post import plot_post

## specify the Data
y = np.repeat([0, 1], [3, 6])  # 3 tails 6 heads

with pm.Model() as model:
    # Hyperhyperprior:
    model_index = pm.DiscreteUniform('model_index', lower=0, upper=1)
    # Hyperprior:
    kappa_theta = 12
    mu_theta = pm.switch(pm.eq(model_index, 1), 0.25, 0.75)
    # Prior distribution:
    a_theta = mu_theta * kappa_theta
    b_theta = (1 - mu_theta) * kappa_theta
    theta = pm.Beta('theta', a_theta,
                    b_theta)  # theta distributed as beta density
    #likelihood
    y = pm.Bernoulli('y', theta, observed=y)
    start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(10000, step, start=start, progressbar=False)

## Check the results.
burnin = 2000  # posterior samples to discard
])
years = len(disasters_data)


@as_op(itypes=[tt.lscalar, tt.dscalar, tt.dscalar], otypes=[tt.dvector])
def rate_(switchpoint, early_mean, late_mean):
    out = empty(years)
    out[:switchpoint] = early_mean
    out[switchpoint:] = late_mean
    return out


with pm.Model() as model:

    # Prior for distribution of switchpoint location
    switchpoint = pm.DiscreteUniform('switchpoint', lower=0, upper=years)
    # Priors for pre- and post-switch mean number of disasters
    early_mean = pm.Exponential('early_mean', lam=1.)
    late_mean = pm.Exponential('late_mean', lam=1.)

    # Allocate appropriate Poisson rates to years before and after current
    # switchpoint location
    idx = arange(years)
    rate = rate_(switchpoint, early_mean, late_mean)

    # Data likelihood
    disasters = pm.Poisson('disasters', rate, observed=disasters_data)

    # Use slice sampler for means
    step1 = pm.Slice([early_mean, late_mean])
    # Use Metropolis for switchpoint, since it accomodates discrete variables
Beispiel #26
0
import numpy as np
from matplotlib import pyplot as plt
import scipy.stats as stats
import pymc3 as pm
import theano.tensor as tt

count_data = np.loadtxt("data/txtdata.csv")
n_count_data = len(count_data)

with pm.Model() as model:
    alpha = 1.0 / count_data.mean()  # Recall count_data is the
    # variable that holds our txt counts
    lambda_1 = pm.Exponential("lambda_1", alpha)
    lambda_2 = pm.Exponential("lambda_2", alpha)

    tau = pm.DiscreteUniform("tau", lower=0, upper=n_count_data - 1)

with model:
    idx = np.arange(n_count_data)  # Index
    lambda_ = pm.math.switch(tau > idx, lambda_1, lambda_2)

with model:
    observation = pm.Poisson("obs", lambda_, observed=count_data)

with model:
    step = pm.Metropolis()
    trace = pm.sample(10000, tune=5000, step=step)

lambda_1_samples = trace['lambda_1']
lambda_2_samples = trace['lambda_2']
tau_samples = trace['tau']
Beispiel #27
0
What is that point, and how skilled is the test taker?
"""

ground_truth = np.array([True,True,True,True,True,True,True,True,True,True])
obs = np.array([True,False,False,False,False,False,False,False,False,False])
correct =  np.equal(ground_truth,obs)

true_inds = np.where(obs)[0]
last_true_idx = true_inds[-1]
test_len = len(ground_truth)

with pm.Model() as model:
    skill = pm.Beta('skill',1.0,1.0)
    n_correct_obs = np.sum(correct)
    
    endpoint = pm.DiscreteUniform('endpoint',last_true_idx,test_len-1)
    
    for i in range(0,last_true_idx+1):
        pm.Bernoulli('correct_%d' % i,skill,observed=correct[i])
        
    for i in range(last_true_idx+1,test_len):
        after_endpoint = pm.math.gt(i,endpoint)
        prob_correct_if_done = float(ground_truth[i]==False)
        prob_correct = pm.math.where(after_endpoint,prob_correct_if_done,skill)
        pm.Bernoulli('correct%d' % i,prob_correct,observed=correct[i])
    
    trace = pm.sample()
    pm.traceplot(trace)
    print pm.summary(trace)
]

# Time series of recorded coal mining disasters in the UK from 1851 to 1962
disasters_data = array([
    4, 5, 4, 0, 1, 4, 3, 4, 0, 6, 3, 3, 4, 0, 2, 6, 3, 3, 5, 4, 5, 3, 1, 4, 4,
    1, 5, 5, 3, 4, 2, 5, 2, 2, 3, 4, 2, 1, 3, 2, 2, 1, 1, 1, 1, 3, 0, 0, 1, 0,
    1, 1, 0, 0, 3, 1, 0, 3, 2, 2, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0,
    0, 1, 1, 0, 2, 3, 3, 1, 1, 2, 1, 1, 1, 1, 2, 4, 2, 0, 0, 1, 4, 0, 0, 0, 1,
    0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1
])
year = arange(1851, 1962)

with pm.Model() as model:

    switchpoint = pm.DiscreteUniform('switchpoint',
                                     lower=year.min(),
                                     upper=year.max())
    early_mean = pm.Exponential('early_mean', lam=1.)
    late_mean = pm.Exponential('late_mean', lam=1.)

    # Allocate appropriate Poisson rates to years before and after current
    # switchpoint location
    rate = tt.switch(switchpoint >= year, early_mean, late_mean)

    disasters = pm.Poisson('disasters', rate, observed=disasters_data)

    # Initial values for stochastic nodes
    start = {'early_mean': 2., 'late_mean': 3.}

    tr = pm.sample(1000, tune=500, start=start)
    pm.traceplot(tr)
import pymc3 as pm
from scipy.stats import poisson
import seaborn as sns

# Config
os.chdir("/home/jovyan/work")
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
plt.rcParams["figure.figsize"] = (12, 3)

# Preparation
N = 100
true_lams = [20, 50]
true_tau = 30
data = np.hstack([
    poisson(true_lams[0]).rvs(true_tau),
    poisson(true_lams[1]).rvs(N - true_tau),
])

# Modeling
with pm.Model() as model:
    lam_1 = pm.Exponential("lam_1", data.mean())
    lam_2 = pm.Exponential("lam_2", data.mean())
    tau = pm.DiscreteUniform("tau", lower=0, upper=N-1)
    idx = np.arange(N)
    lam = pm.math.switch(tau > idx, lam_1, lam_2)
    female = pm.Poisson("target", lam, observed=data)
    step = pm.Metropolis()
    trace = pm.sample(20000, tune=5000, step=step, chains=10)
    pm.traceplot(trace[1000:], grid=True)
plt.savefig("./results/3-15-a-inference.png")
])
years = len(disasters_data)


@as_op(itypes=[tt.lscalar, tt.dscalar, tt.dscalar], otypes=[tt.dvector])
def rate_(switchpoint, early_mean, late_mean):
    out = empty(years)
    out[:switchpoint] = early_mean
    out[switchpoint:] = late_mean
    return out


with pm.Model() as model:

    # Prior for distribution of switchpoint location
    switchpoint = pm.DiscreteUniform("switchpoint", lower=0, upper=years)
    # Priors for pre- and post-switch mean number of disasters
    early_mean = pm.Exponential("early_mean", lam=1.0)
    late_mean = pm.Exponential("late_mean", lam=1.0)

    # Allocate appropriate Poisson rates to years before and after current
    # switchpoint location
    idx = arange(years)
    rate = rate_(switchpoint, early_mean, late_mean)

    # Data likelihood
    disasters = pm.Poisson("disasters", rate, observed=disasters_data)

    # Use slice sampler for means
    step1 = pm.Slice([early_mean, late_mean])
    # Use Metropolis for switchpoint, since it accomodates discrete variables