Exemple #1
0
    def test_check_discrete_minibatch(self):
        disaster_data_t = tt.vector()
        disaster_data_t.tag.test_value = np.zeros(len(self.disaster_data))

        def create_minibatches():
            while True:
                return (self.disaster_data, )

        with Model():
            switchpoint = DiscreteUniform('switchpoint',
                                          lower=self.year.min(),
                                          upper=self.year.max(),
                                          testval=1900)

            # Priors for pre- and post-switch rates number of disasters
            early_rate = Exponential('early_rate', 1)
            late_rate = Exponential('late_rate', 1)

            # Allocate appropriate Poisson rates to years before and after current
            rate = tt.switch(switchpoint >= self.year, early_rate, late_rate)
            disasters = Poisson('disasters', rate, observed=disaster_data_t)

            with self.assertRaises(ValueError):
                advi_minibatch(n=10,
                               minibatch_RVs=[disasters],
                               minibatch_tensors=[disaster_data_t],
                               minibatches=create_minibatches())
Exemple #2
0
def test_check_discrete_minibatch():
    disaster_data_t = tt.vector()
    disaster_data_t.tag.test_value = np.zeros(len(disaster_data))

    with Model() as disaster_model:

        switchpoint = DiscreteUniform('switchpoint',
                                      lower=year.min(),
                                      upper=year.max(),
                                      testval=1900)

        # Priors for pre- and post-switch rates number of disasters
        early_rate = Exponential('early_rate', 1)
        late_rate = Exponential('late_rate', 1)

        # Allocate appropriate Poisson rates to years before and after current
        rate = switch(switchpoint >= year, early_rate, late_rate)

        disasters = Poisson('disasters', rate, observed=disaster_data_t)

    def create_minibatch():
        while True:
            return (disaster_data, )

    # This should raise ValueError
    assert_raises(ValueError,
                  advi_minibatch,
                  model=disaster_model,
                  n=10,
                  minibatch_RVs=[disasters],
                  minibatch_tensors=[disaster_data_t],
                  minibatches=create_minibatch(),
                  verbose=False)
Exemple #3
0
    def test_check_discrete(self):
        with Model():
            switchpoint = DiscreteUniform(
                'switchpoint', lower=self.year.min(), upper=self.year.max(), testval=1900)

            # Priors for pre- and post-switch rates number of disasters
            early_rate = Exponential('early_rate', 1)
            late_rate = Exponential('late_rate', 1)

            # Allocate appropriate Poisson rates to years before and after current
            rate = tt.switch(switchpoint >= self.year, early_rate, late_rate)
            Poisson('disasters', rate, observed=self.disaster_data)

            # This should raise ValueError
            with self.assertRaises(ValueError):
                advi(n=10)
Exemple #4
0
def test_check_discrete():
    with Model() as disaster_model:
        switchpoint = DiscreteUniform('switchpoint',
                                      lower=year.min(),
                                      upper=year.max(),
                                      testval=1900)

        # Priors for pre- and post-switch rates number of disasters
        early_rate = Exponential('early_rate', 1)
        late_rate = Exponential('late_rate', 1)

        # Allocate appropriate Poisson rates to years before and after current
        rate = switch(switchpoint >= year, early_rate, late_rate)

        disasters = Poisson('disasters', rate, observed=disaster_data)

    # This should raise ValueError
    assert_raises(ValueError, advi, model=disaster_model, n=10)
Exemple #5
0
],
                                    value=-999)
year = np.arange(1851, 1962)

plt.plot(year, disaster_data, 'o', markersize=8)
plt.ylabel("Disaster count")
plt.xlabel("Year")

plt.show()

from pymc3 import DiscreteUniform, Poisson, switch, Model, Exponential, NUTS, Metropolis, sample, traceplot

with Model() as disaster_model:

    switchpoint = DiscreteUniform('switchpoint',
                                  lower=year.min(),
                                  upper=year.max(),
                                  testval=1900)

    # Priors for pre- and post-switch rates number of disasters
    early_rate = Exponential('early_rate', 1)
    late_rate = Exponential('late_rate', 1)

    # Allocate appropriate Poisson rates to years before and after current
    rate = switch(switchpoint >= year, early_rate, late_rate)

    disasters = Poisson('disasters', rate, observed=disaster_data)

    step1 = NUTS([early_rate, late_rate])

    # Use Metropolis for switchpoint, and missing values since it accommodates discrete variables
    step2 = Metropolis([switchpoint, disasters.missing_values[0]])
Exemple #6
0
def createSignalModelWithLookup(data, wfMax):
  """
    Uses a lookup table to avoid having to call siggen.  Lookup locations are along a one-dimensional line from PC to the detector corner.  See generate_siggen_lookup.py
    
    wfMax: maximum of the input signal.  Used as a prior for the for scaling of the simulated pulse
    
  """

  with Model() as signal_model:
    
    switchpoint = DiscreteUniform('switchpoint', lower=0, upper=len(data))
    noise_sigma = HalfNormal('noise_sigma', sd=1.)
    siggen_sigma = HalfNormal('siggen_sigma', sd=10.)
    
    
    
    timestamp = np.arange(0, len(data), dtype=np.int)

    uncertainty_model = switch(switchpoint >= timestamp, noise_sigma, siggen_sigma)
    
    wf_scale = Normal('wf_scale', sd=10., mu=wfMax)
    
    detRad = np.floor(35.41)
    detZ = np.floor(41.5)
    
    dtEstimate = DiscreteUniform('dtEstimate', lower=0, upper=99  )

    
  #          radiusEstimate = DiscreteUniform('radiusEstimate', lower=0, upper=35  )
  #          zEstimate =      DiscreteUniform('zEstimate', lower=0, upper=41)

    
    
    @as_op(itypes=[T.lscalar, T.lscalar, T.dscalar], otypes=[T.dvector])
    def siggen_model_dt(switchpoint, dtEstimate, wf_scale):
      siggen_out = dt_array[dtEstimate, :]
      siggen_out *= wf_scale

      T.clip(dtEstimate, 0, 99) #THIS IS A DISASTER. NEED to find a better way to handle this

      out = np.zeros(len(data))
      out[switchpoint:] = siggen_out[0:(len(data) - switchpoint)]
      
  #            print "length of out is %d" % len(out)
      return out
    
    @as_op(itypes=[T.lscalar, T.lscalar, T.lscalar], otypes=[T.dvector])
    def siggen_model(switchpoint, r, z):
      siggen_out = findSiggenWaveform(0,r,z,np.amax(np_data))
      out = np.zeros(len(data))
      out[switchpoint:] = siggen_out[0:(len(data) - switchpoint)]
      
      return out
    
    
  #          print "length of data is %d" % len(data)

  #          @as_op(itypes=[T.lscalar, T.dscalar, T.dscalar], otypes=[T.dvector])
  #          
  #          def crazy_modulo3(switchpoint, exp_scale, exp_rate):
  #            out = np.zeros(len(data))
  #            out[switchpoint:] = exp_scale * (np.exp( exp_rate * (timestamp[switchpoint:] - switchpoint))-1.)
  #            return out

    
    #baseline_model = Deterministic('baseline_model', exp_scale * (exp( (timestamp-switchpoint)*rate)-1.) )
    
  #          baseline_model = siggen_model(switchpoint, radiusEstimate, zEstimate)
    baseline_model_dt = siggen_model_dt(switchpoint, dtEstimate, wf_scale)
    
    
    baseline_observed = Normal("baseline_observed", mu=baseline_model_dt, sd=uncertainty_model, observed= data )

  return signal_model

#def createSignalModelDynamic(data, wfMax):
#  """
#    Calls siggen in real time
#    
#  """
#
#  with Model() as signal_model:
#    
#    switchpoint = DiscreteUniform('switchpoint', lower=0, upper=len(data))
#    noise_sigma = HalfNormal('noise_sigma', sd=1.)
#    siggen_sigma = HalfNormal('siggen_sigma', sd=10.)
#    
#    timestamp = np.arange(0, len(data), dtype=np.int)
#
#    uncertainty_model = switch(switchpoint >= timestamp, noise_sigma, siggen_sigma)
#    
#    detRad = np.floor(35.41)
#    detZ = np.floor(41.5)
#    
#    dtEstimate = DiscreteUniform('dtEstimate', lower=0, upper=99  )
#
#    
#  #          radiusEstimate = DiscreteUniform('radiusEstimate', lower=0, upper=35  )
#  #          zEstimate =      DiscreteUniform('zEstimate', lower=0, upper=41)
#
#    
#    
#    @as_op(itypes=[T.lscalar, T.lscalar], otypes=[T.dvector])
#    def siggen_model_dt(switchpoint, dtEstimate):
#      siggen_out = dt_array[dtEstimate, :]
#      siggen_out *= wfMax
#
#      T.clip(dtEstimate, 0, 99) #THIS IS A DISASTER. NEED to find a better way to handle this
#
#      out = np.zeros(len(data))
#      out[switchpoint:] = siggen_out[0:(len(data) - switchpoint)]
#      
#  #            print "length of out is %d" % len(out)
#      return out
#    
#    @as_op(itypes=[T.lscalar, T.lscalar, T.lscalar], otypes=[T.dvector])
#    def siggen_model(switchpoint, r, z):
#      siggen_out = findSiggenWaveform(0,r,z,np.amax(np_data))
#      out = np.zeros(len(data))
#      out[switchpoint:] = siggen_out[0:(len(data) - switchpoint)]
#      
#      return out
#    
#    
#  #          print "length of data is %d" % len(data)
#
#  #          @as_op(itypes=[T.lscalar, T.dscalar, T.dscalar], otypes=[T.dvector])
#  #          
#  #          def crazy_modulo3(switchpoint, exp_scale, exp_rate):
#  #            out = np.zeros(len(data))
#  #            out[switchpoint:] = exp_scale * (np.exp( exp_rate * (timestamp[switchpoint:] - switchpoint))-1.)
#  #            return out
#
#    
#    #baseline_model = Deterministic('baseline_model', exp_scale * (exp( (timestamp-switchpoint)*rate)-1.) )
#    
#  #          baseline_model = siggen_model(switchpoint, radiusEstimate, zEstimate)
#    baseline_model_dt = siggen_model_dt(switchpoint, dtEstimate)
#    
#    
#    baseline_observed = Normal("baseline_observed", mu=baseline_model_dt, sd=uncertainty_model, observed= data )
#
#  return signal_model
Exemple #7
0
def mcmc_changepoint(dates,
                     ratings,
                     mcmc_iter=1000,
                     discrete=0,
                     plot_result=1):
    """This function models Yelp reviews as coming from two normal distributions
    with a switch point somewhere between them. When left of the switch point then
    reviews are drawn from the first normal distribution. To the right of the
    switch point reviews are drawn from the second normal distribution. Normal
    distributions are used if the reviews have been normalized to the user's
    average rating; otherwise if analyzing in terms of 1-5 stars set discrete=1
    and the function will do the same estimation on Poisson distributions. This
    function then finds the most likely distribution for where the switchpoint is
    and the most likely parameters for the two generator distributions by using
    Metropolis-Hastings sampling and Hamiltonian Monte Carlo."""

    # dates: Array of dates when the reviews were posted
    # ratings: Array of the ratings given by each review
    # mcmc_iter: How many iterations of the MCMC to run?
    # discrete: Should I use Normal or Poisson distributions to model the ratings?
    # (i.e. are the user-averaged or 1-5 stars)
    # plot_result: Should the function output a plot?

    number_of_ratings = np.arange(0, len(ratings))

    if discrete == 0:
        with Model() as switch_model:
            switchpoint = DiscreteUniform('switchpoint',
                                          lower=0,
                                          upper=len(dates))

            before_intensity = Normal('before_intensity', mu=0, sd=1)
            after_intensity = Normal('after_intensity', mu=0, sd=1)

            intensity = switch(switchpoint >= number_of_ratings,
                               before_intensity, after_intensity)
            sigma = HalfNormal('sigma', sd=1)

            rating = Normal('rating', mu=intensity, sd=sigma, observed=ratings)

    elif discrete == 1:
        with Model() as switch_model:
            switchpoint = DiscreteUniform('switchpoint',
                                          lower=0,
                                          upper=len(dates))

            before_intensity = Exponential('before_intensity', 1)
            after_intensity = Exponential('after_intensity', 1)

            intensity = switch(switchpoint >= number_of_ratings,
                               before_intensity, after_intensity)

            rating = Poisson('rating', intensity, observed=ratings)

    with switch_model:
        trace = sample(mcmc_iter)

    if plot_result == 1:
        traceplot(trace)
        plt.show()

    switch_posterior = trace['switchpoint']
    N_MCs = switch_posterior.shape[0]

    before_intensity_posterior = trace['before_intensity']
    after_intensity_posterior = trace['after_intensity']

    expected_stars = np.zeros(len(ratings))
    for a_rating in number_of_ratings:
        where_switch = a_rating < switch_posterior
        expected_stars[a_rating] = (
            before_intensity_posterior[where_switch].sum() +
            after_intensity_posterior[~where_switch].sum()) / N_MCs

    if plot_result == 1:
        plt.plot(dates, ratings, 'o')
        plt.plot(dates, expected_stars, 'b-')
        plt.show()

    # Return the mode and it's frequency / mcmc_iter
    b_mean, b_count = scipy.stats.mode(trace['before_intensity'])
    a_mean, a_count = scipy.stats.mode(trace['after_intensity'])
    modal_switch, count = scipy.stats.mode(trace['switchpoint'])
    sigma_est, sigma_count = scipy.stats.mode(trace['sigma'])
    differential = b_mean - a_mean
    return differential, modal_switch, expected_stars, sigma_est, switch_posterior