def _testZeroDensity(self, alpha):
    """Zero isn't in the support of the gamma distribution.

    But quantized floating point math has its limits.
    TODO(bjp): Implement log-gamma sampler for small-shape distributions.

    Args:
      alpha: float shape value to test
    """
    try:
      from scipy import stats  # pylint: disable=g-import-not-at-top
    except ImportError as e:
      tf_logging.warn("Cannot test zero density proportions: %s" % e)
      return
    allowable_zeros = {
        dtypes.float16: stats.gamma(alpha).cdf(np.finfo(np.float16).tiny),
        dtypes.float32: stats.gamma(alpha).cdf(np.finfo(np.float32).tiny),
        dtypes.float64: stats.gamma(alpha).cdf(np.finfo(np.float64).tiny)
    }
    failures = []
    for use_gpu in [False, True]:
      for dt in dtypes.float16, dtypes.float32, dtypes.float64:
        sampler = self._Sampler(
            10000, alpha, 1.0, dt, use_gpu=use_gpu, seed=12345)
        x = sampler()
        allowable = allowable_zeros[dt] * x.size
        allowable = allowable * 2 if allowable < 10 else allowable * 1.05
        if np.sum(x <= 0) > allowable:
          failures += [(use_gpu, dt)]
      self.assertEqual([], failures)
Exemple #2
0
    def estimate_distribution(self,index_stats,queries,qrel=None):
        self._estimate_para(index_stats,queries,qrel)


        for qid in self._run.ranking:
            self._rel_distribution[qid] = gamma(self._k1[qid],1/self._theta1[qid]) 
            self._non_rel_distribution[qid] = gamma(self._k0[qid],1/self._theta0[qid])  
Exemple #3
0
def test_parameter():

    # Test "Null" parameter
    p = Parameter()
    assert p.shape == (0,)
    assert p.rvs() == []
    assert p.has_value is False
    assert p.is_random is False

    # Test values
    v = 1.
    p = Parameter(v, Positive())
    assert p.value == v
    assert p.bounds.lower > 0
    assert p.bounds.upper is None
    assert p.rvs() == v
    assert p.has_value is True
    assert p.is_random is False

    # Test distributions
    p = Parameter(gamma(1), Positive())
    assert np.shape(p.rvs()) == ()
    assert p.has_value is True
    assert p.is_random is True

    p = Parameter(gamma(1), Positive(), shape=(2,))
    assert np.shape(p.rvs()) == (2,)
    assert Positive().check(p.rvs())

    p = Parameter(gamma(1), Bound(1, 2), shape=(10, 5))
    assert np.shape(p.rvs()) == (10, 5)
    assert Bound(1, 2).check(p.rvs())
Exemple #4
0
    def prob_alias(self, plot=False):
        """Returns tuple (threshold, probability)"""

        from scipy.stats import gamma
        # scipy-ref.pdf Section 5.13 on page 390
        
        if plot:
            import matplotlib.pyplot as plt
            plt.ion()
            plt.clf()

        nd = self.get_all_noise_dists()
        a, loc, scale = gamma.fit(nd)
        ndrv = gamma(a, loc, scale)
        if plot:
            plt.hist(nd, normed=True) # 'normed' might become 'density' later? 
            x = range(max(nd))
            plt.plot(x, ndrv.pdf(x))

        icd = self.get_all_inter_chip_dists()
        a, loc, scale = gamma.fit(icd)
        icdrv = gamma(a, loc, scale)
        if plot:
            plt.hist(icd, normed=True)
            x = range(max(icd))
            plt.plot(x, icdrv.pdf(x))

        # Here it goes!
        threshold = ndrv.ppf(0.997)
        if plot:
            plt.axvline(threshold)
        prob = icdrv.cdf(threshold)
        print 'Noise 99.7%% threshold: %f, probability of aliasing: %1.3e' % (threshold, prob)
        return threshold, prob
Exemple #5
0
def sample_hyperparameters(state):
    # http://bit.ly/1baZ3zf
    T = state['T']
    num_samples = 10  # R
    aalpha = 5
    balpha = 0.1
    abeta = 0.1
    bbeta = 0.1
    bgamma = 0.1  # ?
    agamma = 5  # ?

    # for (int r = 0; r < R; r++) {
    for r in range(num_samples):
        # gamma: root level (Escobar+West95) with n = T
        eta = beta(state['gamma'] + 1, T).rvs()
        bloge = bgamma - np.log(eta)
        K = state['num_topics']
        pie = 1. / (1. + (T * bloge / (agamma + K - 1)))
        u = bernoulli(pie).rvs()
        state['gamma'] = gamma(agamma + K - 1 + u, 1. / bloge).rvs()

        # alpha: document level (Teh+06)
        qs = 0.
        qw = 0.

        for m, doc in enumerate(state['docs']):
            qs += bernoulli(len(doc) * 1. / (len(doc) + state['alpha'])).rvs()
            qw += np.log(beta(state['alpha'] + 1, len(doc)).rvs())
        state['alpha'] = gamma(aalpha + T - qs, 1. / (balpha - qw)).rvs()

    state = update_beta(state, abeta, bbeta)
    return state
Exemple #6
0
def __getGammafilter(a, lamda, negativeDays, hardness):
    """

    :param a:               shapefactor of a gammadistribution
    :param lamda:           scalefactor of a gammadistribution (sometimes scale=1/lamda as in scipy)
    :param negativeDays:    How many days back should the change have affect? Eg 1.5 goes to noon two days back.
    :param hardness:        Should the filter smoothe or spread? Eg: if hardness = 0.5 and applied to only
                            one rain event the cloud cover will be 0.5 that day.

    :return:                A gammafilter
    """

    from scipy.stats import gamma
    from scipy.integrate import quad

    # find the top of the gamadistribution. This will be noon (12:00) on the day with rain
    gdst = gamma(a, scale=1/lamda)
    increase = True                 # initial value
    delta = 0.01
    x = 0.01                        # initial value

    while increase == True:
        h = gdst.pdf(x+delta) - gdst.pdf(x)
        if h < 0:
            increase = False
        else:
            x = x + delta

    # So. x is where the top of the function is.
    # Update the gammadistribution with this shift so it has its maximum on x = 0
    gdst = gamma(a, loc=-x, scale=1/lamda)

    # Fist I make the weights for the days prior the event (x < 0)
    delta = x/(negativeDays+0.5)
    distr = quad(lambda x: gdst.pdf(x), 0, -delta/2)
    intFrom = -delta/2
    gammaFilter = [-distr[0]]

    while -distr[0] > 0.05:
        distr = quad(lambda x: gdst.pdf(x), intFrom, intFrom-delta)
        intFrom = intFrom - delta
        gammaFilter.append(-distr[0])

    gammaFilter.reverse()

    # Then the weights for the positive days ( x > 0 )
    distr = quad(lambda x: gdst.pdf(x), 0, delta/2)
    intFrom = delta/2
    gammaFilter[-1] = gammaFilter[-1] + distr[0]

    while distr[0] > 0.05:
        distr = quad(lambda x: gdst.pdf(x), intFrom, intFrom+delta)
        intFrom = intFrom + delta
        gammaFilter.append(distr[0])

    # And then I devide the list by the wheight for day = 0 thus weighing it to 1
    gammaFilterNorm = [x/max(gammaFilter)*hardness for x in gammaFilter]

    return gammaFilterNorm
Exemple #7
0
 def getPolarNoise(self, radius=500.0, base_eps=2.0, LIMIT_NINETY_FIVE = False, NINETY_FIVE_DISTANCE=0.95):
     r_gen = gamma(2., scale=radius/base_eps)
     theta_gen = uniform(scale=2*math.pi)
     r, theta = r_gen.rvs(), theta_gen.rvs()
     if LIMIT_NINETY_FIVE and r > NINETY_FIVE_DISTANCE:
         r_gen = gamma(2., scale=radius/base_eps, size=1000)
         r = r_gen.rvs()
     return (np.cos(theta) * r, np.sin(theta) * r)
Exemple #8
0
def double_gamma_hrf(delay, tr, fptr=1.0, integrator=trapz):

    r"""The double gamma hemodynamic reponse function (HRF).
    The user specifies only the delay of the peak and undershoot.
    The delay shifts the peak and undershoot by a variable number of
    seconds. The other parameters are hardcoded. The HRF delay is
    modeled for each voxel independently. The form of the HRF and the
    hardcoded values are based on previous work [1]_.

    Parameters
    ----------
    delay : float
        The delay of the HRF peak and undershoot.

    tr : float
        The length of the repetition time in seconds.

    fptr : float
        The number of stimulus frames per reptition time.  For a
        60 Hz projector and with a 1 s repetition time, the fptr
        would be equal to 60.  It is possible that you will bin all
        the frames in a single TR, in which case fptr equals 1.

    integrator : callable
        The integration function for normalizing the units of the HRF
        so that the area under the curve is the same for differently
        delayed HRFs.  Set integrator to None to turn off normalization.

    Returns
    -------
    hrf : ndarray
        The hemodynamic response function to convolve with the stimulus
        timeseries.

    Reference
    ----------
    .. [1] Glover, GH (1999) Deconvolution of impulse response in event related
    BOLD fMRI. NeuroImage 9, 416-429.

    """
    from scipy.special import gamma
    
    # add delay to the peak and undershoot params (alpha 1 and 2)
    alpha_1 = 5/tr+delay/tr
    beta_1 = 1.0
    c = 0.1
    alpha_2 = 15/tr+delay/tr
    beta_2 = 1.0
    
    t = np.arange(0,32,tr)
    
    hrf = ( ( ( t ** (alpha_1) * beta_1 ** alpha_1 * np.exp( -beta_1 * t )) /gamma( alpha_1 )) - c *
            ( ( t ** (alpha_2) * beta_2 ** alpha_2 * np.exp( -beta_2 * t )) /gamma( alpha_2 )) )
            
    if integrator: # pragma: no cover
        hrf /= integrator(hrf)
        
    return hrf
 def choose(self):
     if self.user_class == 'HF':
         self.name = "Log-norm"
         peak_hours_for_iat_hf = [1, 2, 3, 4, 5, 6]
         if self.hour in peak_hours_for_iat_hf:
             lognorm_shape, lognorm_scale, lognorm_location = 4.09174469261446, 1.12850165892419, 4.6875
         else:
             lognorm_shape, lognorm_scale, lognorm_location = 3.93740014906562, 0.982210300411203, 3
         return lognorm(lognorm_shape, loc=lognorm_location, scale=lognorm_scale)
     elif self.user_class == 'HO':
         self.name = "Gamma"
         peak_hours_for_iat_ho = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
         if self.hour in peak_hours_for_iat_ho:
             gamma_shape, gamma_rate, gamma_location = 1.25170029089175, 0.00178381168026473, 0.5
         else:
             gamma_shape, gamma_rate, gamma_location = 1.20448161464647, 0.00177591076721503, 0.5
         return gamma(gamma_shape, loc=gamma_location, scale=1. / gamma_rate)
     elif self.user_class == 'MF':
         self.name = "Gamma"
         peak_hours_for_iat_mf = [1, 2, 3, 4, 5, 6, 7, 22, 23]
         if self.hour in peak_hours_for_iat_mf:
             gamma_shape, gamma_rate, gamma_location = 2.20816848575484, 0.00343216949000565, 1
         else:
             gamma_shape, gamma_rate, gamma_location = 2.03011412986896, 0.00342699308280547, 1
         return gamma(gamma_shape, loc=gamma_location, scale=1. / gamma_rate)
     elif self.user_class == 'MO':
         self.name = "Gamma"
         peak_hours_for_iat_mo = [1, 2, 3, 4, 5, 6]
         if self.hour in peak_hours_for_iat_mo:
             gamma_shape, gamma_rate, gamma_location = 1.29908195595742, 0.00163527376977441, 0.5
         else:
             gamma_shape, gamma_rate, gamma_location = 1.19210494792398, 0.00170354443324898, 0.5
         return gamma(gamma_shape, loc=gamma_location, scale=1. / gamma_rate)
     elif self.user_class == 'LF':
         peak_hours_for_iat_lf = [1, 2, 3, 4, 5, 6, 7]
         if self.hour in peak_hours_for_iat_lf:
             self.name = "Gamma"
             gamma_shape, gamma_rate, gamma_location = 1.79297773527656, 0.00191590321039876, 2
             return gamma(gamma_shape, loc=gamma_location, scale=1. / gamma_rate)
         else:
             self.name = "Weibull"
             weibull_c_shape, weibull_scale, weibull_location = 1.1988117443903, 827.961760834184, 1
             return weibull_min(weibull_c_shape, loc=weibull_location, scale=weibull_scale)
     elif self.user_class == 'LO':
         peak_hours_for_iat_lo = [2, 3, 4, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20]
         if self.hour in peak_hours_for_iat_lo:
             self.name = "Weibull"
             weibull_c_shape, weibull_scale, weibull_location = 0.850890858519732, 548.241539446292, 1
             return weibull_min(weibull_c_shape, loc=weibull_location, scale=weibull_scale)
         else:
             self.name = "Gamma"
             gamma_shape, gamma_rate, gamma_location = 0.707816241615835, 0.00135537879658998, 1
             return gamma(gamma_shape, loc=gamma_location, scale=1. / gamma_rate)
     else:
         raise Exception('The user class %s does not exist' % self.user_class)
Exemple #10
0
 def __init__(self, temporal_deriv=False, tr=2, oversampling=16,
              kernel_secs=32, pos_shape=6, pos_scale=1,
              neg_shape=16, neg_scale=1, ratio=1./6):
     """Create the HRF object with FSL parameters as default."""
     self._rv_pos = gamma(pos_shape, scale=pos_scale)
     self._rv_neg = gamma(neg_shape, scale=neg_scale)
     self._tr = tr
     self._oversampling = oversampling
     dt = tr / oversampling
     self._timepoints = np.linspace(0, kernel_secs, kernel_secs / dt)
     self._temporal_deriv = temporal_deriv
     self._ratio = ratio
Exemple #11
0
def update_wrapper(infoname, priorname, outname, outavgname):
    G = update(infoname, priorname)
    for e in G.edges():
        print G[e[0]][e[1]]['params']
        print e, stats.gamma(G[e[0]][e[1]]['params'][0], scale=G[e[0]][e[1]]['params'][1]).stats(moments='m')
    nx.write_edgelist(G,outname)
    A = G.copy()
    for e in A.edges():
        p = stats.gamma(G[e[0]][e[1]]['params'][0], scale=G[e[0]][e[1]]['params'][1]).stats(moments='m')
#       if p == nan: A[e[0]][e[1]]['weight'] = 0
        A[e[0]][e[1]]['weight'] = p
    nx.write_weighted_edgelist(A,outavgname,delimiter=',')
Exemple #12
0
 def __init__(self, temporal_deriv=False, tr=2, oversampling=16,
              kernel_secs=32, pos_shape=6, pos_scale=1,
              neg_shape=16, neg_scale=1, ratio=1 / 6):
     """Create the HRF object with FSL parameters as default."""
     self._rv_pos = gamma(pos_shape, scale=pos_scale)
     self._rv_neg = gamma(neg_shape, scale=neg_scale)
     self._tr = tr
     self._oversampling = oversampling
     dt = tr / oversampling
     self._timepoints = np.arange(0, kernel_secs, dt, np.float)
     self._sampled_timepoints = np.arange(0, kernel_secs, tr,
                                          np.float) + (tr * .5)
     self._kernel_secs = kernel_secs
     self._temporal_deriv = temporal_deriv
     self._ratio = ratio
Exemple #13
0
def test_sklearn_cv():

    model = LightFM(loss='warp', random_state=42)

    # Set distributions for hyperparameters
    randint = stats.randint(low=1, high=65)
    randint.random_state = 42
    gamma = stats.gamma(a=1.2, loc=0, scale=0.13)
    gamma.random_state = 42
    distr = {'no_components': randint, 'learning_rate': gamma}

    # Custom score function
    def scorer(est, x, y=None):
        return precision_at_k(est, x).mean()

    # Custom CV which sets train_index = test_index
    class CV(KFold):
        def __iter__(self):
            ind = np.arange(self.n)
            for test_index in self._iter_test_masks():
                train_index = np.logical_not(test_index)
                train_index = ind[train_index]
                yield train_index, train_index

    cv = CV(n=train.shape[0], random_state=42)
    search = RandomizedSearchCV(estimator=model, param_distributions=distr,
                                n_iter=10, scoring=scorer, random_state=42,
                                cv=cv)
    search.fit(train)
    assert search.best_params_['no_components'] == 52
Exemple #14
0
def pux_integration(x, sat, icorr, s, s2, start, end, pointNr):
   # see example usage in posterior_covariance.py
   # s[i] = linalg.inv(invC + z_a[i]/s2*ata)
   # icorr[i] = linalg.inv(s2*ident + z_a[i]**2*acat)
   # sat[i] = s[i]*A.T
   h = size(x, 0)
   w = size(x, 1)
   x = x.reshape(h*w)
   zarr = zeros(size(x))
   c = zeros(pointNr)
   m = zeros((pointNr, size(sat,1)))
   zsum = 0
   nsum = 0
   z_a = linspace(start, end, pointNr)
   g_dist = stats.gamma(2., loc = 0., scale = 2.)
   for i in range(0,pointNr):
      z = z_a[i]
      c[i] = g_dist.pdf(z)*multi_norm(x, zarr, icorr[i], inverted=True)
      zsum += z*c[i]
      nsum += c[i]
#      s[i] = linalg.inv(invC + z/s2*ata)
      m[i] = z/s2*dot(sat[i],x)
   print (sum(c))
   c = c/sum(c); # normalization
   
   return [c, m, zsum/nsum]
def run_kstests(json_path, run_date, member):
    try:
        full_path = json_path + "/{0}/{1}/mesh_*.json".format(run_date, member)
        json_files = sorted(glob(full_path))
        ks_results = {"id":[], "ks":[]}
        for json_file in json_files:
            js = open(json_file)
            mesh_track = json.load(js)
            js.close()
            id = mesh_track["properties"]["id"]
            for m, mesh_obj in enumerate(mesh_track["features"]):
                step_id = id + "_{0:03d}".format(m)
                ts = np.array(mesh_obj["properties"]["timesteps"])
                mask = np.array(mesh_obj["properties"]["masks"])
                vals = ts[mask == 1]
                gdist = gamma.fit(vals, floc=vals.min()-0.1)
                sig = kstest(vals, gamma(*gdist).cdf)
                ks_results["id"].append(step_id)
                ks_results["ks"].append(sig)
                if sig[1] < 0.01:
                    print(step_id,)
                    print(sig[1],gdist)
                    print(np.sort(vals))
                    plt.figure(figsize=(8,8))
                    plt.pcolormesh(ts, alpha=0.5, cmap="YlOrRd", vmin=0, vmax=100)
                    pc = plt.pcolormesh(np.ma.array(ts, mask=mask==0), cmap="YlOrRd", vmin=0, vmax=100)
                    plt.title(step_id)
                    plt.colorbar(pc)
                    plt.savefig(step_id + ".png", bbox_inches="tight", dpi=150)
                    plt.close()
        ks_frame = pd.DataFrame(ks_results["ks"], index=ks_results["id"],columns=["D", "p-val"])
        print(ks_frame.shape[0])
    except Exception as e:
        raise e
    return ks_frame
def create_hist(flat_fits, p, low, high, draw=True, bins=20, fit_gamma=True, fit_normal=True):
    latex,getter = params[p]
    vals = np.array([getter(f) for f in flat_fits])
    vals = vals[(vals>low) & (vals<high)]
    pct_captured = int(100*len(vals)/len(flat_fits))
    if draw:
        plt.figure()
        plt.hist(vals,bins,normed=True,color='b')
        xmin,xmax = plt.xlim()
        plt.xlabel('x',fontsize=cfg.fontsize)
        plt.ylabel('p(x)',fontsize=cfg.fontsize)
        ttl1 = 'Distribution of parameter {} (Centeral mass: {}% of values)'.format(latex,pct_captured)
        ttl2 = '(created with low={}, high={})'.format(low,high)
        ttl = '\n'.join([ttl1,ttl2])
        if fit_gamma:
            alpha,loc,scale=stats.gamma.fit(vals)
            beta = 1/scale
            rv = stats.gamma(alpha,loc,scale)
            x = np.linspace(loc,xmax,100)
            prob = rv.pdf(x)
            plt.plot(x,prob,'g',linewidth=3)
            ttl_fit = r'Gamma fit: $\alpha$={:.3f}, $\beta$={:.3f}, $loc$={:.3f}'.format(alpha,beta,loc)
            ttl = '\n'.join([ttl, ttl_fit])
        if fit_normal:
            loc,sigma=stats.norm.fit(vals)
            rv = stats.norm(loc,sigma)
            x = np.linspace(xmin,xmax,100)
            prob = rv.pdf(x)
            plt.plot(x,prob,'k',linewidth=3)
            ttl_fit = r'Normal fit: $loc$={:.3f}, $\sigma$={:.3f}'.format(loc,sigma)
            ttl = '\n'.join([ttl, ttl_fit])
        plt.title(ttl)
    return vals
    def _setcompleteness(self, periodgridspacing, radiusgridspacing, comp):
        self.cdpp_cols = [k for k in self.stlr.keys() if k.startswith("rrmscdpp")]
        self.cdpp_vals = np.array([k[-4:].replace("p", ".") for k in self.cdpp_cols], dtype=float)

        # Pre-compute and freeze the gamma function from Equation (5) in
        # Burke et al.
        self.pgam = gamma(4.65, loc=0., scale=0.98)
        self.mesthres_cols = [k for k in self.stlr.keys() if k.startswith("mesthres")]
        self.mesthres_vals = np.array([k[-4:].replace("p", ".") for k in self.mesthres_cols],
                                 dtype=float)

        period = np.linspace(self.planetperiod[0], self.planetperiod[1], periodgridspacing)
        rp = np.linspace(self.planetradius[0], self.planetradius[1], radiusgridspacing)
        self.period_grid, self.rp_grid = np.meshgrid(period, rp, indexing="ij")

        self.koi_periods = np.array(self.kois.koi_period)
        self.koi_rps = np.array(self.kois.koi_prad)
        self.vol = np.diff(self.period_grid, axis=0)[:, :-1] * np.diff(self.rp_grid, axis=1)[:-1, :]

        if comp is None:
            comp = np.zeros_like(self.period_grid)

            for _, star in self.stlr.iterrows():
                comp += self.get_completeness(star, self.period_grid, self.rp_grid, 0.0, with_geom=True)

            self.comp = comp

        else:
            self.comp = comp
  def _testMoments(self, dt):
    try:
      from scipy import stats  # pylint: disable=g-import-not-at-top
    except ImportError as e:
      tf_logging.warn("Cannot test moments: %s" % e)
      return

    # The moments test is a z-value test.  This is the largest z-value
    # we want to tolerate. Since the z-test approximates a unit normal
    # distribution, it should almost definitely never exceed 6.
    z_limit = 6.0

    for stride in 0, 1, 4, 17:
      alphas = [0.2, 1.0, 3.0]
      if dt == dtypes.float64:
        alphas = [0.01] + alphas
      for alpha in alphas:
        for scale in 9, 17:
          # Gamma moments only defined for values less than the scale param.
          max_moment = min(6, scale // 2)
          sampler = self._Sampler(
              20000, alpha, 1 / scale, dt, use_gpu=False, seed=12345)
          z_scores = util.test_moment_matching(
              sampler(),
              max_moment,
              stats.gamma(alpha, scale=scale),
              stride=stride,
          )
          self.assertAllLess(z_scores, z_limit)
    def test_generic(self):
        import OpenPNM.Geometry.models.pore_diameter as mods

        func = spst.gamma(a=2, loc=0.001, scale=0.0001)
        self.geo.models.add(propname="throat.diameter", model=mods.generic, func=func, seeds="throat.seed")
        assert sp.amin(self.geo["throat.diameter"]) > 0.001
        del self.geo["throat.diameter"]
Exemple #20
0
    def setNewEvidence(self, y):

        a = np.sum(y)
        b = 1
        try :
            b = len(y)
        except:
            b = 1

        a_new = self.a + a
        b_new = self.b + b

        # get new PDF
        self.rescale()

        y_new = np.zeros(shape=(len(self.y),),dtype=np.float)
        ##### use normal approximation for large a and b, unfortunately we reach large a and b very quickly
        #if (a_new > 1000):
        #    y_new = self.normalApprox(a_new, b_new)
        #else:
        self.rv = gamma(a_new, scale=1.0/b_new)
        y_new = self.rv.pdf(self.x)

        ## just incase something messes up
        #if (any(np.isnan(y_new))):
        #    y_new = self.normalApprox(a_new, b_new)

        # measure dKL and dJS before update
        self.measureDKL(y_new)
        self.measureDJS(y_new)

        # update
        self.a = a_new
        self.b = b_new
        self.y = y_new
Exemple #21
0
    def __init__(self, alpha, beta):
        self.alpha = alpha
        self.beta = beta

        # set dist before calling super's __init__
        self.dist = st.gamma(alpha, scale=beta)
        super(Gamma, self).__init__()
Exemple #22
0
    def _draw_gamma_rates(self):
        '''
            Function to draw and assign rates from a discretized gamma distribution, if specified. By default, 4 categories are drawn.
        '''       
        if self.rate_probs is not None:
            print("\nThe provided value for the `rate_probs` argument will be ignored since gamma-distributed heterogeneity has been specified with the alpha parameter.")        
        if type(self.k_gamma) is not int:
            raise TypeError("\nProvided argument `num_categories` must be an integer.")

        #### Note that this code is adapted from gamma.c in PAML ####
        rv = gamma(self.alpha, scale = 1./self.alpha)
        freqK = np.zeros(self.k_gamma)  ### probs
        rK = np.zeros(self.k_gamma)     ### rates

        for i in range(self.k_gamma-1):
            raw=rv.ppf( (i+1.)/self.k_gamma )
            freqK[i] = gammainc(self.alpha + 1, raw*self.alpha)

        rK[0] = freqK[0] * self.k_gamma
        rK[self.k_gamma-1] = (1-freqK[self.k_gamma-2]) * self.k_gamma
        for i in range(1,self.k_gamma-1):
            rK[i] = self.k_gamma * (freqK[i] -freqK[i-1])    
        #############################################################
        
        self.rate_probs = np.repeat(1./self.k_gamma, self.k_gamma)
        self.rate_factors = deepcopy(rK)

        if self.pinv > ZERO:          
            self.rate_probs = list(self.rate_probs - self.pinv/self.k_gamma) + [self.pinv]
            self.rate_factors = list(self.rate_factors) + [0.0]
            
            self.rate_probs = np.array( self.rate_probs )
            self.rate_factors = np.array( self.rate_factors )
def generate_slm_from_txt(training_rows, slm_dir, do_plot=True):
    slm_fxt = os.path.join(slm_dir, "slm.fxt")
    slength_counts = Counter()
    slen=1
    maxl=0
    #print training_rows
    for r in training_rows:
        r = r.strip()
        
        segs = r.split(BREAK) # chop the line up into segments
        for s in segs:
            slen = len(s.split())

            if slen > maxl:
                print "new max length = ", slen
                maxl = slen
                print "from seg: ", s
#                print "from row: ", r

            if slen:
                slength_counts[slen]+=1

    #_ = raw_input("hit key")
                           
    els = list( slength_counts.elements() ) #Counter.elements() returns iterator that iterates across n instances of each element e where slength_counts[e]=n .. we make this into a list for plotting
    print els
    x_vals = range(0, max(els)+1)
    
    (shape, loc, scale) = gamma.fit(els, floc=0)
    gam_gen = gamma(shape, loc, scale) #use these model params to build a new gamma distrib/n generator
    write_slm(slm_fxt, x_vals, gam_gen)
    if do_plot:
        plot_graph(x_vals, gam_gen, els)
    compile_slm(slm_dir) #this last step compiles the slm to binary .fst format
Exemple #24
0
def test_slicesample():
    from scipy.stats import gamma
    import matplotlib.pyplot as plt

    n_iter = 1000

    # Gamma distribution (bounded on left)
    print("Gamma test")
    g = gamma(2.0, loc=0., scale=2.0)

    smpls = np.zeros(n_iter)
    smpls[0] = g.rvs(1)
    for n in np.arange(1,n_iter):
        sn, _ = slicesample(smpls[n-1], g.logpdf, lb=1e-5)
        smpls[n] = sn

    print("Expected gamma mean: ", g.mean())
    print("Inferred gamma mean: ", smpls.mean())
    print("Expected gamma std:  ", g.std())
    print("Inferred gamma std:  ", smpls.std())

    fig, ax = plt.subplots(1, 1)
    x = np.linspace(1e-5, g.mean() + 4*g.std(), 1000)
    ax.plot(x, g.pdf(x), 'k-', lw=2, label='true pdf')
    ax.hist(smpls, 25, normed=True, alpha=0.2)
    ax.legend(loc='best', frameon=False)
    plt.show()
Exemple #25
0
def gamma_dist(bin_values, K, M):
    """Gamma distribution function

    Parameters
    ----------
    bin_values : array
        bin values for detecting photons
        eg : max photon counts is 8
        bin_values = np.arange(8+2)
    K : int
        mean count of photons
    M : int
        number of coherent modes

    Returns
    -------
    gamma_dist : array
        Gamma distribution

    Notes
    -----
    These implementations are based on the references under the ``Notes``
    section of the ``nbinom_dist()`` docstring

    .. math::
        P(K) = \\frac{\Gamma(K + M)} {\Gamma(K + 1)\Gamma(M)}
        (\\frac {M} {M + <K>})^M (\\frac {<K>}{M + <K>})^K
    """

    gamma_dist = (stats.gamma(M, 0., K/M)).pdf(bin_values)
    return gamma_dist
def gen_gauss_diag_lpost(num_datasets, dims, ev_params = [(80, 10), (40,10)], cov_var_const = 4, with_grad = False):
    def gen_lp_unnorm_ev(lev, distr_norm, with_grad = False):
       # print(distr_norm.mu, distr_norm.K)
        rval = lambda x:distr_norm.logpdf(x) + lev
        rval.log_evidence = lev
        if with_grad:
            rval.lpdf_and_grad = lambda x, pdf, grad: distr_norm.log_pdf_and_grad(x, pdf, grad)
        return rval
        
        
    rval = []
    for ep in ev_params:
        lev_distr = stats.gamma(ep[0], scale=ep[1])
        for i in range(int(num_datasets//len(ev_params))):
            while True:
                try:
                    m = stats.multivariate_normal.rvs([0] * dims, np.eye(dims)*1000)
                    K = np.eye(dims)
                    val = gen_lp_unnorm_ev(-lev_distr.rvs(), mvnorm(m, K), with_grad = with_grad)
                    val.mean = m
                    val.cov = K
                    rval.append(val)
                    break
                except np.linalg.LinAlgError:
                    import sys
                    #The Matrix from the niw was not invertible. Try again.
                    print("np.linalg.LinAlgError - trying again", file=sys.stderr)
                    pass
            
    return rval
Exemple #27
0
        def gamma_distribution(sigma):
            """ Returns a normalized gamma distribution kernel for convolutions """
            from scipy.stats import gamma

            k = sigma
            t = np.sqrt(sigma)
            dist = gamma(k, 0, t)

            x = np.arange(0.0, 10.0 * sigma, 1.0)
            x[0] = 1e-20

            g = dist.pdf(x)
            shift = np.argmax(dist.pdf(x))

            g = np.concatenate((np.zeros(int(10.0 * sigma) - 1), g))

            g = g[shift:-1]
            g = np.append(g, np.zeros(shift))

            # import matplotlib.pyplot as plt
            # plt.plot(g)
            # plt.show()

            # print x[np.argmax(dist.pdf(x))]
            # print dist.mean(), dist.median(), np.sqrt(dist.var())

            return g / g.sum()
    def Consumed_SKU(self):  # Function that determines which SKU's are consumed.

        check = True
        while self.Time < self.Sim_Length:
            while self.Time < self.Cycle_Length * (self.Order_Amount + 1):
                arrival = gamma(self.Num_SKU, scale=self.Demand).rvs()
                self.Erlang_Nums.append(arrival)
                Location = randint(0, self.Num_SKU - 1)

                if self.Time + arrival <= self.Cycle_Length * (
                    self.Order_Amount + 1
                ):  # Analyzes whether order is needed based on time.
                    self.Time += arrival
                    self.Check_Lead_Time()

                else:
                    break

                if self.Check_SKU(Location):
                    check = self.Empty_SKU(Location)
                else:
                    print "***** Unable to consume SKU *****\n"

                while check == False:  # If unable to consume an SKU, will try another and output error.
                    Location = randint(0, self.Num_SKU - 1)
                    check = self.Empty_SKU(Location)

            self.Replenishment()
            self.Time += arrival
            if int(self.Time) == self.Warmup_Time:  # When Warm-Up period is over, clear relevant information.
                self.Clear()

        self.Total_Cost()
def test2(graph, r=1.2, iterations=1000, bounds=(0, 20), steps=1000):
    N = len(graph.vertices())
    fitness = [1, r]
    prior = stats.gamma(2, scale=0.5).pdf
    partition = Partition(bounds[0], bounds[1], steps)
    prior_points = partition.map(prior)
    table = likelihood_table(N, partition.points)
    #means = []
    #modes = []
    l = []
    for i in range(iterations):
        occupation = [0]*N
        occupation[::2] = [1]*(len(occupation[::2]))
        #occupation[:N2//2] = [1]*(len(occupation[:N2//2]))
        pop = PopulationOnGraph(graph, occupation, fitness)
        tuples = list(pop)
        #conjugate_parameters = tuples_to_conjugate_parameters(N, tuples[:-1])
        #posterior = construct_posterior(N, conjugate_parameters, prior_points, partition, table)
        #mean = posterior.mean()[0]
        #mode = posterior.mode()[0]
        #means.append(mean) 
        #modes.append(mode)
        #print mean, mode
        l.append(len(tuples))
    return numpy.mean(l), numpy.std(l)
def gen_mm_lpost(num_datasets,num_modes, dims, ev_params = [(80, 10), (40,10)], cov_var_const = 1.5, ):
    def gen_lp_unnorm_ev(lev, mixt):
        rval = lambda x:mixt.logpdf(x) + lev
        rval.log_evidence = lev
        return (rval, lev)
        
    rval = []
    for ep in ev_params:
        lev_distr = stats.gamma(ep[0], scale=ep[1])
        for i in range(int(num_datasets//len(ev_params))):
            mode_p = np.random.dirichlet([100] * num_modes)
            mode_d = []
            m = stats.multivariate_normal.rvs([0] * dims, np.eye(dims)*10)
            while True:
                try:
                    K = invwishart_rv(np.eye(dims) * cov_var_const , dims)
                    print(K)
                    mode_mean_dist = stats.multivariate_normal(m, K)
                    break
                except:
                    pass
            
            
            while len(mode_d) != num_modes:
                try:                    
                    mode_d.append(mvnorm(mode_mean_dist.rvs(),
                                         invwishart_rv(K, dims)))
                except:
                    #The Matrix from the niw was not invertible. Try again.
                    pass
            mixt = GMM(num_modes, dims)
            mixt.comp_lprior = np.log(mode_p)
            mixt.comp_dist = mode_d
            rval.append(gen_lp_unnorm_ev(-lev_distr.rvs(), mixt))
    return rval
Exemple #31
0
import sys
import time
import random
import datetime
from scipy.stats import gamma
sys.path.append("..")
from db_builder import db_block_time_stamp
BLOCK_TIME = db_block_time_stamp.init()


SIZE_DB = [0, 32929106, 33036408, 32872161, 32784690]
MIX_SIZE = 0
SIMU_TIMES = 250
ALPHA = 13.19
BETA = 0.86
D = gamma(ALPHA, scale=1 / BETA)


def main():
    try:
    	th = int(sys.argv[1])
        bin_num = int(sys.argv[2])
        bin_size = int(sys.argv[3])
    except:
    	th = int(input("DB no.(1-4): "))
        bin_num = int(input("bin num: "))
        bin_size = int(input("bin size: "))
    info = simulate(th, bin_num, bin_size, SIZE_DB[th],  SIMU_TIMES)
    write_log(
        th, "../../result/lab4-th{}-binnum{}-binsize{}.txt".format(
        	th, bin_num, bin_size), info)
Exemple #32
0
    ('scl', StandardScaler()),
    (
        'lin',
        Ridge(
            solver='sparse_cg',
            tol=0.001,  # optimizer termination criteria
            # alpha = 1.0,  # L2 regulization alpha=C^{-1}
            fit_intercept=True,
            normalize=False,  # done in the pipeline
            copy_X=True,
            max_iter=1000,  # for CG solver
        ))
])

hyper = {
    'lin__alpha': ss.gamma(a=1.5, loc=1e-5, scale=.7),  # alpha ~ [0.001, 10]
}

meta = {
    'id':
    "simi8",
    'name':
    'LinReg Ridge',
    'descriptions':
    ("Ridge Regression (L2 penalty), Conjugate Gradient solver, "
     "standard-normal transformed features."),
    'solver':
    'Conjugate Gradient',
    'active':
    True,
    'keywords': [
Exemple #33
0
def estimate_tweedie_loglike_series(x, mu, phi, p):
    """Estimate the loglikihood of a given set of x, mu, phi, and p

    Parameters
    ----------
    x : array
        The observed values. Must be non-negative.
    mu : array
        The fitted values. Must be positive.
    phi : array
        The scale paramter. Must be positive.
    p : array
        The Tweedie variance power. Must equal 0 or must be greater than or
        equal to 1.

    Returns
    -------
    estiate_tweedie_loglike_series : float
    """
    x = np.array(x, ndmin=1)
    mu = np.array(mu, ndmin=1)
    phi = np.array(phi, ndmin=1)
    p = np.array(p, ndmin=1)

    ll = np.ones_like(x) * -np.inf

    # Gaussian (Normal)
    gaussian_mask = p == 0.
    if np.sum(gaussian_mask) > 0:
        ll[gaussian_mask] = norm(loc=mu[gaussian_mask],
                                 scale=np.sqrt(phi[gaussian_mask])).logpdf(
                                     x[gaussian_mask])

    # Poisson
    poisson_mask = p == 1.
    if np.sum(poisson_mask) > 0:
        poisson_pdf = poisson(mu=mu[poisson_mask] / phi[poisson_mask]).pmf(
            x[poisson_mask] / phi[poisson_mask]) / phi[poisson_mask]
        ll[poisson_mask] = np.log(poisson_pdf)

    # 1 < p < 2
    ll_1to_2_mask = (1 < p) & (p < 2)
    if np.sum(ll_1to_2_mask) > 0:
        # Calculating logliklihood at x == 0 is pretty straightforward
        zeros = x == 0
        mask = zeros & ll_1to_2_mask
        ll[mask] = -(mu[mask]**(2 - p[mask]) / (phi[mask] * (2 - p[mask])))
        mask = ~zeros & ll_1to_2_mask
        ll[mask] = ll_1to2(x[mask], mu[mask], phi[mask], p[mask])

    # Gamma
    gamma_mask = p == 2
    if np.sum(gamma_mask) > 0:
        ll[gamma_mask] = gamma(a=1 / phi, scale=phi * mu).logpdf(x[gamma_mask])

    # (2 < p < 3) or (p > 3)
    ll_2plus_mask = ((2 < p) & (p < 3)) | (p > 3)
    if np.sum(ll_2plus_mask) > 0:
        zeros = x == 0
        mask = zeros & ll_2plus_mask
        ll[mask] = -np.inf
        mask = ~zeros & ll_2plus_mask
        ll[mask] = ll_2orMore(x[mask], mu[mask], phi[mask], p[mask])

    # Inverse Gaussian (Normal)
    invgauss_mask = p == 3
    if np.sum(invgauss_mask) > 0:
        cond1 = invgauss_mask
        cond2 = x > 0
        mask = cond1 & cond2
        ll[mask] = invgauss(mu=mu[mask] * phi[mask],
                            scale=1. / phi[mask]).logpdf(x[mask])
    return ll
Exemple #34
0
def run_platformqc(data_path, output_path, *, suffix=None, b_width=1000):
    if not suffix:
        suffix = ""
    else:
        suffix = "_" + suffix
    log_path = os.path.join(output_path, "log",
                            "log_sequel_platformqc" + suffix + ".txt")
    fig_path = os.path.join(output_path, "fig",
                            "fig_sequel_platformqc_length" + suffix + ".png")
    fig_path_bar = os.path.join(
        output_path, "fig", "fig_sequel_platformqc_adapter" + suffix + ".png")
    json_path = os.path.join(output_path, "QC_vals_sequel" + suffix + ".json")
    # json
    tobe_json = {}

    # output_path will be made too.
    if not os.path.isdir(os.path.join(output_path, "log")):
        os.makedirs(os.path.join(output_path, "log"), exist_ok=True)

    if not os.path.isdir(os.path.join(output_path, "fig")):
        os.makedirs(os.path.join(output_path, "fig"), exist_ok=True)

    ### logging conf ###
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    fh = logging.FileHandler(log_path, 'w')
    sh = logging.StreamHandler()

    formatter = logging.Formatter(
        '%(module)s:%(asctime)s:%(lineno)d:%(levelname)s:%(message)s')
    fh.setFormatter(formatter)
    sh.setFormatter(formatter)

    logger.addHandler(sh)
    logger.addHandler(fh)
    #####################

    logger.info("Started sequel platform QC for %s" % data_path)

    # sequel
    xml_file = get_sts_xml_path(data_path, logger)

    if not xml_file:
        logger.warning("sts.xml is missing. Productivity won't be shown")
        [p0, p1, p2] = [None] * 3
    else:
        [p0, p1, p2] = parse_sts_xml(
            xml_file,
            ns="http://pacificbiosciences.com/PacBioBaseDataModel.xsd")
        logger.info("Parsed sts.xml")

    [subr_bam_p, scrap_bam_p] = get_bam_path(data_path, logger)
    if subr_bam_p and scrap_bam_p:
        scrap_bam = pysam.AlignmentFile(scrap_bam_p, 'rb', check_sq=False)
        subr_bam = pysam.AlignmentFile(subr_bam_p, 'rb', check_sq=False)
    else:
        logger.ERROR("Platform QC failed due to missing bam files")
        return 1

    bam_reads = {}
    snr = [[], [], [], []]
    hr_fraction = []
    tot_lengths = []
    hr_lengths = []
    ad_num_stat = {}
    control_throughput = 0

    if get_readtype(scrap_bam.header) == 'SCRAP':
        logger.info("Started to load scraps.bam...")
        control_throughput = set_scrap(bam_reads, scrap_bam, snr)
    else:
        logger.ERROR("the given scrap file has incorrect header.")

    logger.info("Scrap reads were loaded.")

    if get_readtype(subr_bam.header) == 'SUBREAD':
        logger.info("Started to load subreads.bam...")
        set_subreads(bam_reads, subr_bam, snr)
    else:
        logger.ERROR("the given subread file has incorrect header.")

    logger.info("Subreads were loaded.")

    for k, v in bam_reads.items():
        #print(k)
        l = construct_polread(v)

        #print(l)
        if l[4]:
            hr_fraction.append(l[2] / l[3])
            tot_lengths.append(l[3])
            hr_lengths.append(l[2])
            if l[5] in ad_num_stat:
                ad_num_stat[l[5]] += 1
            else:
                ad_num_stat[l[5]] = 1

    max_adnum = max(ad_num_stat.keys())
    min_adnum = min(ad_num_stat.keys())

    left = []
    height = []
    for i in range(min_adnum, max_adnum + 1):
        left.append(i)
        if i in ad_num_stat:
            height.append(ad_num_stat[i])
        else:
            height.append(0)

    plt.bar(left, height)
    plt.savefig(fig_path_bar, bbox_inches="tight")
    plt.close()
    logger.info("Plotted bar plot for adpter occurence")

    (a, b) = lq_gamma.estimate_gamma_dist_scipy(hr_lengths)
    logger.info("Fitting by Gamma dist finished.")

    _max = np.array(hr_lengths).max()
    _mean = np.array(hr_lengths).mean()
    _n50 = get_N50(hr_lengths)
    _n90 = get_NXX(hr_lengths, 90)
    throughput = np.sum(hr_lengths)
    longest = np.max(hr_lengths)
    fracs = np.mean(hr_fraction)

    tobe_json["Productivity"] = {"P0": p0, "P1": p1, "P2": p2}
    tobe_json["Throughput"] = int(throughput)
    tobe_json["Throughput(Control)"] = int(control_throughput)
    tobe_json["Longest_read"] = int(_max)
    tobe_json["Num_of_reads"] = len(hr_lengths)
    tobe_json["polread_gamma_params"] = [float(a), float(b)]
    tobe_json["Mean_polread_length"] = float(_mean)
    tobe_json["N50_polread_length"] = float(_n50)
    tobe_json["Mean_HQ_fraction"] = float(np.mean(fracs))
    tobe_json["Adapter_observation"] = ad_num_stat

    with open(json_path, "w") as f:
        logger.info("Quality measurements were written into a JSON file: %s" %
                    json_path)
        json.dump(tobe_json, f, indent=4)

    x = np.linspace(0, gamma.ppf(0.99, a, 0, b))
    est_dist = gamma(a, 0, b)
    plt.plot(x, est_dist.pdf(x), c=rgb(214, 39, 40))
    plt.grid(True)
    plt.hist(hr_lengths,
             histtype='step',
             bins=np.arange(min(hr_lengths), _max + b_width, b_width),
             color=rgb(214, 39, 40),
             alpha=0.7,
             normed=True)
    plt.xlabel('Read length')
    plt.ylabel('Probability density')

    if _mean >= 10000:  # pol read mean is expected >= 10k and <= 15k, but omit the <= 15k condition.
        plt.axvline(x=_mean,
                    linestyle='dashed',
                    linewidth=2,
                    color=rgb(44, 160, 44),
                    alpha=0.8)
    else:
        plt.axvline(x=_mean,
                    linestyle='dashed',
                    linewidth=2,
                    color=rgb(188, 189, 34),
                    alpha=0.8)

    if _n50 >= 20000:
        plt.axvline(x=_n50, linewidth=2, color=rgb(44, 160, 44), alpha=0.8)
    else:
        plt.axvline(x=_n50, linewidth=2, color=rgb(188, 189, 34), alpha=0.8)

    plt.hist(tot_lengths,
             histtype='step',
             bins=np.arange(min(tot_lengths),
                            max(tot_lengths) + b_width, b_width),
             color=rgb(31, 119, 180),
             alpha=0.7,
             normed=True)

    ymin, ymax = plt.gca().get_ylim()
    xmin, xmax = plt.gca().get_xlim()
    plt.text(xmax * 0.6, ymax * 0.72, r'$\alpha=%.3f,\ \beta=%.3f$' % (a, b))
    plt.text(xmax * 0.6, ymax * 0.77, r'Gamma dist params:')

    plt.text(xmax * 0.6, ymax * 0.85, r'sample mean: %.3f' % (_mean, ))
    plt.text(xmax * 0.6, ymax * 0.9, r'N50: %.3f' % (_n50, ))
    plt.text(xmax * 0.6, ymax * 0.95, r'N90: %.3f' % (_n90, ))

    plt.text(_mean, ymax * 0.85, r'Mean')
    plt.text(_n50, ymax * 0.9, r'N50')

    plt.savefig(fig_path, bbox_inches="tight")
    plt.close()
    #plt.show()

    logger.info("Figs were generated.")
    logger.info("Finished all processes.")
Exemple #35
0
                                     shape=(img_size, img_size))
        y[i, rr, cc] = 1
    return y


#%%

# names (this is just for reference for the moment!)
columns = ["x", "y", "radius", "dx", "dy"]

# prior sampling function for each variable
# (assumes x and y are coordinates in the range 0-img_size)
prior_fn = independent_sample([
    norm(loc=img_size / 2, scale=img_size / 2).rvs,
    norm(loc=img_size / 2, scale=img_size / 2).rvs,
    gamma(a=1, loc=0, scale=10).rvs,
    norm(loc=0, scale=0.5).rvs,
    norm(loc=0, scale=0.5).rvs,
])


# very simple linear dynamics: x += dx
def velocity(x):
    dt = 1.0
    print(x)
    xp = (x @ np.array([
        [1, 0, 0, dt, 0],
        [0, 1, 0, 0, dt],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 0, 1],
Exemple #36
0
 def get_regularizer(regularizer):
     if isinstance(regularizer, float):
         reg = gamma(a=regularizer, scale=1)  # Initial weight prior
         regularizer = Parameter(reg, Positive())
     return regularizer
Exemple #37
0
 def get_var(var):
     if isinstance(var, float):
         var = gamma(a=var, scale=1)  # Initial target noise
         var = Parameter(var, Positive())
     return var
            "p": list(distribution.p),
        }
    elif isinstance(distribution, stats.distributions.rv_frozen):
        name = distribution_name_mapping[distribution.dist.name]
        encoded_parameters = distribution_parameter_encoders[name](
            *distribution_parameters(distribution)
        )
    else:
        raise ValueError(f"Do not have a codec for {distribution}")
    return dict(type="distribution", distribution=name, **encoded_parameters)


# Functions to decode serialised distributions.
distribution_decoders = {
    "categorical": [lambda data: Categorical(data["bins"], data["weights"])],
    "gamma": [lambda data: stats.gamma(data["k"], scale=data["theta"]),
              lambda data: stats.gamma(data["k"], scale=data["θ"]),
              lambda data: stats.gamma(data["shape"], scale=data["scale"]),
              lambda data: stats.gamma(data["alpha"], scale=1 / data["beta"]),
              lambda data: stats.gamma(data["α"], scale=1 / data["β"]),
              lambda data: stats.gamma(data["shape"], scale=1 / data["rate"])],
    "normal": [lambda data: stats.norm(data["mu"], data["sigma"]),
               lambda data: stats.norm(data["μ"], data["σ"]),
               lambda data: stats.norm(data["μ"], sqrt(data["σ²"])),
               lambda data: stats.norm(data["mu"], 1 / sqrt(data["tau"])),
               lambda data: stats.norm(data["μ"], 1 / sqrt(data["τ"]))],
    "uniform": [lambda data: stats.uniform(data["a"], data["b"] - data["a"])],
    "poisson": [lambda data: stats.poisson(data["lambda"]),
                lambda data: stats.poisson(data["λ"])],
    "exponential": [lambda data: stats.expon(scale=1 / data["lambda"]),
                    lambda data: stats.expon(scale=1 / data["λ"]),
Exemple #39
0
def test(expected, found, message, tolerance=0.001):
    diff = abs(expected - found)
    if diff >= tolerance:
        exit('%s, but found %s != %s by %s.' %
             (message, expected, found, diff))


px1 = poisson(3).pmf(1)
test(0.149, px1, '1. Let X ∼ Pois(3). Find P(X = 1). Answer is (0.149)')

pxlt1 = poisson(3).cdf(1)
test(0.199, pxlt1, '2. Let X ∼ Pois(3). Find P(X ≤ 1). (0.199)')

pxgt1 = 1 - poisson(3).cdf(1)
test(0.801, pxgt1, '3. Let X ∼ Pois(3). Find P(X > 1). (0.801)')
grv = gamma(2, scale=1 / (1 / 3))

py_low = grv.cdf(0.5)
py_high = grv.cdf(1.5)
py_range = py_high - py_low
test(0.078, py_range,
     '4. Let Y ∼ Gamma(2, 1/3). Find P(0.5 < Y < 1.5). (0.078)')

pltz = norm(0, 1).ppf(0.975)
test(1.96, pltz,
     '5. Let Z ∼ N(0, 1). Find z such that P(Z < z) = 0.975. (1.96)')

rvn = norm(loc=0, scale=1)
zrange = rvn.cdf(1.96) - rvn.cdf(-1.96)
test(0.95, zrange, '6. Let Z ∼ N(0, 1). Find P(−1.96 < Z < 1.96). (0.95)')
Exemple #40
0
def test_write_distribution(standard_api):
    with standard_api as api:
        api.write_distribution("output-parameter", "example-distribution",
                               stats.gamma(1, scale=2))

pp_plot(logeados, stats.genpareto(c = parametros_pareto[0], 
                                loc = parametros_pareto[1],
                                scale=parametros_pareto[2]), 
        line = True,ax=ax2)
ax2.set_title('Pareto generalizada', fontsize=11)

pp_plot(logeados, stats.dweibull(c = parametros_weibull[0], 
                                loc = parametros_weibull[1],
                                scale=parametros_weibull[2]), 
        line = True,ax=ax3)
ax3.set_title('Weibull doble', fontsize=11)

pp_plot(logeados, stats.gamma(a = parametros_gamma[0], 
                                loc = parametros_gamma[1],
                                scale=parametros_gamma[2]), 
        line = True,ax=ax4)
ax4.set_title('Gamma', fontsize=11)

fig.tight_layout(pad=0.7)

fig.text(0.5, 0, 'Probabilidades teóricas', ha='center', va='center')
fig.text(0., 0.5, 'Probabilidades observadas', ha='center', va='center', rotation='vertical')

fig.suptitle('Gráfico de probabilidades observadas vs teóricas')
fig.subplots_adjust(top=0.86)

plt.show()

#%%
xlabel(r'Rate (s$^{-1}$)')
ylabel('PDF (s)')

def test_norm1():
    """
    Test that the posterior is normalized.
    """
    assert_approx_equal(np.trapz(pri1.post_pdf, dx=pri1.dr), 1., 2)  # match 1 to 2 digits


#-------------------------------------------------------------------------------
# 2nd case:  exp'l prior with scale (prior mean) 10., (n,T) = (16, 2)

# Prior:
scale = 10.
gamma1 = stats.gamma(1, scale=scale)  # a=1 is exp'l dist'n

pri2 = PoissonRateInference(T, n, gamma1.pdf, r_u)
pri2.plot(ls='g--')


#-------------------------------------------------------------------------------
# 3rd case:  flat prior with (n,T) = (80, 10)

n, T = 80, 10.  # data

pri3 = PoissonRateInference(T, n, flat_pdf, r_u)
pri3.plot(alpha=.5)


#-------------------------------------------------------------------------------
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

# Figures path
figpath = 'figures/'
if not os.path.exists(figpath):
    os.makedirs(figpath)

inc_pars = 0.65, 1.57
symp_pars = 0.79, 1.23
no_symp_pars = 0.79, 1.23
crit_pars = 12.5, 0.8

I_inc = stats.lognorm(s=inc_pars[0], scale=np.exp(inc_pars[1]))
I_symp = stats.lognorm(s=symp_pars[0], scale=np.exp(symp_pars[1]))
I_no_symp = stats.lognorm(s=no_symp_pars[0], scale=np.exp(no_symp_pars[1]))
I_crit = stats.gamma(*crit_pars)

distribs = [I_inc, I_symp, I_no_symp, I_crit]
names = [
    r'Incubation time$\sim Lognormal({0}, {1}^2)$'.format(*reversed(inc_pars)),
    r'(A)symptomatic time$\sim Lognormal({0}, {1}^2)$'.format(
        *reversed(symp_pars)),
    r'Asymptomatic time$\sim Lognormal({0}, {1}^2)$'.format(
        *reversed(no_symp_pars)),
    r'Critical time$\sim Gamma({0}, {1})$'.format(*crit_pars),
]
savenames = [
    'incubation.pdf', 'symptomatic.pdf', 'asymptomatic.pdf', 'critical.pdf'
]

for i, dist in enumerate(distribs):
Exemple #44
0
wordCounts = list(word2count.values())
np.array(wordCounts).shape  # .shape is a function in numpy and pandas
len(wordCounts)  # len() is a system function


h1 = ggplot(pd.DataFrame(wordCounts, columns = ['wC']), aes(x = 'wC')) +\
    geom_histogram()
h2 = ggplot(pd.DataFrame(np.log(wordCounts), columns = ['wC']), aes(x = 'wC')) +\
    geom_histogram(binwidth = .5)
print(h2)

# a: shape para, loc: location para, scale: scale para
gammaA, gammaLoc, gammaScale = ss.gamma.fit(np.log(wordCounts))

myHist = plt.hist(np.log(wordCounts), 20, density=True)
rv = ss.gamma(gammaA, loc=gammaLoc, scale=gammaScale)
x = np.linspace(0.1, 12, 35)
plt.plot(x, rv.pdf(x), lw=2)
plt.show()

# In[7]:

# define the threshold above,  remove Qs and As with word count less than thQ and thA
thQs = 5
QsWords2Integer = {}
wordInt = 0
for word, count in word2count.items():
    if count >= thQs:
        QsWords2Integer[word] = wordInt
        wordInt += 1
Exemple #45
0
 lambda con1, con0: osp.beta(con1, con0),
 dist.BinomialProbs:
 lambda probs, total_count: osp.binom(n=total_count, p=probs),
 dist.BinomialLogits:
 lambda logits, total_count: osp.binom(n=total_count,
                                       p=_to_probs_bernoulli(logits)),
 dist.Cauchy:
 lambda loc, scale: osp.cauchy(loc=loc, scale=scale),
 dist.Chi2:
 lambda df: osp.chi2(df),
 dist.Dirichlet:
 lambda conc: osp.dirichlet(conc),
 dist.Exponential:
 lambda rate: osp.expon(scale=np.reciprocal(rate)),
 dist.Gamma:
 lambda conc, rate: osp.gamma(conc, scale=1. / rate),
 dist.HalfCauchy:
 lambda scale: osp.halfcauchy(scale=scale),
 dist.HalfNormal:
 lambda scale: osp.halfnorm(scale=scale),
 dist.LogNormal:
 lambda loc, scale: osp.lognorm(s=scale, scale=np.exp(loc)),
 dist.MultinomialProbs:
 lambda probs, total_count: osp.multinomial(n=total_count, p=probs),
 dist.MultinomialLogits:
 lambda logits, total_count: osp.multinomial(n=total_count,
                                             p=_to_probs_multinom(logits)),
 dist.Normal:
 lambda loc, scale: osp.norm(loc=loc, scale=scale),
 dist.Pareto:
 lambda alpha, scale: osp.pareto(alpha, scale=scale),
 def _kstest(self, alpha, beta, samples):
   # Uses the Kolmogorov-Smirnov test for goodness of fit.
   ks, _ = stats.kstest(samples, stats.gamma(alpha, scale=1 / beta).cdf)
   # Return True when the test passes.
   return ks < 0.02
Exemple #47
0
# for each contact, which circuit it belongs to as an int ID
circuit_idx = df.circuit_idx
area = df.synaptic_area

edge_idx_to_circuit_idx = dict(zip(edge_idx, circuit_idx))

# for each edge, which circuit it belongs to as an int ID
edge_circuit_idx = np.array(
    [c for e, c in sorted(edge_idx_to_circuit_idx.items())],
    dtype=int
)

# estimate gamma of pooled

shape, loc, scale = stats.gamma.fit(area, floc=0)
estimate = stats.gamma(shape, loc, scale)

# ax: Axes
# fig, ax = plt.subplots(1, 1)
# ax.hist(area, density=True)
# x = np.linspace(1, area.max(), 100)
# ax.plot(x, estimate.pdf(x))
# plt.show(block=False)

# partial pooling

# shape2  rate2  rate1  rate0
#    \     /      /      /
#     \   /      /      /
#    shape1_x   /      /      <- distribution of graph edges
#        \     /      /
def check_distribution(kin, temp, ndof, kb=8.314e-3,
                       verbosity=2, screen=False, filename=None,
                       ene_unit=None, temp_unit=None):
    r"""
    Checks if a kinetic energy trajectory is Maxwell-Boltzmann distributed.

    .. warning: This is a low-level function. Additionally to being less
       user-friendly, there is a higher probability of erroneous and / or
       badly documented behavior due to unexpected inputs. Consider using
       the high-level version based on the SimulationData object. See
       physical_validation.kinetic_energy.check_mb_ensemble for more
       information and full documentation.

    Parameters
    ----------
    kin : array-like
        Kinetic energy snapshots of the system.
    temp : float
        Target temperature of the system. Used to construct the
        Maxwell-Boltzmann distribution.
    ndof : float
        Number of degrees of freedom in the system. Used to construct the
        Maxwell-Boltzmann distribution.
    kb : float
        Boltzmann constant :math:`k_B`. Default: 8.314e-3 (kJ/mol).
    verbosity : int
        0: Silent.
        1: Print minimal information.
        2: Print result details.
        3: Print additional information.
        Default: 2.
    screen : bool
        Plot distributions on screen. Default: False.
    filename : string
        Plot distributions to `filename`.pdf. Default: None.
    ene_unit : string
        Energy unit - used for output only.
    temp_unit : string
        Temperature unit - used for output only.

    Returns
    -------
    result : float
        The p value of the test.

    See Also
    --------
    physical_validation.kinetic_energy.distribution : High-level version
    """

    # Discard burn-in period and time-correlated frames
    kin = trajectory.prepare(kin, verbosity=verbosity, name='Kinetic energy')
    kt = kb * temp

    if ndof <= 0:
        warnings.warn('Zero degrees of freedom!')
        p = np.float('NaN')
    else:
        d, p = stats.kstest(kin, 'gamma', (ndof/2, 0, kt))

    # ====================== #
    # Plot to screen or file #
    # ====================== #
    do_plot = screen or filename is not None
    if do_plot:
        ana_dist = stats.gamma(ndof/2, scale=kt)
        ana_kin = np.linspace(ana_dist.ppf(0.0001),
                              ana_dist.ppf(0.9999), 200)
        ana_hist = ana_dist.pdf(ana_kin)

        tunit = ''
        if temp_unit is not None:
            tunit = temp_unit

        data = [{'y': kin,
                 'hist': int(len(kin)/150),
                 'args': dict(label='Trajectory', density=True, alpha=0.5)}]
        if ndof > 0:
            data.append(
                {'x': ana_kin,
                 'y': ana_hist,
                 'args': dict(label='Analytical T=' + str(temp) + tunit, lw=5)})

        unit = ''
        if ene_unit is not None:
            unit = ' [' + ene_unit + ']'

        plot.plot(data,
                  legend='lower left',
                  title='Kinetic energy distribution',
                  xlabel='Kinetic energy' + unit,
                  ylabel='Probability [%]',
                  sci_x=True,
                  percent=True,
                  filename=filename,
                  screen=screen)

    if verbosity > 0:
        if verbosity > 1:
            message = ('Kinetic energy distribution check (strict)\n'
                       'Kolmogorov-Smirnov test result: p = {:g}\n'
                       'Null hypothesis: Kinetic energy is Maxwell-Boltzmann distributed'.format(p))
        else:
            message = 'p = {:g}'.format(p)
        print(message)

    return p
# generate Dirichlet samples & plot them
for alpha in alphas:
    samples = st.dirichlet(alpha).rvs(N)

    ax = plt.gca(projection='3d')
    plt.title(r'$\alpha$ = {}'.format(alpha))
    ax.scatter(samples[:, 0], samples[:, 1], samples[:, 2])
    ax.view_init(azim=40)
    ax.set_xlabel(r'$p_1$')
    ax.set_ylabel(r'$p_2$')
    ax.set_zlabel(r'$p_3$')
    plt.show()

# use standardized Gamma distribution to generate Dirichlet
a = 3  # choose the parameter set of alpha
gamma1 = st.gamma(alphas[a][0]).rvs(size=(N, 1))
gamma2 = st.gamma(alphas[a][1]).rvs(size=(N, 1))
gamma3 = st.gamma(alphas[a][2]).rvs(size=(N, 1))
Diri = np.concatenate((gamma1, gamma2, gamma3), axis=1)

for i in range(N):
    # each component as normalized Gamma realization
    norm = sum(Diri[i, :])
    Diri[i, :] /= norm

ax = plt.gca(projection='3d')
plt.title(r'$\alpha$ = {}, gen from indep Gamma'.format(alphas[a]))
ax.scatter(Diri[:, 0], Diri[:, 1], Diri[:, 2])
ax.view_init(azim=40)
ax.set_xlabel(r'$p_1$')
ax.set_ylabel(r'$p_2$')
Exemple #50
0

alphac = n / 2 + xi0


def betac(mu):
    return xi0 + 0.5 * np.sum((arrY - mu)**2)


# start simulation
muk, tauk = x0
flag = 0
while len(datalst) < size:
    rvmuc = stats.norm(muc(tauk), sigmac(tauk))
    muk = rvmuc.rvs()
    rvtauc = stats.gamma(alphac, scale=1 / betac(muk))
    tauk = rvtauc.rvs()
    flag += 1
    if flag == initnum:
        print(f'flag: {flag}, we get the first data')
        datalst.append([muk, tauk])
        rvynew = stats.norm(muk, np.sqrt(1 / tauk))
        ynewlst.append(rvynew.rvs())
    if flag > initnum and flag % step == 0:
        print(f'flag: {flag}, we get the  {(flag-initnum)//step+1}th data')
        datalst.append([muk, tauk])
        rvynew = stats.norm(muk, np.sqrt(1 / tauk))
        ynewlst.append(rvynew.rvs())

with open(f'./ass1/savedoc/p3{name1}.pkl', 'wb') as f:
    pickle.dump(datalst, f)
def check_mean_std(kin, temp, ndof, kb, verbosity=2,
                   bs_repetitions=200,
                   screen=False, filename=None,
                   ene_unit=None, temp_unit=None):
    r"""
    Calculates the mean and standard deviation of a trajectory (+ bootstrap
    error estimates), and compares them to the theoretically expected values.

    .. warning: This is a low-level function. Additionally to being less
       user-friendly, there is a higher probability of erroneous and / or
       badly documented behavior due to unexpected inputs. Consider using
       the high-level version based on the SimulationData object. See
       physical_validation.kinetic_energy.check_mb_ensemble for more
       information and full documentation.

    Parameters
    ----------
    kin : array-like
        Kinetic energy snapshots of the system.
    temp : float
        Target temperature of the system. Used to construct the
        Maxwell-Boltzmann distribution.
    ndof : float
        Number of degrees of freedom in the system. Used to construct the
        Maxwell-Boltzmann distribution.
    kb : float
        Boltzmann constant :math:`k_B`.
    verbosity : int
        0: Silent.
        1: Print minimal information.
        2: Print result details.
        3: Print additional information.
        Default: 2.
    bs_repetitions : int
        Number of bootstrap samples used for error estimate. Default: 200.
    screen : bool
        Plot distributions on screen. Default: False.
    filename : string
        Plot distributions to `filename`.pdf. Default: None.
    ene_unit : string
        Energy unit - used for output only.
    temp_unit : string
        Temperature unit - used for output only.

    Returns
    -------
    result : Tuple[float]
        Distance of the estimated T(mu) and T(sigma) from the expected
        temperature, measured in standard deviations of the estimates.

    See Also
    --------
    physical_validation.kinetic_energy.distribution : High-level version
    """

    # Discard burn-in period and time-correlated frames
    kin = trajectory.prepare(kin, verbosity=verbosity, name='Kinetic energy')

    if ndof <= 0:
        warnings.warn('Zero degrees of freedom!')

    # ========================== #
    # Compute mu and sig of data #
    # ========================== #
    kt = temp * kb
    loc = 0
    ana_shape = ndof / 2
    ana_scale = kt
    ana_dist = stats.gamma(ana_shape, loc=loc, scale=ana_scale)

    if ndof > 0:
        temp_mu = 2 * np.mean(kin) / (ndof * kb)
        temp_sig = np.sqrt(2 / ndof) * np.std(kin) / kb
    else:
        temp_mu = 0
        temp_sig = 0

    # ======================== #
    # Bootstrap error estimate #
    # ======================== #
    mu = []
    sig = []
    for k in trajectory.bootstrap(kin, bs_repetitions):
        mu.append(np.mean(k))
        sig.append(np.std(k))
    std_mu = np.std(mu)
    std_sig = np.std(sig)
    if ndof > 0:
        std_temp_mu = 2 * std_mu / (ndof * kb)
        std_temp_sig = np.sqrt(2 / ndof) * std_sig / kb
    else:
        std_temp_mu = 0
        std_temp_sig = 0

    # ====================== #
    # Plot to screen or file #
    # ====================== #
    do_plot = screen or filename is not None
    if do_plot:
        ana_kin = np.linspace(ana_dist.ppf(0.0001),
                              ana_dist.ppf(0.9999), 200)
        ana_hist = ana_dist.pdf(ana_kin)

        tunit = ''
        if temp_unit is not None:
            tunit = temp_unit

        data = [{'y': kin,
                 'hist': int(len(kin)/150),
                 'args': dict(label='Trajectory', density=True, alpha=0.5)}]
        if ndof > 0:
            data.append(
                {'x': ana_kin,
                 'y': ana_hist,
                 'args': dict(label='Analytical T=' + str(temp) + tunit, lw=5)})

        unit = ''
        if ene_unit is not None:
            unit = ' [' + ene_unit + ']'

        plot.plot(data,
                  legend='best',
                  title='Kinetic energy distribution',
                  xlabel='Kinetic energy' + unit,
                  ylabel='Probability [%]',
                  sci_x=True,
                  percent=True,
                  filename=filename,
                  screen=screen)

    # ================ #
    # Output to screen #
    # ================ #
    if verbosity > 0:
        eunit = ''
        if ene_unit is not None:
            eunit = ' ' + ene_unit
        tunit = ''
        if temp_unit is not None:
            tunit = ' ' + temp_unit
        if verbosity > 1:
            message = ('Kinetic energy distribution check (non-strict)\n'
                       'Analytical distribution (T={2:.2f}{0:s}):\n'
                       ' * mu: {3:.2f}{1:s}\n'
                       ' * sigma: {4:.2f}{1:s}\n'
                       'Trajectory:\n'
                       ' * mu: {5:.2f} +- {7:.2f}{1:s}\n'
                       '   T(mu) = {9:.2f} +- {11:.2f}{0:s}\n'
                       ' * sigma: {6:.2f} +- {8:.2f}{1:s}\n'
                       '   T(sigma) = {10:.2f} +- {12:.2f}{0:s}'.format(
                           tunit, eunit,
                           temp, ana_dist.mean(), ana_dist.std(),
                           np.mean(kin), np.std(kin), std_mu, std_sig,
                           temp_mu, temp_sig, std_temp_mu, std_temp_sig))
        else:
            message = ('T(mu) = {1:.2f} +- {3:.2f}{0:s}\n'
                       'T(sigma) = {2:.2f} +- {4:.2f}{0:s}'.format(
                           tunit,
                           temp_mu, temp_sig, std_temp_mu, std_temp_sig))
        print(message)

    # ============= #
    # Return values #
    # ============= #
    nan = np.float('NaN')
    if ndof > 0:
        r1 = np.abs(temp - temp_mu) / std_temp_mu
        r2 = np.abs(temp - temp_sig) / std_temp_sig
    else:
        r1 = nan
        r2 = nan
    return r1, r2
Exemple #52
0
    def plot_priors(self):
        '''
        '''
        E0_mean, E0_std, alpha_emax, beta_emax, alpha_H, beta_H, log10_ec50_mean, log10_ec50_std, alpha_obs, beta_obs = self.get_priors(
        )

        f, axes = plt.subplots(2, 3, figsize=(12, 7))

        # E0
        xx = np.linspace(0, 2, 50)
        rv = norm(E0_mean, E0_std)
        yy = rv.pdf(xx)
        axes.flat[0].set_title('E0 parameter')
        axes.flat[0].set_xlabel('E0')
        axes.flat[0].set_ylabel('probability')
        axes.flat[0].plot(xx, yy, 'r-')

        # EMAX
        xx = np.linspace(0, 2, 50)
        rv = gamma(alpha_emax, scale=1 / beta_emax, loc=0)
        yy = rv.pdf(xx)
        axes.flat[1].set_title('Emax parameter')
        axes.flat[1].set_xlabel('Emax')
        axes.flat[1].set_ylabel('probability')
        axes.flat[1].plot(xx, yy, 'r-')

        # H
        xx = np.linspace(0, 5, 100)
        rv = gamma(alpha_H, scale=1 / beta_H, loc=0)
        yy = rv.pdf(xx)
        axes.flat[2].set_title('Hill Coefficient (H) parameter')
        axes.flat[2].set_xlabel('H')
        axes.flat[2].set_ylabel('probability')
        axes.flat[2].plot(xx, yy, 'r-')

        # EC50
        xx = np.logspace(-7, 1, 100)
        rv = norm(log10_ec50_mean, log10_ec50_std)
        yy = rv.pdf(np.log10(xx))
        axes.flat[3].set_title('EC50 parameter')
        axes.flat[3].set_xlabel('EC50 [uM]')
        axes.flat[3].set_ylabel('probability')
        axes.flat[3].plot(xx, yy, 'r-')

        # Log10 EC50
        axes.flat[4].set_title('Log10 EC50 parameter [~ Normal]')
        axes.flat[4].set_xlabel('Log10( EC50 [uM] )')
        axes.flat[4].set_ylabel('probability')
        axes.flat[4].plot(np.log10(xx), yy, 'r-')

        # OBS
        xx = np.linspace(0, 5, 100)
        rv = gamma(alpha_obs, scale=1 / beta_obs, loc=0)
        yy = rv.pdf(xx)
        axes.flat[5].set_title('Observation Std parameter')
        axes.flat[5].set_xlabel('Obs. Std')
        axes.flat[5].set_ylabel('probability')
        axes.flat[5].plot(xx, yy, 'r-')

        plt.tight_layout()
        plt.show()
Exemple #53
0
    def __init__(self,
                 X,
                 Y,
                 R,
                 target_sparsity=0.01,
                 gamma0_v=1.0,
                 lambda_params=(1e-6, 1e-6),
                 nu_params=(1e-6, 1e-6),
                 xi=0.999999,
                 xi_prior_shape=(1, 1),
                 check_finite=True,
                 min_eigenval=0,
                 jitter=1e-6):
        """The Probit model used for modeling Sparse Regression using a Gaussian field. :cite:`Engelhardt2014`.

        .. math::

            y|X,\\beta,\\beta_0, \\nu \propto \mathcal{N}(\\beta_0 1_n + X \\beta, \\nu^{-1} I_n)

        Parameters
        ----------
        X : ndarray
           The predictor matrix of real numbers, n x p in size, where n is the no. of samples (genotypes) and p is the
           no. of features (SNPs).
        Y : ndarray
           The response vector of real numbers, n x 1 in size, with each value representing the phenotype value for the
           sample.
        R : ndarray
           The covariance matrix for the SNPs, p x p in size. The matrix may not be positive-definite, but is converted
           to one internally.
        target_sparsity : float
            The proportion of included predictors. For example, a value of 0.01 indicates that around 1% of total SNPs
            are expected be included in our model. This value affects the probit threshold gamma_0 of the model.
        gamma0_v : float
            Variance of the probit threshold gamma_0
        lambda_params : tuple
            Shape parameter and Inverse-scale parameter of the gamma prior placed on the model parameter lambda, where
            lambda is the inverse squared global scale parameter for the regression weights.
        nu_params : tuple
            Shape parameter and Inverse-scale parameter of the gamma prior placed on the model parameter nu, where nu
            is the residual precision.
        xi : float
            The shrinkage constant in the interval [0,1] to regularize the covariance matrix towards the identity
            matrix. This ensures that the covariance matrix is positive definite.
            A larger xi value biases our estimate towards the supplied R matrix, a lower value biases it towards the
            identity matrix.
            If None, then xi is sampled from a beta distribution with shape parameters specified by the tuple
            xi_prior_shape.
        xi_prior_shape : tuple
            Shape parameters of the beta prior placed on the model parameter xi, specified as a 2-tuple of real values.
            This argument is ignored and xi is not sampled, if it is specified explicitly using the xi parameter.
        check_finite : bool
            Whether to check that the input matrices contain only finite numbers. Disabling may give a performance gain,
            but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs.
            This parameter is passed on to several linear algebra functions in scipy internally.
        min_eigenval : float
            Minimum Eigenvalue we can accept in the covariance matrix. Any eigenvalues encountered below this threshold
            are set to zero, and the resulting covariance matrix normalized to give ones on the diagonal.
        jitter : float
            A small value to add to the diagonals of the covariance matrix to avoid conditioning issues.
        """

        self.X = X
        self.Y = Y
        self.R = Mvn(cov=R, min_eigenval=min_eigenval, jitter=jitter)

        self.N, self.P = self.X.shape

        self.nu_a, self.nu_b = nu_params

        self.check_finite = check_finite

        if xi is None:
            self.sample_xi = True
            self._xi_distribution = beta(*xi_prior_shape)
            self.xi = self._xi_distribution.mean()
        else:
            self.sample_xi = False
            self.xi = xi

        # Initialize scalar model distributions and the parameter values to their prior means.
        self._gamma0_distribution = norm(loc=norm.ppf(1.0 - target_sparsity),
                                         scale=gamma0_v)
        self.gamma0 = self._gamma0_distribution.mean()
        self._lambda_distribution = gamma(lambda_params[0],
                                          scale=1. / lambda_params[1])
        self.lamb = self._lambda_distribution.mean()
        self._nu_distribution = gamma(self.nu_a, scale=1. / self.nu_b)
        self.nu = self._nu_distribution.mean()

        # Cache for holding probit prior distributions (multivariate normal distributions with 0 mean and known
        # covariance, possibly adjusted by a shrinkage factor xi expressing our confidence in the covariance).
        # A single iteration of MCMC calls on many computations on this distribution, so caching improves performance
        # significantly. A small cache size works just as well as a large one,
        # because the most recently used distribution tends to be used repeatedly in a single MCMC step.
        self._probit_cache = Cache(maxsize=4)

        # A cache used to hold the marginal PPI (Posterior Probability of Inclusion) distributions
        # p(y | X, gamma, gamma_0, nu, lambda) ~ Normal(..)
        # A small cache size works just as well as a large one, because the most recently used distribution tends to
        # be used repeatedly in a single MCMC step.
        self._ppi_cache = Cache(maxsize=8)

        # Initialize the sparsity function by generating a random variate from the model's probit distribution
        self.gamma = self.probit_distribution(self.xi).rvs()
Exemple #54
0
"""

import random
import numpy as np
from scipy.stats import t, beta, lognorm, expon, gamma, poisson
import matplotlib.pyplot as plt

n = 100

# == Arbitrary collection of distributions == #
distributions = {
    "student's t with 10 degrees of freedom": t(10),
    "beta(2, 2)": beta(2, 2),
    "lognormal LN(0, 1/2)": lognorm(0.5),
    "gamma(5, 1/2)": gamma(5, scale=2),
    "poisson(4)": poisson(4),
    "exponential with lambda = 1": expon(1)
}

# == Create a figure and some axes == #
num_plots = 3
fig, axes = plt.subplots(num_plots, 1, figsize=(10, 10))

# == Set some plotting parameters to improve layout == #
bbox = (0., 1.02, 1., .102)
legend_args = {'ncol': 2, 'bbox_to_anchor': bbox, 'loc': 3, 'mode': 'expand'}
plt.subplots_adjust(hspace=0.5)

for ax in axes:
    # == Choose a randomly selected distribution == #
Exemple #55
0
 def rv(self):
     return stats.gamma(self.a, loc=self.mu, scale=1/self.b)
Exemple #56
0
 #np.random.seed(1)
 #for i in np.random.random(size=100):
 #    self.insert_value(30*(i-0.5))
 #self.show()
 #func = np.vectorize(self._UpperHull.value)
 #%%
 #np.random.seed(2)
 #samples = self.sample(10000)
 #print samples
 #plt.hist(samples)
 #from scipy.stats import kstest
 #kstest(samples,"norm")
 #%% Gamma k, theta
 k = 9
 theta = 0.5
 distro = stat.gamma(a=k, scale=0.5)
 h = np.vectorize(lambda x: (k - 1) * np.log(x) - x / theta)
 hprime = np.vectorize(lambda x: (k - 1) / x - 1 / theta)
 initial_knots = [1, 4.5, 7]
 xlb = -np.inf
 xub = np.inf
 self = ArsSampler(initial_knots, h, hprime, xlb, xub)
 np.random.seed(1)
 samples = self.sample(10000)
 xs = np.linspace(0, 10, 200)
 ax2 = fig1.add_subplot(2, 2, 2)
 ax2.plot(xs, gaussian_kde(samples)(xs))
 ax2.plot(xs, distro.pdf(xs))
 kstest(samples, distro.cdf)
 #%% Truncated Normal, cut above -1
 a = -1
    def get_posteriors(self, timeseries_type, plot=False):
        """
        Generate posteriors for R_t.

        Parameters
        ----------
        ----------
        timeseries_type: TimeseriesType
            New X per day (cases, deaths etc).
        plot: bool
            If True, plot a cool looking est of posteriors.

        Returns
        -------
        dates: array-like
            Input data over a subset of indices available after windowing.
        times: array-like
            Output integers since the reference date.
        posteriors: pd.DataFrame
            Posterior estimates for each timestamp with non-zero data.
        start_idx: int
            Index of first Rt value calculated from input data series
            #TODO figure out why this value sometimes truncates the series

        """
        dates, timeseries = self.get_timeseries(timeseries_type=timeseries_type)

        if len(timeseries) == 0:
            self.log.info("empty timeseries, skipping", timeseries_type=str(timeseries_type.value))
            return None, None, None, None
        else:
            self.log.info(
                "Analyzing posteriors for timeseries", timeseries_type=str(timeseries_type.value)
            )

        # (1) Calculate Lambda (the Poisson likelihood given the data) based on
        # the observed increase from t-1 cases to t cases.
        lam = timeseries[:-1].values * np.exp((self.r_list[:, None] - 1) / self.serial_period)

        # (2) Calculate each day's likelihood over R_t
        # Originally smoothed counts were rounded (as needed for sps.poisson.pmf below) which
        # doesn't work well for low counts and introduces artifacts at rounding transitions. Now
        # calculate for both ceiling and floor values and interpolate between to get smooth
        # behaviour
        ts_floor = timeseries.apply(np.floor).astype(int)
        ts_ceil = timeseries.apply(np.ceil).astype(int)
        ts_frac = timeseries - ts_floor

        likelihoods_floor = pd.DataFrame(
            data=sps.poisson.pmf(ts_floor[1:].values, lam),
            index=self.r_list,
            columns=timeseries.index[1:],
        )
        likelihoods_ceil = pd.DataFrame(
            data=sps.poisson.pmf(ts_ceil[1:].values, lam),
            index=self.r_list,
            columns=timeseries.index[1:],
        )
        # Interpolate between value for ceiling and floor of smoothed counts
        likelihoods = ts_frac * likelihoods_ceil + (1 - ts_frac) * likelihoods_floor

        # (3) Create the (now scaled up for low counts) Gaussian Matrix
        (current_sigma, process_matrix) = self.make_process_matrix(timeseries.median())

        # (3a) Normalize all rows to sum to 1
        process_matrix /= process_matrix.sum(axis=0)

        # (4) Calculate the initial prior. Gamma mean of "a" with mode of "a-1".
        prior0 = sps.gamma(a=2.5).pdf(self.r_list)
        prior0 /= prior0.sum()

        reinit_prior = sps.gamma(a=2).pdf(self.r_list)
        reinit_prior /= reinit_prior.sum()

        # Create a DataFrame that will hold our posteriors for each day
        # Insert our prior as the first posterior.
        posteriors = pd.DataFrame(
            index=self.r_list, columns=timeseries.index, data={timeseries.index[0]: prior0}
        )

        # We said we'd keep track of the sum of the log of the probability
        # of the data for maximum likelihood calculation.
        log_likelihood = 0.0

        # Initialize timeseries scale (used for auto sigma)
        scale = timeseries.head(1).item()

        # Setup monitoring for Reff lagging signal in daily likelihood
        monitor = utils.LagMonitor(debug=False)  # Set debug=True for detailed printout of daily lag

        # (5) Iteratively apply Bayes' rule
        loop_idx = 0
        for previous_day, current_day in zip(timeseries.index[:-1], timeseries.index[1:]):

            # Keep track of exponential moving average of scale of counts of timeseries
            scale = 0.9 * scale + 0.1 * timeseries[current_day]

            # Calculate process matrix for each day
            (current_sigma, process_matrix) = self.make_process_matrix(scale)

            # (5a) Calculate the new prior
            current_prior = process_matrix @ posteriors[previous_day]

            # (5b) Calculate the numerator of Bayes' Rule: P(k|R_t)P(R_t)
            numerator = likelihoods[current_day] * current_prior

            # (5c) Calculate the denominator of Bayes' Rule P(k)
            denominator = np.sum(numerator)

            # Execute full Bayes' Rule
            if denominator == 0:
                # Restart the baysian learning for the remaining series.
                # This is necessary since otherwise NaN values
                # will be inferred for all future days, after seeing
                # a single (smoothed) zero value.
                #
                # We understand that restarting the posteriors with the
                # re-initial prior may incur a start-up artifact as the posterior
                # restabilizes, but we believe it's the current best
                # solution for municipalities that have smoothed cases and
                # deaths that dip down to zero, but then start to increase
                # again.

                posteriors[current_day] = reinit_prior
            else:
                posteriors[current_day] = numerator / denominator

            # Monitors if posterior is lagging excessively behind signal in likelihood
            # TODO future can return cumulative lag and use to scale sigma up only when needed
            monitor.evaluate_lag_using_argmaxes(
                current_day=loop_idx,
                current_sigma=current_sigma,
                prev_post_am=posteriors[previous_day].argmax(),
                prior_am=current_prior.argmax(),
                like_am=likelihoods[current_day].argmax(),
                post_am=numerator.argmax(),
            )

            # Add to the running sum of log likelihoods
            log_likelihood += np.log(denominator)
            loop_idx += 1

        self.log_likelihood = log_likelihood

        if plot:
            plotting.plot_posteriors(x=posteriors)  # Returns Figure.
            # The interpreter will handle this as it sees fit. Normal builds never call plot flag.

        start_idx = -len(posteriors.columns)

        return dates[start_idx:], posteriors, start_idx
 def icdf(x, alpha, beta):
     g = stats.gamma(alpha, 0, 1.0 / beta)
     return g.ppf(x)
 def eval_( d, v, *, ratio, beta ):
     return gamma( ratio(d)*beta(d) , 0, 1/beta(d) ).logpdf( v )
import numpy as np
from scipy.stats import gamma
import matplotlib.pyplot as plt
'''def f(x):
    return Gamma(x)'''

alpha_values = [1, 2, 3, 3, 3]
# alpha is the shape parameter
beta_values = [0.5, 0.5, 0.5, 1, 2]
#beta is the rate parameter
color = ['b', 'r', 'g', 'y', 'm']
x = np.linspace(1E-6, 10, 1000)

fig, ax = plt.subplots(figsize=(12, 8))

for k, t, c in zip(alpha_values, beta_values, color):
    dist = gamma(k, 0, t)
    plt.plot(x, dist.pdf(x), c=c, label=r'$alpha=%.1f,\ \theta=%.1f$' % (k, t))

plt.title('Gamma Distribution')
plt.xlim(0, 10)
plt.ylim(0, 2)

plt.xlabel('$x$')
plt.ylabel(r'$p(x|\alpha, \beta)$')

plt.legend(loc=0)
plt.show()