def _testZeroDensity(self, alpha): """Zero isn't in the support of the gamma distribution. But quantized floating point math has its limits. TODO(bjp): Implement log-gamma sampler for small-shape distributions. Args: alpha: float shape value to test """ try: from scipy import stats # pylint: disable=g-import-not-at-top except ImportError as e: tf_logging.warn("Cannot test zero density proportions: %s" % e) return allowable_zeros = { dtypes.float16: stats.gamma(alpha).cdf(np.finfo(np.float16).tiny), dtypes.float32: stats.gamma(alpha).cdf(np.finfo(np.float32).tiny), dtypes.float64: stats.gamma(alpha).cdf(np.finfo(np.float64).tiny) } failures = [] for use_gpu in [False, True]: for dt in dtypes.float16, dtypes.float32, dtypes.float64: sampler = self._Sampler( 10000, alpha, 1.0, dt, use_gpu=use_gpu, seed=12345) x = sampler() allowable = allowable_zeros[dt] * x.size allowable = allowable * 2 if allowable < 10 else allowable * 1.05 if np.sum(x <= 0) > allowable: failures += [(use_gpu, dt)] self.assertEqual([], failures)
def estimate_distribution(self,index_stats,queries,qrel=None): self._estimate_para(index_stats,queries,qrel) for qid in self._run.ranking: self._rel_distribution[qid] = gamma(self._k1[qid],1/self._theta1[qid]) self._non_rel_distribution[qid] = gamma(self._k0[qid],1/self._theta0[qid])
def test_parameter(): # Test "Null" parameter p = Parameter() assert p.shape == (0,) assert p.rvs() == [] assert p.has_value is False assert p.is_random is False # Test values v = 1. p = Parameter(v, Positive()) assert p.value == v assert p.bounds.lower > 0 assert p.bounds.upper is None assert p.rvs() == v assert p.has_value is True assert p.is_random is False # Test distributions p = Parameter(gamma(1), Positive()) assert np.shape(p.rvs()) == () assert p.has_value is True assert p.is_random is True p = Parameter(gamma(1), Positive(), shape=(2,)) assert np.shape(p.rvs()) == (2,) assert Positive().check(p.rvs()) p = Parameter(gamma(1), Bound(1, 2), shape=(10, 5)) assert np.shape(p.rvs()) == (10, 5) assert Bound(1, 2).check(p.rvs())
def prob_alias(self, plot=False): """Returns tuple (threshold, probability)""" from scipy.stats import gamma # scipy-ref.pdf Section 5.13 on page 390 if plot: import matplotlib.pyplot as plt plt.ion() plt.clf() nd = self.get_all_noise_dists() a, loc, scale = gamma.fit(nd) ndrv = gamma(a, loc, scale) if plot: plt.hist(nd, normed=True) # 'normed' might become 'density' later? x = range(max(nd)) plt.plot(x, ndrv.pdf(x)) icd = self.get_all_inter_chip_dists() a, loc, scale = gamma.fit(icd) icdrv = gamma(a, loc, scale) if plot: plt.hist(icd, normed=True) x = range(max(icd)) plt.plot(x, icdrv.pdf(x)) # Here it goes! threshold = ndrv.ppf(0.997) if plot: plt.axvline(threshold) prob = icdrv.cdf(threshold) print 'Noise 99.7%% threshold: %f, probability of aliasing: %1.3e' % (threshold, prob) return threshold, prob
def sample_hyperparameters(state): # http://bit.ly/1baZ3zf T = state['T'] num_samples = 10 # R aalpha = 5 balpha = 0.1 abeta = 0.1 bbeta = 0.1 bgamma = 0.1 # ? agamma = 5 # ? # for (int r = 0; r < R; r++) { for r in range(num_samples): # gamma: root level (Escobar+West95) with n = T eta = beta(state['gamma'] + 1, T).rvs() bloge = bgamma - np.log(eta) K = state['num_topics'] pie = 1. / (1. + (T * bloge / (agamma + K - 1))) u = bernoulli(pie).rvs() state['gamma'] = gamma(agamma + K - 1 + u, 1. / bloge).rvs() # alpha: document level (Teh+06) qs = 0. qw = 0. for m, doc in enumerate(state['docs']): qs += bernoulli(len(doc) * 1. / (len(doc) + state['alpha'])).rvs() qw += np.log(beta(state['alpha'] + 1, len(doc)).rvs()) state['alpha'] = gamma(aalpha + T - qs, 1. / (balpha - qw)).rvs() state = update_beta(state, abeta, bbeta) return state
def __getGammafilter(a, lamda, negativeDays, hardness): """ :param a: shapefactor of a gammadistribution :param lamda: scalefactor of a gammadistribution (sometimes scale=1/lamda as in scipy) :param negativeDays: How many days back should the change have affect? Eg 1.5 goes to noon two days back. :param hardness: Should the filter smoothe or spread? Eg: if hardness = 0.5 and applied to only one rain event the cloud cover will be 0.5 that day. :return: A gammafilter """ from scipy.stats import gamma from scipy.integrate import quad # find the top of the gamadistribution. This will be noon (12:00) on the day with rain gdst = gamma(a, scale=1/lamda) increase = True # initial value delta = 0.01 x = 0.01 # initial value while increase == True: h = gdst.pdf(x+delta) - gdst.pdf(x) if h < 0: increase = False else: x = x + delta # So. x is where the top of the function is. # Update the gammadistribution with this shift so it has its maximum on x = 0 gdst = gamma(a, loc=-x, scale=1/lamda) # Fist I make the weights for the days prior the event (x < 0) delta = x/(negativeDays+0.5) distr = quad(lambda x: gdst.pdf(x), 0, -delta/2) intFrom = -delta/2 gammaFilter = [-distr[0]] while -distr[0] > 0.05: distr = quad(lambda x: gdst.pdf(x), intFrom, intFrom-delta) intFrom = intFrom - delta gammaFilter.append(-distr[0]) gammaFilter.reverse() # Then the weights for the positive days ( x > 0 ) distr = quad(lambda x: gdst.pdf(x), 0, delta/2) intFrom = delta/2 gammaFilter[-1] = gammaFilter[-1] + distr[0] while distr[0] > 0.05: distr = quad(lambda x: gdst.pdf(x), intFrom, intFrom+delta) intFrom = intFrom + delta gammaFilter.append(distr[0]) # And then I devide the list by the wheight for day = 0 thus weighing it to 1 gammaFilterNorm = [x/max(gammaFilter)*hardness for x in gammaFilter] return gammaFilterNorm
def getPolarNoise(self, radius=500.0, base_eps=2.0, LIMIT_NINETY_FIVE = False, NINETY_FIVE_DISTANCE=0.95): r_gen = gamma(2., scale=radius/base_eps) theta_gen = uniform(scale=2*math.pi) r, theta = r_gen.rvs(), theta_gen.rvs() if LIMIT_NINETY_FIVE and r > NINETY_FIVE_DISTANCE: r_gen = gamma(2., scale=radius/base_eps, size=1000) r = r_gen.rvs() return (np.cos(theta) * r, np.sin(theta) * r)
def double_gamma_hrf(delay, tr, fptr=1.0, integrator=trapz): r"""The double gamma hemodynamic reponse function (HRF). The user specifies only the delay of the peak and undershoot. The delay shifts the peak and undershoot by a variable number of seconds. The other parameters are hardcoded. The HRF delay is modeled for each voxel independently. The form of the HRF and the hardcoded values are based on previous work [1]_. Parameters ---------- delay : float The delay of the HRF peak and undershoot. tr : float The length of the repetition time in seconds. fptr : float The number of stimulus frames per reptition time. For a 60 Hz projector and with a 1 s repetition time, the fptr would be equal to 60. It is possible that you will bin all the frames in a single TR, in which case fptr equals 1. integrator : callable The integration function for normalizing the units of the HRF so that the area under the curve is the same for differently delayed HRFs. Set integrator to None to turn off normalization. Returns ------- hrf : ndarray The hemodynamic response function to convolve with the stimulus timeseries. Reference ---------- .. [1] Glover, GH (1999) Deconvolution of impulse response in event related BOLD fMRI. NeuroImage 9, 416-429. """ from scipy.special import gamma # add delay to the peak and undershoot params (alpha 1 and 2) alpha_1 = 5/tr+delay/tr beta_1 = 1.0 c = 0.1 alpha_2 = 15/tr+delay/tr beta_2 = 1.0 t = np.arange(0,32,tr) hrf = ( ( ( t ** (alpha_1) * beta_1 ** alpha_1 * np.exp( -beta_1 * t )) /gamma( alpha_1 )) - c * ( ( t ** (alpha_2) * beta_2 ** alpha_2 * np.exp( -beta_2 * t )) /gamma( alpha_2 )) ) if integrator: # pragma: no cover hrf /= integrator(hrf) return hrf
def choose(self): if self.user_class == 'HF': self.name = "Log-norm" peak_hours_for_iat_hf = [1, 2, 3, 4, 5, 6] if self.hour in peak_hours_for_iat_hf: lognorm_shape, lognorm_scale, lognorm_location = 4.09174469261446, 1.12850165892419, 4.6875 else: lognorm_shape, lognorm_scale, lognorm_location = 3.93740014906562, 0.982210300411203, 3 return lognorm(lognorm_shape, loc=lognorm_location, scale=lognorm_scale) elif self.user_class == 'HO': self.name = "Gamma" peak_hours_for_iat_ho = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] if self.hour in peak_hours_for_iat_ho: gamma_shape, gamma_rate, gamma_location = 1.25170029089175, 0.00178381168026473, 0.5 else: gamma_shape, gamma_rate, gamma_location = 1.20448161464647, 0.00177591076721503, 0.5 return gamma(gamma_shape, loc=gamma_location, scale=1. / gamma_rate) elif self.user_class == 'MF': self.name = "Gamma" peak_hours_for_iat_mf = [1, 2, 3, 4, 5, 6, 7, 22, 23] if self.hour in peak_hours_for_iat_mf: gamma_shape, gamma_rate, gamma_location = 2.20816848575484, 0.00343216949000565, 1 else: gamma_shape, gamma_rate, gamma_location = 2.03011412986896, 0.00342699308280547, 1 return gamma(gamma_shape, loc=gamma_location, scale=1. / gamma_rate) elif self.user_class == 'MO': self.name = "Gamma" peak_hours_for_iat_mo = [1, 2, 3, 4, 5, 6] if self.hour in peak_hours_for_iat_mo: gamma_shape, gamma_rate, gamma_location = 1.29908195595742, 0.00163527376977441, 0.5 else: gamma_shape, gamma_rate, gamma_location = 1.19210494792398, 0.00170354443324898, 0.5 return gamma(gamma_shape, loc=gamma_location, scale=1. / gamma_rate) elif self.user_class == 'LF': peak_hours_for_iat_lf = [1, 2, 3, 4, 5, 6, 7] if self.hour in peak_hours_for_iat_lf: self.name = "Gamma" gamma_shape, gamma_rate, gamma_location = 1.79297773527656, 0.00191590321039876, 2 return gamma(gamma_shape, loc=gamma_location, scale=1. / gamma_rate) else: self.name = "Weibull" weibull_c_shape, weibull_scale, weibull_location = 1.1988117443903, 827.961760834184, 1 return weibull_min(weibull_c_shape, loc=weibull_location, scale=weibull_scale) elif self.user_class == 'LO': peak_hours_for_iat_lo = [2, 3, 4, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20] if self.hour in peak_hours_for_iat_lo: self.name = "Weibull" weibull_c_shape, weibull_scale, weibull_location = 0.850890858519732, 548.241539446292, 1 return weibull_min(weibull_c_shape, loc=weibull_location, scale=weibull_scale) else: self.name = "Gamma" gamma_shape, gamma_rate, gamma_location = 0.707816241615835, 0.00135537879658998, 1 return gamma(gamma_shape, loc=gamma_location, scale=1. / gamma_rate) else: raise Exception('The user class %s does not exist' % self.user_class)
def __init__(self, temporal_deriv=False, tr=2, oversampling=16, kernel_secs=32, pos_shape=6, pos_scale=1, neg_shape=16, neg_scale=1, ratio=1./6): """Create the HRF object with FSL parameters as default.""" self._rv_pos = gamma(pos_shape, scale=pos_scale) self._rv_neg = gamma(neg_shape, scale=neg_scale) self._tr = tr self._oversampling = oversampling dt = tr / oversampling self._timepoints = np.linspace(0, kernel_secs, kernel_secs / dt) self._temporal_deriv = temporal_deriv self._ratio = ratio
def update_wrapper(infoname, priorname, outname, outavgname): G = update(infoname, priorname) for e in G.edges(): print G[e[0]][e[1]]['params'] print e, stats.gamma(G[e[0]][e[1]]['params'][0], scale=G[e[0]][e[1]]['params'][1]).stats(moments='m') nx.write_edgelist(G,outname) A = G.copy() for e in A.edges(): p = stats.gamma(G[e[0]][e[1]]['params'][0], scale=G[e[0]][e[1]]['params'][1]).stats(moments='m') # if p == nan: A[e[0]][e[1]]['weight'] = 0 A[e[0]][e[1]]['weight'] = p nx.write_weighted_edgelist(A,outavgname,delimiter=',')
def __init__(self, temporal_deriv=False, tr=2, oversampling=16, kernel_secs=32, pos_shape=6, pos_scale=1, neg_shape=16, neg_scale=1, ratio=1 / 6): """Create the HRF object with FSL parameters as default.""" self._rv_pos = gamma(pos_shape, scale=pos_scale) self._rv_neg = gamma(neg_shape, scale=neg_scale) self._tr = tr self._oversampling = oversampling dt = tr / oversampling self._timepoints = np.arange(0, kernel_secs, dt, np.float) self._sampled_timepoints = np.arange(0, kernel_secs, tr, np.float) + (tr * .5) self._kernel_secs = kernel_secs self._temporal_deriv = temporal_deriv self._ratio = ratio
def test_sklearn_cv(): model = LightFM(loss='warp', random_state=42) # Set distributions for hyperparameters randint = stats.randint(low=1, high=65) randint.random_state = 42 gamma = stats.gamma(a=1.2, loc=0, scale=0.13) gamma.random_state = 42 distr = {'no_components': randint, 'learning_rate': gamma} # Custom score function def scorer(est, x, y=None): return precision_at_k(est, x).mean() # Custom CV which sets train_index = test_index class CV(KFold): def __iter__(self): ind = np.arange(self.n) for test_index in self._iter_test_masks(): train_index = np.logical_not(test_index) train_index = ind[train_index] yield train_index, train_index cv = CV(n=train.shape[0], random_state=42) search = RandomizedSearchCV(estimator=model, param_distributions=distr, n_iter=10, scoring=scorer, random_state=42, cv=cv) search.fit(train) assert search.best_params_['no_components'] == 52
def pux_integration(x, sat, icorr, s, s2, start, end, pointNr): # see example usage in posterior_covariance.py # s[i] = linalg.inv(invC + z_a[i]/s2*ata) # icorr[i] = linalg.inv(s2*ident + z_a[i]**2*acat) # sat[i] = s[i]*A.T h = size(x, 0) w = size(x, 1) x = x.reshape(h*w) zarr = zeros(size(x)) c = zeros(pointNr) m = zeros((pointNr, size(sat,1))) zsum = 0 nsum = 0 z_a = linspace(start, end, pointNr) g_dist = stats.gamma(2., loc = 0., scale = 2.) for i in range(0,pointNr): z = z_a[i] c[i] = g_dist.pdf(z)*multi_norm(x, zarr, icorr[i], inverted=True) zsum += z*c[i] nsum += c[i] # s[i] = linalg.inv(invC + z/s2*ata) m[i] = z/s2*dot(sat[i],x) print (sum(c)) c = c/sum(c); # normalization return [c, m, zsum/nsum]
def run_kstests(json_path, run_date, member): try: full_path = json_path + "/{0}/{1}/mesh_*.json".format(run_date, member) json_files = sorted(glob(full_path)) ks_results = {"id":[], "ks":[]} for json_file in json_files: js = open(json_file) mesh_track = json.load(js) js.close() id = mesh_track["properties"]["id"] for m, mesh_obj in enumerate(mesh_track["features"]): step_id = id + "_{0:03d}".format(m) ts = np.array(mesh_obj["properties"]["timesteps"]) mask = np.array(mesh_obj["properties"]["masks"]) vals = ts[mask == 1] gdist = gamma.fit(vals, floc=vals.min()-0.1) sig = kstest(vals, gamma(*gdist).cdf) ks_results["id"].append(step_id) ks_results["ks"].append(sig) if sig[1] < 0.01: print(step_id,) print(sig[1],gdist) print(np.sort(vals)) plt.figure(figsize=(8,8)) plt.pcolormesh(ts, alpha=0.5, cmap="YlOrRd", vmin=0, vmax=100) pc = plt.pcolormesh(np.ma.array(ts, mask=mask==0), cmap="YlOrRd", vmin=0, vmax=100) plt.title(step_id) plt.colorbar(pc) plt.savefig(step_id + ".png", bbox_inches="tight", dpi=150) plt.close() ks_frame = pd.DataFrame(ks_results["ks"], index=ks_results["id"],columns=["D", "p-val"]) print(ks_frame.shape[0]) except Exception as e: raise e return ks_frame
def create_hist(flat_fits, p, low, high, draw=True, bins=20, fit_gamma=True, fit_normal=True): latex,getter = params[p] vals = np.array([getter(f) for f in flat_fits]) vals = vals[(vals>low) & (vals<high)] pct_captured = int(100*len(vals)/len(flat_fits)) if draw: plt.figure() plt.hist(vals,bins,normed=True,color='b') xmin,xmax = plt.xlim() plt.xlabel('x',fontsize=cfg.fontsize) plt.ylabel('p(x)',fontsize=cfg.fontsize) ttl1 = 'Distribution of parameter {} (Centeral mass: {}% of values)'.format(latex,pct_captured) ttl2 = '(created with low={}, high={})'.format(low,high) ttl = '\n'.join([ttl1,ttl2]) if fit_gamma: alpha,loc,scale=stats.gamma.fit(vals) beta = 1/scale rv = stats.gamma(alpha,loc,scale) x = np.linspace(loc,xmax,100) prob = rv.pdf(x) plt.plot(x,prob,'g',linewidth=3) ttl_fit = r'Gamma fit: $\alpha$={:.3f}, $\beta$={:.3f}, $loc$={:.3f}'.format(alpha,beta,loc) ttl = '\n'.join([ttl, ttl_fit]) if fit_normal: loc,sigma=stats.norm.fit(vals) rv = stats.norm(loc,sigma) x = np.linspace(xmin,xmax,100) prob = rv.pdf(x) plt.plot(x,prob,'k',linewidth=3) ttl_fit = r'Normal fit: $loc$={:.3f}, $\sigma$={:.3f}'.format(loc,sigma) ttl = '\n'.join([ttl, ttl_fit]) plt.title(ttl) return vals
def _setcompleteness(self, periodgridspacing, radiusgridspacing, comp): self.cdpp_cols = [k for k in self.stlr.keys() if k.startswith("rrmscdpp")] self.cdpp_vals = np.array([k[-4:].replace("p", ".") for k in self.cdpp_cols], dtype=float) # Pre-compute and freeze the gamma function from Equation (5) in # Burke et al. self.pgam = gamma(4.65, loc=0., scale=0.98) self.mesthres_cols = [k for k in self.stlr.keys() if k.startswith("mesthres")] self.mesthres_vals = np.array([k[-4:].replace("p", ".") for k in self.mesthres_cols], dtype=float) period = np.linspace(self.planetperiod[0], self.planetperiod[1], periodgridspacing) rp = np.linspace(self.planetradius[0], self.planetradius[1], radiusgridspacing) self.period_grid, self.rp_grid = np.meshgrid(period, rp, indexing="ij") self.koi_periods = np.array(self.kois.koi_period) self.koi_rps = np.array(self.kois.koi_prad) self.vol = np.diff(self.period_grid, axis=0)[:, :-1] * np.diff(self.rp_grid, axis=1)[:-1, :] if comp is None: comp = np.zeros_like(self.period_grid) for _, star in self.stlr.iterrows(): comp += self.get_completeness(star, self.period_grid, self.rp_grid, 0.0, with_geom=True) self.comp = comp else: self.comp = comp
def _testMoments(self, dt): try: from scipy import stats # pylint: disable=g-import-not-at-top except ImportError as e: tf_logging.warn("Cannot test moments: %s" % e) return # The moments test is a z-value test. This is the largest z-value # we want to tolerate. Since the z-test approximates a unit normal # distribution, it should almost definitely never exceed 6. z_limit = 6.0 for stride in 0, 1, 4, 17: alphas = [0.2, 1.0, 3.0] if dt == dtypes.float64: alphas = [0.01] + alphas for alpha in alphas: for scale in 9, 17: # Gamma moments only defined for values less than the scale param. max_moment = min(6, scale // 2) sampler = self._Sampler( 20000, alpha, 1 / scale, dt, use_gpu=False, seed=12345) z_scores = util.test_moment_matching( sampler(), max_moment, stats.gamma(alpha, scale=scale), stride=stride, ) self.assertAllLess(z_scores, z_limit)
def test_generic(self): import OpenPNM.Geometry.models.pore_diameter as mods func = spst.gamma(a=2, loc=0.001, scale=0.0001) self.geo.models.add(propname="throat.diameter", model=mods.generic, func=func, seeds="throat.seed") assert sp.amin(self.geo["throat.diameter"]) > 0.001 del self.geo["throat.diameter"]
def setNewEvidence(self, y): a = np.sum(y) b = 1 try : b = len(y) except: b = 1 a_new = self.a + a b_new = self.b + b # get new PDF self.rescale() y_new = np.zeros(shape=(len(self.y),),dtype=np.float) ##### use normal approximation for large a and b, unfortunately we reach large a and b very quickly #if (a_new > 1000): # y_new = self.normalApprox(a_new, b_new) #else: self.rv = gamma(a_new, scale=1.0/b_new) y_new = self.rv.pdf(self.x) ## just incase something messes up #if (any(np.isnan(y_new))): # y_new = self.normalApprox(a_new, b_new) # measure dKL and dJS before update self.measureDKL(y_new) self.measureDJS(y_new) # update self.a = a_new self.b = b_new self.y = y_new
def __init__(self, alpha, beta): self.alpha = alpha self.beta = beta # set dist before calling super's __init__ self.dist = st.gamma(alpha, scale=beta) super(Gamma, self).__init__()
def _draw_gamma_rates(self): ''' Function to draw and assign rates from a discretized gamma distribution, if specified. By default, 4 categories are drawn. ''' if self.rate_probs is not None: print("\nThe provided value for the `rate_probs` argument will be ignored since gamma-distributed heterogeneity has been specified with the alpha parameter.") if type(self.k_gamma) is not int: raise TypeError("\nProvided argument `num_categories` must be an integer.") #### Note that this code is adapted from gamma.c in PAML #### rv = gamma(self.alpha, scale = 1./self.alpha) freqK = np.zeros(self.k_gamma) ### probs rK = np.zeros(self.k_gamma) ### rates for i in range(self.k_gamma-1): raw=rv.ppf( (i+1.)/self.k_gamma ) freqK[i] = gammainc(self.alpha + 1, raw*self.alpha) rK[0] = freqK[0] * self.k_gamma rK[self.k_gamma-1] = (1-freqK[self.k_gamma-2]) * self.k_gamma for i in range(1,self.k_gamma-1): rK[i] = self.k_gamma * (freqK[i] -freqK[i-1]) ############################################################# self.rate_probs = np.repeat(1./self.k_gamma, self.k_gamma) self.rate_factors = deepcopy(rK) if self.pinv > ZERO: self.rate_probs = list(self.rate_probs - self.pinv/self.k_gamma) + [self.pinv] self.rate_factors = list(self.rate_factors) + [0.0] self.rate_probs = np.array( self.rate_probs ) self.rate_factors = np.array( self.rate_factors )
def generate_slm_from_txt(training_rows, slm_dir, do_plot=True): slm_fxt = os.path.join(slm_dir, "slm.fxt") slength_counts = Counter() slen=1 maxl=0 #print training_rows for r in training_rows: r = r.strip() segs = r.split(BREAK) # chop the line up into segments for s in segs: slen = len(s.split()) if slen > maxl: print "new max length = ", slen maxl = slen print "from seg: ", s # print "from row: ", r if slen: slength_counts[slen]+=1 #_ = raw_input("hit key") els = list( slength_counts.elements() ) #Counter.elements() returns iterator that iterates across n instances of each element e where slength_counts[e]=n .. we make this into a list for plotting print els x_vals = range(0, max(els)+1) (shape, loc, scale) = gamma.fit(els, floc=0) gam_gen = gamma(shape, loc, scale) #use these model params to build a new gamma distrib/n generator write_slm(slm_fxt, x_vals, gam_gen) if do_plot: plot_graph(x_vals, gam_gen, els) compile_slm(slm_dir) #this last step compiles the slm to binary .fst format
def test_slicesample(): from scipy.stats import gamma import matplotlib.pyplot as plt n_iter = 1000 # Gamma distribution (bounded on left) print("Gamma test") g = gamma(2.0, loc=0., scale=2.0) smpls = np.zeros(n_iter) smpls[0] = g.rvs(1) for n in np.arange(1,n_iter): sn, _ = slicesample(smpls[n-1], g.logpdf, lb=1e-5) smpls[n] = sn print("Expected gamma mean: ", g.mean()) print("Inferred gamma mean: ", smpls.mean()) print("Expected gamma std: ", g.std()) print("Inferred gamma std: ", smpls.std()) fig, ax = plt.subplots(1, 1) x = np.linspace(1e-5, g.mean() + 4*g.std(), 1000) ax.plot(x, g.pdf(x), 'k-', lw=2, label='true pdf') ax.hist(smpls, 25, normed=True, alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
def gamma_dist(bin_values, K, M): """Gamma distribution function Parameters ---------- bin_values : array bin values for detecting photons eg : max photon counts is 8 bin_values = np.arange(8+2) K : int mean count of photons M : int number of coherent modes Returns ------- gamma_dist : array Gamma distribution Notes ----- These implementations are based on the references under the ``Notes`` section of the ``nbinom_dist()`` docstring .. math:: P(K) = \\frac{\Gamma(K + M)} {\Gamma(K + 1)\Gamma(M)} (\\frac {M} {M + <K>})^M (\\frac {<K>}{M + <K>})^K """ gamma_dist = (stats.gamma(M, 0., K/M)).pdf(bin_values) return gamma_dist
def gen_gauss_diag_lpost(num_datasets, dims, ev_params = [(80, 10), (40,10)], cov_var_const = 4, with_grad = False): def gen_lp_unnorm_ev(lev, distr_norm, with_grad = False): # print(distr_norm.mu, distr_norm.K) rval = lambda x:distr_norm.logpdf(x) + lev rval.log_evidence = lev if with_grad: rval.lpdf_and_grad = lambda x, pdf, grad: distr_norm.log_pdf_and_grad(x, pdf, grad) return rval rval = [] for ep in ev_params: lev_distr = stats.gamma(ep[0], scale=ep[1]) for i in range(int(num_datasets//len(ev_params))): while True: try: m = stats.multivariate_normal.rvs([0] * dims, np.eye(dims)*1000) K = np.eye(dims) val = gen_lp_unnorm_ev(-lev_distr.rvs(), mvnorm(m, K), with_grad = with_grad) val.mean = m val.cov = K rval.append(val) break except np.linalg.LinAlgError: import sys #The Matrix from the niw was not invertible. Try again. print("np.linalg.LinAlgError - trying again", file=sys.stderr) pass return rval
def gamma_distribution(sigma): """ Returns a normalized gamma distribution kernel for convolutions """ from scipy.stats import gamma k = sigma t = np.sqrt(sigma) dist = gamma(k, 0, t) x = np.arange(0.0, 10.0 * sigma, 1.0) x[0] = 1e-20 g = dist.pdf(x) shift = np.argmax(dist.pdf(x)) g = np.concatenate((np.zeros(int(10.0 * sigma) - 1), g)) g = g[shift:-1] g = np.append(g, np.zeros(shift)) # import matplotlib.pyplot as plt # plt.plot(g) # plt.show() # print x[np.argmax(dist.pdf(x))] # print dist.mean(), dist.median(), np.sqrt(dist.var()) return g / g.sum()
def Consumed_SKU(self): # Function that determines which SKU's are consumed. check = True while self.Time < self.Sim_Length: while self.Time < self.Cycle_Length * (self.Order_Amount + 1): arrival = gamma(self.Num_SKU, scale=self.Demand).rvs() self.Erlang_Nums.append(arrival) Location = randint(0, self.Num_SKU - 1) if self.Time + arrival <= self.Cycle_Length * ( self.Order_Amount + 1 ): # Analyzes whether order is needed based on time. self.Time += arrival self.Check_Lead_Time() else: break if self.Check_SKU(Location): check = self.Empty_SKU(Location) else: print "***** Unable to consume SKU *****\n" while check == False: # If unable to consume an SKU, will try another and output error. Location = randint(0, self.Num_SKU - 1) check = self.Empty_SKU(Location) self.Replenishment() self.Time += arrival if int(self.Time) == self.Warmup_Time: # When Warm-Up period is over, clear relevant information. self.Clear() self.Total_Cost()
def test2(graph, r=1.2, iterations=1000, bounds=(0, 20), steps=1000): N = len(graph.vertices()) fitness = [1, r] prior = stats.gamma(2, scale=0.5).pdf partition = Partition(bounds[0], bounds[1], steps) prior_points = partition.map(prior) table = likelihood_table(N, partition.points) #means = [] #modes = [] l = [] for i in range(iterations): occupation = [0]*N occupation[::2] = [1]*(len(occupation[::2])) #occupation[:N2//2] = [1]*(len(occupation[:N2//2])) pop = PopulationOnGraph(graph, occupation, fitness) tuples = list(pop) #conjugate_parameters = tuples_to_conjugate_parameters(N, tuples[:-1]) #posterior = construct_posterior(N, conjugate_parameters, prior_points, partition, table) #mean = posterior.mean()[0] #mode = posterior.mode()[0] #means.append(mean) #modes.append(mode) #print mean, mode l.append(len(tuples)) return numpy.mean(l), numpy.std(l)
def gen_mm_lpost(num_datasets,num_modes, dims, ev_params = [(80, 10), (40,10)], cov_var_const = 1.5, ): def gen_lp_unnorm_ev(lev, mixt): rval = lambda x:mixt.logpdf(x) + lev rval.log_evidence = lev return (rval, lev) rval = [] for ep in ev_params: lev_distr = stats.gamma(ep[0], scale=ep[1]) for i in range(int(num_datasets//len(ev_params))): mode_p = np.random.dirichlet([100] * num_modes) mode_d = [] m = stats.multivariate_normal.rvs([0] * dims, np.eye(dims)*10) while True: try: K = invwishart_rv(np.eye(dims) * cov_var_const , dims) print(K) mode_mean_dist = stats.multivariate_normal(m, K) break except: pass while len(mode_d) != num_modes: try: mode_d.append(mvnorm(mode_mean_dist.rvs(), invwishart_rv(K, dims))) except: #The Matrix from the niw was not invertible. Try again. pass mixt = GMM(num_modes, dims) mixt.comp_lprior = np.log(mode_p) mixt.comp_dist = mode_d rval.append(gen_lp_unnorm_ev(-lev_distr.rvs(), mixt)) return rval
import sys import time import random import datetime from scipy.stats import gamma sys.path.append("..") from db_builder import db_block_time_stamp BLOCK_TIME = db_block_time_stamp.init() SIZE_DB = [0, 32929106, 33036408, 32872161, 32784690] MIX_SIZE = 0 SIMU_TIMES = 250 ALPHA = 13.19 BETA = 0.86 D = gamma(ALPHA, scale=1 / BETA) def main(): try: th = int(sys.argv[1]) bin_num = int(sys.argv[2]) bin_size = int(sys.argv[3]) except: th = int(input("DB no.(1-4): ")) bin_num = int(input("bin num: ")) bin_size = int(input("bin size: ")) info = simulate(th, bin_num, bin_size, SIZE_DB[th], SIMU_TIMES) write_log( th, "../../result/lab4-th{}-binnum{}-binsize{}.txt".format( th, bin_num, bin_size), info)
('scl', StandardScaler()), ( 'lin', Ridge( solver='sparse_cg', tol=0.001, # optimizer termination criteria # alpha = 1.0, # L2 regulization alpha=C^{-1} fit_intercept=True, normalize=False, # done in the pipeline copy_X=True, max_iter=1000, # for CG solver )) ]) hyper = { 'lin__alpha': ss.gamma(a=1.5, loc=1e-5, scale=.7), # alpha ~ [0.001, 10] } meta = { 'id': "simi8", 'name': 'LinReg Ridge', 'descriptions': ("Ridge Regression (L2 penalty), Conjugate Gradient solver, " "standard-normal transformed features."), 'solver': 'Conjugate Gradient', 'active': True, 'keywords': [
def estimate_tweedie_loglike_series(x, mu, phi, p): """Estimate the loglikihood of a given set of x, mu, phi, and p Parameters ---------- x : array The observed values. Must be non-negative. mu : array The fitted values. Must be positive. phi : array The scale paramter. Must be positive. p : array The Tweedie variance power. Must equal 0 or must be greater than or equal to 1. Returns ------- estiate_tweedie_loglike_series : float """ x = np.array(x, ndmin=1) mu = np.array(mu, ndmin=1) phi = np.array(phi, ndmin=1) p = np.array(p, ndmin=1) ll = np.ones_like(x) * -np.inf # Gaussian (Normal) gaussian_mask = p == 0. if np.sum(gaussian_mask) > 0: ll[gaussian_mask] = norm(loc=mu[gaussian_mask], scale=np.sqrt(phi[gaussian_mask])).logpdf( x[gaussian_mask]) # Poisson poisson_mask = p == 1. if np.sum(poisson_mask) > 0: poisson_pdf = poisson(mu=mu[poisson_mask] / phi[poisson_mask]).pmf( x[poisson_mask] / phi[poisson_mask]) / phi[poisson_mask] ll[poisson_mask] = np.log(poisson_pdf) # 1 < p < 2 ll_1to_2_mask = (1 < p) & (p < 2) if np.sum(ll_1to_2_mask) > 0: # Calculating logliklihood at x == 0 is pretty straightforward zeros = x == 0 mask = zeros & ll_1to_2_mask ll[mask] = -(mu[mask]**(2 - p[mask]) / (phi[mask] * (2 - p[mask]))) mask = ~zeros & ll_1to_2_mask ll[mask] = ll_1to2(x[mask], mu[mask], phi[mask], p[mask]) # Gamma gamma_mask = p == 2 if np.sum(gamma_mask) > 0: ll[gamma_mask] = gamma(a=1 / phi, scale=phi * mu).logpdf(x[gamma_mask]) # (2 < p < 3) or (p > 3) ll_2plus_mask = ((2 < p) & (p < 3)) | (p > 3) if np.sum(ll_2plus_mask) > 0: zeros = x == 0 mask = zeros & ll_2plus_mask ll[mask] = -np.inf mask = ~zeros & ll_2plus_mask ll[mask] = ll_2orMore(x[mask], mu[mask], phi[mask], p[mask]) # Inverse Gaussian (Normal) invgauss_mask = p == 3 if np.sum(invgauss_mask) > 0: cond1 = invgauss_mask cond2 = x > 0 mask = cond1 & cond2 ll[mask] = invgauss(mu=mu[mask] * phi[mask], scale=1. / phi[mask]).logpdf(x[mask]) return ll
def run_platformqc(data_path, output_path, *, suffix=None, b_width=1000): if not suffix: suffix = "" else: suffix = "_" + suffix log_path = os.path.join(output_path, "log", "log_sequel_platformqc" + suffix + ".txt") fig_path = os.path.join(output_path, "fig", "fig_sequel_platformqc_length" + suffix + ".png") fig_path_bar = os.path.join( output_path, "fig", "fig_sequel_platformqc_adapter" + suffix + ".png") json_path = os.path.join(output_path, "QC_vals_sequel" + suffix + ".json") # json tobe_json = {} # output_path will be made too. if not os.path.isdir(os.path.join(output_path, "log")): os.makedirs(os.path.join(output_path, "log"), exist_ok=True) if not os.path.isdir(os.path.join(output_path, "fig")): os.makedirs(os.path.join(output_path, "fig"), exist_ok=True) ### logging conf ### logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) fh = logging.FileHandler(log_path, 'w') sh = logging.StreamHandler() formatter = logging.Formatter( '%(module)s:%(asctime)s:%(lineno)d:%(levelname)s:%(message)s') fh.setFormatter(formatter) sh.setFormatter(formatter) logger.addHandler(sh) logger.addHandler(fh) ##################### logger.info("Started sequel platform QC for %s" % data_path) # sequel xml_file = get_sts_xml_path(data_path, logger) if not xml_file: logger.warning("sts.xml is missing. Productivity won't be shown") [p0, p1, p2] = [None] * 3 else: [p0, p1, p2] = parse_sts_xml( xml_file, ns="http://pacificbiosciences.com/PacBioBaseDataModel.xsd") logger.info("Parsed sts.xml") [subr_bam_p, scrap_bam_p] = get_bam_path(data_path, logger) if subr_bam_p and scrap_bam_p: scrap_bam = pysam.AlignmentFile(scrap_bam_p, 'rb', check_sq=False) subr_bam = pysam.AlignmentFile(subr_bam_p, 'rb', check_sq=False) else: logger.ERROR("Platform QC failed due to missing bam files") return 1 bam_reads = {} snr = [[], [], [], []] hr_fraction = [] tot_lengths = [] hr_lengths = [] ad_num_stat = {} control_throughput = 0 if get_readtype(scrap_bam.header) == 'SCRAP': logger.info("Started to load scraps.bam...") control_throughput = set_scrap(bam_reads, scrap_bam, snr) else: logger.ERROR("the given scrap file has incorrect header.") logger.info("Scrap reads were loaded.") if get_readtype(subr_bam.header) == 'SUBREAD': logger.info("Started to load subreads.bam...") set_subreads(bam_reads, subr_bam, snr) else: logger.ERROR("the given subread file has incorrect header.") logger.info("Subreads were loaded.") for k, v in bam_reads.items(): #print(k) l = construct_polread(v) #print(l) if l[4]: hr_fraction.append(l[2] / l[3]) tot_lengths.append(l[3]) hr_lengths.append(l[2]) if l[5] in ad_num_stat: ad_num_stat[l[5]] += 1 else: ad_num_stat[l[5]] = 1 max_adnum = max(ad_num_stat.keys()) min_adnum = min(ad_num_stat.keys()) left = [] height = [] for i in range(min_adnum, max_adnum + 1): left.append(i) if i in ad_num_stat: height.append(ad_num_stat[i]) else: height.append(0) plt.bar(left, height) plt.savefig(fig_path_bar, bbox_inches="tight") plt.close() logger.info("Plotted bar plot for adpter occurence") (a, b) = lq_gamma.estimate_gamma_dist_scipy(hr_lengths) logger.info("Fitting by Gamma dist finished.") _max = np.array(hr_lengths).max() _mean = np.array(hr_lengths).mean() _n50 = get_N50(hr_lengths) _n90 = get_NXX(hr_lengths, 90) throughput = np.sum(hr_lengths) longest = np.max(hr_lengths) fracs = np.mean(hr_fraction) tobe_json["Productivity"] = {"P0": p0, "P1": p1, "P2": p2} tobe_json["Throughput"] = int(throughput) tobe_json["Throughput(Control)"] = int(control_throughput) tobe_json["Longest_read"] = int(_max) tobe_json["Num_of_reads"] = len(hr_lengths) tobe_json["polread_gamma_params"] = [float(a), float(b)] tobe_json["Mean_polread_length"] = float(_mean) tobe_json["N50_polread_length"] = float(_n50) tobe_json["Mean_HQ_fraction"] = float(np.mean(fracs)) tobe_json["Adapter_observation"] = ad_num_stat with open(json_path, "w") as f: logger.info("Quality measurements were written into a JSON file: %s" % json_path) json.dump(tobe_json, f, indent=4) x = np.linspace(0, gamma.ppf(0.99, a, 0, b)) est_dist = gamma(a, 0, b) plt.plot(x, est_dist.pdf(x), c=rgb(214, 39, 40)) plt.grid(True) plt.hist(hr_lengths, histtype='step', bins=np.arange(min(hr_lengths), _max + b_width, b_width), color=rgb(214, 39, 40), alpha=0.7, normed=True) plt.xlabel('Read length') plt.ylabel('Probability density') if _mean >= 10000: # pol read mean is expected >= 10k and <= 15k, but omit the <= 15k condition. plt.axvline(x=_mean, linestyle='dashed', linewidth=2, color=rgb(44, 160, 44), alpha=0.8) else: plt.axvline(x=_mean, linestyle='dashed', linewidth=2, color=rgb(188, 189, 34), alpha=0.8) if _n50 >= 20000: plt.axvline(x=_n50, linewidth=2, color=rgb(44, 160, 44), alpha=0.8) else: plt.axvline(x=_n50, linewidth=2, color=rgb(188, 189, 34), alpha=0.8) plt.hist(tot_lengths, histtype='step', bins=np.arange(min(tot_lengths), max(tot_lengths) + b_width, b_width), color=rgb(31, 119, 180), alpha=0.7, normed=True) ymin, ymax = plt.gca().get_ylim() xmin, xmax = plt.gca().get_xlim() plt.text(xmax * 0.6, ymax * 0.72, r'$\alpha=%.3f,\ \beta=%.3f$' % (a, b)) plt.text(xmax * 0.6, ymax * 0.77, r'Gamma dist params:') plt.text(xmax * 0.6, ymax * 0.85, r'sample mean: %.3f' % (_mean, )) plt.text(xmax * 0.6, ymax * 0.9, r'N50: %.3f' % (_n50, )) plt.text(xmax * 0.6, ymax * 0.95, r'N90: %.3f' % (_n90, )) plt.text(_mean, ymax * 0.85, r'Mean') plt.text(_n50, ymax * 0.9, r'N50') plt.savefig(fig_path, bbox_inches="tight") plt.close() #plt.show() logger.info("Figs were generated.") logger.info("Finished all processes.")
shape=(img_size, img_size)) y[i, rr, cc] = 1 return y #%% # names (this is just for reference for the moment!) columns = ["x", "y", "radius", "dx", "dy"] # prior sampling function for each variable # (assumes x and y are coordinates in the range 0-img_size) prior_fn = independent_sample([ norm(loc=img_size / 2, scale=img_size / 2).rvs, norm(loc=img_size / 2, scale=img_size / 2).rvs, gamma(a=1, loc=0, scale=10).rvs, norm(loc=0, scale=0.5).rvs, norm(loc=0, scale=0.5).rvs, ]) # very simple linear dynamics: x += dx def velocity(x): dt = 1.0 print(x) xp = (x @ np.array([ [1, 0, 0, dt, 0], [0, 1, 0, 0, dt], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1],
def get_regularizer(regularizer): if isinstance(regularizer, float): reg = gamma(a=regularizer, scale=1) # Initial weight prior regularizer = Parameter(reg, Positive()) return regularizer
def get_var(var): if isinstance(var, float): var = gamma(a=var, scale=1) # Initial target noise var = Parameter(var, Positive()) return var
"p": list(distribution.p), } elif isinstance(distribution, stats.distributions.rv_frozen): name = distribution_name_mapping[distribution.dist.name] encoded_parameters = distribution_parameter_encoders[name]( *distribution_parameters(distribution) ) else: raise ValueError(f"Do not have a codec for {distribution}") return dict(type="distribution", distribution=name, **encoded_parameters) # Functions to decode serialised distributions. distribution_decoders = { "categorical": [lambda data: Categorical(data["bins"], data["weights"])], "gamma": [lambda data: stats.gamma(data["k"], scale=data["theta"]), lambda data: stats.gamma(data["k"], scale=data["θ"]), lambda data: stats.gamma(data["shape"], scale=data["scale"]), lambda data: stats.gamma(data["alpha"], scale=1 / data["beta"]), lambda data: stats.gamma(data["α"], scale=1 / data["β"]), lambda data: stats.gamma(data["shape"], scale=1 / data["rate"])], "normal": [lambda data: stats.norm(data["mu"], data["sigma"]), lambda data: stats.norm(data["μ"], data["σ"]), lambda data: stats.norm(data["μ"], sqrt(data["σ²"])), lambda data: stats.norm(data["mu"], 1 / sqrt(data["tau"])), lambda data: stats.norm(data["μ"], 1 / sqrt(data["τ"]))], "uniform": [lambda data: stats.uniform(data["a"], data["b"] - data["a"])], "poisson": [lambda data: stats.poisson(data["lambda"]), lambda data: stats.poisson(data["λ"])], "exponential": [lambda data: stats.expon(scale=1 / data["lambda"]), lambda data: stats.expon(scale=1 / data["λ"]),
def test(expected, found, message, tolerance=0.001): diff = abs(expected - found) if diff >= tolerance: exit('%s, but found %s != %s by %s.' % (message, expected, found, diff)) px1 = poisson(3).pmf(1) test(0.149, px1, '1. Let X ∼ Pois(3). Find P(X = 1). Answer is (0.149)') pxlt1 = poisson(3).cdf(1) test(0.199, pxlt1, '2. Let X ∼ Pois(3). Find P(X ≤ 1). (0.199)') pxgt1 = 1 - poisson(3).cdf(1) test(0.801, pxgt1, '3. Let X ∼ Pois(3). Find P(X > 1). (0.801)') grv = gamma(2, scale=1 / (1 / 3)) py_low = grv.cdf(0.5) py_high = grv.cdf(1.5) py_range = py_high - py_low test(0.078, py_range, '4. Let Y ∼ Gamma(2, 1/3). Find P(0.5 < Y < 1.5). (0.078)') pltz = norm(0, 1).ppf(0.975) test(1.96, pltz, '5. Let Z ∼ N(0, 1). Find z such that P(Z < z) = 0.975. (1.96)') rvn = norm(loc=0, scale=1) zrange = rvn.cdf(1.96) - rvn.cdf(-1.96) test(0.95, zrange, '6. Let Z ∼ N(0, 1). Find P(−1.96 < Z < 1.96). (0.95)')
def test_write_distribution(standard_api): with standard_api as api: api.write_distribution("output-parameter", "example-distribution", stats.gamma(1, scale=2))
pp_plot(logeados, stats.genpareto(c = parametros_pareto[0], loc = parametros_pareto[1], scale=parametros_pareto[2]), line = True,ax=ax2) ax2.set_title('Pareto generalizada', fontsize=11) pp_plot(logeados, stats.dweibull(c = parametros_weibull[0], loc = parametros_weibull[1], scale=parametros_weibull[2]), line = True,ax=ax3) ax3.set_title('Weibull doble', fontsize=11) pp_plot(logeados, stats.gamma(a = parametros_gamma[0], loc = parametros_gamma[1], scale=parametros_gamma[2]), line = True,ax=ax4) ax4.set_title('Gamma', fontsize=11) fig.tight_layout(pad=0.7) fig.text(0.5, 0, 'Probabilidades teóricas', ha='center', va='center') fig.text(0., 0.5, 'Probabilidades observadas', ha='center', va='center', rotation='vertical') fig.suptitle('Gráfico de probabilidades observadas vs teóricas') fig.subplots_adjust(top=0.86) plt.show() #%%
xlabel(r'Rate (s$^{-1}$)') ylabel('PDF (s)') def test_norm1(): """ Test that the posterior is normalized. """ assert_approx_equal(np.trapz(pri1.post_pdf, dx=pri1.dr), 1., 2) # match 1 to 2 digits #------------------------------------------------------------------------------- # 2nd case: exp'l prior with scale (prior mean) 10., (n,T) = (16, 2) # Prior: scale = 10. gamma1 = stats.gamma(1, scale=scale) # a=1 is exp'l dist'n pri2 = PoissonRateInference(T, n, gamma1.pdf, r_u) pri2.plot(ls='g--') #------------------------------------------------------------------------------- # 3rd case: flat prior with (n,T) = (80, 10) n, T = 80, 10. # data pri3 = PoissonRateInference(T, n, flat_pdf, r_u) pri3.plot(alpha=.5) #-------------------------------------------------------------------------------
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title # Figures path figpath = 'figures/' if not os.path.exists(figpath): os.makedirs(figpath) inc_pars = 0.65, 1.57 symp_pars = 0.79, 1.23 no_symp_pars = 0.79, 1.23 crit_pars = 12.5, 0.8 I_inc = stats.lognorm(s=inc_pars[0], scale=np.exp(inc_pars[1])) I_symp = stats.lognorm(s=symp_pars[0], scale=np.exp(symp_pars[1])) I_no_symp = stats.lognorm(s=no_symp_pars[0], scale=np.exp(no_symp_pars[1])) I_crit = stats.gamma(*crit_pars) distribs = [I_inc, I_symp, I_no_symp, I_crit] names = [ r'Incubation time$\sim Lognormal({0}, {1}^2)$'.format(*reversed(inc_pars)), r'(A)symptomatic time$\sim Lognormal({0}, {1}^2)$'.format( *reversed(symp_pars)), r'Asymptomatic time$\sim Lognormal({0}, {1}^2)$'.format( *reversed(no_symp_pars)), r'Critical time$\sim Gamma({0}, {1})$'.format(*crit_pars), ] savenames = [ 'incubation.pdf', 'symptomatic.pdf', 'asymptomatic.pdf', 'critical.pdf' ] for i, dist in enumerate(distribs):
wordCounts = list(word2count.values()) np.array(wordCounts).shape # .shape is a function in numpy and pandas len(wordCounts) # len() is a system function h1 = ggplot(pd.DataFrame(wordCounts, columns = ['wC']), aes(x = 'wC')) +\ geom_histogram() h2 = ggplot(pd.DataFrame(np.log(wordCounts), columns = ['wC']), aes(x = 'wC')) +\ geom_histogram(binwidth = .5) print(h2) # a: shape para, loc: location para, scale: scale para gammaA, gammaLoc, gammaScale = ss.gamma.fit(np.log(wordCounts)) myHist = plt.hist(np.log(wordCounts), 20, density=True) rv = ss.gamma(gammaA, loc=gammaLoc, scale=gammaScale) x = np.linspace(0.1, 12, 35) plt.plot(x, rv.pdf(x), lw=2) plt.show() # In[7]: # define the threshold above, remove Qs and As with word count less than thQ and thA thQs = 5 QsWords2Integer = {} wordInt = 0 for word, count in word2count.items(): if count >= thQs: QsWords2Integer[word] = wordInt wordInt += 1
lambda con1, con0: osp.beta(con1, con0), dist.BinomialProbs: lambda probs, total_count: osp.binom(n=total_count, p=probs), dist.BinomialLogits: lambda logits, total_count: osp.binom(n=total_count, p=_to_probs_bernoulli(logits)), dist.Cauchy: lambda loc, scale: osp.cauchy(loc=loc, scale=scale), dist.Chi2: lambda df: osp.chi2(df), dist.Dirichlet: lambda conc: osp.dirichlet(conc), dist.Exponential: lambda rate: osp.expon(scale=np.reciprocal(rate)), dist.Gamma: lambda conc, rate: osp.gamma(conc, scale=1. / rate), dist.HalfCauchy: lambda scale: osp.halfcauchy(scale=scale), dist.HalfNormal: lambda scale: osp.halfnorm(scale=scale), dist.LogNormal: lambda loc, scale: osp.lognorm(s=scale, scale=np.exp(loc)), dist.MultinomialProbs: lambda probs, total_count: osp.multinomial(n=total_count, p=probs), dist.MultinomialLogits: lambda logits, total_count: osp.multinomial(n=total_count, p=_to_probs_multinom(logits)), dist.Normal: lambda loc, scale: osp.norm(loc=loc, scale=scale), dist.Pareto: lambda alpha, scale: osp.pareto(alpha, scale=scale),
def _kstest(self, alpha, beta, samples): # Uses the Kolmogorov-Smirnov test for goodness of fit. ks, _ = stats.kstest(samples, stats.gamma(alpha, scale=1 / beta).cdf) # Return True when the test passes. return ks < 0.02
# for each contact, which circuit it belongs to as an int ID circuit_idx = df.circuit_idx area = df.synaptic_area edge_idx_to_circuit_idx = dict(zip(edge_idx, circuit_idx)) # for each edge, which circuit it belongs to as an int ID edge_circuit_idx = np.array( [c for e, c in sorted(edge_idx_to_circuit_idx.items())], dtype=int ) # estimate gamma of pooled shape, loc, scale = stats.gamma.fit(area, floc=0) estimate = stats.gamma(shape, loc, scale) # ax: Axes # fig, ax = plt.subplots(1, 1) # ax.hist(area, density=True) # x = np.linspace(1, area.max(), 100) # ax.plot(x, estimate.pdf(x)) # plt.show(block=False) # partial pooling # shape2 rate2 rate1 rate0 # \ / / / # \ / / / # shape1_x / / <- distribution of graph edges # \ / /
def check_distribution(kin, temp, ndof, kb=8.314e-3, verbosity=2, screen=False, filename=None, ene_unit=None, temp_unit=None): r""" Checks if a kinetic energy trajectory is Maxwell-Boltzmann distributed. .. warning: This is a low-level function. Additionally to being less user-friendly, there is a higher probability of erroneous and / or badly documented behavior due to unexpected inputs. Consider using the high-level version based on the SimulationData object. See physical_validation.kinetic_energy.check_mb_ensemble for more information and full documentation. Parameters ---------- kin : array-like Kinetic energy snapshots of the system. temp : float Target temperature of the system. Used to construct the Maxwell-Boltzmann distribution. ndof : float Number of degrees of freedom in the system. Used to construct the Maxwell-Boltzmann distribution. kb : float Boltzmann constant :math:`k_B`. Default: 8.314e-3 (kJ/mol). verbosity : int 0: Silent. 1: Print minimal information. 2: Print result details. 3: Print additional information. Default: 2. screen : bool Plot distributions on screen. Default: False. filename : string Plot distributions to `filename`.pdf. Default: None. ene_unit : string Energy unit - used for output only. temp_unit : string Temperature unit - used for output only. Returns ------- result : float The p value of the test. See Also -------- physical_validation.kinetic_energy.distribution : High-level version """ # Discard burn-in period and time-correlated frames kin = trajectory.prepare(kin, verbosity=verbosity, name='Kinetic energy') kt = kb * temp if ndof <= 0: warnings.warn('Zero degrees of freedom!') p = np.float('NaN') else: d, p = stats.kstest(kin, 'gamma', (ndof/2, 0, kt)) # ====================== # # Plot to screen or file # # ====================== # do_plot = screen or filename is not None if do_plot: ana_dist = stats.gamma(ndof/2, scale=kt) ana_kin = np.linspace(ana_dist.ppf(0.0001), ana_dist.ppf(0.9999), 200) ana_hist = ana_dist.pdf(ana_kin) tunit = '' if temp_unit is not None: tunit = temp_unit data = [{'y': kin, 'hist': int(len(kin)/150), 'args': dict(label='Trajectory', density=True, alpha=0.5)}] if ndof > 0: data.append( {'x': ana_kin, 'y': ana_hist, 'args': dict(label='Analytical T=' + str(temp) + tunit, lw=5)}) unit = '' if ene_unit is not None: unit = ' [' + ene_unit + ']' plot.plot(data, legend='lower left', title='Kinetic energy distribution', xlabel='Kinetic energy' + unit, ylabel='Probability [%]', sci_x=True, percent=True, filename=filename, screen=screen) if verbosity > 0: if verbosity > 1: message = ('Kinetic energy distribution check (strict)\n' 'Kolmogorov-Smirnov test result: p = {:g}\n' 'Null hypothesis: Kinetic energy is Maxwell-Boltzmann distributed'.format(p)) else: message = 'p = {:g}'.format(p) print(message) return p
# generate Dirichlet samples & plot them for alpha in alphas: samples = st.dirichlet(alpha).rvs(N) ax = plt.gca(projection='3d') plt.title(r'$\alpha$ = {}'.format(alpha)) ax.scatter(samples[:, 0], samples[:, 1], samples[:, 2]) ax.view_init(azim=40) ax.set_xlabel(r'$p_1$') ax.set_ylabel(r'$p_2$') ax.set_zlabel(r'$p_3$') plt.show() # use standardized Gamma distribution to generate Dirichlet a = 3 # choose the parameter set of alpha gamma1 = st.gamma(alphas[a][0]).rvs(size=(N, 1)) gamma2 = st.gamma(alphas[a][1]).rvs(size=(N, 1)) gamma3 = st.gamma(alphas[a][2]).rvs(size=(N, 1)) Diri = np.concatenate((gamma1, gamma2, gamma3), axis=1) for i in range(N): # each component as normalized Gamma realization norm = sum(Diri[i, :]) Diri[i, :] /= norm ax = plt.gca(projection='3d') plt.title(r'$\alpha$ = {}, gen from indep Gamma'.format(alphas[a])) ax.scatter(Diri[:, 0], Diri[:, 1], Diri[:, 2]) ax.view_init(azim=40) ax.set_xlabel(r'$p_1$') ax.set_ylabel(r'$p_2$')
alphac = n / 2 + xi0 def betac(mu): return xi0 + 0.5 * np.sum((arrY - mu)**2) # start simulation muk, tauk = x0 flag = 0 while len(datalst) < size: rvmuc = stats.norm(muc(tauk), sigmac(tauk)) muk = rvmuc.rvs() rvtauc = stats.gamma(alphac, scale=1 / betac(muk)) tauk = rvtauc.rvs() flag += 1 if flag == initnum: print(f'flag: {flag}, we get the first data') datalst.append([muk, tauk]) rvynew = stats.norm(muk, np.sqrt(1 / tauk)) ynewlst.append(rvynew.rvs()) if flag > initnum and flag % step == 0: print(f'flag: {flag}, we get the {(flag-initnum)//step+1}th data') datalst.append([muk, tauk]) rvynew = stats.norm(muk, np.sqrt(1 / tauk)) ynewlst.append(rvynew.rvs()) with open(f'./ass1/savedoc/p3{name1}.pkl', 'wb') as f: pickle.dump(datalst, f)
def check_mean_std(kin, temp, ndof, kb, verbosity=2, bs_repetitions=200, screen=False, filename=None, ene_unit=None, temp_unit=None): r""" Calculates the mean and standard deviation of a trajectory (+ bootstrap error estimates), and compares them to the theoretically expected values. .. warning: This is a low-level function. Additionally to being less user-friendly, there is a higher probability of erroneous and / or badly documented behavior due to unexpected inputs. Consider using the high-level version based on the SimulationData object. See physical_validation.kinetic_energy.check_mb_ensemble for more information and full documentation. Parameters ---------- kin : array-like Kinetic energy snapshots of the system. temp : float Target temperature of the system. Used to construct the Maxwell-Boltzmann distribution. ndof : float Number of degrees of freedom in the system. Used to construct the Maxwell-Boltzmann distribution. kb : float Boltzmann constant :math:`k_B`. verbosity : int 0: Silent. 1: Print minimal information. 2: Print result details. 3: Print additional information. Default: 2. bs_repetitions : int Number of bootstrap samples used for error estimate. Default: 200. screen : bool Plot distributions on screen. Default: False. filename : string Plot distributions to `filename`.pdf. Default: None. ene_unit : string Energy unit - used for output only. temp_unit : string Temperature unit - used for output only. Returns ------- result : Tuple[float] Distance of the estimated T(mu) and T(sigma) from the expected temperature, measured in standard deviations of the estimates. See Also -------- physical_validation.kinetic_energy.distribution : High-level version """ # Discard burn-in period and time-correlated frames kin = trajectory.prepare(kin, verbosity=verbosity, name='Kinetic energy') if ndof <= 0: warnings.warn('Zero degrees of freedom!') # ========================== # # Compute mu and sig of data # # ========================== # kt = temp * kb loc = 0 ana_shape = ndof / 2 ana_scale = kt ana_dist = stats.gamma(ana_shape, loc=loc, scale=ana_scale) if ndof > 0: temp_mu = 2 * np.mean(kin) / (ndof * kb) temp_sig = np.sqrt(2 / ndof) * np.std(kin) / kb else: temp_mu = 0 temp_sig = 0 # ======================== # # Bootstrap error estimate # # ======================== # mu = [] sig = [] for k in trajectory.bootstrap(kin, bs_repetitions): mu.append(np.mean(k)) sig.append(np.std(k)) std_mu = np.std(mu) std_sig = np.std(sig) if ndof > 0: std_temp_mu = 2 * std_mu / (ndof * kb) std_temp_sig = np.sqrt(2 / ndof) * std_sig / kb else: std_temp_mu = 0 std_temp_sig = 0 # ====================== # # Plot to screen or file # # ====================== # do_plot = screen or filename is not None if do_plot: ana_kin = np.linspace(ana_dist.ppf(0.0001), ana_dist.ppf(0.9999), 200) ana_hist = ana_dist.pdf(ana_kin) tunit = '' if temp_unit is not None: tunit = temp_unit data = [{'y': kin, 'hist': int(len(kin)/150), 'args': dict(label='Trajectory', density=True, alpha=0.5)}] if ndof > 0: data.append( {'x': ana_kin, 'y': ana_hist, 'args': dict(label='Analytical T=' + str(temp) + tunit, lw=5)}) unit = '' if ene_unit is not None: unit = ' [' + ene_unit + ']' plot.plot(data, legend='best', title='Kinetic energy distribution', xlabel='Kinetic energy' + unit, ylabel='Probability [%]', sci_x=True, percent=True, filename=filename, screen=screen) # ================ # # Output to screen # # ================ # if verbosity > 0: eunit = '' if ene_unit is not None: eunit = ' ' + ene_unit tunit = '' if temp_unit is not None: tunit = ' ' + temp_unit if verbosity > 1: message = ('Kinetic energy distribution check (non-strict)\n' 'Analytical distribution (T={2:.2f}{0:s}):\n' ' * mu: {3:.2f}{1:s}\n' ' * sigma: {4:.2f}{1:s}\n' 'Trajectory:\n' ' * mu: {5:.2f} +- {7:.2f}{1:s}\n' ' T(mu) = {9:.2f} +- {11:.2f}{0:s}\n' ' * sigma: {6:.2f} +- {8:.2f}{1:s}\n' ' T(sigma) = {10:.2f} +- {12:.2f}{0:s}'.format( tunit, eunit, temp, ana_dist.mean(), ana_dist.std(), np.mean(kin), np.std(kin), std_mu, std_sig, temp_mu, temp_sig, std_temp_mu, std_temp_sig)) else: message = ('T(mu) = {1:.2f} +- {3:.2f}{0:s}\n' 'T(sigma) = {2:.2f} +- {4:.2f}{0:s}'.format( tunit, temp_mu, temp_sig, std_temp_mu, std_temp_sig)) print(message) # ============= # # Return values # # ============= # nan = np.float('NaN') if ndof > 0: r1 = np.abs(temp - temp_mu) / std_temp_mu r2 = np.abs(temp - temp_sig) / std_temp_sig else: r1 = nan r2 = nan return r1, r2
def plot_priors(self): ''' ''' E0_mean, E0_std, alpha_emax, beta_emax, alpha_H, beta_H, log10_ec50_mean, log10_ec50_std, alpha_obs, beta_obs = self.get_priors( ) f, axes = plt.subplots(2, 3, figsize=(12, 7)) # E0 xx = np.linspace(0, 2, 50) rv = norm(E0_mean, E0_std) yy = rv.pdf(xx) axes.flat[0].set_title('E0 parameter') axes.flat[0].set_xlabel('E0') axes.flat[0].set_ylabel('probability') axes.flat[0].plot(xx, yy, 'r-') # EMAX xx = np.linspace(0, 2, 50) rv = gamma(alpha_emax, scale=1 / beta_emax, loc=0) yy = rv.pdf(xx) axes.flat[1].set_title('Emax parameter') axes.flat[1].set_xlabel('Emax') axes.flat[1].set_ylabel('probability') axes.flat[1].plot(xx, yy, 'r-') # H xx = np.linspace(0, 5, 100) rv = gamma(alpha_H, scale=1 / beta_H, loc=0) yy = rv.pdf(xx) axes.flat[2].set_title('Hill Coefficient (H) parameter') axes.flat[2].set_xlabel('H') axes.flat[2].set_ylabel('probability') axes.flat[2].plot(xx, yy, 'r-') # EC50 xx = np.logspace(-7, 1, 100) rv = norm(log10_ec50_mean, log10_ec50_std) yy = rv.pdf(np.log10(xx)) axes.flat[3].set_title('EC50 parameter') axes.flat[3].set_xlabel('EC50 [uM]') axes.flat[3].set_ylabel('probability') axes.flat[3].plot(xx, yy, 'r-') # Log10 EC50 axes.flat[4].set_title('Log10 EC50 parameter [~ Normal]') axes.flat[4].set_xlabel('Log10( EC50 [uM] )') axes.flat[4].set_ylabel('probability') axes.flat[4].plot(np.log10(xx), yy, 'r-') # OBS xx = np.linspace(0, 5, 100) rv = gamma(alpha_obs, scale=1 / beta_obs, loc=0) yy = rv.pdf(xx) axes.flat[5].set_title('Observation Std parameter') axes.flat[5].set_xlabel('Obs. Std') axes.flat[5].set_ylabel('probability') axes.flat[5].plot(xx, yy, 'r-') plt.tight_layout() plt.show()
def __init__(self, X, Y, R, target_sparsity=0.01, gamma0_v=1.0, lambda_params=(1e-6, 1e-6), nu_params=(1e-6, 1e-6), xi=0.999999, xi_prior_shape=(1, 1), check_finite=True, min_eigenval=0, jitter=1e-6): """The Probit model used for modeling Sparse Regression using a Gaussian field. :cite:`Engelhardt2014`. .. math:: y|X,\\beta,\\beta_0, \\nu \propto \mathcal{N}(\\beta_0 1_n + X \\beta, \\nu^{-1} I_n) Parameters ---------- X : ndarray The predictor matrix of real numbers, n x p in size, where n is the no. of samples (genotypes) and p is the no. of features (SNPs). Y : ndarray The response vector of real numbers, n x 1 in size, with each value representing the phenotype value for the sample. R : ndarray The covariance matrix for the SNPs, p x p in size. The matrix may not be positive-definite, but is converted to one internally. target_sparsity : float The proportion of included predictors. For example, a value of 0.01 indicates that around 1% of total SNPs are expected be included in our model. This value affects the probit threshold gamma_0 of the model. gamma0_v : float Variance of the probit threshold gamma_0 lambda_params : tuple Shape parameter and Inverse-scale parameter of the gamma prior placed on the model parameter lambda, where lambda is the inverse squared global scale parameter for the regression weights. nu_params : tuple Shape parameter and Inverse-scale parameter of the gamma prior placed on the model parameter nu, where nu is the residual precision. xi : float The shrinkage constant in the interval [0,1] to regularize the covariance matrix towards the identity matrix. This ensures that the covariance matrix is positive definite. A larger xi value biases our estimate towards the supplied R matrix, a lower value biases it towards the identity matrix. If None, then xi is sampled from a beta distribution with shape parameters specified by the tuple xi_prior_shape. xi_prior_shape : tuple Shape parameters of the beta prior placed on the model parameter xi, specified as a 2-tuple of real values. This argument is ignored and xi is not sampled, if it is specified explicitly using the xi parameter. check_finite : bool Whether to check that the input matrices contain only finite numbers. Disabling may give a performance gain, but may result in problems (crashes, non-termination) if the inputs do contain infinities or NaNs. This parameter is passed on to several linear algebra functions in scipy internally. min_eigenval : float Minimum Eigenvalue we can accept in the covariance matrix. Any eigenvalues encountered below this threshold are set to zero, and the resulting covariance matrix normalized to give ones on the diagonal. jitter : float A small value to add to the diagonals of the covariance matrix to avoid conditioning issues. """ self.X = X self.Y = Y self.R = Mvn(cov=R, min_eigenval=min_eigenval, jitter=jitter) self.N, self.P = self.X.shape self.nu_a, self.nu_b = nu_params self.check_finite = check_finite if xi is None: self.sample_xi = True self._xi_distribution = beta(*xi_prior_shape) self.xi = self._xi_distribution.mean() else: self.sample_xi = False self.xi = xi # Initialize scalar model distributions and the parameter values to their prior means. self._gamma0_distribution = norm(loc=norm.ppf(1.0 - target_sparsity), scale=gamma0_v) self.gamma0 = self._gamma0_distribution.mean() self._lambda_distribution = gamma(lambda_params[0], scale=1. / lambda_params[1]) self.lamb = self._lambda_distribution.mean() self._nu_distribution = gamma(self.nu_a, scale=1. / self.nu_b) self.nu = self._nu_distribution.mean() # Cache for holding probit prior distributions (multivariate normal distributions with 0 mean and known # covariance, possibly adjusted by a shrinkage factor xi expressing our confidence in the covariance). # A single iteration of MCMC calls on many computations on this distribution, so caching improves performance # significantly. A small cache size works just as well as a large one, # because the most recently used distribution tends to be used repeatedly in a single MCMC step. self._probit_cache = Cache(maxsize=4) # A cache used to hold the marginal PPI (Posterior Probability of Inclusion) distributions # p(y | X, gamma, gamma_0, nu, lambda) ~ Normal(..) # A small cache size works just as well as a large one, because the most recently used distribution tends to # be used repeatedly in a single MCMC step. self._ppi_cache = Cache(maxsize=8) # Initialize the sparsity function by generating a random variate from the model's probit distribution self.gamma = self.probit_distribution(self.xi).rvs()
""" import random import numpy as np from scipy.stats import t, beta, lognorm, expon, gamma, poisson import matplotlib.pyplot as plt n = 100 # == Arbitrary collection of distributions == # distributions = { "student's t with 10 degrees of freedom": t(10), "beta(2, 2)": beta(2, 2), "lognormal LN(0, 1/2)": lognorm(0.5), "gamma(5, 1/2)": gamma(5, scale=2), "poisson(4)": poisson(4), "exponential with lambda = 1": expon(1) } # == Create a figure and some axes == # num_plots = 3 fig, axes = plt.subplots(num_plots, 1, figsize=(10, 10)) # == Set some plotting parameters to improve layout == # bbox = (0., 1.02, 1., .102) legend_args = {'ncol': 2, 'bbox_to_anchor': bbox, 'loc': 3, 'mode': 'expand'} plt.subplots_adjust(hspace=0.5) for ax in axes: # == Choose a randomly selected distribution == #
def rv(self): return stats.gamma(self.a, loc=self.mu, scale=1/self.b)
#np.random.seed(1) #for i in np.random.random(size=100): # self.insert_value(30*(i-0.5)) #self.show() #func = np.vectorize(self._UpperHull.value) #%% #np.random.seed(2) #samples = self.sample(10000) #print samples #plt.hist(samples) #from scipy.stats import kstest #kstest(samples,"norm") #%% Gamma k, theta k = 9 theta = 0.5 distro = stat.gamma(a=k, scale=0.5) h = np.vectorize(lambda x: (k - 1) * np.log(x) - x / theta) hprime = np.vectorize(lambda x: (k - 1) / x - 1 / theta) initial_knots = [1, 4.5, 7] xlb = -np.inf xub = np.inf self = ArsSampler(initial_knots, h, hprime, xlb, xub) np.random.seed(1) samples = self.sample(10000) xs = np.linspace(0, 10, 200) ax2 = fig1.add_subplot(2, 2, 2) ax2.plot(xs, gaussian_kde(samples)(xs)) ax2.plot(xs, distro.pdf(xs)) kstest(samples, distro.cdf) #%% Truncated Normal, cut above -1 a = -1
def get_posteriors(self, timeseries_type, plot=False): """ Generate posteriors for R_t. Parameters ---------- ---------- timeseries_type: TimeseriesType New X per day (cases, deaths etc). plot: bool If True, plot a cool looking est of posteriors. Returns ------- dates: array-like Input data over a subset of indices available after windowing. times: array-like Output integers since the reference date. posteriors: pd.DataFrame Posterior estimates for each timestamp with non-zero data. start_idx: int Index of first Rt value calculated from input data series #TODO figure out why this value sometimes truncates the series """ dates, timeseries = self.get_timeseries(timeseries_type=timeseries_type) if len(timeseries) == 0: self.log.info("empty timeseries, skipping", timeseries_type=str(timeseries_type.value)) return None, None, None, None else: self.log.info( "Analyzing posteriors for timeseries", timeseries_type=str(timeseries_type.value) ) # (1) Calculate Lambda (the Poisson likelihood given the data) based on # the observed increase from t-1 cases to t cases. lam = timeseries[:-1].values * np.exp((self.r_list[:, None] - 1) / self.serial_period) # (2) Calculate each day's likelihood over R_t # Originally smoothed counts were rounded (as needed for sps.poisson.pmf below) which # doesn't work well for low counts and introduces artifacts at rounding transitions. Now # calculate for both ceiling and floor values and interpolate between to get smooth # behaviour ts_floor = timeseries.apply(np.floor).astype(int) ts_ceil = timeseries.apply(np.ceil).astype(int) ts_frac = timeseries - ts_floor likelihoods_floor = pd.DataFrame( data=sps.poisson.pmf(ts_floor[1:].values, lam), index=self.r_list, columns=timeseries.index[1:], ) likelihoods_ceil = pd.DataFrame( data=sps.poisson.pmf(ts_ceil[1:].values, lam), index=self.r_list, columns=timeseries.index[1:], ) # Interpolate between value for ceiling and floor of smoothed counts likelihoods = ts_frac * likelihoods_ceil + (1 - ts_frac) * likelihoods_floor # (3) Create the (now scaled up for low counts) Gaussian Matrix (current_sigma, process_matrix) = self.make_process_matrix(timeseries.median()) # (3a) Normalize all rows to sum to 1 process_matrix /= process_matrix.sum(axis=0) # (4) Calculate the initial prior. Gamma mean of "a" with mode of "a-1". prior0 = sps.gamma(a=2.5).pdf(self.r_list) prior0 /= prior0.sum() reinit_prior = sps.gamma(a=2).pdf(self.r_list) reinit_prior /= reinit_prior.sum() # Create a DataFrame that will hold our posteriors for each day # Insert our prior as the first posterior. posteriors = pd.DataFrame( index=self.r_list, columns=timeseries.index, data={timeseries.index[0]: prior0} ) # We said we'd keep track of the sum of the log of the probability # of the data for maximum likelihood calculation. log_likelihood = 0.0 # Initialize timeseries scale (used for auto sigma) scale = timeseries.head(1).item() # Setup monitoring for Reff lagging signal in daily likelihood monitor = utils.LagMonitor(debug=False) # Set debug=True for detailed printout of daily lag # (5) Iteratively apply Bayes' rule loop_idx = 0 for previous_day, current_day in zip(timeseries.index[:-1], timeseries.index[1:]): # Keep track of exponential moving average of scale of counts of timeseries scale = 0.9 * scale + 0.1 * timeseries[current_day] # Calculate process matrix for each day (current_sigma, process_matrix) = self.make_process_matrix(scale) # (5a) Calculate the new prior current_prior = process_matrix @ posteriors[previous_day] # (5b) Calculate the numerator of Bayes' Rule: P(k|R_t)P(R_t) numerator = likelihoods[current_day] * current_prior # (5c) Calculate the denominator of Bayes' Rule P(k) denominator = np.sum(numerator) # Execute full Bayes' Rule if denominator == 0: # Restart the baysian learning for the remaining series. # This is necessary since otherwise NaN values # will be inferred for all future days, after seeing # a single (smoothed) zero value. # # We understand that restarting the posteriors with the # re-initial prior may incur a start-up artifact as the posterior # restabilizes, but we believe it's the current best # solution for municipalities that have smoothed cases and # deaths that dip down to zero, but then start to increase # again. posteriors[current_day] = reinit_prior else: posteriors[current_day] = numerator / denominator # Monitors if posterior is lagging excessively behind signal in likelihood # TODO future can return cumulative lag and use to scale sigma up only when needed monitor.evaluate_lag_using_argmaxes( current_day=loop_idx, current_sigma=current_sigma, prev_post_am=posteriors[previous_day].argmax(), prior_am=current_prior.argmax(), like_am=likelihoods[current_day].argmax(), post_am=numerator.argmax(), ) # Add to the running sum of log likelihoods log_likelihood += np.log(denominator) loop_idx += 1 self.log_likelihood = log_likelihood if plot: plotting.plot_posteriors(x=posteriors) # Returns Figure. # The interpreter will handle this as it sees fit. Normal builds never call plot flag. start_idx = -len(posteriors.columns) return dates[start_idx:], posteriors, start_idx
def icdf(x, alpha, beta): g = stats.gamma(alpha, 0, 1.0 / beta) return g.ppf(x)
def eval_( d, v, *, ratio, beta ): return gamma( ratio(d)*beta(d) , 0, 1/beta(d) ).logpdf( v )
import numpy as np from scipy.stats import gamma import matplotlib.pyplot as plt '''def f(x): return Gamma(x)''' alpha_values = [1, 2, 3, 3, 3] # alpha is the shape parameter beta_values = [0.5, 0.5, 0.5, 1, 2] #beta is the rate parameter color = ['b', 'r', 'g', 'y', 'm'] x = np.linspace(1E-6, 10, 1000) fig, ax = plt.subplots(figsize=(12, 8)) for k, t, c in zip(alpha_values, beta_values, color): dist = gamma(k, 0, t) plt.plot(x, dist.pdf(x), c=c, label=r'$alpha=%.1f,\ \theta=%.1f$' % (k, t)) plt.title('Gamma Distribution') plt.xlim(0, 10) plt.ylim(0, 2) plt.xlabel('$x$') plt.ylabel(r'$p(x|\alpha, \beta)$') plt.legend(loc=0) plt.show()