def plot_fitting_poisson(self, head): mu = self.params["poisson"][0] print([mu]) weights = np.ones_like(self.data) / len(self.data) n, bins, patches = plt.hist(self.data, 20, weights=weights, facecolor='green', alpha=0.75) # print(n, bins) cdfs, prev = [], -1 for idx, x in enumerate(bins): if x >= 0 and np.round(x) > prev: cdfs.append( poisson.cdf(np.round(x), mu) - poisson.cdf(prev, mu)) prev = np.round(x) # print(cdfs) plt.plot([np.round(x) for x in bins if x >= 0], cdfs, label='poisson pmf', color='black') plt.scatter([np.round(x) for x in bins if x >= 0], cdfs, label='poisson pmf', color='black') plt.savefig(head + "_poisson.png") plt.close()
def auto_threshold(self): """ Based on transient Event Detection over Background noise. The TODO should be elsewhere in the core part of the software :return: """ # def poisson(x, mu): # return np.exp(-mu) / factorial(x) * np.power(mu, x) false_negative_ratio = float(self.false_negative_sv.get()) / 100.0 mu = self.PCH.time_axis[np.argmax(self.PCH.data)] chi = 0 while 1 - poisson.cdf(chi, mu) > false_negative_ratio: chi += 1 self.pch_graph.threshold = chi self.threshold_flank_sv.set(str(chi)) # NB this value is hardcoded ! false_negative_ratio = false_negative_ratio/1000.0 mu = self.PCH.time_axis[np.argmax(self.PCH.data)] chi = 0 while 1 - poisson.cdf(chi, mu) > false_negative_ratio: chi += 1 self.pch_graph.threshold_burst = chi self.threshold_burst_sv.set(str(chi)) self.pch_graph.plot(self.PCH)
def probTable(homeAtt, homeDef, awayAtt, awayDef, homeFactor): lambdaA = exp(homeAtt - awayDef + homeFactor) lambdaB = exp(awayAtt - homeDef) A = [] B = [] remA = 0 remB = 0 for i in range(0, 7): tempPois = poisson.cdf(i, lambdaA) A.append(tempPois - remA) remA = tempPois tempPois = poisson.cdf(i, lambdaB) B.append(tempPois - remB) remB = tempPois A.append(1 - remA) B.append(1 - remB) result = [] for i in range(0, 8): for j in range(0, 8): result.append(A[i] * B[j]) return result
def plot_fitting_zip(self, head): mu, psi, zero = self.params["zip"][0], self.params["zip"][ 1], self.params["zip"][2] print([mu, psi, zero]) zero = (1.0 - psi) + np.exp(-mu) weights = np.ones_like(self.data) / len(self.data) n, bins, patches = plt.hist(self.data, 20, weights=weights, facecolor='green', alpha=0.75) # print(n, bins) cdfs, prev = [], -1 for idx, x in enumerate(bins): if x >= 0 and np.round(x) > prev: if prev < 0: cdfs.append(psi * poisson.cdf(np.round(x), mu) + (1.0 - psi)) else: cdfs.append(psi * poisson.cdf(np.round(x), mu) - psi * poisson.cdf(np.round(prev), mu)) prev = np.round(x) # print(cdfs) plt.plot([int(x) for x in bins if x >= 0], cdfs, label='zip', color='black') plt.scatter([int(x) for x in bins if x >= 0], cdfs, label='zip', color='black') plt.savefig(head + "_zip.png") plt.close()
def ln_post(self,pars): ''' The log-posterior to be used in the MCMC_fitter. This version cannot account for CRs. ''' if np.any(pars< 0): return -np.inf ln_post = 0. for i in range(self.x_hat.size): if i == 0: lli = np.log(norm.pdf(self.RM.noisy_counts[i],loc=pars[1],scale=self.RM.RON_adu)) else: mc = pars[0] * self.RM.RTS.group_times[i] nsig = 2 mincnt = np.max([0,np.round(mc-nsig*np.sqrt(mc)).astype(np.int_)]) maxcnt = np.max([mincnt+1,(mc+nsig*np.sqrt(mc)).astype(np.int_)]) xi = np.arange(mincnt,maxcnt) poisson_pmf = poisson.pmf(xi,mu=mc)/(poisson.cdf(maxcnt,mu=mc)-poisson.cdf(mincnt,mu=mc)) gaussian_pdf = norm.pdf(xi,loc=self.RM.noisy_counts[i]-self.RM.noisy_counts[0],scale=self.RM.RON_adu) lli = np.log(np.sum(gaussian_pdf * poisson_pmf)) ln_post = ln_post + lli return ln_post
def meet_ifr(self): #print(self.ifr,self.oq) array_es = [(1 - tifr) * toq for tifr, toq in zip(self.ifr, self.oq)] reorder_points = [] for es, index in zip(array_es, range(len(self.mdolt))): s = 0 # s will be reorder point # stop if es-val is positive lb = 0 # lower bound for ub in range(int(pow(sys.maxsize, 0.5))): # theoritical infinity s = pow(2, ub) pp1 = poisson.cdf(s - 1, self.mdolt[index]) pp2 = poisson.cdf(s, self.mdolt[index]) val = (self.mdolt[index] * (1 - pp1)) - (s * (1 - pp2)) if (es - val) >= 0: break if (ub > 10): # if within 2^10 = 1000 simply follow linear search lb = ub - 1 for s in range(pow(2, lb) - 1, pow(2, ub) + 1, 1): # linear search in the found interval pp1 = poisson.cdf(s - 1, self.mdolt[index]) pp2 = poisson.cdf(s, self.mdolt[index]) val = (self.mdolt[index] * (1 - pp1)) - (s * (1 - pp2)) if (es - val) >= 0: break reorder_points.append(s) #print(reorder_points) return reorder_points
def poisson_test_random(x, lmd) : """Prob( Pois(n,p) >= x ) + randomization """ p_down = 1 - poisson.cdf(x, lmd) p_up = 1 - poisson.cdf(x, lmd) + poisson.pmf(x, lmd) U = np.random.rand(x.shape[0]) prob = np.minimum(p_down + (p_up-p_down)*U, 1) return prob * (x != 0) + U * (x == 0)
def poisson_min_max_limits(conf, nevents): """ Calculate the minimum and maximum mean Poisson value mu consistent with seeing nevents at a given confidence level. conf: float 95%, 90%, 68.3% or similar. nevents: int The number of events observed. Returns ------- mulo, mhi : floats Mean number of events such that >= observed number of events nevents occurs in fewer than conf% of cases (mulo), and mean number of events such that <= nevents occurs in fewer than conf% of cases (muhi) """ from scipy.stats import poisson nevents = int(nevents) conf = float(conf) if np.isnan(conf) or np.isnan(nevents): return np.nan, np.nan target = 1 - conf / 100. if nevents == 0: mulo = 0 else: mulo = _bisect(lambda mu: 1 - poisson.cdf(nevents - 1, mu), target) muhi = _bisect(lambda mu: poisson.cdf(nevents, mu), target) return mulo, muhi
def poisson_min_max_limits(conf, nevents): """ Calculate the minimum and maximum mean Poisson value mu consistent with seeing nevents at a given confidence level. conf: float 95%, 90%, 68.3% or similar. nevents: int The number of events observed. Returns ------- mulo, mhi : floats Mean number of events such that >= observed number of events nevents occurs in fewer than conf% of cases (mulo), and mean number of events such that <= nevents occurs in fewer than conf% of cases (muhi) """ from scipy.stats import poisson nevents = int(nevents) conf = float(conf) if np.isnan(conf) or np.isnan(nevents): return np.nan, np.nan target = 1 - conf/100. if nevents == 0: mulo = 0 else: mulo = _bisect(lambda mu: 1 - poisson.cdf(nevents-1, mu), target) muhi = _bisect(lambda mu: poisson.cdf(nevents, mu), target) return mulo, muhi
def plot_poisson(): fig, ax = plt.subplots(1, 1) # This is prediction for Wawrinka in 2014 mu = 7.869325 x = np.arange(poisson.ppf(0.01, mu), poisson.ppf(0.999, mu)) ax.plot(x, poisson.pmf(x, mu), 'wo', ms=8, label='poisson pmf') ax.vlines(x, 0, poisson.pmf(x, mu), colors=['b', 'b', 'b', 'b', 'b', 'r', 'r', 'r', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g'], lw=5, alpha=0.5) rv = poisson(mu) ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1, label='frozen pmf') plt.title("Stanislas Wawrinka") plt.xlabel('# QF+ Finishes in 2014') plt.ylabel('Probability') prob0 = poisson.cdf(6, mu) prob123 = poisson.cdf(9, mu) - poisson.cdf(6, mu) probAbove3 = poisson.cdf(10000, mu) - poisson.cdf(9, mu) print prob0 print prob123 print probAbove3 plt.show()
def ln_post_CR(self,pars): ''' The log-posterior to be used in the MCMC_fitter. This version accounts for CRs. ''' if np.any(pars<= 0): return -np.inf ln_post = 0. nsig = 2 for i in range(1,self.x_hat.size,1): mc = pars[0] * (self.RM.RTS.group_times[i]-self.RM.RTS.group_times[i-1]) sigmaintv = nsig*np.sqrt(mc+np.square(self.RM.RON_adu)) mincnt = 0 # np.max([0,np.round(mc-sigmaintv).astype(np.int_)]) maxcnt = 100 #np.max([mincnt+10,np.round(mc+sigmaintv).astype(np.int_)]) xi = np.arange(mincnt,maxcnt) poisson_pmf = poisson.pmf(xi,mu=mc)/(poisson.cdf(maxcnt,mu=mc)-poisson.cdf(mincnt,mu=mc)) gaussian_pdf = norm.pdf(xi,loc=self.RM.noisy_counts[i]-self.RM.noisy_counts[i-1],scale=np.sqrt(2)*self.RM.RON_adu) lli = np.log(np.sum((gaussian_pdf * poisson_pmf))) ln_post = ln_post + lli return ln_post
def get_mapping(self) -> StateActionMapping[State, Action]: #We need to define the StateActionMapping for this Finite MDP mapping: StateActionMapping[State, Action] = {} list_actions: List[Action] = [] #We start by defining all the available actions for i in range(self.H + 1): range_j = self.H - i for j in range(range_j + 1): list_actions.append(Action(i, j)) self.list_actions: List[Action] = list_actions list_states: List[State] = [] #Then we define all the possible states for i in range(1, self.W + 1): list_states.append(State(i)) self.list_states: List[State] = list_states for state in list_states: submapping: ActionMapping[Action, StateReward[State]] = {} for action in list_actions: s: int = action.s l: int = action.l reward: float = state.wage * (self.H - l - s) pois_mean: float = self.alpha * l proba_offer: float = self.beta * s / self.H if state.wage == self.W: #If you're in state W, you stay in state W with constant #Probability. The reward only depends on the action you #you have chosen submapping[action] = Constant((state, reward)) elif state.wage == self.W - 1: #If you're in state W-1, you can either stay in your state #or land in state W submapping[action] = Categorical({ (state, reward): poisson.pmf(0,pois_mean)*(1-proba_offer), (State(self.W), reward):proba_offer+(1-proba_offer)*\ (1-poisson.pmf(0,pois_mean)) }) else: #If you're in any other state, you can land to any state #Between your current state and W with probabilities #as described before dic_distrib = {} dic_distrib[(state, reward)] = poisson.pmf( 0, pois_mean) * (1 - proba_offer) dic_distrib[ (State(state.wage+1), reward)] = proba_offer*poisson.cdf(1,pois_mean)\ +(1-proba_offer)*poisson.pmf(1,pois_mean) for k in range(2, self.W - state.wage): dic_distrib[(State(state.wage + k), reward)] = poisson.pmf(k, pois_mean) dic_distrib[(State(self.W), reward)] = 1 - poisson.cdf( self.W - state.wage - 1, pois_mean) submapping[action] = Categorical(dic_distrib) mapping[state] = submapping return mapping
def poisson_approximate(s1, s2, s_both, sLen): if s1 >= s2: np = s2 * (s1 / float(sLen)) else: np = s1 * (s2 / float(sLen)) if s_both == 0.0: return round(np, 3), PSN.pmf(0, np) elif s_both < np: return round(np, 3), PSN.cdf(s_both, np) else: return round(np, 3), 1 - PSN.cdf(s_both, np)
def getOdds55(offForm1, defForm1, offForm2, defForm2): exp_score = offForm1 / 2 + offForm2 / 2 + defForm1 / 2 + defForm2 / 2 prob4 = poisson.cdf(4, exp_score) prob5 = poisson.cdf(5, exp_score) prob6 = poisson.cdf(6, exp_score) return [prob4, prob5, prob6]
def _lids_pmf(x_lids, mu_lids): r'''Probability mass function of the LIDS''' # Expected number of counts mu = _lids_inverse_func(mu_lids) k1 = _lids_inverse_func(x_lids) k2 = _lids_inverse_func(x_lids + 1) return poisson.cdf(k=k1, mu=mu) - poisson.cdf(k=k2, mu=mu)
def predict_n_rel(des_prob, n_docs, mu): i = 0 cum_prob = poisson.cdf(i, mu) while (i < n_docs) and (cum_prob < des_prob): i += 1 cum_prob = poisson.cdf(i, mu) return i
def altCLs(Nbkg, Nobs, CL): Nsig=0 while (True): clb = poisson.cdf(Nobs, Nbkg) clbs = poisson.cdf(Nobs, Nbkg+Nsig) cls = clbs/clb if (cls < (1 - CL)): break Nsig += 1 return Nsig
def get_probs(lamda, max): v1 = poisson.pmf(range(max), lamda[0]) v1[-1] = 1 - poisson.cdf(max - 1, lamda[0]) v2 = poisson.pmf(range(max), lamda[1]) v1[-1] = 1 - poisson.cdf(max - 1, lamda[1]) M = v1[:, np.newaxis] * v2 return M
def get_infection_type_frequencies(bacteria_count, viral_counts_dict): infection_type_dict = {} # no infections f = 1 for i in viral_counts_dict: f *= (poisson.cdf(0, viral_counts_dict[i] / bacteria_count)) infection_type_dict['none'] = f # alone for i in viral_counts_dict: # only i in cell f = (poisson.cdf(MAX_INFECTIONS_PER_CELL, viral_counts_dict[i] / bacteria_count) - poisson.cdf(0, viral_counts_dict[i] / bacteria_count)) for j in viral_counts_dict: if j != i: f *= poisson.cdf(0, viral_counts_dict[j] / bacteria_count) infection_type_dict[i] = f # two virus types for i in viral_counts_dict: # only i not in cell f = poisson.cdf(0, viral_counts_dict[i] / bacteria_count) for j in viral_counts_dict: if j != i: f *= (poisson.cdf(MAX_INFECTIONS_PER_CELL, viral_counts_dict[j] / bacteria_count) - poisson.cdf(0, viral_counts_dict[j] / bacteria_count)) infection_type_dict['_'.join([k for k in viral_counts_dict if k != i])] = f # all virus types f = 1 for i in viral_counts_dict: f *= (poisson.cdf(MAX_INFECTIONS_PER_CELL, viral_counts_dict[i] / bacteria_count) - poisson.cdf(0, viral_counts_dict[i] / bacteria_count)) infection_type_dict['all_virus_types'] = f return infection_type_dict
def _error( value ) : '''Construct Bayesian errors using Poisson distribution''' # likelihood = P(value|lambda) using underlying Poisson assumption # error: lambdas with equal likelihood for which area in between is 68% lambda_up, lambda_down, step_size = 1.1*value, 0.9*value, float(value)/10 for i in range(5) : lambda_up -= step_size; lambda_down += step_size; step_size /= 10 while (poisson.cdf(value,lambda_down) - poisson.cdf(value,lambda_up)) < 0.6826894921370859 : lambda_up += step_size while poisson.pmf(value,lambda_down) > poisson.pmf(value,lambda_up) : lambda_down -= step_size/10 return (value-lambda_down,lambda_up-value)
def plot_pval(counts, Lam, seed=123, title="", outfile="", save=True): np.random.seed(123) (n, p) = counts.shape C = np.random.uniform(size=(n, p)) pval = C * poisson.cdf(counts - 1, Lam) + (1 - C) * poisson.cdf( counts, Lam) plt.hist(pval.flatten(), bins=np.linspace(0, 1, 100)) plt.title(title) if save: plt.savefig(outfile) plt.close()
def evaluate_policy(new_policy, env, sample): env.time_step(new_policy) X_I, X_S = sample.X_I, sample.X_S currentV_I = sample.val_I currentV_L = sample.val_L meanX_S, meanX_I, meanX_R = env.sample_stochastic() errX_S, errX_I, errX_R = env.get_error() val_I = meanX_I val_L = new_policy lowXS = max(round(meanX_S - errX_S, 0), 0) uppXS = min(round(meanX_S + errX_S, 0), env.M) # lowXI=max(round(meanX_I-errX_I,0),0) # uppXI=min(round(meanX_I+errX_I,0),env.M) lowXR = max(round(meanX_R - errX_R, 0), 0) uppXR = min(round(meanX_R + errX_R, 0), env.M) lowI = max(X_S - uppXS, 0) uppI = min(X_S, X_S - lowXS) lowR = max(lowXR - (env.M - X_I - X_S), 0) uppR = min(X_I, uppXR - (env.M - X_I - X_S)) for i in range(lowI, uppI): for j in range(lowR, uppR): probI = poisson.pmf(i, env.beta) probR = binom.pmf(j, uppR, env.gamma) if i == lowI: probI = poisson.cdf(i, env.beta) if i == uppI: probI = 1 - poisson.cdf(i - 1, env.beta) if j == lowR: probR = binom.cdf(j, uppR, env.gamma) if j == uppR: probR = 1 - binom.cdf(j - 1, uppR, env.gamma) val_I += 0.97 * probI * probR * currentV_I[X_I + i - j - 1, X_S - i - 1] val_L += 0.97 * probI * probR * currentV_L[X_I + i - j - 1, X_S - i - 1] objs = [val_I, val_L] return objs
def BuildSingleSurprise(psth, rate0, binsize, surplength,t_before): eps=np.finfo(np.float128).eps/1000000000 #this epsilon is used not to get inf when taking log10 mu = rate0*binsize # this is the mean value for the poisson blank, and corresponding binsize counts = np.zeros([2,surplength]) Surprise = np.zeros([2,surplength]) for j in np.arange(surplength): i = j + t_before - binsize + 1 #this will make every surprise start at 0-1 ms bin for d in [0,1]: counts[d][j] = sum(psth[d][i:i+binsize]) Surprise[d][j] = -np.log10(min(poisson.cdf(counts[d][j],mu)-eps,1-poisson.cdf(counts[d][j],mu)+eps)) return Surprise
def pdcdf(x, lbd): ''' Returns the pre-calculated value for the cumulative distribution function of a Poisson distribution. :param lbd: demand distribution expected value (lambda) :param x: point to calculate the cdf (integer) :returns: P(X <= x) :rtype: float :raises Warning: if the pair (x, lbd) does not exist in the table, and the return value is calculated on-the-fly using :mod:`scipy:scipy.stats` :examples: >>> round(pdcdf(1, 25),4) == round(poisson.cdf(1,25)) True >>> round(pdcdf(1e6, 25),4) == round(poisson.cdf(1e6,25)) True ''' try: return pdtable[x, lbd] except IndexError: warnings.warn("Input too big for the pre-calculated table: %d %d" % (x, lbd)) return poisson.cdf(x, lbd)
def error (event, numlines, rate, P): if event.count>3: return True return False if ( (1.0-poisson.cdf(event.count-1, float(rate*numlines)*(event.stop-event.start) ) )<P ): return True return False
def ks_1sample(mme_sample_data, last_week): sample_variance = np.var(mme_sample_data, ddof=1) sample_mean = np.mean(mme_sample_data) binom_p_mme = 1-(sample_variance/sample_mean) binom_n_mme = (sample_mean*sample_mean)/(sample_mean - sample_variance) poisson_lambda_mme = sample_mean geom_p_mme = 1/sample_mean geom_differences = [] binom_differences = [] poisson_differences = [] for i in range(7): ecdf_left = i/7 ecdf_right = (i+1)/7 geom_cdf = geom.cdf(last_week[i], geom_p_mme) poisson_cdf = poisson.cdf(last_week[i], poisson_lambda_mme) binom_cdf = binom.cdf(last_week[i], binom_n_mme, binom_p_mme) geom_differences.append(abs(ecdf_left - geom_cdf)) geom_differences.append(abs(ecdf_right - geom_cdf)) binom_differences.append(abs(ecdf_left - binom_cdf)) binom_differences.append(abs(ecdf_right - binom_cdf)) poisson_differences.append(abs(ecdf_left - poisson_cdf)) poisson_differences.append(abs(ecdf_right - poisson_cdf)) return np.max(geom_differences), np.max(binom_differences), np.max(poisson_differences)
def make(self, key): # -- get trial-spikes - use only trials in ProbeInsertionQuality.GoodTrial trial_spikes, trial_durations = ( Unit.TrialSpikes * (experiment.TrialEvent & 'trial_event_type = "trialend"') & ProbeInsertionQuality.GoodTrial & key).fetch('spike_times', 'trial_event_time', order_by='trial') # -- compute trial spike-rates trial_spike_rates = [len(s) for s in trial_spikes ] / trial_durations.astype(float) # spikes/sec mean_spike_rate = np.mean(trial_spike_rates) # -- moving-average window_size = 6 # sample kernel = np.ones(window_size) / window_size processed_trial_spike_rates = np.convolve(trial_spike_rates, kernel, 'same') # -- down-sample ds_factor = 6 processed_trial_spike_rates = processed_trial_spike_rates[::ds_factor] # -- compute drift_qc from poisson distribution poisson_cdf = poisson.cdf(processed_trial_spike_rates, mean_spike_rate) instability = np.logical_or( poisson_cdf > 0.95, poisson_cdf < 0.05).sum() / len(poisson_cdf) # -- insert self.insert1(key) self.DriftMetric.insert1({**key, 'drift_metric': instability})
def KS1Sample(data_, true_distro_): state1_ = data_.columns[0] state2_ = data_.columns[1] MME = [] if true_distro_ == 'poisson': MME = MMEPoisson(data_[state1_]) elif true_distro_ == 'geometric': MME = MMEGeometric(data_[state1_]) elif true_distro_ == 'binomial': MME = MMEBinomial(data_[state1_]) else: return None print("The MME values : ", MME) uniq_val_state2, freq_val_state2 = np.unique(data_[state2_], return_counts=True) cdf_state2 = np.cumsum(freq_val_state2) cdf_state2 = cdf_state2 / cdf_state2[-1] # print(uniq_val_state2) # print(freq_val_state2) # print(cdf_state2) max_diff = -1 for i, value_ in enumerate(uniq_val_state2): left_ecdf, right_ecdf = 0, 0 if i == 0: left_ecdf = 0 right_ecdf = cdf_state2[i] elif i == len(uniq_val_state2) - 1: left_ecdf = cdf_state2[i - 1] right_ecdf = 1 else: left_ecdf = cdf_state2[i - 1] right_ecdf = cdf_state2[i] true_cdf = 0 if true_distro_ == 'poisson': true_cdf = poisson.cdf(value_, MME[0]) elif true_distro_ == 'geometric': true_cdf = geom.cdf(value_, MME[0]) elif true_distro_ == 'binomial': true_cdf = binom.cdf(value_, MME[0], MME[1]) diff_ = max(abs(left_ecdf - true_cdf), abs(right_ecdf - true_cdf)) if diff_ > max_diff: max_diff = diff_ # print(value_, left_ecdf, right_ecdf, true_cdf, diff_) if max_diff > 0.05: print( "Since Max distance(%f) > 0.05, we reject Null Hypothesis (%s has same distribution as %s having true distribution of %s)\n\n" % (max_diff, state2_, state1_, true_distro_)) else: print( "Since Max distance(%f) <= 0.05, we accept Null Hypothesis (%s has same distribution as %s having true distribution of %s)\n\n" % max_diff)
def calcPvalUponPoissondist(Count, Mean): from scipy.stats import poisson if Count >= Mean: Pval = poisson.sf(Count, Mean) * 2 else: Pval = poisson.cdf(Count, Mean) * 2 return Pval
def call_peaks(genome, unit_length=200, small_length=1000, medium_length=5000, large_length=10000): peaks_out = [] for contig in genome: total_reads = sum(genome[contig]) contig_length = len(genome[contig]) if total_reads == 0: continue window_counts = window_read_counts(genome[contig], unit_length) window_sum = window_counts.sum(axis=1) small_bin_counts = calculate_bins(window_counts, small_length, unit_length) medium_bin_counts = calculate_bins(window_counts, medium_length, unit_length) large_bin_counts = calculate_bins(window_counts, large_length, unit_length) local_bin_sums = np.hstack( (small_bin_counts.sum(axis=1), medium_bin_counts.sum(axis=1), large_bin_counts.sum(axis=1))) local_lambdas = (local_bin_sums / np.array( [small_length, medium_length, large_length])) * unit_length lambda_bg = np.ones( window_sum.shape) * (total_reads / contig_length) * unit_length all_lambdas = np.hstack((lambda_bg, local_lambdas)) max_lambdas = np.amax(all_lambdas, axis=1) p_vals = 1 - poisson.cdf(window_sum.astype(int), mu=max_lambdas.astype(float)) p_vals = np.transpose(p_vals.astype(np.longdouble))[0] qvalue = importr('qvalue') q_vals = np.array(qvalue.qvalue(FloatVector(p_vals))[2]) q_vals = np.hstack( (np.transpose(np.matrix(list(range(1, len(q_vals) + 1)))), np.transpose(np.matrix(q_vals)))) qv_df = pd.DataFrame(q_vals) qv_df.columns = ['Position', 'qvalue'] peak_indices = np.array( qv_df.query('qvalue < 0.01')['Position'].tolist()).astype(int) peaks = indices_to_peaks(peak_indices) peaks = correct_peaks(peaks, unit_length) for peak in peaks: peaks_out.append([contig, peak[0], peak[1]]) return peaks_out
def check_significance(self, num_pairs, critValue, cut_off=0.10): """ This is mostly a playing about function, this tests to see if the number of calculated cointegration pairs is more than you expected from random deviation to see if the results are significant i.e. If you calculate 12 pairs when you tests 100 at a 0.1 critvalue is that significant? This is used to adjust for multiple comparison bias :param num_pairs: The number of calculated pairs :param critValue: The critical value you used :param cut_off: The cut off to determine whether or not the results are significant :return: """ num_stocks = len(self.portfolio) num_comparison = (num_stocks) * (num_stocks - 1) / 2 x_val = round(critValue * num_comparison) fish = 1 - poisson.cdf(k=num_pairs - 1, mu=x_val) if fish < cut_off: print( "These results seem significant: \n Expected: {} | Pairs: {} | p: {}" .format(x_val, num_pairs, fish)) return 1 else: print( "These results may be insignificant:\n Expected: {} | Pairs: {} | p: {}" .format(x_val, num_pairs, fish)) return 0
def ppois(q,mu): """ Calculates the cumulative of the Poisson distribution """ from scipy.stats import poisson result=poisson.cdf(k=q,mu=mu) return result
def poisson_safety(segs, injTable, livetime): """ Return a tuple containing the number of vetoed injections, the number expected, and the Poisson safety probability based on the number of injections vetoed relative to random chance according to Poisson statistics. Arguments: segs : glue.segments.segmentlist list of segments to be tested injTable : glue.ligolw.table.Table table of injections livetime : [ float ] livetime of search """ deadtime = segs.__abs__() get_time = def_get_time(injTable.tableName) injvetoed = len([inj for inj in injTable if get_time(inj) in segs]) injexp = len(injTable) * float(deadtime) / float(livetime) prob = 1 - poisson.cdf(injvetoed - 1, injexp) return injvetoed, injexp, prob
def calc_KS_1_sample_test(x_points, parameters_list, data, distribution_name, column_type , column_name): dict_name = 'KS_' + distribution_name +'_cols' dict_name = ['x', 'F_cap_x', 'F_cap_DC_left', 'F_cap_DC_right', 'left_diff_abs', 'right_diff_abs'] row_list = [] for x in x_points: if(distribution_name == 'binomial'): #Find cdf of binomial at given point x F_cap_x = binom.cdf(x, parameters_list[0], parameters_list[1]) if(distribution_name == 'poisson'): #Find cdf of poisson at given point x F_cap_x = poisson.cdf(x, parameters_list[0]) if(distribution_name == 'geometric'): #Find cdf of geometric at given point x F_cap_x = geom.cdf(x, parameters_list[0]) # Find CDF to the left of point x in the sorted DC dataset F_cap_DC_left = get_left_cdf(data ,column_name, x, 'DC_eCDF') # Find CDF to the right of point x in the sorted DC dataset F_cap_DC_right = get_right_cdf(data, column_name, x, 'DC_eCDF') # Find absolute difference between left CDFs of x points and DC datasets left_diff_abs = round(abs(F_cap_x - F_cap_DC_left), 4) # Find absolute difference between right CDFs of x points and DC datasets right_diff_abs = round(abs(F_cap_x - F_cap_DC_right), 4) row = [x, F_cap_x, F_cap_DC_left, F_cap_DC_right, left_diff_abs, right_diff_abs] row_dict = dict(zip(dict_name, row)) row_list.append(row_dict) # Build KS Test Table (represented as a dataframe) df_name = 'KS_' + distribution_name +'_df' df_name = pd.DataFrame(row_list, columns=dict_name) # Calculate KS statistic value max_diff_x = [] d_right = df_name.iloc[df_name['right_diff_abs'].idxmax(axis=1)][['x', 'right_diff_abs']] d_left = df_name.iloc[df_name['left_diff_abs'].idxmax(axis=1)][['x', 'left_diff_abs']] if d_right['right_diff_abs'] == d_left['left_diff_abs']: print("KS Statistic is {0} at x = {1} and {2}".format(d_right['right_diff_abs'], d_left['x'], d_right['x'])) max_diff_x.append(d_right['x']) max_diff_x.append(d_left['x']) elif d_right['right_diff_abs'] > d_left['left_diff_abs']: print("KS Statistic is {0} at x = {1}".format(d_right['right_diff_abs'], d_right['x'])) max_diff_x.append(d_right['x']) else: print("KS Statistic is {0} at x = {1}".format(d_left['left_diff_abs'], d_left['x'])) max_diff_x.append(d_left['x']) # Reject/Accept Null Hypothesis based on calculated KS Statistic d and given threshold=0.05 d = max(d_right['right_diff_abs'], d_left['left_diff_abs']) critical_value = 0.05 hypothesis_type = 'confirmed positive cases' if column_type == 'confirmed' else column_type if d > critical_value: print("Rejected Null Hypothesis: We reject the hypothesis that the distribution of daily {0} in DC is {3}, as KS Statistic d = {1} exceeds threshold {2}".format(hypothesis_type, d, critical_value, distribution_name)) print() else: print("Failed to reject Null Hypothesis: We accept the hypothesis that the distribution of daily {0} is same in both CT and DC, as KS Statistic d = {1} does not exceed threshold {2}".format(hypothesis_type, d, critical_value)) print() return max_diff_x
def calc_f3(qbar,L): val = np.e**(-qbar) for k in range(1,L): coeff = ((2*k) - 1) + 2 val += coeff * poisson.cdf(k,qbar) return val / (L**2)
def clean_bkg(img, bkg): """ Subtract statistically the background from a Poisson image :param img: Input image :type img: class:`numpy.ndarray` :param bkg: Background map :type bkg: class:`numpy.ndarray` :return: Background subtracted Poisson image :rtype: class:`numpy.ndarray` """ id = np.where(img > 0.0) yp, xp = np.indices(img.shape) y = yp[id] x = xp[id] npt = len(img[id]) nsm = 10 ons = np.ones((nsm, nsm)) timg = generic_filter(img, heaviside, footprint=ons, mode='constant') tbkg = generic_filter(np.ones(img.shape) * bkg, heaviside, footprint=ons, mode='constant', cval=0, origin=0) prob = 1. - poisson.cdf(timg[id], tbkg[id]) vals = np.random.rand(npt) remove = np.where(vals < prob) img[y[remove], x[remove]] = 0 return img
def poisson_safety(segs, injTable, livetime): """ Return a tuple containing the number of vetoed injections, the number expected, and the Poisson safety probability based on the number of injections vetoed relative to random chance according to Poisson statistics. Arguments: segs : glue.segments.segmentlist list of segments to be tested injTable : glue.ligolw.table.Table table of injections livetime : [ float ] livetime of search """ deadtime = segs.__abs__() get_time = def_get_time(injTable.tableName) injvetoed = len([ inj for inj in injTable if get_time(inj) in segs ]) injexp = len(injTable)*float(deadtime) / float(livetime) prob = 1 - poisson.cdf(injvetoed-1, injexp) return injvetoed, injexp, prob
def myKStest(loadFile='optSxJKtext32.txt', nSims=10000): """ no comment """ from scipy.stats import poisson from scipy.stats import kstest # load data PvalMinima, XvalMinima, SxEnsembleMin = np.loadtxt(loadFile, unpack=True) Pvals = PvalMinima[1:] #ditch SMICA Xvals = XvalMinima[1:] #ditch SMICA Svals = SxEnsembleMin[1:] #ditto # find indices of two groupings of x values gt = np.where(Xvals > 0) lt = np.where(Xvals < 0) # recover integer values that created p-values pInts = np.rint(Pvals * nSims) PmeanGt = np.mean(pInts[gt]) PmeanLt = np.mean(pInts[lt]) Pmean = np.mean(pInts) cdf = lambda k: poisson.cdf(k, np.rint(PmeanGt)) print 'closest integer to mean: ', np.rint(PmeanGt) result = kstest(pInts[gt], cdf) return result
def Pvalue(AS, NumBases): ExpectedMMrate = 0.01 ExpectedMM = ExpectedMMrate*float(NumBases) if NumBases == 0.0: return "-" ActualMM = int(ceil(float(AS)/(-6.0))) prob = 1 - poisson.cdf(ActualMM, ExpectedMM) return prob
def estimate_revenue(self, st_time, pk_zone, dp_zone, th, car_type="UberX", zero_threshold=0.1): info = self.G[self.time_to_index(st_time)][pk_zone][dp_zone] mu = float(info[0]) / self.data_date_range prob = 1.0 - poisson.cdf(th, mu) if math.isnan(prob) or prob < zero_threshold: return 0 revenue = self.compute_fare(info[1], info[2], car_type) return prob * revenue
def local_coverage_score(covlist,window_size=11): ''' find local coverage dips Returns list of scores for each position in covlist ''' localmeans = [scipy.mean(covlist[max(1,i-window_size):min(i+window_size+1,len(covlist))]) for i,cov in enumerate(covlist)] pvals = [poisson.cdf(cov,localmeans[i]) for i,cov in enumerate(covlist)] pvals[0] = 1 scores = [(-10 * scipy.log10(pv)) if pv != 0 else 0 for pv in pvals] return scores,localmeans
def poissonTailTest(counts, eventRate, oneSided=True): counts = array(counts) diffs = abs(counts-eventRate) result = poisson.cdf(eventRate-diffs, eventRate) if not oneSided: result *= 2 return result
def testprobabilitiespoisson(self): prob = zeros((4,3)) exp_value = self.data.calculate_equation(self.coefficients[0]) prob[:,0] = poisson.pmf(0, exp_value) prob[:,1] = poisson.pmf(1, exp_value) prob[:,2] = 1 - poisson.cdf(1, exp_value) prob_model = self.model.calc_probabilities(self.data) prob_diff = all(prob == prob_model) self.assertEqual(True, prob_diff)
def main(): freq = 1020 num_docs = 31254 lambda_ = freq/num_docs for i in itertools.count(0): # probability of the term occuring <=i times within a document is poisson.cdf(i, lambda_) # get the probability of the term occuring >i times within some document in the collection if 1 - poisson.cdf(i, lambda_)**num_docs < 0.01: print(i+1) break
def getPval(alignedReads): if len(alignedReads) > 0: useMMrate = globalDecoyMMrate # total number of mismatches observed for all reads aligning to this junction, rounded to get integer which is required for poisson.cdf num_mm = int(ceil(sum([x.readStat for x in alignedReads]) / -6.0)) num_bases = sum([x.juncRead.readLen for x in alignedReads]) + sum([x.useMate.readLen for x in alignedReads if x.useMate != None]) return 1 - poisson.cdf(num_mm, useMMrate*num_bases) else: return "-"
def _error( value ) : '''Construct frequentist errors using Poisson distribution''' # up error: smallest lambda for which P(n<=nobs|lambda) < (1-0.68268...)/2 = 0.15865... # down error: largest lambda for which P(n>=nobs|lambda) < (1-0.68268...)/2 = 0.15865... lambda_up, lambda_down, step_size = 1.1*value, 0.9*value, float(value)/10 if value == 0 : return (0,1.8410216450100005) # save time with precomputed value if value < 1 : lambda_up, lambda_down, step_size = 1.8, 0.0, 0.1 for i in range(5) : lambda_up -= step_size; lambda_down += step_size; step_size /= 10 while poisson.cdf( value, lambda_up ) > 0.15865525393145705 : lambda_up += step_size while poisson.sf( value-1, lambda_down ) > 0.15865525393145705 : lambda_down -= step_size return (value-lambda_down,lambda_up-value)
def add_gc_bias(meancoverages,targetcoverage): rand=poisson.rvs(targetcoverage) cumprob=poisson.cdf(rand,targetcoverage) # cdf(x, mu, loc=0) Cumulative density function. toret=[] for cov in meancoverages: if cov==0: toret.append(0) else: t=int(poisson.ppf(cumprob,cov)) # ppf(q, mu, loc=0) Percent point function (inverse of cdf percentiles). toret.append(t) return toret
def poisson_marginals(means, accuracy=1e-10): """ Finds the probability mass functions (pmfs) and approximate supports of a set of Poisson random variables with means specified in input "means". The second argument, "acc", specifies the desired degree of accuracy. The "support" is taken to consist of all values for which the pmfs is greater than acc. Inputs: means: the means of the Poisson RVs acc: desired accuracy Outputs: pmfs: a cell-array of vectors, where the k-th element is the probability mass function of the k-th Poisson random variable. supports: a cell-array of vectors, where the k-th element is a vector of integers of the states that the k-th Poisson random variable would take with probability larger than "acc". E.g., P(kth RV==supports{k}(1))=pmfs{k}(1); Code from the paper: 'Generating spike-trains with specified correlations', Macke et al., submitted to Neural Computation Adapted from `<http://www.kyb.mpg.de/bethgegroup/code/efficientsampling>`_ Parameters ---------- means : Type Description accuracy : int, optional Description (default 1e-10) Returns ------- Value : Type Description """ from scipy.stats import poisson import math cmfs = [] pmfs = [] supps = [] for k in range(len(means)): cmfs.append(poisson.cdf(range(0, int(max(math.ceil(5 * means[k]), 20) + 1)), means[k])) pmfs.append(poisson.pmf(range(0, int(max(math.ceil(5 * means[k]), 20) + 1)), means[k])) supps.append(np.where((cmfs[k] <= 1 - accuracy) & (pmfs[k] >= accuracy))[0]) cmfs[k] = cmfs[k][supps[k]] pmfs[k] = poisson.pmf(supps[k], means[k]) return np.array(pmfs), np.array(cmfs), np.array(supps)
def estimate_revenue2(self, st_time_index, pk_zone, dp_zone, th, car_type="UberX", zero_threshold=0.1): info = self.G[st_time_index][pk_zone][dp_zone] mu = float(info[0]) / self.data_date_range key_tuple = (th, mu) if key_tuple not in self.cached_cdf: self.cached_cdf[key_tuple] = 1.0 - poisson.cdf(*key_tuple) prob = self.cached_cdf[key_tuple] if math.isnan(prob) or prob < zero_threshold: return 0 revenue = self.compute_fare(info[1], info[2], car_type) if revenue > 300: print st_time_index, pk_zone, dp_zone return prob * revenue
def prior_calculations(lbda,maxlen,eta,maxlhs): #First normalization constants for beta beta_Z = poisson.cdf(maxlhs,eta) - poisson.pmf(0,eta) #Then the actual un-normalized pmfs logalpha_pmf = {} for i in range(maxlen+1): try: logalpha_pmf[i] = poisson.logpmf(i,lbda) except RuntimeWarning: logalpha_pmf[i] = -inf logbeta_pmf = {} for i in range(1,maxlhs+1): logbeta_pmf[i] = poisson.logpmf(i,eta) return beta_Z,logalpha_pmf,logbeta_pmf
def getExpected(mu): """ Given a mean coverage mu, determine the AUC, X-intercept, and elbow point of a Poisson-distributed perfectly behaved input sample with the same coverage """ x = np.arange(round(poisson.interval(0.99999, mu=mu)[1] + 1)) # This will be an appropriate range pmf = poisson.pmf(x, mu=mu) cdf = poisson.cdf(x, mu=mu) cs = np.cumsum(pmf * x) cs /= max(cs) XInt = cdf[np.nonzero(cs)[0][0]] AUC = sum(poisson.pmf(x, mu=mu) * cs) elbow = cdf[np.argmax(cdf - cs)] return (AUC, XInt, elbow)
def pvalue(lo, hi): "Compute p value in window [lo, hi)" d, m = data[lo:hi].sum(), mc[lo:hi].sum() if m == 0: # MC prediction is zero. Not sure what then.. assert d == 0, "Data = {0} where the prediction is zero..".format(d) return 1 if d < m: return 1 # "Dips" get ignored. # P(d >= m) p = 1 - poisson.cdf(d-1, m) if verbose and edges: print "{0:2} {1:2} [{2:8.3f}, {3:8.3f}] {4:7.0f} {5:7.3f} {6:.5f} {7:.2f}".format( lo, hi, edges[lo], edges[hi], d, m, p, -log(p)) return p
def find_bounds(mean, alpha): """ Find both the lower and upper bounds for a given mean value and dispersion parameter in a poisson distribution. """ fun = lambda i: poisson.cdf(i,mean) upper = None lower = None i = 0 while True: if upper is None and fun(i) > 1 - alpha / 2.0: upper = i if lower is None and fun(i) > alpha / 2.0: lower = i if upper is not None and lower is not None: return lower, upper i += 1
def test_poisson(): # Test we can at match a Binomial distribution from scipy mu = 2 dist = lk.Poisson() x = np.random.randint(low=0, high=5, size=(10,)) p1 = poisson.logpmf(x, mu) p2 = dist.loglike(x, mu) np.allclose(p1, p2) p1 = poisson.cdf(x, mu) p2 = dist.cdf(x, mu) np.allclose(p1, p2)
def cdf(self, y, f): r""" Cumulative density function of the likelihood. Parameters ---------- y: ndarray query quantiles, i.e.\ :math:`P(Y \leq y)`. f: ndarray latent function from the GLM prior (:math:`\mathbf{f} = \boldsymbol\Phi \mathbf{w}`) Returns ------- cdf: ndarray Cumulative density function evaluated at y. """ mu = np.exp(f) if self.tranfcn == 'exp' else softplus(f) return poisson.cdf(y, mu=mu)
def poisson_threshold(dataset): """ Given a date, return a threshold value which will produce alerts_per_day or more instances in a day with probability p or less. That is: p(X > alerts_per_day) <= admin_conf.alert_confidence. :param dataset: sorted numpy array of alert scores :type dataset: numpy array :returns: float -- calculated threshold """ # First, find the target parameter mu = Conf.alerts_per_day amount = mu error = 1 iters = 0 while error > Conf.alert_confidence and iters < 100: # Keep trying until we get closer iters = iters + 1 amount = float (amount) / 2 prob = 1 - poisson.cdf(Conf.alerts_per_day, mu) error = math.fabs(Conf.alert_confidence - prob) if prob > Conf.alert_confidence: # Need to keep decreasing lambda mu = mu - amount else: # We overshot mu = mu + amount # Now, figure out the threshold so that the average number of investigations # per day is mu. I think we can just take the score of the mu'th instance # for each day, and then average those. numDays = 0 scoreSum = 0 # if we don't have enough alerts for a given day to use a score, just # use zero for that day's scoreSum - i.e. we want to see all alerts for npArray in dataset: if len(npArray) > int(mu): scoreSum += npArray[int(mu)] numDays = numDays + 1 return 0.0 if numDays == 0 else float(scoreSum) / numDays
def getFourthSNP(ipDir, prefix): thirdSNP = os.path.join(ipDir, prefix + '_3' + '.' + FileExts.SNP) fourthSNP = os.path.join(ipDir, prefix + '_4' + '.' + FileExts.SNP) with open(thirdSNP, 'r') as thirdSNPFile: with open(fourthSNP, 'w') as fourthSNPFile: for line in thirdSNPFile: line = line.strip() cols = line.split('\t') dpVal = '' for eqTup in cols[1].split(';'): if eqTup.startswith('DP='): dpVal = float(eqTup[3:]) break varReads = float(cols[7]) poissonCDF = 1-poisson.cdf(varReads - 1, dpVal*0.01) #poissonCDF = 1-myPoisson.cdf(varReads - 1, dpVal*0.01) cols.insert(0, str(poissonCDF)) fourthSNPFile.write('\t'.join(cols) + '\n')
def poisson_pvalue(scores, window): ## histogram import numpy hist, bin_edges = numpy.histogram(scores, range(0,max(scores)+window,window)) start, end = 0, 0 for i in range(len(hist)): if hist[i] == max(hist): start = bin_edges[i] end = bin_edges[i+1] break mean, cc = 0, 0 for s in scores: if start <= s and s < end: mean += s cc += 1 mean = mean/float(cc) from scipy.stats import poisson pvalues = [] for s in scores: pvalues.append(1.0-poisson.cdf(s, mean)) return pvalues, mean
def P_breakpoints_in_interval(I, q, n): """ param: q - breakpoint ratio I - Interval of lenght I n - number of break points Calculates: k - Number of expected breakpoints within an interval P( n bp in I) - Probability of n breakpoints in I returns: P( n bp in I) """ k = q * I # for i in range(0,n): # math.exp(-k) #print poisson.cdf(n, k) return poisson.cdf(n, k)
def safety(segments, injections, threshold=SAFETY_THRESHOLD): """The safety of these segments with respect to vetoing GW signals The 'safety' of a given segment list is determined by comparing the number of coincidences between the veto segments and injection segments to random chance. A segment list is returned as safe (`True`) if the Poisson significance of the number of injection coincidences exceeds the threshold (default 5e-3). Parameters ---------- segments : `~gwpy.segments.DataQualityFlag`, `~glue.segments.segmentlist` the set of segments to test injections : `~glue.segments.segmentlist` the set of injections against which to compare threshold : `float`, optional, default: 5e-3 the Poission significance value above which a set of segments is declared unsafe Returns ------- safe : `bool` the boolean statement of whether this segment list is safe (`True`) or not (`False`) """ if not isinstance(injections, DataQualityFlag): injections = DataQualityFlag(active=injections) # segment info deadtime = float(abs(segments.active)) livetime = float(abs(segments.known)) # injection coincidence numveto = len([inj for inj in injections.active if inj.intersects(segments)]) numexp = len(injections) * deadtime / livetime # statistical significance prob = 1 - poisson.cdf(numveto - 1, numexp) return prob < threshold