def posterior(x, n, p1, p2): """ Calculates the posterior probability that the probability of developing severe side effects falls within a specific range given the data """ if type(n) is not int or n < 1: raise ValueError('n must be a positive integer') if type(x) is not int or x < 0: raise ValueError('x must be an integer that is ' + 'greater than or equal to 0') if x > n: raise ValueError('x cannot be greater than n') if type(p1) is not float or p1 < 0 or p1 > 1: raise ValueError('p1 must be a float in the range [0, 1]') if type(p2) is not float or p2 < 0 or p2 > 1: raise ValueError('p2 must be a float in the range [0, 1]') if p2 <= p1: raise ValueError('p2 must be greater than p1') cdf_beta1 = beta.cdf(p1, x + 1, n - x + 1) cdf_beta2 = beta.cdf(p2, x + 1, n - x + 1) my_posterior = cdf_beta2 - cdf_beta1 return my_posterior
def warp_input(self, X, alpha=None, beta=None): bounds = np.array(self.bounds) if alpha is None: alpha = self._alpha if beta is None: beta = self._beta if X is None: return None X = np.array(X) X_warped = np.empty((X).shape) for n in range(self.ndim): # a hack way to deal with the numpy shapes problem. This should be fixed try: X_warped[:, n:n + 1] = (X[:, n:n + 1] - bounds[n, 0]) / (bounds[n, 1] - bounds[n, 0]) # use beta CDF warping X_warped[:, n:n + 1] = beta_dist.cdf(X_warped[:, n:n + 1], alpha[n], beta[n]) X_warped = (bounds[n, 1] - bounds[n, 0]) * X_warped + bounds[n, 0] except: X_warped = (X - bounds[n, 0]) / (bounds[n, 1] - bounds[n, 0]) # use beta CDF warping X_warped[:] = beta_dist.cdf(X_warped[:], alpha[n], beta[n]) X_warped = (bounds[n, 1] - bounds[n, 0]) * X_warped + bounds[n, 0] return X_warped
def create_gauss_scalingfactors(cdfs): """ Create the scaling factor distributions needed to sample uncertainty in the Gaussian non-CO2 radiative forcings""" #Scale based on combined gaussian components rf_2011_mid = np.array([1.82, 2.83, 0.35, 0.07, -0.15, 0.04, -0.9]) rf_2011_up = [2.18, 3.4, 0.559, 0.121, -0.047, 0.09, -0.1] rf_2011_low = [1.46, 2.26, 0.141, 0.019, -0.253, 0.019, -1.9] #Estimate sd using 5-95 intervals erf_sigs = (np.array(rf_2011_up) - np.array(rf_2011_low)) / (2 * 1.654) sig_wmghg = np.copy(erf_sigs[1]) #Find the non-CO2 GHG forcing uncertainty sig_owmghg = np.sqrt(erf_sigs[1]**2 - erf_sigs[0]**2) erf_sigs[1] = sig_owmghg sig_tot = np.sqrt(np.sum(erf_sigs[1:-2]**2)) rf_2011_mid_a = np.copy(rf_2011_mid) rf_2011_mid_a[1] = rf_2011_mid[1] - rf_2011_mid[0] #Calculate the scaling factors #Derive the scaling factors to span 5-95% AR5 guassian forcing uncertainty #assuming +/- 20% uncertainty in WMGHG forcings #Map the TCR cdf to the forcing scaling cdf using a beta function cdf beta_cdf_sf = root(lambda var: 0.05 - beta.cdf(0.5 - 1.0 / 3.0, var, var), x0=2.0).x[0] cdfs_gauss = 1.0 - beta.cdf(cdfs, beta_cdf_sf, beta_cdf_sf) sf_gauss = (np.sum(rf_2011_mid_a[1:-2]) + np.sqrt(2.0) * erfinv(2 * cdfs_gauss - 1) * sig_tot) / np.sum( rf_2011_mid_a[1:-2]) return sf_gauss
def Beta(length, popularity, be=10): x = [i / length for i in range(length + 1)] cdfs = [ beta.cdf(x[i + 1], popularity, be) - beta.cdf(x[i], popularity, be) for i in range(length) ] return cdfs
def Jeffrey(self, df, x, y, z): #df is a dataframe object that contains columns x, y and z #x aggregation variable (rating grade for PD test) #y modelled variable #z observed variable (expected to be binary) #alpha = D + 1/2 #beta = Nc- D + 1/2 aggregation = df.groupby(x).agg({ x: 'count', y: ['sum', 'count', 'mean'], z: ['sum', 'count', 'mean'] }) aggregation['Observed'] = aggregation[(z, 'mean')] aggregation['alpha'] = aggregation[(z, 'sum')] + 1 / 2 aggregation['beta'] = aggregation[(z, 'count')] - aggregation['alpha'] + 1 aggregation['H0'] = aggregation[(y, 'mean')] aggregation['p_val'] = beta.cdf(aggregation['H0'], aggregation['alpha'], aggregation['beta']) aggregation.loc['Portfolio'] = aggregation.sum() aggregation['Observed'].loc['Portfolio'] = df.agg({z: 'mean'}).values aggregation['alpha'].loc['Portfolio'] = df.agg({ z: 'sum' }).values + 1 / 2 aggregation['beta'].loc['Portfolio'] = df.agg({ z: 'count' }).values - aggregation['alpha'].loc['Portfolio'] + 1 aggregation['H0'].loc['Portfolio'] = df.agg({y: 'mean'}).values aggregation['p_val'].loc['Portfolio'] = beta.cdf( aggregation['H0'].loc['Portfolio'], aggregation['alpha'].loc['Portfolio'], aggregation['beta'].loc['Portfolio']) return aggregation
def allocated_rollouts(self): fraction_of_budget_used = (self.currently_used_budget + 1) / self.max_budget yvals = beta.cdf([fraction_of_budget_used], self.alpha_param, self.beta_param) fractional_improvment_we_should_be_at = yvals[0] # This is based on the total overall improvement how much effort should we have spent number_of_rollouts_we_shouldve_used = fractional_improvment_we_should_be_at * self.max_rollout if not self.waste_unused_rollouts: # Subtracts the already spent rollouts from the rollouts we shouldve used number_of_free_rollouts = number_of_rollouts_we_shouldve_used - self.currently_used_rollout else: #not wasting rollouts. Thus, number_of_free_rollouts = {cdf (budget+1) - cdf(budget)} * max_rollouts. # for a given cdf i.e. fixed alpha and beta fractional_improvment_we_are_at = \ beta.cdf([(self.currently_used_budget) / self.max_budget], self.alpha_param, self.beta_param)[0] number_of_rollouts_we_have_used = fractional_improvment_we_are_at * self.max_rollout number_of_free_rollouts = number_of_rollouts_we_shouldve_used - number_of_rollouts_we_have_used logging.getLogger( self.logger_name).debug(f"Rollouts {number_of_free_rollouts}") if number_of_free_rollouts < self.K + 2: return self.K + 2 return int(number_of_free_rollouts)
def PyNy(y_hat, PD_LEN): PD_y_hat = y_hat[:PD_LEN] PD_y_hat_mean = Mean_1d(PD_y_hat) PD_y_hat_var = Variance_1d(PD_y_hat, PD_y_hat_mean) PD_a, PD_b = AandB(PD_y_hat_mean, PD_y_hat_var) nonPD_y_hat = y_hat[PD_LEN:] nonPD_y_hat_mean = Mean_1d(nonPD_y_hat) nonPD_y_hat_var = Variance_1d(nonPD_y_hat, nonPD_y_hat_mean) N_a, N_b = AandB(nonPD_y_hat_mean, nonPD_y_hat_var) cdf_x = np.linspace(0, 1, 1001) P_y = beta.cdf(cdf_x, PD_a, PD_b) N_y = beta.cdf(cdf_x, N_a, N_b) # plot 을 안쓰실때는 # 으로 막아두시길 for i in range(len(N_y)): N_y[i] = 1 - N_y[i] # 측정된 cdf 값을 그리는 plt plt.plot(cdf_x, P_y, 'red') #main emotion plt.plot(cdf_x, N_y, 'blue') #non_emotion plt.ylim(0, 2) plt.show() # y_hat 값에 대한 histogram 을 그리는 plt if PD_a > 0: plt.hist([y_hat[:PD_LEN], y_hat[PD_LEN:]]) plt.show() return P_y, N_y
def integrand(x): pdf1 = beta.pdf(x,a1,b1) pdf2 = beta.pdf(x,a2,b2) cdf1 = beta.cdf(x,a1,b1) cdf2 = beta.cdf(x,a2,b2) rho = pdf1 * cdf2 + pdf2 * cdf1 return -rho * np.log(rho)
def f_dp(x, alpha_aa, alpha_c, a_aa0, b_aa0, a_c0, b_c0, a_aa1, b_aa1, a_c1, b_c1): f_c = (lambda x_c: alpha_c * beta.cdf(x_c, a_c1, b_c1, 0, 1) + (1 - alpha_c) * beta.cdf(x_c, a_c0, b_c0, 0, 1)) inv_f_c = inversefunc(f_c, domain=[0, 1], open_domain=[False, False]) return float( inv_f_c(alpha_aa * beta.cdf(x, a_aa1, b_aa1) + (1 - alpha_aa) * beta.cdf(x, a_aa0, b_aa0)))
def diff_between(k1, n1, k2, n2, p=0.96, a=0.05, b=1, Nsamp=10000): """ Bayesian maximum a posteriori estimate of the difference in prevalence when the same test is applied to two groups k1 : number of participants significant in group 1 out of n1 : total number of participants in group 1 k2 : number of participants significant in group 2 out of n2 : total number of participants in group 2 p : coverage for highest-posterior density interval (in [0 1]) a : alpha value of within-participant test (default=0.05) b : sensitivity/beta of within-participant test (default=1) Nsamp : number of samples from the posterior Outputs: map : maximum a posteriori estimate of the difference in prevalence: gamma_1 - gamma_2 post_x : x-axis for kernel density fit of posterior distribution of the above post : posterior distribution from kernel density fit hpdi : highest-posterior density interval with coverage p probGT : estimated posterior probability that the prevalence is higher in group 1 logoddsGT : estimated log odds in favour of the hypothesis that the prevalence is higher in group 1 samples : posterior samples """ # gamma priors = Beta(r,s) r1 = 1; s1 = 1 r2 = 1; s2 = 1 # Parameters for Beta posteriors m11 = k1 + r1 m12 = n1 - k1 + s1 m21 = k2 + r2 m22 = n2 - k2 + s2 # Generate truncated beta samples th1 = beta.ppf(np.random.uniform(beta.cdf(a,m11,m12),beta.cdf(b,m11,m12), Nsamp), m11, m12) th2 = beta.ppf(np.random.uniform(beta.cdf(a,m21,m22),beta.cdf(b,m21,m22), Nsamp), m21, m22) # vector of estimates of prevalence differences samples = (th1 - th2) / (b-a) # kernel density estimate of posterior post_x = np.linspace(-1,1,200) kde = sp.stats.gaussian_kde(samples) post = kde(post_x) map = post_x[np.argmax(post)] # Estimate the posterior probability, and logodds, that the prevalence is higher for group 1. # Laplace's rule of succession used to avoid estimates of 0 or 1 probGT = (np.sum(samples>0)+1)/(Nsamp+2) logoddsGT = np.log(probGT / (1-probGT)) hpdi = _hpdi(samples, p) res = {"map": map, "post_x": post_x, "post": post, "hpdi": hpdi, "probGT": probGT, "logoddsGT": logoddsGT, "samples": samples} return res
def _Cough_Chen(self): """ Distribution fitted from Chen. :return: """ # Small droplets. < 10micron nparticles_small = 230 k = 3.75 D_small = numpy.arange(0, 20, 0.1) Fcdf_small = gamma.cdf(D_small, 3.75) Dsmall_avg = (D_small[:-1] + D_small[1:]) / 2. F_small = numpy.diff(Fcdf_small) Volume_small = (4.0 / 3.0) * numpy.pi * ( Dsmall_avg * 1e-6)**3 * F_small * nparticles_small vol_small_ml = (Volume_small.sum() * m**3).asUnit(ml) # medium droplets 10micron < x < 225 micron # upto 100 evaporates in air. nparticles_medium = 210 params = dict(a=0.2, b=1, loc=53, scale=200) D_medium = numpy.arange(10, 100, 1) Fcdf_medium = beta.cdf(D_medium, **params) Dmedium_avg = (D_medium[:-1] + D_medium[1:]) / 2. F_medium = numpy.diff(Fcdf_medium) Volume_small = (4.0 / 3.0) * numpy.pi * ( Dmedium_avg * 1e-6)**3 * F_medium * nparticles_medium vol_medium_ml = (Volume_small.sum() * m**3).asUnit(ml) evaporatingDropletsVolume = vol_small_ml + vol_medium_ml ##### == None evaporating D_medium = numpy.arange(100, 225, 1) Fcdf_medium = beta.cdf(D_medium, **params) Dmedium_avg = (D_medium[:-1] + D_medium[1:]) / 2. F_medium = numpy.diff(Fcdf_medium) Volume_small = (4.0 / 3.0) * numpy.pi * ( Dmedium_avg * 1e-6)**3 * F_medium * nparticles_medium vol_medium_ml = (Volume_small.sum() * m**3).asUnit(ml) nparticles_large = 20 D_large = numpy.arange(225, 800, 1) Fcdf_large = uniform.cdf(D_large, loc=225, scale=800 - 225) Dlarge_avg = (D_large[:-1] + D_large[1:]) / 2. F_large = numpy.diff(Fcdf_large) Volume_small = (4.0 / 3.0) * numpy.pi * ( Dlarge_avg * 1e-6)**3 * F_large * nparticles_large vol_large_ml = (Volume_small.sum() * m**3).asUnit(ml) nonEvaporatingDropletsVolume = vol_large_ml + vol_medium_ml return evaporatingDropletsVolume, nonEvaporatingDropletsVolume
def rhomax_integrand(x,a1,a2,b1,b2): pdf1 = beta.pdf(x,a1,b1) pdf2 = beta.pdf(x,a2,b2) cdf1 = beta.cdf(x,a1,b1) cdf2 = beta.cdf(x,a2,b2) rho = pdf1 * cdf2 + pdf2 * cdf1 integrand = -rho * np.log( rho ) return integrand
def Rhomax(x, a1, a2, b1, b2): # Takes numbers and returns a prob. number pdf1 = beta.pdf(x, a1, b1) pdf2 = beta.pdf(x, a2, b2) cdf1 = beta.cdf(x, a1, b1) cdf2 = beta.cdf(x, a2, b2) rho = pdf1 * cdf2 + pdf2 * cdf1 return rho
def bibeta_roc(Y,p): def logL(ab): a0,b0,a1,b1 = ab LL = beta.logpdf(p[Y==0],a0,b0).sum() + beta.logpdf(p[Y==1],a1,b1).sum() return -LL result = minimize(logL,[1,3,3,1],bounds=[(1e-7,None)]*4) a0,b0,a1,b1 = result.x threshold = np.linspace(0,1,1000) fpr = 1-beta.cdf(threshold,a0,b0) tpr = 1-beta.cdf(threshold,a1,b1) return threshold,fpr,tpr
def demand_sampler(random_state, size): demand = numpy.zeros([size, T, len(I) * len(J)]) for i_idx, i in enumerate(I): for j_idx, j in enumerate(J): gamma_k, gamma_theta, beta_a, beta_b = airFare.loc[i, j][:4] G = random_state.gamma(gamma_k, gamma_theta, size=size) * (1 - Rate[i]) for t in range(1, T): B = (beta.cdf(1 - day[t] / day[0], beta_a, beta_b) - beta.cdf(1 - day[t - 1] / day[0], beta_a, beta_b)) demand[:, t, 6 * i_idx + j_idx] = random_state.poisson(G * B) return demand
def rhomax_integrand(x, a1, a2, b1, b2): # Takes numbers and returns a prob. number pdf1 = beta.pdf(x, a1, b1) pdf2 = beta.pdf(x, a2, b2) cdf1 = beta.cdf(x, a1, b1) cdf2 = beta.cdf(x, a2, b2) rho = pdf1 * cdf2 + pdf2 * cdf1 integrand = -rho * np.log(rho) return integrand
def target_func_AnotB(z_AnotB): if (z_AnotB < 0 or z_AnotB > 1 or (pA - z_AnotB * (1 - pB)) / pB < 0 or (pA - z_AnotB * (1 - pB)) / pB > 1): return sys.float_info.max cdf_AnotB = beta.cdf(z_AnotB, a2, b2) #substitute z_AB cdf_AB = beta.cdf((pA - z_AnotB * (1 - pB)) / pB, a1, b1) if (cdf_AB == 0 or cdf_AnotB == 0): return sys.float_info.max return max(cdf_AnotB / cdf_AB, cdf_AB / cdf_AnotB)
def target_func_AB(z_AB): if (z_AB < 0 or z_AB > 1 or (pA - z_AB * pB) / (1 - pB) < 0 or (pA - z_AB * pB) / (1 - pB) > 1): return sys.float_info.max cdf_AB = beta.cdf(z_AB, a1, b1) #substitute z_AnotB cdf_AnotB = beta.cdf((pA - z_AB * pB) / (1 - pB), a2, b2) if (cdf_AB == 0 or cdf_AnotB == 0): return sys.float_info.max return max(cdf_AnotB / cdf_AB, cdf_AB / cdf_AnotB)
def eva_policy(theta_aa, theta_c, a_aa0, b_aa0, a_aa1, b_aa1, a_c0, b_c0, a_c1, b_c1): tpr = [] fpr = [] tpr.append(1 - beta.cdf(theta_aa, a_aa1, b_aa1)) tpr.append(1 - beta.cdf(theta_c, a_c1, b_c1)) fpr.append(1 - beta.cdf(theta_aa, a_aa0, b_aa0)) fpr.append(1 - beta.cdf(theta_c, a_c0, b_c0)) return tpr, fpr
def compute_covariance(self, x, y, diag=False): x, y = format_data(x), format_data(y) x, y = np.copy(x), np.copy(y) assert(x.shape[1] == self.N and y.shape[1] == self.N) for i in range(x.shape[1]): a, b = self.parameters[-2*self.N + 2*i].value, self.parameters[-2*self.N + 2*i + 1].value x[:,i] = beta.cdf(x[:,i], a, b) y[:,i] = beta.cdf(y[:,i], a, b) val = self.kernel.compute_covariance(x, y, diag=diag) return val
def hopkins(X: np.array, alpha=0.05): ''' Hopkins is a metric of how uniformly distributed an array is. - Close to 1 is evidence of substructure - Close to .5 is normally distributed - Close to 0 indicates regularity Input: X: An (n,) or (n,m) list or numpy array Output: H: float, Hopkin's Statistic D: string, Decision on if we have regularity, no structure, or clusters p: float, P-Value from the cdf of Beta(m,m) distribution ''' X = np.array(X) if len(X.shape) == 1: X = X.reshape(-1,1) d = X.shape[1] n = len(X) m = int(0.1 * n) nbrs = NearestNeighbors(n_neighbors=1).fit(X) rand_X = sample(range(0, n, 1), m) ujd = [] wjd = [] for j in range(0, m): # Get distance to a random point random_uniform = np.random.uniform(np.min(X, axis=0), np.max(X, axis=0), d).reshape(1,-1) u_dist, _ = nbrs.kneighbors(random_uniform, 2, return_distance=True) ujd.append(u_dist[0][1]**d) # Get distance to another sample random_sample = X[rand_X[j]].reshape(1,-1) w_dist, _ = nbrs.kneighbors(random_sample, 2, return_distance=True) wjd.append(w_dist[0][1]**d) denom = np.sum(ujd) + np.sum(wjd) if denom == 0: raise RuntimeWarning('The Hopkins denominator was 0, cannot proceed') else: H = np.sum(ujd) / denom if H > 0.5: p = 1 - beta.cdf(H, m, m) if p < alpha: D = 'There is evidence of clusters' else: D = 'There is no evidence of structure' else: p = beta.cdf(H, m, m) if p < alpha: D = 'There is evidence of regularity' else: D = 'There is no evidence of structure' return H, D, p
def generate_conditional_ps(self): # p(TiN|Somatic) and p(TiN|Germline) n_af_w = np.zeros([self.number_of_sites, len(self.af)]) t_af_w = np.zeros([self.number_of_sites, len(self.af)]) t_het_direction = np.ones([self.number_of_sites, len(self.af)]) t_het_direction[:, 0:np.int(np.round(np.true_divide(len(self.af), 2)) )] = -1 for i, f in enumerate(self.af): n_af_w[:, i] = self.rv_normal_af.cdf(f) - self.rv_normal_af.cdf(f - 0.005) t_af_w[:, i] = self.rv_tumor_af.cdf(f) - self.rv_tumor_af.cdf(f - 0.005) # ac given somatic t_af = np.multiply(f, self.n_depth) n_ac_given_tin = np.multiply(t_af[:, np.newaxis], self.CN_ratio) # ac given heterozygous f_t_af = 0.5 - np.abs((.5 - f)) psi_t_af = 0.5 - f_t_af psi_t_af = np.multiply(psi_t_af, t_het_direction[:, i]) exp_f = 0.5 + np.multiply(psi_t_af[:, np.newaxis], self.CN_ratio) n_het_ac_given_tin = np.multiply(exp_f, self.n_depth[:, np.newaxis]) for TiN_idx, TiN in enumerate(self.TiN_range): self.p_TiN_given_S[:, TiN_idx] += np.multiply( beta.cdf( self.normal_f[:] + .01, n_ac_given_tin[:, TiN_idx] + 1, self.n_depth[:] - n_ac_given_tin[:, TiN_idx] + 1) - beta.cdf(self.normal_f[:], n_ac_given_tin[:, TiN_idx] + 1, self.n_depth[:] - n_ac_given_tin[:, TiN_idx] + 1), t_af_w[:, i]) self.p_TiN_given_het[:, TiN_idx] += np.multiply( beta.cdf( self.normal_f[:] + .01, n_het_ac_given_tin[:, TiN_idx] + 1, self.n_depth[:] - n_het_ac_given_tin[:, TiN_idx] + 1) - beta.cdf( self.normal_f[:], n_het_ac_given_tin[:, TiN_idx] + 1, self.n_depth[:] - n_het_ac_given_tin[:, TiN_idx] + 1), t_af_w[:, i]) self.p_artifact = self.rv_tumor_af.cdf(self.normal_f + .01) - self.rv_tumor_af.cdf( self.normal_f) self.p_TiN_given_G = np.multiply(1 - self.p_artifact[:, np.newaxis], self.p_TiN_given_het) + np.multiply( self.p_artifact[:, np.newaxis], 1 - self.p_TiN_given_het)
def calculate_aep_curve(elt, grid_size=2**14, max_loss_factor=5): """ This function calculates the OEP of a given ELT ---------- elt : pandas dataframe containing PLT grid_size: grid size used for ep calculations max_loss_factor: factor used to extimate max loss Returns ------- EPCurve : exceedance probability curve """ elt_lambda = ELT(elt).get_lambda() max_loss = _max_loss(elt, max_loss_factor) dx = max_loss / grid_size xx = np.arange(1, grid_size + 1) * dx severity_distribution, severity_density_function = calculate_severity_distribution( elt, grid_size, max_loss_factor) elt['dx'] = dx / elt['ExpValue'] elt['xx'] = '' elt['FX'] = '' elt['FX2'] = '' fx2 = [0] * grid_size for index, row in elt.iterrows(): row['xx'] = np.true_divide(xx, row['ExpValue']) row['FX'] = beta.cdf(row['xx'], row['alpha'], row['beta']) row['FX2'] = beta.cdf(row['xx'][0] - row['dx'] / 2, row['alpha'], row['beta']) cdf2 = beta.cdf(row['xx'][:-1] + row['dx'] / 2, row['alpha'], row['beta']) cdf1 = beta.cdf(row['xx'][:-1] - row['dx'] / 2, row['alpha'], row['beta']) row['FX2'] = np.insert(cdf2 - cdf1, 0, row['FX2']) fx2 = fx2 + (row['Rate'] / elt_lambda) * row['FX2'] elt.at[index] = row fx_hat = fft(fx2) fs_hat = numpy.exp(-elt_lambda * (1 - fx_hat)) fs = numpy.real(ifft(fs_hat, norm="forward") / grid_size) fs_cum = numpy.cumsum(fs) severity_density_function['AEP'] = 1 - fs_cum aep = severity_density_function.rename(columns={ 'AEP': 'Probability', 'threshold': 'Loss' }) return ep_curve.EPCurve(aep, ep_type=ep_curve.EPType.AEP)
def hpdi(p, k, n, a=0.05, b=1): """ Bayesian highest posterior density interval of population prevalence gamma under a uniform prior p : HPDI to return (e.g. 0.95 for 95%) k : number of participants significant out of n : total number of participants a : alpha value of within-participant test (default=0.05) b : sensitivity/beta of within-participant test (default=1) """ b1 = k+r b2 = n-k+s # truncated beta pdf/cdf/icdf tbpdf = lambda x: beta.pdf(x,b1,b2) / (beta.cdf(b,b1,b2)-beta.cdf(a,b1,b2)) tbcdf = lambda x: (beta.cdf(x,b1,b2)-beta.cdf(a,b1,b2)) / (beta.cdf(b,b1,b2)-beta.cdf(a,b1,b2)) tbicdf = lambda x: beta.ppf( (1-x)*beta.cdf(a,b1,b2) + x*beta.cdf(b,b1,b2),b1,b2 ) if k==a: x = np.array([a, tbicdf(p)]) elif k==n: x = np.array([tbicdf(1-p), b]) else: f = lambda x: np.array([tbcdf(x[1])-tbcdf(x[0])-p, tbpdf(x[1])-tbpdf(x[0])]); x, info, ier, mesg = fsolve(f, np.array([tbicdf((1-p)/2), tbicdf((1+p)/2)]),full_output=True ) # limit to valid theta values if (x[0]<a) or (x[1]<x[0]): x = np.array([a, tbicdf(p)]) if x[1]>b: x = np.array([tbicdf(1-p), b]) hpdi = (x-a)/(b-a) return hpdi
def Beta_Entropy(w0, l0, w1, l1): global MC_samples seq.reset() x = seq.get(MC_samples) x = np.reshape(x, MC_samples) pdf0 = beta.pdf(x, w0+1, l0+1) pdf1 = beta.pdf(x, w1+1, l1+1) cdf0 = beta.cdf(x, w0+1, l0+1) cdf1 = beta.cdf(x, w1+1, l1+1) rho = pdf0 * cdf1 + pdf1 * cdf0 + 1E-4 integral = np.mean( -rho * np.log( rho ) ) return integral
def func_2b(): sample_x = np.random.beta(8, 5, 1000) sample_y = np.random.beta(4, 7, 1000) observed_result = np.zeros((5, 5)) estimated_result = np.zeros((5, 5)) difference_result = np.zeros((5, 5)) estimated_x = [0] * 5 estimated_y = [0] * 5 chi_squared_result = 0 #take (sample_x[i],sample_y[i]) as a sample of (X,Y) #print sample_x,"\n",sample_y #divide the 2-way table into 5*5 areas for i in range(0, 5): #row for j in range(0, 5): #column row_down = 0.2 * i row_up = 0.2 * i + 0.2 column_down = 0.2 * j column_up = 0.2 * j + 0.2 for sample_index in range(0, 1000): if (int(sample_y[sample_index] >= row_down) & int(sample_y[sample_index] < row_up)): if (int(sample_x[sample_index] >= column_down) & int(sample_x[sample_index] < column_up)): observed_result[i][j] += 1 #calculation_result[i][j]/=1000 for i in range(0, 5): estimated_x[i] = beta.cdf(0.2 * i + 0.2, 8, 5) - beta.cdf( 0.2 * i, 8, 5) estimated_y[i] = beta.cdf(0.2 * i + 0.2, 4, 7) - beta.cdf( 0.2 * i, 4, 7) for i in range(0, 5): for j in range(0, 5): estimated_result[i][j] = (estimated_y[i] * estimated_x[j] * 1000) difference_result = estimated_result - observed_result for i in range(0, 5): for j in range(0, 5): chi_squared_result += (difference_result[i][j]**2 / estimated_result[i][j]) #print estimated_result print "The observed result of RV X&Y is:\n", observed_result #print difference_result print "The Result of Chi-squared_test=\n", chi_squared_result #Chi-squared distribution threshold can be acquired from charts, pick 95.0% #the degree of freedom is 4*4=16, threshold=26.3 if chi_squared_result <= 26.3: print "X & Y can be considered as independent" else: print "X & Y cannot be considered as independent"
def probf_baharev(df1, df2, noncen, fcrit): x = 1 - special.btdtri(df1, df2, fcrit) eps = 1.0e-7 itr_cnt = 0 f = None while itr_cnt <= 10: mu = noncen / 2.0 ql = poisson.ppf(eps, mu) qu = poisson.ppf(1 - eps, mu) k = qu c = beta.cdf(x, df1 + k, df2) d = x * (1.0 - x) / (df1 + k - 1.0) * beta.pdf(x, df1 + k - 1, df2, 0) p = poisson.pmf(k, mu) f = p * c p = k / mu * p k = qu - 1 while k >= ql: c = c + d d = (df1 + k) / (x * (df1 + k + df2 - 1)) * d f = f + p * c p = k / mu * p k = k - 1 itr_cnt = itr_cnt + 1 if (itr_cnt == 11): print("newton iteration failed") return f
def checkBlockedUser(a, b, th=0.95): # return beta.cdf(0.5, a, b) > th s = beta.cdf(0.5, a, b) blocked = False if s > th: blocked = True return blocked
def t_inv_beta(p0): """ A function to compute the inverse template for the beta family Parameters ----------------- p0: a numpy.ndarray of shape (B,m) , where m is the number of null hypotheses and B is typically the number of permutations/bootstraps that contains the values on which to apply (column wise) the inverse beta reference family Returns ----------------- a numpy.ndarray of shape () Examples data = np.random.uniform(0,1,(10,10)) pr.t_inv_beta(data) ----------------- """ # Obtain the number of null hypotheses m = p0.shape[1] # Initialize the matrix of transformed p-values transformed_pvalues = np.zeros(p0.shape) # Transformed each column via the beta pdf # (t_k^B)^{-1}(lambda) = F(lambda) where F (beta.pdf) is the cdf of the # beta(k, m+1-k) distribution. for k in np.arange(m): transformed_pvalues[:, k] = beta.cdf(p0[:, k], k + 1, m + 1 - (k + 1)) return transformed_pvalues
def mid_p_interval(total, passed, conf=0.682689492137, is_upper=True): alpha = 1. - conf alpha_min = alpha / 2 vmin = alpha_min if is_upper else (1. - alpha_min) tol = 1e-9 # tolerance pmin = 0 pmax = 1 p = 0 # treat special case for 0<passed<1 # do a linear interpolation of the upper limit values if passed > 0 and passed < 1: p0 = mid_p_interval(total, 0.0, is_upper) p1 = mid_p_interval(total, 1.0, is_upper) p = (p1 - p0) * passed + p0 return p while abs(pmax - pmin) > tol: p = (pmin + pmax) / 2 # make it work for non integer using the binomial - beta relationship v = 0.5 * beta.pdf(p, passed + 1., total - passed + 1) / (total + 1) # compute the binomial cdf at passed -1 if passed >= 1: v += 1 - beta.cdf(p, passed, total - passed + 1) if v > vmin: pmin = p else: pmax = p return p
def _fap_cvm(freq, psd, psd_best_period): """ Computes false alarm probability for the cvm-distance-minimised beta distribution """ from scipy import optimize from scipy.stats import beta clip = 0.00001 temp = np.mean(psd) * (-np.mean(psd) + np.mean(psd) ** 2 + np.var(psd)) theta_1 = - temp / np.var(psd) if theta_1 < 0: theta_1 = clip theta_2 = (theta_1 - theta_1 * np.mean(psd)) / np.mean(psd) if theta_2 < 0: theta_2 = clip cvm_minimize = optimize.minimize( _cvm, [theta_1, theta_2], args=(psd,), bounds=((clip, None), (clip, None)), ) fap = 1 - beta.cdf(psd_best_period, cvm_minimize.x[0], cvm_minimize.x[1]) ** len(freq) return fap
def mapy2beta(self, y): """ mapping y to beta parameters ---------- self: class object hypothesis criteria y: array_like Input array returns ------- pair of beta CDF: Float Returns a float. """ a = beta.cdf(y, self._betaA, self._betaB) i = 0 if y < self._ymin: return 0 elif y > self._ymax: return 1 else: for i, z in enumerate(self._Y): if y < z: i = self._Y.index(z) break return a
def pInitial(self): """ Returns a distribution over initial states (z) The distribution is discretized into the number of bins specified by self.bins """ cdf = Beta.cdf(self.quantiles,self.i[0],self.i[1]) return np.diff(cdf)
def plot_beta(a, b, fill = False): betap = partial(beta.pdf, a, b) xs = np.linspace(0,1, num = 150) #y = list(map(betap, x)) y = [beta.pdf(x, a, b) for x in xs] fills = [ x <= 0.5 for x in xs] #sio = cStringIO.StringIO() sio = io.BytesIO() #sio = io.StringIO() fig = plt.figure() ax1 = fig.add_subplot(111) #ax1.set_ylim(bottom=0) #ax1.set_ylim(bottom if fill: ax1.fill_between(xs ,y,0,where=fills , color='0.8') ax1.plot(xs, y) fig.savefig(sio, format='png') enc = base64.b64encode(sio.getvalue()) plt.close("all") #hex = enc.strip() hex = enc.decode('utf-8') #hex = sio.getvalue().encode("base64").strip() prob = beta.cdf(0.5, a, b) return hex, prob
def getbcdf_pval_swc(self, swc): """ get p-value of beta CDF with candidated sigma weight parameters ---------- returns ------- pval: d: Y: ba: bb: bcdf: beta.cdf """ Y = [self.kernelizeisw(x, swc) for x in self._data] Y.sort() Y = featureScaling(Y) y_m = np.mean(Y) y_v = np.var(Y, ddof = 1) if math.isnan(y_v) or y_v == 0: return 0 ba = y_m ** 2 * ((1 - y_m) / y_v - 1 / y_m) bb = ba * (1 - y_m) / y_m bcdf = beta.cdf(Y, ba, bb) # Y = featureScaling(Y) d, pval = scistats.kstest(Y, lambda cdf: bcdf) params = p3c(pval, d, Y, [x for x in swc], ba, bb, 0, bcdf) return params
def g(x): prob_xx=beta.cdf(x,alpha_param,beta_param)+z if prob_xx>1: #Numerical precision paranoia prob_xx=1 xx=beta.ppf(prob_xx,alpha_param,beta_param) prob_diff=beta.pdf(x,alpha_param,beta_param)-beta.pdf(xx,alpha_param,beta_param) #print ' x=%f, prob_xx=%f, xx=%f, prob_diff=%f'%(x,prob_xx,xx,prob_diff) return(prob_diff)
def wat(x, s_n, ar): f = 1 for i in xrange(len(ar)): if i is s_n: f = f*beta.pdf(x, ar[i][0] + 1, ar[i][1] - ar[i][0] + 1) #f(Sa|Dt) #Dt is information available before reward else: f = f*beta.cdf(x, ar[i][0] + 1, ar[i][1] - ar[i][0] + 1) #F(S < Sa|Dt) #D is information available before reward return f
def pTransition(self,z,x): """ Returns a distribution over transitions to z given the current state (z) and observation (x) The distribution is discretized into the number of bins specified by self.bins """ alpha = self.w[0]*z + sum([x[i]*w for i, w in enumerate(self.w[1:])]) cdf = Beta.cdf(self.quantiles, alpha[0], alpha[1]) return np.diff(cdf)
def kstest(self, x): s = Sample(x).set(self.centroid['mean'], self.centroid['std']) fss = featurescaling(s, x_min=self._stats_['min'], x_max=self._stats_['max']) p = scibeta.cdf(fss, self.alpha, self.beta) if Sample.x == Sample.m or Sample.x == Sample.c: p = 1. - p return 0 if math.isnan(p) else p
def betadist(betaparams,B,pcF): #defining beta distribution parameters a=float(betaparams['a'].value) b=float(betaparams['b'].value) loc=float(betaparams['loc'].value) scale=float(betaparams['scale'].value) #creating fitted data model_pcF=beta.cdf(B,a,b,loc=loc,scale=scale) #returning residual return (model_pcF-pcF)
def _fap_pre(time, freq, psd_best_period): """ Computes false alarm probability for the pre-defined beta distribution """ from scipy.stats import beta a = (3 - 1) / 2 b = (len(time) - 3) / 2 fap = 1 - beta.cdf(psd_best_period, a, b) ** len(freq) return fap
def beta_rain(a,b,Pt,P): try: a>0 b>0 P['c_rain']=beta.cdf(np.linspace(0,1,len(P)),a,b)*Pt P['i_rain']=P['c_rain'].diff(periods=1) P.fillna(value=0,inplace=True) return P except: print "parameter error/s"
def credible_interval(c, n, CI=0.95, alpha=1, beta=1): lower_tail = (1.0 - CI) / 2.0 upper_tail = 1.0 - lower_tail x = np.linspace(0, 1, 1000) a = c + alpha b = n - c + beta mean = a / (a + b) cdf = beta_distribution.cdf(x, a, b) lower_bound = x[cdf > lower_tail][0] upper_bound = x[cdf < upper_tail][-1] return mean, lower_bound, upper_bound
def beta_slope_profile(numprofiles, max_a, min_a, max_b, min_b, elev,base): #generating random parameters for beta distribution a_list=np.random.rand(numprofiles)*(max_a-min_a)+min_a b_list=np.random.rand(numprofiles)*(max_b-min_b)+min_b b_list=a_list/b_list x=np.linspace(0,base,numpts) y_list=[] for j in range(len(a_list)): #3. Generating topo profiles y=beta.cdf(x,a_list[j],b_list[j],loc=0,scale=base) y_list.append(y[::-1]) return x,y_list
def __check__(self): self.all_distances = [d / max(self.all_distances) for d in self.all_distances] mean = numpy.mean(self.all_distances) var = numpy.var(self.all_distances, ddof=1) ii = len(self.all_distances) while var >= (mean * (1 - mean)): ii -= 1 mean = numpy.mean(self.all_distances[:ii]) var = numpy.var(self.all_distances[:ii], ddof=1) alpha1 = mean * (mean * (1 - mean) / var - 1) beta1 = alpha1 * (1 - mean) / mean for d in sorted(self.all_distances): print d, beta.cdf(d, alpha1, beta1)
def betadist_leastsquare_fitting(B,pcF,ai,bi,loc,scale): #defining power law variables betaparams = Parameters() betaparams.add('a', value=ai, vary=True, min=0.001, max=None) betaparams.add('b', value=bi, vary=True, min=0.001, max=None) betaparams.add('loc', value=loc, vary=True, min=0, max=loc) betaparams.add('scale', value=scale, vary=True, min=scale, max=90) #minimizing residuals result = minimize(betadist, betaparams, args=(B,pcF)) Bfit = np.linspace(betaparams['loc'].value,betaparams['scale']+betaparams['loc'].value,90*10+1) pcFfit=beta.cdf(Bfit, betaparams['a'].value, betaparams['b'].value, loc=betaparams['loc'].value, scale=betaparams['scale'].value) return Bfit,pcFfit, np.sqrt(np.mean((result.residual)**2)),betaparams['a'].value,betaparams['b'].value,betaparams['loc'].value,betaparams['scale'].value
def getProb(post): m = post.shape[0] prob = np.zeros((m, 1)) for i in xrange(m): b1, b2, b3 = post[i][:] #p = dblquad(integrand, 0.0, 0.5, lambda p2 : p2, lambda p2 : 1-p2, args=(b1, b2, b3))[0] p = 1-beta.cdf(0.5, b1, b2) #print(p) ''' if b3 == 0: p *= gamma(b1+b2)/(gamma(b1)*gamma(b2)) else: p *= gamma(b1+b2+b3)/(gamma(b1)*gamma(b2)*gamma(b3)) p /= 5000000 ''' prob[i] = p return prob
def _cvm(param, data): """ Cramer-von-Mises distance for beta distribution """ from scipy.stats import beta a, b = param ordered_data = np.sort(data, axis=None) sumbeta = 0 for n in range(len(data)): cdf = beta.cdf(ordered_data[n], a, b) sumbeta += (cdf - (n - 0.5) / len(data)) ** 2.0 cvm_dist = (1. / len(data)) * sumbeta + 1. / (12 * (len(data) ** 2.)) mask = np.isfinite(cvm_dist) cvm = cvm_dist[mask] return cvm
def _fap_nll(time, freq, psd, psd_best_period): """ Computes false alarm probability for the negative logarithmic likelihood-minimised beta distribution """ from scipy import optimize from scipy.stats import beta a = (3 - 1) / 2 b = (len(time) - 3) / 2 clip = 0.00001 nll_minimize = optimize.minimize( _nll, [a, b], args=(psd,), bounds=((clip, None), (clip, None)), ) fap = 1 - beta.cdf(psd_best_period, nll_minimize.x[0], nll_minimize.x[1]) ** len(freq) return fap
def __check__(self): """ purely for debugging and dev - trying to understand the distribution of points in a cluster :return: """ self.all_distances = [d/max(self.all_distances) for d in self.all_distances] mean=numpy.mean(self.all_distances) var=numpy.var(self.all_distances,ddof=1) ii = len(self.all_distances) while var >= (mean*(1-mean)): ii -= 1 mean=numpy.mean(self.all_distances[:ii]) var=numpy.var(self.all_distances[:ii],ddof=1) alpha1=mean*(mean*(1-mean)/var-1) beta1=alpha1*(1-mean)/mean for d in sorted(self.all_distances): print d,beta.cdf(d,alpha1,beta1)
def inverse_beta_ml_band(z,i,N): alpha_param=i+1 beta_param=N-i+1 small=.5 if i<small: x=0 xx=beta.ppf(z,alpha_param,beta_param) elif i>N-small: x=beta.ppf(1.0-z,alpha_param,beta_param) xx=1 if np.isnan(x): print "NaN failure in beta edge case" IPython.embed() else: x_min=0 x_max=beta.ppf(1.0-z,alpha_param,beta_param) def g(x): prob_xx=beta.cdf(x,alpha_param,beta_param)+z if prob_xx>1: #Numerical precision paranoia prob_xx=1 xx=beta.ppf(prob_xx,alpha_param,beta_param) prob_diff=beta.pdf(x,alpha_param,beta_param)-beta.pdf(xx,alpha_param,beta_param) #print ' x=%f, prob_xx=%f, xx=%f, prob_diff=%f'%(x,prob_xx,xx,prob_diff) return(prob_diff) try: #print '*** (x_min,x_max)=%f,%f'%(x_min,x_max) x=brentq(g,x_min,x_max) #print ' (x)=%f\n'%(x) except: print "Failure at 'brentq'." IPython.embed() prob_xx=beta.cdf(x,alpha_param,beta_param)+z if prob_xx>1: #Numerical precision paranoia prob_xx=1 xx=beta.ppf(prob_xx,alpha_param,beta_param) larger_x=np.max((x,xx)) smaller_x=np.min((x,xx)) return((smaller_x,larger_x))
def guess_beta(num_weights,results,CI=.9,alpha=.3,color='green'): num_coins=fc.total_num_coins(results) x=np.linspace(0,1,num_weights) y=np.zeros(num_weights) lower=np.zeros(num_weights) upper=np.zeros(num_weights) for i,xx in enumerate(x): for num_flips in results: for heads in results[num_flips]: #print 'num_flips=%d,heads=%d'%(num_flips,heads) sum_weight=float(results[num_flips][heads])/float(num_coins) y[i]+=sum_weight * beta.cdf(xx,heads+1,num_flips-heads+1) if y[-1]>1.0: #print 'y[-1] = %f ! Fixing...'%(y[-1]) y/=y[-1] for i,xx in enumerate(x): (lower[i],upper[i])=nest.inverse_beta_ml_band(CI,y[i]*float(num_coins),num_coins) plt.plot(x,y,color=color,label='Estimate with %.1f%% credible interval'%(100.0*CI)) ax=plt.gca() ax.fill_between(x,lower,upper,alpha=alpha,color=color)
from scipy.stats import beta print(beta.cdf(2,3,4))
def gridSetting(data,options,Seed): # Initialisierung d = np.size(options['borders'],0) X1D = [] '''Equal steps in cumulative distribution''' if options['gridSetType'] == 'cumDist': Like1D = np.zeros([options['GridSetEval'], 1]) for idx in range(d): if options['borders'][idx, 0] < options['borders'][idx,1]: X1D.append(np.zeros([1, options['stepN'][idx]])) local_N_eval = options['GridSetEval'] while any(np.diff(X1D[idx]) == 0): Xtest1D = np.linspace(options['borders'][idx,0], options['borders'][idx,1], local_N_eval) alpha = Seed[0] beta = Seed[1] l = Seed[2] gamma = Seed[3] varscale = Seed[4] if idx == 1: alpha = Xtest1D elif idx == 2: beta = Xtest1D elif idx == 3: l = Xtest1D elif idx == 4: gamma = Xtest1D elif idx == 5: varscale = Xtest1D Like1D = likelihood(data, options, [alpha, beta, l, gamma, varscale]) Like1D = Like1D + np.mean(Like1D)*options['UniformWeight'] Like1D = np.cumsum(Like1D) Like1D = Like1D/max(Like1D) wanted = np.linspace(0,1,options['stepN'][idx]) for igrid in range(options['stepN'][idx]): X1D[idx].append(copy.deepcopy(Xtest1D[Like1D >= wanted, 0, 'first'])) #TODO check local_N_eval = 10*local_N_eval else: X1D.append(copy.deepcopy(options['borders'][idx,0])) ''' equal steps in cumulative second derivative''' elif (options['gridSetType'] in ['2', '2ndDerivative']): Like1D = np.zeros([options['GridSetEval'], 1]) for idx in range(d): if options['borders'][idx,0] < options['borders'][idx,1]: X1D.append(np.zeros([1,options['stepN'][idx]])) local_N_eval = options['GridSetEval'] while any(np.diff(X1D[idx] == 0)): Xtest1D = np.linspace(options['borders'][idx,0], options['borders'][idx,1], local_N_eval) alpha = Seed[0] beta = Seed[1] l = Seed[2] gamma = Seed[3] varscale = Seed[4] if idx == 1: alpha = Xtest1D elif idx == 2: beta = Xtest1D elif idx == 3: l = Xtest1D elif idx == 4: gamma = Xtest1D elif idx == 5: varscale = Xtest1D # calc likelihood on the line Like1D = likelihood(data, options, [alpha, beta, l, gamma, varscale]) Like1D = np.abs(np.convolve(np.squeeze(Like1D), np.array([1,-2,1]), mode='same')) Like1D = Like1D + np.mean(Like1D)*options['UniformWeight'] Like1D = np.cumsum(Like1D) Like1D = Like1D/max(Like1D) wanted = np.linspace(0,1,options['stepN'][idx]) for igrid in range(options['stepN'][idx]): X1D[idx].append(copy.deepcopy(Xtest1D[Like1D >= wanted, 0, 'first'])) #ToDo local_N_eval = 10*local_N_eval if local_N_eval > 10**7: X1D[idx] = np.unique(np.array(X1D)) # TODO check break else: X1D.append(options['borders'][idx,0]) ''' different choices for the varscale ''' ''' We use STD now directly as parametrisation''' elif options['gridSetType'] in ['priorlike', 'STD', 'exp', '4power']: for i in range(4): if options['borders'](i,0) < options['borders'](i,1): X1D.append(np.linspace(options['borders'][i,0], options['borders'][i,1], options['stepN'][i])) else: X1D.append(copy.deepcopy(options['borders'][id,0])) if options['gridSetType'] == 'priorlike': maximum = b.cdf(options['borders'][4,1],1,options['betaPrior']) minimum = b.cdf(options['borders'][4,0],1,options['betaPrior']) X1D.append(b.ppf(np.linspace(minimum, maximum, options['stepN'][4]), 1, options['betaPrior'])) elif options['gridSetType'] == 'STD': maximum = np.sqrt(options['borders'][4,1]) minimum = np.sqrt(options['borders'][4,0]) X1D.append((np.linspace(minimum, maximum, options['stepN'][4]))**2) elif options['gridSetType'] == 'exp': p = np.linspace(1,1,options['stepN'][4]) X1D.append(np.log(p)/np.log(.1)*(options['borders'][4,1] - options['borders'][4,0]) + options['borders'][4,0]) elif options['gridSetType'] == '4power': maximum = np.sqrt(options['borders'][4,1]) minimum = np.sqrt(options['borders'][4,0]) X1D.append((np.linspace(minimum, maximum, options['stepN'][4]))**4) return X1D
(0.5, 0.5), (1, 1), (4, 3), (2, 5), (6, 6) ] for p in params: y = beta.pdf(x, p[0], p[1]) plt.plot(x, y, label="$\\alpha=%s$, $\\beta=%s$" % p) plt.xlabel("$\\theta$, Fairness") plt.ylabel("PDF") plt.legend(title="Parameters") plt.show() for p in params: y = beta.cdf(x, p[0], p[1]) plt.plot(x, y, label="$\\alpha=%s$, $\\beta=%s$" % p) plt.xlabel("$\\theta$, Fairness") plt.ylabel("CDF") plt.legend(title="Parameters") plt.show() for p in params: y = beta.ppf(x, p[0], p[1]) plt.plot(x, y, label="$\\alpha=%s$, $\\beta=%s$" % p) plt.xlabel("$\\theta$, Fairness") plt.ylabel("PPF") plt.legend(title="Parameters") plt.show()
def betascores(r): x = np.asarray(r, np.float) n = x.size x.sort() p = beta.cdf(x=x, a=np.arange(1, n + 1), b=np.arange(n, 0, -1)) return p
def betacdf(j,alpha_a,beta_a,skala): return beta.cdf(j,alpha_a,beta_a,loc=0, scale=skala)
print name,len(times[name]) print correct,incorrect print np.mean(correct_times),np.mean(incorrect_times) #print sum([1 for c in correct_times if c >= min(incorrect_times)])/float(len(correct_times)) #print max_time = max(max(correct_times),max(incorrect_times)) min_time = min(min(correct_times),min(incorrect_times)) data = correct_times data = [(t-min_time)/float(max_time-min_time) for t in data] #print data a,b,lower,scale = beta.fit(data) #print a,b,lower,scale #print #print beta.cdf(0.8,a,b) #----------------Fit using moments---------------- mean=np.mean(data) var=np.var(data,ddof=1) alpha1=mean**2*(1-mean)/var-mean beta1=alpha1*(1-mean)/mean print beta.cdf((incorrect_times[-1]-min_time)/(max_time-min_time),alpha1,beta1) print #break #print correct_times #print incorrect_times #print times.keys() #print user_collection.find_one({"name":"kellinora"})
def betacdf(j,alpha_a,beta_a,skala): return beta.cdf(j,alpha_a,beta_a,loc=0, scale=skala) def betapdf(j,alpha_a,beta_a,skala): return beta.pdf(j,alpha_a,beta_a,loc=0, scale=skala) while j < akhir: #simulasi dalam detik #utk tiap video: if j >= multiple_of[0]*7200: TASKS = [ (betacdf,(j, catalog[k]['alpha'],catalog[k]['beta'],skala) for k in range(numbervideo)]+[ (betapdf,(j, catalog[k]['alpha'],catalog[k]['beta'],skala) for k in range(numbervideo)] for k in range(numbervideo): alpha_a=catalog[k]['alpha'] beta_a=catalog[k]['beta'] #hitung pdf dan cdf cdf_a = beta.cdf(j,alpha_a,beta_a,loc=0,scale=skala) pdf_a = beta.pdf(j,alpha_a,beta_a,loc=0,scale=skala) catalog[k]['cdf'][j]=cdf_a catalog[k]['pdf'][j]=pdf_a multiple_of.pop(0) print j j+=1 with open('catalog.pickle', 'wb') as handle: pickle.dump(catalog, handle) handle.close()