def clopper_pearson_binomial(passed, total, sigma=1, CL=None): """ Estimate the exact binomial from the clopper-pearson method - paramters - passed: int or array counts of passed elements total: int or array total elements sigma: float [default: 1] to estimate the CL automatically from the normal distribution at <sigma> sigmas CL: None or float [default: None] to specify a confidence level for the clopper-pearson. If None, it will be automatically estimated by <sigma> - return - eff: float or array: efficiency (<passed>/<total>). If <passed> and <total> are arrays, <eff> is an array uncertainties: 1d or 2d array array of the uncentainties: <uncertainties>[0] is the lower boundary, <uncertainties>[1] the upper one. If <passed> and <total> are arrays, <uncertainties>[0] and <uncertainties>[1] are arrays """ eff, notpassed = np.float_(passed)/total, total-passed if CL is None: ybeta_low, ybeta_up = 1-norm.cdf(sigma,0,1), norm.cdf(sigma,0,1) else: ybeta_low, ybeta_up = (1-CL)/2, (1+CL)/2 el, eu = eff-betaincinv(passed,notpassed+1,ybeta_low),betaincinv(passed+1,notpassed,ybeta_up)-eff if isiterable(el): el[np.isnan(el)] = eu[np.isnan(eu)] = 0. else: if np.isnan(el): el = 0. if np.isnan(eu): eu = 0. return eff, np.atleast_2d([el,eu])
def logistic_fidelity(self): #group data and assign state labels gnd_features = np.hstack([np.real(self.ground_data.T), np.imag(self.ground_data.T)]) ex_features = np.hstack([np.real(self.excited_data.T), np.imag(self.excited_data.T)]) #liblinear wants arrays in C order features = np.ascontiguousarray(np.vstack([gnd_features, ex_features])) state = np.ascontiguousarray(np.hstack([np.zeros(self.ground_data.shape[1]), np.ones(self.excited_data.shape[1])])) #Set up logistic regression with cross-validation using liblinear. #Cs sets the inverse of the regularization strength, which will be optimized #through cross-validation. Uses the default Stratified K-Folds #CV generator, with 3 folds. #This is set up to be as consistent with the MATLAB implementation #as I can make it. --GJR Cs = np.logspace(-1,2,5) logreg = LogisticRegressionCV(Cs, cv=3, solver='liblinear') logreg.fit(features, state) #fit the model predictions = logreg.predict(features) #in-place classification score = logreg.score(features,state) #mean accuracy of classification N = len(predictions) S = np.sum(predictions == state) #how many we got right #now calculate confidence intervals c = 0.95 flo = betaincinv(S+1, N-S+1, (1-c)/2., ) fhi = betaincinv(S+1, N-S+1, (1+c)/2., ) logger.info(("In-place logistic regression fidelity: " + "{:.2f}% ({:.2f}, {:.2f})".format(100*score, 100*flo, 100*fhi)))
def _get_confidence_int(self, Y): beta = 5.0 Y = np.array(Y) Z = (1 - Y) W = Y * beta alpha = W / Z L = sc.betaincinv(alpha, beta, .075) U = sc.betaincinv(alpha, beta, .925) index = Y > .9 L[index] = .95 * Y[index] U[index] = 1.05 * Y[index] L[Y < 0.00009] = 0.00009 index = U < Y U[index] = 1.05 * Y[index] index = Y < L L[index] = 0.95 * Y[index] return L, U
def credible_interval(outcomes, c=0.95): """ Calculate the credible interval for a fidelity estimate. """ from scipy.special import betaincinv N = outcomes.size S = np.count_nonzero(outcomes) xlo = betaincinv(S+1,N-S+1,(1-c)/2.) xup = betaincinv(S+1,N-S+1,(1+c)/2.) return xlo, xup
def credible_interval(outcomes, c=0.95): """ Calculate the credible interval for a fidelity estimate. """ from scipy.special import betaincinv N = outcomes.size S = np.count_nonzero(outcomes) xlo = betaincinv(S + 1, N - S + 1, (1 - c) / 2.) xup = betaincinv(S + 1, N - S + 1, (1 + c) / 2.) return xlo, xup
def betacred(k, n, j, p): """ Returns the upper and lower bounds of the credible interval based on a beta distribution. """ r = (1.0 - p / 100) / 2 a = 1 if j: a = 0.5 l = betaincinv(k + a, n - k + a, r) u = betaincinv(k + a, n - k + a, 1.0 - r) return (l, u)
def _rank_confidence_band(nranks): alpha = 0.01 n = nranks k0 = np.arange(1, n + 1) k1 = np.flipud(k0).copy() top = betaincinv(k0, k1, 1 - alpha) mean = k0 / (n + 1) bottom = betaincinv(k0, k1, alpha) return (bottom, mean, top)
def binomial_conf_interval(x, n, conf=0.95): if n == 0: left = random.random()*(1 - conf) return left, left + conf b = special.beta(x+1, n-x+1) def f(left_a): left, right = max(1e-8, special.betaincinv(x+1, n-x+1, left_a)), min(1-1e-8, special.betaincinv(x+1, n-x+1, left_a + conf)) top = right**(x+1) * (1-right)**(n-x+1) * left*(1-left) - left**(x+1) * (1-left)**(n-x+1) * right * (1-right) bottom = (x - n*right)*left*(1-left) - (x - n*left)*right*(1-right) return top/bottom/b left_a = find_root(f, (1-conf)/2, bounds=(0, 1-conf)) return special.betaincinv(x+1, n-x+1, left_a), special.betaincinv(x+1, n-x+1, left_a + conf)
def binomial_conf_interval(x, n, conf=0.95): assert 0 <= x <= n and 0 <= conf < 1 if n == 0: left = random.random()*(1 - conf) return left, left + conf bl = float(special.betaln(x+1, n-x+1)) def f(left_a): left, right = max(1e-8, float(special.betaincinv(x+1, n-x+1, left_a))), min(1-1e-8, float(special.betaincinv(x+1, n-x+1, left_a + conf))) top = math.exp(math.log(right)*(x+1) + math.log(1-right)*(n-x+1) + math.log(left) + math.log(1-left) - bl) - math.exp(math.log(left)*(x+1) + math.log(1-left)*(n-x+1) + math.log(right) + math.log(1-right) - bl) bottom = (x - n*right)*left*(1-left) - (x - n*left)*right*(1-right) return top/bottom left_a = find_root(f, (1-conf)/2, bounds=(0, 1-conf)) return float(special.betaincinv(x+1, n-x+1, left_a)), float(special.betaincinv(x+1, n-x+1, left_a + conf))
def mPERT_sample(mu, a=0.0, b=1.0, gamma=4.0, var=None): """Provide a vectorized Modified PERT distribution. Parameters ---------- mu : float or ndarray Mean value for the PERT distribution. a : float or ndarray Lower bound for the distribution. b : float or ndarray Upper bound for the distribution. gamma : float or ndarray Shape paramter. var : float, ndarray or None Variance of the distribution. If var != None, gamma will be calcuated to meet the desired variance. Returns ------- out : float or ndarray Samples drawn from the specified mPERT distribution. Shape is the broadcasted shape of the the input parameters. """ mu, a, b = np.atleast_1d(mu, a, b) if var is not None: gamma = (mu - a) * (b - mu) / var - 3.0 alp1 = 1.0 + gamma * ((mu - a) / (b - a)) alp2 = 1.0 + gamma * ((b - mu) / (b - a)) u = np.random.random_sample(mu.shape) alp3 = sc.betaincinv(alp1, alp2, u) return (b - a) * alp3 + a
def generate_thresholds_SMV(self): # this function generate thresholds for SMV scenario nsamples=self.nsamples; nfeatures=self.nfeatures; kmax=self.kmax; alpha_list=self.alpha_list; threshold_dict={}; # place to save the set of thresholds used for residual ratio thresholding for alpha in alpha_list: # we compare the residual ratios with a sequence of threshold defined using the given value of alpha. # however, if that thresholding scheme fails which happens only at low signal to noise ratio, # we gradually increase the value of alpha until we get a succesful thresholding. alphas_to_use is this set of thresholds # gradually increasing. alphas_to_use = 10 ** (np.linspace(np.log10(alpha), np.log10(nfeatures* kmax), 100)); threshold_alpha = {}; threshold_alpha['when_rrt_fails'] = [] for alpha_t in alphas_to_use: thres = np.zeros(kmax); for k in np.arange(kmax): j = k + 1; a = (nsamples - j)/ 2; b = 1/ 2; npossibilities = nfeatures- j + 1 val = alpha_t / (npossibilities * kmax) thres[k] = np.sqrt(special.betaincinv(a, b, val)) if alpha_t == alpha: threshold_alpha['direct'] = thres; else: threshold_alpha['when_rrt_fails'].append(thres) threshold_alpha['alphas_to_use'] = alphas_to_use threshold_dict[alpha] = threshold_alpha self.threshold_dict = threshold_dict return None
def inverse_binom_cdf_prob(k, N, F): """Calculate the trial probability that gives the CDF. This gets the trial probability that gives an overall cumulative probability for Pr(X <= k; N, p) = F Parameters ---------- k : int Maximum number of successes. N : int Total number of trials. F : float The cumulative probability for (k, N). Returns ------- p : float The trial probability. """ # This uses the result that we can write the cumulative probability of a # binomial in terms of an incomplete beta function import scipy.special as sp return sp.betaincinv(k + 1, N - k, 1 - F)
def qf(self, p, alpha, beta): r""" Quantile function for the Beta Distribution: Parameters ---------- p : numpy array or scalar The percentiles at which the quantile will be calculated alpha : numpy array or scalar One shape parameter for the Beta distribution beta : numpy array or scalar Another scale parameter for the Beta distribution Returns ------- q : scalar or numpy array The quantiles for the Beta distribution at each value p. Examples -------- >>> import numpy as np >>> from surpyval import Beta >>> p = np.array([.1, .2, .3, .4, .5]) >>> Beta.qf(p, 3, 4) array([0.20090888, 0.26864915, 0.32332388, 0.37307973, 0.42140719]) """ return betaincinv(alpha, beta, p)
def generate_thresholds_robust_regression(self): # this function generate thresholds for robust regression nsamples=self.nsamples; nfeatures=self.nfeatures; kmax=self.kmax; alpha_list=self.alpha_list; if nfeatures>nsamples: raise Exception('Must satisfy nfeatures<nsamples. This technique is for low dimensional dense regression with sparse outliers. High dimensional regression with sparse outliers can be posed as a compressive sensing problem') threshold_dict={}; # place to save the set of thresholds used for residual ratio thresholding for alpha in alpha_list: # we compare the residual ratios with a sequence of threshold defined using the given value of alpha. # however, if that thresholding scheme fails which happens only at low signal to noise ratio, # we gradually increase the value of alpha until we get a succesful thresholding. alphas_to_use is this set of thresholds # gradually increasing. alphas_to_use=10**(np.linspace(np.log10(alpha),np.log10(nsamples*kmax),100)); threshold_alpha={}; threshold_alpha['when_rrt_fails']=[] for alpha_t in alphas_to_use: thres=np.zeros(kmax); for k in np.arange(kmax): # definition of RRT thresholds. j=k+1+nfeatures;a=(nsamples-j)/2;b=1/2 npossibilities=(nsamples-j+1) val=alpha_t/(npossibilities*kmax) thres[k]=np.sqrt(special.betaincinv(a,b,val)) if alpha_t==alpha: threshold_alpha['direct']=thres; # save the threshold related to given alpha seperately. else: threshold_alpha['when_rrt_fails'].append(thres) # save the thresholds to be used when RRT fails threshold_alpha['alphas_to_use']=alphas_to_use threshold_dict[alpha]=threshold_alpha self.threshold_dict=threshold_dict return None
def Percentile(self, ps): """Returns the given percentiles from this distribution. ps: scalar, array, or list of [0-100] """ ps = np.asarray(ps) / 100 xs = special.betaincinv(self.alpha, self.beta, ps) return xs
def mPERT_sample(mu, a=0.0, b=1.0, gamma=4.0, var=None): mu, a, b = np.atleast_1d(mu, a, b) if var is not None: gamma = (mu - a) * (b - mu) / var - 3.0 alp1 = 1.0 + gamma * ((mu - a) / (b - a)) alp2 = 1.0 + gamma * ((b - mu) / (b - a)) u = np.random.random_sample(mu.shape) alp3 = sc.betaincinv(alp1, alp2, u) return (b - a) * alp3 + a
def get_ortho_haar_theta( units: int, num_layers: int, hadamard: bool ) -> Union[Tuple[np.ndarray, np.ndarray], Tuple[tf.Variable, tf.Variable], tf.Variable]: alpha_checkerboard = get_alpha_checkerboard_general(units, num_layers) theta_0_root = alpha_checkerboard.T[::2, ::2] - 1 theta_1_root = alpha_checkerboard.T[1::2, 1::2] - 1 theta_0_init = 2 * np.arcsin( betaincinv(0.5 * theta_0_root, 0.5, np.random.rand(*theta_0_root.shape))) theta_1_init = 2 * np.arcsin( betaincinv(0.5 * theta_1_root, 0.5, np.random.rand(*theta_1_root.shape))) if not hadamard: theta_0_init = np.pi - theta_0_init theta_1_init = np.pi - theta_1_init return theta_0_init.astype(dtype=NP_FLOAT), theta_1_init.astype( dtype=NP_FLOAT)
def _rank_confidence_band(nranks, significance_level, ok): from numpy import arange, flipud, ascontiguousarray from scipy.special import betaincinv alpha = significance_level k0 = arange(1, nranks + 1) k1 = flipud(k0).copy() k0 = ascontiguousarray(k0[ok]) k1 = ascontiguousarray(k1[ok]) my_ok = k1 / k0 / (k1[0] / k0[0]) > 1e-4 k0 = ascontiguousarray(k0[my_ok]) k1 = ascontiguousarray(k1[my_ok]) top = betaincinv(k0, k1, 1 - alpha) bottom = betaincinv(k0, k1, alpha) return (my_ok, bottom, top)
def main(): VRP_cost = 0 VRP_Route = [] for k in range(nber_of_vehicles): n = len(Repartition[1][k]) res = str(Repartition[1][k])[1:-1] Intres = [int(u) for u in res if u.isdigit()] Intres.insert(0, depot) ResCity = [cityList[int(u)] for u in res if u.isdigit()] ResCity.insert(0, cityList[depot]) for k in range(3): bestRoute = geneticAlgorithmPlot(population=ResCity, popSize=100, eliteSize=20, mutationRate=0.01, generations=500) bestRouteList = [] sX = [] sY = [] IndexRoute = [] for j in range(len(bestRoute)): bestRouteList.append((bestRoute[j].x, bestRoute[j].y)) sX.append(bestRoute[j].x) sY.append(bestRoute[j].y) IndexRoute.append(key_list[val_list.index(bestRouteList[j])]) sX.append(bestRoute[0].x) sY.append(bestRoute[0].y) #plotPath(sX, sY) #print(IndexRoute) crowd.append(IndexRoute) #print(agg_matrix(crowd)) agg = agg_matrix(crowd) Inv_Agg = np.zeros((n, n)) for k in range(n): for j in range(n): Inv_Agg[k, j] = 1 - sc.betaincinv(2.8, 3.2, agg[k, j] / n) r = range(n) dist = {(i, j): Inv_Agg[i, j] for i in r for j in r} aggRoute = tsp.tsp(r, dist)[1] sortedaggRoute = [Intres[i] for i in aggRoute] #import pdb; pdb.set_trace() cost = 0 for u in range(n - 1): cost += City.distance(ResCity[u], ResCity[u + 1]) cost += City.distance(ResCity[n - 1], ResCity[0]) #import pdb; pdb.set_trace() VRP_cost += cost VRP_Route.append(sortedaggRoute) print(VRP_Route, VRP_cost) '''
def predictRecallMedian(prior, tnow, percentile=0.5): """Median (or percentile) of the immediate recall probability. Same arguments as `ebisu.predictRecall`, see that docstring for details. An extra keyword argument, `percentile`, is a float between 0 and 1, and specifies the percentile rather than 50% (median). """ # [1] `Integrate[p**((a-t)/t) * (1-p**(1/t))**(b-1) / t / Beta[a,b], p]` # and see "Alternate form assuming a, b, p, and t are positive". from scipy.special import betaincinv alpha, beta, t = prior dt = tnow / t return betaincinv(alpha, beta, percentile)**dt
def binom_conf_interval(k, n, conf=0.68269): """Binomial proportion confidence interval given k successes, n trials, adopting Bayesian approach with Jeffreys prior.""" if conf < 0.0 or conf > 1.0: raise ValueError("conf must be between 0. and 1.") alpha = 1.0 - conf k = np.asarray(k).astype(int) n = np.asarray(n).astype(int) if (n <= 0).any(): log.warning("%(funcName)s: n must be positive") return 0, 0 if (k < 0).any() or (k > n).any(): log.warning("%(funcName)s: k must be in {0, 1, .., n}") return 0, 0 lowerbound = betaincinv(k + 0.5, n - k + 0.5, 0.5 * alpha) upperbound = betaincinv(k + 0.5, n - k + 0.5, 1.0 - 0.5 * alpha) # Set lower or upper bound to k/n when k/n = 0 or 1 # We have to treat the special case of k/n being scalars, # which is an ugly kludge if lowerbound.ndim == 0: if k == 0: lowerbound = 0.0 elif k == n: upperbound = 1.0 else: lowerbound[k == 0] = 0 upperbound[k == n] = 1 conf_interval = np.array([lowerbound, upperbound]) return conf_interval
def vangel_approx(self, n=None, i=None, j=None, p=None, g=None): if n is None: n = self.n if i is None: i = 1 if j is None: j = self.j + 1 if p is None: p = self.p if g is None: g = self.g betatmp = betainc(j, n - j + 1, p) a = g - betatmp b = 1.0 - betatmp q = betaincinv(i, j - i, a / b) return np.log(((p) * (n + 1)) / j) / np.log(q)
def ppf(self, y): """Percent point function (inverse cumulative distribution). Requires Scipy. Parameters ---------- y : ndarray Cumulative probabilities in [0, 1]. Returns ------- ndarray Evaluation points `x` in [0, 1] such that `P(X <= x) = y`. """ from scipy.special import betaincinv sq_x = betaincinv(self.m / 2.0, self.n / 2.0, y) return np.sqrt(sq_x)
def ppf(self, y): """Percent point function (inverse cumulative distribution). .. note:: Requires SciPy. Parameters ---------- y : array_like Cumulative probabilities in [0, 1]. Returns ------- ppf : array_like Evaluation points ``x`` in [0, 1] such that ``P(X <= x) = y``. """ from scipy.special import betaincinv sq_x = betaincinv(self.m / 2.0, self.n / 2.0, y) return np.sqrt(sq_x)
def main(): for k in range(2): bestRoute = geneticAlgorithmPlot(population=cityList, popSize=100, eliteSize=20, mutationRate=0.01, generations=500) bestRouteList = [] sX = [] sY = [] IndexRoute = [] for j in range(len(bestRoute)): bestRouteList.append((bestRoute[j].x, bestRoute[j].y)) sX.append(bestRoute[j].x) sY.append(bestRoute[j].y) #import pdb; pdb.set_trace() IndexRoute.append(key_list[val_list.index(bestRouteList[j])]) sX.append(bestRoute[0].x) sY.append(bestRoute[0].y) #plotPath(sX, sY) #print(IndexRoute) crowd.append(IndexRoute) #print(agg_matrix(crowd)) #import pdb; pdb.set_trace() agg = agg_matrix(crowd) Inv_Agg = np.zeros((n, n)) for k in range(n): for j in range(n): Inv_Agg[k, j] = 1 - sc.betaincinv(2.8, 3.2, agg[k, j] / n) r = range(n) dist = {(i, j): Inv_Agg[i, j] for i in r for j in r} aggRoute = tsp.tsp(r, dist)[1] cost = 0 for u in range(n - 1): cost += R_D[aggRoute[u], aggRoute[u + 1]] cost += R_D[n - 1, 0] print(aggRoute, cost) '''
def f(left_a): left, right = max(1e-8, special.betaincinv(x+1, n-x+1, left_a)), min(1-1e-8, special.betaincinv(x+1, n-x+1, left_a + conf)) top = right**(x+1) * (1-right)**(n-x+1) * left*(1-left) - left**(x+1) * (1-left)**(n-x+1) * right * (1-right) bottom = (x - n*right)*left*(1-left) - (x - n*left)*right*(1-right) return top/bottom/b
def ppf(self, y): from scipy.special import betaincinv y_reflect = np.where(y < 0.5, y, 1 - y) z_sq = betaincinv(self.m / 2.0, 0.5, 2 * y_reflect) x = np.arcsin(np.sqrt(z_sq)) / np.pi return np.where(y < 0.5, x, 1 - x)
def get_invBeta(self) -> None: self.invBeta = sc.betaincinv(0.5 * self.nu, 0.5, 1 - self.confidence_level)
def binom_conf_interval(k, n, conf=0.68269, interval='wilson'): r"""Binomial proportion confidence interval given k successes, n trials. Parameters ---------- k : int or numpy.ndarray Number of successes (0 <= ``k`` <= ``n``). n : int or numpy.ndarray Number of trials (``n`` > 0). If both ``k`` and ``n`` are arrays, they must have the same shape. conf : float in [0, 1], optional Desired probability content of interval. Default is 0.68269, corresponding to 1 sigma in a 1-dimensional Gaussian distribution. interval : {'wilson', 'jeffreys', 'flat', 'wald'}, optional Formula used for confidence interval. See notes for details. The ``'wilson'`` and ``'jeffreys'`` intervals generally give similar results, while 'flat' is somewhat different, especially for small values of ``n``. ``'wilson'`` should be somewhat faster than ``'flat'`` or ``'jeffreys'``. The 'wald' interval is generally not recommended. It is provided for comparison purposes. Default is ``'wilson'``. Returns ------- conf_interval : numpy.ndarray ``conf_interval[0]`` and ``conf_interval[1]`` correspond to the lower and upper limits, respectively, for each element in ``k``, ``n``. Notes ----- In situations where a probability of success is not known, it can be estimated from a number of trials (N) and number of observed successes (k). For example, this is done in Monte Carlo experiments designed to estimate a detection efficiency. It is simple to take the sample proportion of successes (k/N) as a reasonable best estimate of the true probability :math:`\epsilon`. However, deriving an accurate confidence interval on :math:`\epsilon` is non-trivial. There are several formulas for this interval (see [1]_). Four intervals are implemented here: **1. The Wilson Interval.** This interval, attributed to Wilson [2]_, is given by .. math:: CI_{\rm Wilson} = \frac{k + \kappa^2/2}{N + \kappa^2} \pm \frac{\kappa n^{1/2}}{n + \kappa^2} ((\hat{\epsilon}(1 - \hat{\epsilon}) + \kappa^2/(4n))^{1/2} where :math:`\hat{\epsilon} = k / N` and :math:`\kappa` is the number of standard deviations corresponding to the desired confidence interval for a *normal* distribution (for example, 1.0 for a confidence interval of 68.269%). For a confidence interval of 100(1 - :math:`\alpha`)%, .. math:: \kappa = \Phi^{-1}(1-\alpha/2) = \sqrt{2}{\rm erf}^{-1}(1-\alpha). **2. The Jeffreys Interval.** This interval is derived by applying Bayes' theorem to the binomial distribution with the noninformative Jeffreys prior [3]_, [4]_. The noninformative Jeffreys prior is the Beta distribution, Beta(1/2, 1/2), which has the density function .. math:: f(\epsilon) = \pi^{-1} \epsilon^{-1/2}(1-\epsilon)^{-1/2}. The justification for this prior is that it is invariant under reparameterizations of the binomial proportion. The posterior density function is also a Beta distribution: Beta(k + 1/2, N - k + 1/2). The interval is then chosen so that it is *equal-tailed*: Each tail (outside the interval) contains :math:`\alpha`/2 of the posterior probability, and the interval itself contains 1 - :math:`\alpha`. This interval must be calculated numerically. Additionally, when k = 0 the lower limit is set to 0 and when k = N the upper limit is set to 1, so that in these cases, there is only one tail containing :math:`\alpha`/2 and the interval itself contains 1 - :math:`\alpha`/2 rather than the nominal 1 - :math:`\alpha`. **3. A Flat prior.** This is similar to the Jeffreys interval, but uses a flat (uniform) prior on the binomial proportion over the range 0 to 1 rather than the reparametrization-invariant Jeffreys prior. The posterior density function is a Beta distribution: Beta(k + 1, N - k + 1). The same comments about the nature of the interval (equal-tailed, etc.) also apply to this option. **4. The Wald Interval.** This interval is given by .. math:: CI_{\rm Wald} = \hat{\epsilon} \pm \kappa \sqrt{\frac{\hat{\epsilon}(1-\hat{\epsilon})}{N}} The Wald interval gives acceptable results in some limiting cases. Particularly, when N is very large, and the true proportion :math:`\epsilon` is not "too close" to 0 or 1. However, as the later is not verifiable when trying to estimate :math:`\epsilon`, this is not very helpful. Its use is not recommended, but it is provided here for comparison purposes due to its prevalence in everyday practical statistics. References ---------- .. [1] Brown, Lawrence D.; Cai, T. Tony; DasGupta, Anirban (2001). "Interval Estimation for a Binomial Proportion". Statistical Science 16 (2): 101-133. doi:10.1214/ss/1009213286 .. [2] Wilson, E. B. (1927). "Probable inference, the law of succession, and statistical inference". Journal of the American Statistical Association 22: 209-212. .. [3] Jeffreys, Harold (1946). "An Invariant Form for the Prior Probability in Estimation Problems". Proc. R. Soc. Lond.. A 24 186 (1007): 453-461. doi:10.1098/rspa.1946.0056 .. [4] Jeffreys, Harold (1998). Theory of Probability. Oxford University Press, 3rd edition. ISBN 978-0198503682 Examples -------- Integer inputs return an array with shape (2,): >>> binom_conf_interval(4, 5, interval='wilson') array([ 0.57921724, 0.92078259]) Arrays of arbitrary dimension are supported. The Wilson and Jeffreys intervals give similar results, even for small k, N: >>> binom_conf_interval([0, 1, 2, 5], 5, interval='wilson') array([[ 0. , 0.07921741, 0.21597328, 0.83333304], [ 0.16666696, 0.42078276, 0.61736012, 1. ]]) >>> binom_conf_interval([0, 1, 2, 5], 5, interval='jeffreys') array([[ 0. , 0.0842525 , 0.21789949, 0.82788246], [ 0.17211754, 0.42218001, 0.61753691, 1. ]]) >>> binom_conf_interval([0, 1, 2, 5], 5, interval='flat') array([[ 0. , 0.12139799, 0.24309021, 0.73577037], [ 0.26422963, 0.45401727, 0.61535699, 1. ]]) In contrast, the Wald interval gives poor results for small k, N. For k = 0 or k = N, the interval always has zero length. >>> binom_conf_interval([0, 1, 2, 5], 5, interval='wald') array([[ 0. , 0.02111437, 0.18091075, 1. ], [ 0. , 0.37888563, 0.61908925, 1. ]]) For confidence intervals approaching 1, the Wald interval for 0 < k < N can give intervals that extend outside [0, 1]: >>> binom_conf_interval([0, 1, 2, 5], 5, interval='wald', conf=0.99) array([[ 0. , -0.26077835, -0.16433593, 1. ], [ 0. , 0.66077835, 0.96433593, 1. ]]) """ if conf < 0. or conf > 1.: raise ValueError('conf must be between 0. and 1.') alpha = 1. - conf k = np.asarray(k).astype(np.int) n = np.asarray(n).astype(np.int) if (n <= 0).any(): raise ValueError('n must be positive') if (k < 0).any() or (k > n).any(): raise ValueError('k must be in {0, 1, .., n}') if interval == 'wilson' or interval == 'wald': from scipy.special import erfinv kappa = np.sqrt(2.) * min(erfinv(conf), 1.e10) # Avoid overflows. k = k.astype(np.float) n = n.astype(np.float) p = k / n if interval == 'wilson': midpoint = (k + kappa**2 / 2.) / (n + kappa**2) halflength = (kappa * np.sqrt(n)) / (n + kappa ** 2) * \ np.sqrt(p * (1 - p) + kappa ** 2 / (4 * n)) conf_interval = np.array( [midpoint - halflength, midpoint + halflength]) # Correct intervals out of range due to floating point errors. conf_interval[conf_interval < 0.] = 0. conf_interval[conf_interval > 1.] = 1. else: midpoint = p halflength = kappa * np.sqrt(p * (1. - p) / n) conf_interval = np.array( [midpoint - halflength, midpoint + halflength]) elif interval == 'jeffreys' or interval == 'flat': from scipy.special import betaincinv if interval == 'jeffreys': lowerbound = betaincinv(k + 0.5, n - k + 0.5, 0.5 * alpha) upperbound = betaincinv(k + 0.5, n - k + 0.5, 1. - 0.5 * alpha) else: lowerbound = betaincinv(k + 1, n - k + 1, 0.5 * alpha) upperbound = betaincinv(k + 1, n - k + 1, 1. - 0.5 * alpha) # Set lower or upper bound to k/n when k/n = 0 or 1 # We have to treat the special case of k/n being scalars, # which is an ugly kludge if lowerbound.ndim == 0: if k == 0: lowerbound = 0. elif k == n: upperbound = 1. else: lowerbound[k == 0] = 0 upperbound[k == n] = 1 conf_interval = np.array([lowerbound, upperbound]) else: raise ValueError('Unrecognized interval: {0:s}'.format(interval)) return conf_interval
def ppf(self, y): """Evaluates the inverse CDF along the values ``x``.""" y_reflect = np.where(y < 0.5, y, 1 - y) z_sq = betaincinv(self.m / 2.0, 0.5, 2 * y_reflect) x = np.arcsin(np.sqrt(z_sq)) / np.pi return np.where(y < 0.5, x, 1 - x)
def get_quantile(self, q, X=None): a, b = self._get_alphabeta(X) return special.betaincinv(a, b, q)
def bernoulli_trial_probability(m, n): c = 0.95 x1 = betaincinv(m + 1, n - m + 1, (1 - c) / 2) x2 = betaincinv(m + 1, n - m + 1, (1 + c) / 2) return x1, x2
def ppf(self, y): y_reflect = np.where(y < 0.5, y, 1 - y) z_sq = betaincinv(self.m / 2.0, 0.5, 2 * y_reflect) x = np.arcsin(np.sqrt(z_sq)) / np.pi return np.where(y < 0.5, x, 1 - x)
def median(aa, bb): if (aa <= 0 or bb <= 0): raise ValueError("aa and bb must be bigger than 0") return sp.betaincinv(aa, bb, 1 / 2)
np.sqrt(p * (1 - p) + kappa ** 2 / (4 * n)) conf_interval = np.array([midpoint - halflength, midpoint + halflength]) conf_interval[conf_interval < 0.] = 0. conf_interval[conf_interval > 1.] = 1. return conf_interval else: midpoint = p halflength = kappa * np.sqrt(p * (1. - p) / n) return np.array([midpoint - halflength, midpoint + halflength]) elif interval == 'jeffreys': from scipy.special import betaincinv lowerbound = betaincinv(k + 0.5, n - k + 0.5, alpha / 2.) upperbound = betaincinv(k + 0.5, n - k + 0.5, 1. - alpha / 2.) lowerbound[k == 0] = 0. upperbound[k == n] = 1. return np.array([lowerbound, upperbound]) else: raise ValueError('Unrecognized interval: {0:s}'.format(interval)) def binned_binom_proportion(x, success, bins=10, range=None, conf=0.68269, interval='wilson'): """Binomial proportion and confidence interval in bins of a continuous variable `x`. Given a set of datapoint pairs where the `x` values are continuously distributed and the `success` values are binomial
subs1 = np.array([0, 1, 4, 5, 6, 9, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21]) nans1 = np.isnan(responses[subs1, :]).sum(axis=0) p1 = np.nan_to_num(responses[subs1, :]).sum(axis=0) #edhmm group subs2 = np.array([2, 3, 7, 8, 10, 17]) nans2 = np.isnan(responses[subs2, :]).sum(axis=0) p2 = np.nan_to_num(responses[subs2, :]).sum(axis=0) trials = np.arange(126, 161, 1) #compute 90% Jeffreys interval n1 = len(subs1) u1 = betaincinv(p1 + 1 / 2, n1 - nans1 - p1 + 1 / 2, .95) l1 = betaincinv(p1 + 1 / 2, n1 - nans1 - p1 + 1 / 2, .05) n2 = len(subs2) u2 = betaincinv(p2 + 1 / 2, n2 - nans2 - p2 + 1 / 2, .95) l2 = betaincinv(p2 + 1 / 2, n2 - nans2 - p2 + 1 / 2, .05) #compute the mean response m1 = p1 / (n1 - nans1 + 1) m2 = p2 / (n2 - nans2 + 1) #make the figure fig, ax = plt.subplots(1, 2, figsize=(12, 5), sharex=True) ax[0].plot(trials, m1, label='DU-RW group', color='r') ax[0].fill_between(trials, u1, l1, color='r', alpha=.2) ax[0].plot(trials, m2, color='b', label='ED-HMM group')
def binom_conf_interval(k, n, conf=0.68269, interval='wilson'): r"""Binomial proportion confidence interval given k successes, n trials. Parameters ---------- k : int or numpy.ndarray Number of successes (0 <= ``k`` <= ``n``). n : int or numpy.ndarray Number of trials (``n`` > 0). If both ``k`` and ``n`` are arrays, they must have the same shape. conf : float in [0, 1], optional Desired probability content of interval. Default is 0.68269, corresponding to 1 sigma in a 1-dimensional Gaussian distribution. interval : {'wilson', 'jeffreys', 'flat', 'wald'}, optional Formula used for confidence interval. See notes for details. The ``'wilson'`` and ``'jeffreys'`` intervals generally give similar results, while 'flat' is somewhat different, especially for small values of ``n``. ``'wilson'`` should be somewhat faster than ``'flat'`` or ``'jeffreys'``. The 'wald' interval is generally not recommended. It is provided for comparison purposes. Default is ``'wilson'``. Returns ------- conf_interval : numpy.ndarray ``conf_interval[0]`` and ``conf_interval[1]`` correspond to the lower and upper limits, respectively, for each element in ``k``, ``n``. Notes ----- In situations where a probability of success is not known, it can be estimated from a number of trials (N) and number of observed successes (k). For example, this is done in Monte Carlo experiments designed to estimate a detection efficiency. It is simple to take the sample proportion of successes (k/N) as a reasonable best estimate of the true probability :math:`\epsilon`. However, deriving an accurate confidence interval on :math:`\epsilon` is non-trivial. There are several formulas for this interval (see [1]_). Four intervals are implemented here: **1. The Wilson Interval.** This interval, attributed to Wilson [2]_, is given by .. math:: CI_{\rm Wilson} = \frac{k + \kappa^2/2}{N + \kappa^2} \pm \frac{\kappa n^{1/2}}{n + \kappa^2} ((\hat{\epsilon}(1 - \hat{\epsilon}) + \kappa^2/(4n))^{1/2} where :math:`\hat{\epsilon} = k / N` and :math:`\kappa` is the number of standard deviations corresponding to the desired confidence interval for a *normal* distribution (for example, 1.0 for a confidence interval of 68.269%). For a confidence interval of 100(1 - :math:`\alpha`)%, .. math:: \kappa = \Phi^{-1}(1-\alpha/2) = \sqrt{2}{\rm erf}^{-1}(1-\alpha). **2. The Jeffreys Interval.** This interval is derived by applying Bayes' theorem to the binomial distribution with the noninformative Jeffreys prior [3]_, [4]_. The noninformative Jeffreys prior is the Beta distribution, Beta(1/2, 1/2), which has the density function .. math:: f(\epsilon) = \pi^{-1} \epsilon^{-1/2}(1-\epsilon)^{-1/2}. The justification for this prior is that it is invariant under reparameterizations of the binomial proportion. The posterior density function is also a Beta distribution: Beta(k + 1/2, N - k + 1/2). The interval is then chosen so that it is *equal-tailed*: Each tail (outside the interval) contains :math:`\alpha`/2 of the posterior probability, and the interval itself contains 1 - :math:`\alpha`. This interval must be calculated numerically. Additionally, when k = 0 the lower limit is set to 0 and when k = N the upper limit is set to 1, so that in these cases, there is only one tail containing :math:`\alpha`/2 and the interval itself contains 1 - :math:`\alpha`/2 rather than the nominal 1 - :math:`\alpha`. **3. A Flat prior.** This is similar to the Jeffreys interval, but uses a flat (uniform) prior on the binomial proportion over the range 0 to 1 rather than the reparametrization-invariant Jeffreys prior. The posterior density function is a Beta distribution: Beta(k + 1, N - k + 1). The same comments about the nature of the interval (equal-tailed, etc.) also apply to this option. **4. The Wald Interval.** This interval is given by .. math:: CI_{\rm Wald} = \hat{\epsilon} \pm \kappa \sqrt{\frac{\hat{\epsilon}(1-\hat{\epsilon})}{N}} The Wald interval gives acceptable results in some limiting cases. Particularly, when N is very large, and the true proportion :math:`\epsilon` is not "too close" to 0 or 1. However, as the later is not verifiable when trying to estimate :math:`\epsilon`, this is not very helpful. Its use is not recommended, but it is provided here for comparison purposes due to its prevalence in everyday practical statistics. References ---------- .. [1] Brown, Lawrence D.; Cai, T. Tony; DasGupta, Anirban (2001). "Interval Estimation for a Binomial Proportion". Statistical Science 16 (2): 101-133. doi:10.1214/ss/1009213286 .. [2] Wilson, E. B. (1927). "Probable inference, the law of succession, and statistical inference". Journal of the American Statistical Association 22: 209-212. .. [3] Jeffreys, Harold (1946). "An Invariant Form for the Prior Probability in Estimation Problems". Proc. R. Soc. Lond.. A 24 186 (1007): 453-461. doi:10.1098/rspa.1946.0056 .. [4] Jeffreys, Harold (1998). Theory of Probability. Oxford University Press, 3rd edition. ISBN 978-0198503682 Examples -------- Integer inputs return an array with shape (2,): >>> binom_conf_interval(4, 5, interval='wilson') array([ 0.57921724, 0.92078259]) Arrays of arbitrary dimension are supported. The Wilson and Jeffreys intervals give similar results, even for small k, N: >>> binom_conf_interval([0, 1, 2, 5], 5, interval='wilson') array([[ 0. , 0.07921741, 0.21597328, 0.83333304], [ 0.16666696, 0.42078276, 0.61736012, 1. ]]) >>> binom_conf_interval([0, 1, 2, 5], 5, interval='jeffreys') array([[ 0. , 0.0842525 , 0.21789949, 0.82788246], [ 0.17211754, 0.42218001, 0.61753691, 1. ]]) >>> binom_conf_interval([0, 1, 2, 5], 5, interval='flat') array([[ 0. , 0.12139799, 0.24309021, 0.73577037], [ 0.26422963, 0.45401727, 0.61535699, 1. ]]) In contrast, the Wald interval gives poor results for small k, N. For k = 0 or k = N, the interval always has zero length. >>> binom_conf_interval([0, 1, 2, 5], 5, interval='wald') array([[ 0. , 0.02111437, 0.18091075, 1. ], [ 0. , 0.37888563, 0.61908925, 1. ]]) For confidence intervals approaching 1, the Wald interval for 0 < k < N can give intervals that extend outside [0, 1]: >>> binom_conf_interval([0, 1, 2, 5], 5, interval='wald', conf=0.99) array([[ 0. , -0.26077835, -0.16433593, 1. ], [ 0. , 0.66077835, 0.96433593, 1. ]]) """ if conf < 0. or conf > 1.: raise ValueError('conf must be between 0. and 1.') alpha = 1. - conf k = np.asarray(k).astype(np.int) n = np.asarray(n).astype(np.int) if (n <= 0).any(): raise ValueError('n must be positive') if (k < 0).any() or (k > n).any(): raise ValueError('k must be in {0, 1, .., n}') if interval == 'wilson' or interval == 'wald': from scipy.special import erfinv kappa = np.sqrt(2.) * min(erfinv(conf), 1.e10) # Avoid overflows. k = k.astype(np.float) n = n.astype(np.float) p = k / n if interval == 'wilson': midpoint = (k + kappa ** 2 / 2.) / (n + kappa ** 2) halflength = (kappa * np.sqrt(n)) / (n + kappa ** 2) * \ np.sqrt(p * (1 - p) + kappa ** 2 / (4 * n)) conf_interval = np.array([midpoint - halflength, midpoint + halflength]) # Correct intervals out of range due to floating point errors. conf_interval[conf_interval < 0.] = 0. conf_interval[conf_interval > 1.] = 1. else: midpoint = p halflength = kappa * np.sqrt(p * (1. - p) / n) conf_interval = np.array([midpoint - halflength, midpoint + halflength]) elif interval == 'jeffreys' or interval == 'flat': from scipy.special import betaincinv if interval == 'jeffreys': lowerbound = betaincinv(k + 0.5, n - k + 0.5, 0.5 * alpha) upperbound = betaincinv(k + 0.5, n - k + 0.5, 1. - 0.5 * alpha) else: lowerbound = betaincinv(k + 1, n - k + 1, 0.5 * alpha) upperbound = betaincinv(k + 1, n - k + 1, 1. - 0.5 * alpha) # Set lower or upper bound to k/n when k/n = 0 or 1 # We have to treat the special case of k/n being scalars, # which is an ugly kludge if lowerbound.ndim == 0: if k == 0: lowerbound = 0. elif k == n: upperbound = 1. else: lowerbound[k == 0] = 0 upperbound[k == n] = 1 conf_interval = np.array([lowerbound, upperbound]) else: raise ValueError('Unrecognized interval: {0:s}'.format(interval)) return conf_interval
def f(left_a): left, right = max(1e-8, float(special.betaincinv(x+1, n-x+1, left_a))), min(1-1e-8, float(special.betaincinv(x+1, n-x+1, left_a + conf))) top = math.exp(math.log(right)*(x+1) + math.log(1-right)*(n-x+1) + math.log(left) + math.log(1-left) - bl) - math.exp(math.log(left)*(x+1) + math.log(1-left)*(n-x+1) + math.log(right) + math.log(1-right) - bl) bottom = (x - n*right)*left*(1-left) - (x - n*left)*right*(1-right) return top/bottom
valArray1, valArray2 = np.meshgrid(valVect, valVect) valArray = np.array([valArray1.flatten(), valArray2.flatten()]) print(valArray.shape) tempss = valArray[0, 1] print(tempss) # initial drawing scheme numChoices = np.size(valArray, 1) probArray = np.ones((numChoices, 1)) * (1 / numChoices) # correct vs incorrect counts correctAndIncorrectCounts = np.ones((numChoices, 2)) for k in np.arange(50): drawInd = np.random.choice(numChoices, 1, p=probArray.flatten()) # update count array depending on response correctFlag = 1 if correctFlag: correctAndIncorrectCounts[drawInd, 0] += 1 else: correctAndIncorrectCounts[drawInd, 1] += 1 # update probabilities based on counts probArray = 1 - betaincinv(correctAndIncorrectCounts[:, 0], correctAndIncorrectCounts[:, 1], 1 / (1 + 2)) probArray = probArray / sum(probArray) print(probArray)