def _covtest_sampler(cone, eta, sigma, ndraw=1000, mu=None): """ Due to special strucutre of covtest cone constraint, sampling is easy with importance weights. """ n = eta.shape[0] eta_n = eta / np.linalg.norm(eta) results = [] weights = [] if mu is None: mu = np.zeros(n) for _ in range(ndraw): Y0 = np.random.standard_normal(n) * sigma + mu mu_eta = (mu * eta_n).sum() Y0 -= (Y0 * eta_n).sum() * eta_n L, _, U = cone.bounds(eta_n, Y0)[:3] cdfL = ndtr(-(L - mu_eta) / sigma) cdfU = ndtr(-(U - mu_eta) / sigma) unif = np.random.sample() * (cdfU - cdfL) + cdfL if unif < 0.5: tnorm = ndtri(unif) * sigma else: tnorm = -ndtri(1-unif) * sigma tnorm = -tnorm results.append(np.sum(eta * (Y0 + (tnorm + mu_eta) * eta_n))) weights.append(np.fabs(cdfL - cdfU)) family = discrete_family(results, weights) return family
def skellam_cdf_root(target1, target2, n=0, tol=1e-8): """ Finds the parameters (a, b) of a Skellam distribution in order to have target1 = P(X-Y < n) target2 = P(X-Y > n) The first guess is estimated through an approximation to the normal distribution X-Y ~ N(a-b, a+b) then: (n - 1/2 - (a-b))/sqrt(a+b) = phi^-1(target1) =: phi_u (n + 1/2 - (a-b))/sqrt(a+b) = phi^-1(1-target2) =: phi_d n - 1/2 - (a-b) = sqrt(a+b) * phi_u n + 1/2 - (a-b) = sqrt(a+b) * phi_d sum_ab := 1 / (phi_d - phi_u)^2 diff_ab := .5 * phi_d / (phi_d - phi_u) - n """ # if not (isinstance(target1, float) and isinstance(target2, float) and # target1 > 0 and target2 > 0 and target1 + target2 < 1): if target1 + target2 >= 1 or min(target1, target2) <= 0: return [np.NaN, np.NaN] phi_d = ndtri(target1) phi_u = ndtri(1 - target2) sum_ab = 1 / (phi_u - phi_d) ** 2 diff_ab = 0.5 * (phi_u + phi_d) / (phi_u - phi_d) - n floor = 1e-6 a = max((sum_ab + diff_ab) * 0.5, floor) b = max((sum_ab - diff_ab) * 0.5, floor) target = np.array([target1, target2]) e = 1 count = 0 max_steps = 50 while e > abs(tol): skellam = skellam_cdf_pair(a, b, n, tol) s, ds = skellam[:, 0], skellam[:, [1, 2]] e = np.linalg.norm(target - s) step = np.linalg.solve(ds, target - s) scale_factor = 1 new_ab = np.maximum((a, b) + scale_factor * step, floor) new_s = s new_e = np.linalg.norm(new_s - target) for i in range(10): if new_e < e: a, b = new_ab break else: scale_factor *= .5 new_ab = np.maximum((a, b) + scale_factor * step, floor) new_s = skellam_cdf_pair(new_ab[0], new_ab[1], n, tol)[:, 0] new_e = np.linalg.norm(new_s - target) count += 1 if count > max_steps: return [np.NaN, np.NaN] return a, b
def test_bns_scores(self): scores, _ = bns(self.X, self.y) print "scores:", scores self.assertTrue(len(scores) == 4) self.assertTrue(np.all(scores >= 0.0)) self.assertTrue(np.all(scores <= 2*ndtri(0.9995))) true_bns_scores = np.array([ndtri(0.9995) - ndtri(0.0005), # ~ F^-1(1) - F^-1(0) ndtri(0.0005) - ndtri(0.0005), # F^-1(0) - F^-1(0) ndtri(0.5) - ndtri(0.5), # F^-1(0.5) - F^-1(0.5) ndtri(0.5) - ndtri(0.0005)]) # F^-1(0.5) - F^-1(0) assert_array_equal(scores, true_bns_scores)
def prior_trans(self,t): Teff,Dist,Rad,NH,arfsc = t arfscran = 1.0 - 0.0 arfscmin = 0.0 # build normalized prior array outarr = np.array([ self.Teff + self.eTeff*ndtri(Teff), self.Dist + self.eDist*ndtri(Dist), self.Rad + self.eRad*ndtri(Rad), self.NH + self.eNH*ndtri(NH), arfscran * arfsc + arfscmin, ]) return outarr
def prior_transform(theta): """ A function defining the tranform between the parameterisation in the unit hypercube to the true parameters. Args: theta (list): a list/array containing the parameters. Returns: list: a new list/array with the transformed parameters. """ # unpack the parameters (in their unit hypercube form) mprime = theta[0] cprime = theta[1] cmin = -10. # lower bound on uniform prior on c cmax = 10. # upper bound on uniform prior on c mmu = 0. # mean of Gaussian prior on m msigma = 10. # standard deviation of Gaussian prior on m m = mmu + msigma*ndtri(mprime) # convert back to m c = cprime*(cmax-cmin) + cmin # convert back to c return np.array([m, c])
def forward_cpu(self, x): if not available_cpu: raise ImportError('SciPy is not available. Forward computation' ' of ndtri in CPU can not be done.' + str(_import_error)) self.retain_outputs((0,)) return utils.force_array(special.ndtri(x[0]), dtype=x[0].dtype),
def prior_transform_polychord(cube): """ A function defining the tranform between the parameterisation in the unit hypercube to the true parameters. Args: cube (array, list): a list containing the parameters as drawn from a unit hypercube. Returns: list: the transformed parameters. """ #mprime, cprime = cube # unpack the parameters (in their unit hypercube form) mprime = cube[0] cprime = cube[1] cmin = -10. # lower bound on uniform prior on c cmax = 10. # upper bound on uniform prior on c mmu = 0. # mean of Gaussian prior on m msigma = 10. # standard deviation of Gaussian prior on m m = mmu + msigma*ndtri(mprime) # convert back to m c = UniformPrior(cmin, cmax)(cprime) # convert back to c using UniformPrior class theta = [m, c] return theta
def ppf(self, x): """ Computes the percent point function of the distribution at the point(s) x. It is defined as the inverse of the CDF. y = ppf(x) can be interpreted as the argument y for which the value of the cdf(x) is equal to y. Essentially that means the random varable y is the place on the distribution the CDF evaluates to x. Parameters ---------- x: array, dtype=float, shape=(m x n), bounds=(0,1) The value(s) at which the user would like the ppf evaluated. If an array is passed in, the ppf is evaluated at every point in the array and an array of the same size is returned. Returns ------- ppf: array, dtype=float, shape=(m x n) The ppf at each point in x. """ if (x <=0).any() or (x >=1).any(): raise ValueError('all values in x must be between 0 and 1, \ exclusive') ppf = ndtri(x) return ppf
def prior_trans(self,t): Teff,Dist,Rad,NH,arfsc1,arfsc2 = t # define range and min values Teffrange = 200000.0-60000.0 Teffmin = 60000.0 # Distrange = 1.0-0.0 # Distmin = 0.0 Distmu = 405.0/1000.0 Distsig = 28.0/1000.0 Radrange = 0.08-0.0 Radmin = 0.0 NHrange = 0.1-0.0 NHmin = 0.0 arfscran = 1.0 - 0.0 arfscmin = 0.0 # build normalized prior array outarr = np.array([ Teffrange*Teff+Teffmin, # Distrange*Dist+Distmin, Distmu + Distsig*ndtri(Dist), Radrange*Rad+Radmin, NHrange*NH+NHmin, arfscran * arfsc1 + arfscmin, arfscran * arfsc2 + arfscmin, ]) return outarr
def prob_to_gauss_sigma(prob): """ prob_to_gauss_sigma(prob): Returns the Gaussian sigma for which the area under the Gaussian probability density function (integrated from minus infinity to 'sigma') is equal to 'prob'. """ return ndtri(prob)
def _ppf(self, q, s, frac): """ percent point function (inverse of cdf) """ s, frac = s[0], frac[0] # reset broadcasting q_scale = (q - (1 - frac)) / frac res = np.zeros_like(q) idx = (q_scale > 0) res[idx] = np.exp(s * special.ndtri(q_scale[idx])) return res
def get_sobol_eps(n_iterations, n_simulations): sobol_array = np.zeros([n_simulations, n_iterations]) sobol_engine = torch.quasirandom.SobolEngine(dimension=n_iterations) sobol_rand = sobol_engine.draw(n_simulations) for i in range(n_simulations): for j in range(n_iterations): sobol_array[i][j] = ndtri(sobol_rand[i][j].item()) return sobol_array
def p_to_z(p, tail='two'): """Convert p-values to z-values. """ eps = np.spacing(1) p = np.array(p) p[p < eps] = eps if tail == 'two': z = ndtri(1 - (p / 2)) z = np.array(z) elif tail == 'one': z = ndtri(1 - p) z = np.array(z) z[z < 0] = 0 else: raise ValueError('Argument "tail" must be one of ["one", "two"]') if z.shape == (): z = z[()] return z
def zplot(pvalue_dict, name='', format='png', path='./', fontmap=None, verbose=1): """Plots absolute values of z-scores for model validation output from diagnostics.validate().""" if verbose: print_('\nGenerating model validation plot') if fontmap is None: fontmap = {1: 10, 2: 8, 3: 6, 4: 5, 5: 4} x, y, labels = [], [], [] for i, var in enumerate(pvalue_dict): # Get p-values pvals = pvalue_dict[var] # Take absolute values of inverse-standard normals zvals = abs(special.ndtri(pvals)) x = append(x, zvals) y = append(y, ones(size(zvals)) * (i + 1)) vname = var vname += " (%i)" % size(zvals) labels = append(labels, vname) # Spawn new figure figure() subplot(111) subplots_adjust(left=0.25, bottom=0.1) # Plot scores pyplot(x, y, 'o') # Set range on axes ylim(0, size(pvalue_dict) + 2) xlim(xmin=0) # Tick labels for y-axis yticks(arange(len(labels) + 2), append(append("", labels), "")) # X label xlabel("Absolute z transformation of p-values") if not os.path.exists(path): os.mkdir(path) if not path.endswith('/'): path += '/' if name: name += '-' savefig("%s%svalidation.%s" % (path, name, format))
def testNdtri(self): """Verifies that ndtri computation is correct.""" p = np.linspace(0., 1., 50).astype(np.float64) # Quantile performs piecewise rational approximation so adding some # sp_special input values to make sure we hit all the pieces. p = np.hstack( (p, np.exp(-32), 1. - np.exp(-32), np.exp(-2), 1. - np.exp(-2))) expected_x = sp_special.ndtri(p) x = special_math.ndtri(p) self.assertAllClose(expected_x, self.evaluate(x), atol=0.)
def transform(self, x): """ Transform from unit normalisation to this prior. Parameters ---------- x : float The position in the normalised hyperparameter space """ return self.mean + self.srd * ndtri(x)
def testNdtri(self): """Verifies that ndtri computation is correct.""" with self.test_session(): p = np.linspace(0., 1.0, 50).astype(np.float64) # Quantile performs piecewise rational approximation so adding some # special input values to make sure we hit all the pieces. p = np.hstack((p, np.exp(-32), 1. - np.exp(-32), np.exp(-2), 1. - np.exp(-2))) expected_x = special.ndtri(p) x = special_math.ndtri(p) self.assertAllClose(expected_x, x.eval(), atol=0.)
def quasi_gaussian_samples(self): """ Generate standard Gaussian samples using low-discrepancy Halton seq with inverse transform """ g = ghalton.GeneralizedHalton(100) unif = np.array(g.get(int(self.n_steps * self.n_paths / 100) + 1)).flatten() unif = unif[:self.n_steps * self.n_paths] z_mat = ndtri(unif).reshape((self.n_steps, self.n_paths)) return z_mat
def testProbits(self): probits = [-42., 42.] dist = tfd.ProbitBernoulli(probits=probits, validate_args=True) self.assertAllClose(probits, self.evaluate(dist.probits)) self.assertAllClose( sp_special.ndtr(probits), self.evaluate(dist.probs_parameter())) p = [0.01, 0.99, 0.42] dist = tfd.ProbitBernoulli(probs=p, validate_args=True) self.assertAllClose( sp_special.ndtri(p), self.evaluate(dist.probits_parameter()))
def update(self, rank_list): num_coders = len(rank_list) rating = np.full((num_coders,), 1200) volatility = np.full((num_coders,), 515) times_played = np.zeros((num_coders,)) rank = np.arange(num_coders) + 1 for index, rank_item in enumerate(rank_list): name = rank_item[0] if name in self.rating: rating[index] = self.rating[name] volatility[index] = self.volatility[name] times_played[index] = self.times_played[name] else: self.rating_history[name] = [] volatility_squared = np.square(volatility) competition_factor = math.sqrt(np.average(volatility_squared) + \ np.var(rating, ddof=1)) win_probability = np.fromfunction(lambda i, j : 0.5 * \ (erf((rating[i] - rating[j]) / \ np.sqrt(2 * (volatility_squared[i] + \ volatility_squared[j]))) + 1), \ (num_coders, num_coders), dtype=int) expected_rank = .5 + np.sum(win_probability, 0) expected_performance = -ndtri((expected_rank - .5) / num_coders) actual_performance = -ndtri((rank - .5) / num_coders) performed_as_rating = rating + competition_factor * \ (actual_performance - expected_performance) weight = 1 / (1 - (.42 / (times_played + 1) + .18)) - 1 cap = 150 + 1500 / (times_played + 2) new_rating = np.clip((rating + weight * performed_as_rating) / (1 + weight), \ rating - cap, rating + cap) new_volatility = np.sqrt(np.square(new_rating - rating) / weight + \ np.square(volatility) / (weight + 1)) new_times_played = times_played + 1 for index, rank_item in enumerate(rank_list): name = rank_item[0] self.rating[name] = new_rating[index] self.volatility[name] = new_volatility[index] self.times_played[name] = new_times_played[index] self.rating_history[name].append((new_rating[index], rank_item[1]))
def _binom_wilson_conf_int(k, n, confidence_level, alternative, correction): # This function assumes that the arguments have already been validated. # In particular, `alternative` must be one of 'two-sided', 'less' or # 'greater'. p = k / n if alternative == 'two-sided': z = ndtri(0.5 + 0.5 * confidence_level) else: z = ndtri(confidence_level) # For reference, the formulas implemented here are from # Newcombe (1998) (ref. [3] in the proportion_ci docstring). denom = 2 * (n + z**2) center = (2 * n * p + z**2) / denom q = 1 - p if correction: if alternative == 'less' or k == 0: lo = 0.0 else: dlo = (1 + z * sqrt(z**2 - 2 - 1 / n + 4 * p * (n * q + 1))) / denom lo = center - dlo if alternative == 'greater' or k == n: hi = 1.0 else: dhi = (1 + z * sqrt(z**2 + 2 - 1 / n + 4 * p * (n * q - 1))) / denom hi = center + dhi else: delta = z / denom * sqrt(4 * n * p * q + z**2) if alternative == 'less' or k == 0: lo = 0.0 else: lo = center - delta if alternative == 'greater' or k == n: hi = 1.0 else: hi = center + delta return lo, hi
def create_cell_counts_table(zscore_diff): z_threshold = -ndtri(.05 / 2) cell_type_counts = {} cell_type_counts['AA'] = sum( np.all([ zscore_diff[0, :] >= z_threshold, zscore_diff[1, :] >= z_threshold ], 0)) cell_type_counts['0A'] = sum( np.all([ abs(zscore_diff[0, :]) <= z_threshold, zscore_diff[1, :] >= z_threshold ], 0)) cell_type_counts['XA'] = sum( np.all([ zscore_diff[0, :] <= -z_threshold, zscore_diff[1, :] >= z_threshold ], 0)) cell_type_counts['A0'] = sum( np.all([ zscore_diff[0, :] >= z_threshold, abs(zscore_diff[1, :]) <= z_threshold ], 0)) cell_type_counts['00'] = sum( np.all([ abs(zscore_diff[0, :]) <= z_threshold, abs(zscore_diff[1, :]) <= z_threshold ], 0)) cell_type_counts['X0'] = sum( np.all([ zscore_diff[0, :] <= -z_threshold, abs(zscore_diff[1, :]) <= z_threshold ], 0)) cell_type_counts['AX'] = sum( np.all([ zscore_diff[0, :] >= z_threshold, zscore_diff[1, :] <= -z_threshold ], 0)) cell_type_counts['0X'] = sum( np.all([ abs(zscore_diff[0, :]) <= z_threshold, zscore_diff[1, :] <= -z_threshold ], 0)) cell_type_counts['XX'] = sum( np.all([ zscore_diff[0, :] <= -z_threshold, zscore_diff[1, :] <= -z_threshold ], 0)) counts = [cell_type_counts['AA'],cell_type_counts['0A'],cell_type_counts['XA'],cell_type_counts['A0'],\ cell_type_counts['00'],cell_type_counts['X0'],cell_type_counts['AX'],cell_type_counts['0X'],cell_type_counts['XX']] return (counts, cell_type_counts)
def KStest2(l1, func): """ Evaluate the Kolmogorov-Smirnov Test between the data set l1 and the CDF func, where func = CDF(x), and 0. <= CDF(x) <= 1. for all x. Function returns maximal Distance between the CDFs and the corresponding Q_KS value. Parameters: ----------- l1 = (n x m)-dim array func = function of normed CDF, must be able to handle (n x m)-dim arrays Returns: -------- (3 x n)-dim tuple with: D = maximum distance of KS test (n-dim array) result = signficance of KS test (n-dim array) result in sigma = significance in sigma (one sided confidence level, n-dim array) Notes ----- if n = 1, floats are returned in tuple See Numerical Recipes 3rd Edition, p.737 """ if not isinstance(l1, np.ndarray): l1 = np.array(l1) if len(l1.shape) == 1: l1 = np.array([l1]) ret_float = True else: ret_float = False x = np.sort(l1) # sort l1 along last axis cdf_data = (np.mgrid[0:x.shape[0], 0:x.shape[1]][1] + np.ones(x.shape)) / x.shape[ 1] # compute cdf(x), same for all n rows, is (n x m) array cdf_func = func(x) # ( n x m )-dim array D = np.max( np.abs(cdf_data - cdf_func), axis=1) # compute maximum distance over axis 1. D is n-dim array Neff = np.sqrt(x.shape[1]) + 0.12 + 0.11 / np.sqrt(x.shape[1]) QKS = KSdist(Neff * D) sig = -1. * ndtri(QKS) if ret_float: return D[0], QKS[0], sig[0] else: return D, QKS, sig
def _ppf(self, qloc, idx, cache): dim = self._rotation.index(idx) conditions = [ self._get_cache(dim_, cache, get=1) for dim_ in self._rotation[:dim] ] assert not any([ isinstance(condition, chaospy.Distribution) for condition in conditions ]) qloc = numpy.vstack(conditions + [qloc]) zloc = special.ndtri(qloc) out = special.ndtr(self._inv_transform[idx, :len(qloc)].dot(zloc)) return out
def setStrikeFromDelta(self, delta, price, eval_date): if type(eval_date) is not date: try: eval_date = date.fromisoformat(eval_date) except: raise TypeError("Expiration must be a date.") time_delta = self.expiration - eval_date + timedelta(days=1) time = time_delta.days / 365.0 self.strike = np.exp(-(ndtri(delta) * self.iv * np.sqrt(time) - self.iv**2 * 0.5 * time)) * price return self.strike
def test_stdtrit_vs_R_large_df(): df = [1e10, 1e12, 1e120, np.inf] p = 0.1 res = stdtrit(df, p) # R Code: # options(digits=20) # qt(0.1, c(1e10, 1e12, 1e120, Inf)) res_R = [ -1.2815515656292593150, -1.2815515655454472466, -1.2815515655446008125, -1.2815515655446008125 ] assert_allclose(res, res_R, rtol=1e-15) # last value should also agree with ndtri assert_equal(res[3], ndtri(0.1))
def copnorm_1d(x): """Copula normalization for a single vector. Parameters ---------- x : array_like Array of data of shape (n_epochs,) Returns ------- cx : array_like Standard normal samples with the same empirical CDF value as the input. """ assert isinstance(x, np.ndarray) and (x.ndim == 1) return ndtri(ctransform(x))
def get_gmm(data, n_components, bayesian=False, use_cdf=False, epsilon=1e-7): if use_cdf: data = np.clip(data, epsilon, 1 - epsilon) data = ndtri(data) if bayesian: gmm = BayesianGaussianMixture( n_components=n_components, max_iter=1000, verbose=2) else: gmm = GaussianMixture( n_components=n_components, max_iter=1000, verbose=2) gmm.fit(data) print('weight') print(gmm.weights_) print('BIC', bic(gmm, data)) return gmm
def _delta_mu_from_auc(self): """Compute the delta_mu from AUC for Gaussian sampling. delta_mu is the difference of means between the Gaussian distributions representing positive and negative class sample scores. Here we implement the strategy by Marzban [1] to compute the difference of means between two Gaussians with unit variances from a given AUC. These means that the samples generated are those of a base classifier whose performance is the given AUC. Returns: mean difference between positive and negative sample scores used for Gaussian sampling ((M,) ndarray) """ return np.sqrt(2.) * ndtri(self.auc)
def get_tail(self, f, mu, s2, dmu=None, ds2=None): # in the case of a probit f should represent a vector of probabilities, # i.e. each entry should be in (0, 1). ndtri is the inverse standard # normal CDF (inverse of the probit) so this transforms f into a vector # of real-values which we then evaluate ndtr on (the standard normal # CDF or probit) after normalizing by mu/s2. a = ss.ndtri(f) # inverse CDF of target z = (mu - a) / np.sqrt(1 + s2) # standardize target p = ss.ndtr(z) # CDF if dmu is None: return p raise NotImplementedError
def _ppf(self, uloc, idx, mu, sigma, cache): dim = self._rotation.index(idx) conditions = [ self._get_cache(dim_, cache, get=1) for dim_ in self._rotation[:dim] ] assert not any([ isinstance(condition, chaospy.Distribution) for condition in conditions ]) uloc = numpy.vstack(conditions + [uloc]) zloc = special.ndtri(uloc) loc = self._inv_transform[idx, :len(uloc)].dot(zloc) xloc = loc + mu[idx] out = numpy.e**xloc return out
def priorTransform(self, theta): """prior Transform for gaussian and flat priors""" priors = [] n = self.nsigma if self.priortype == 'g': for c, bound in enumerate(self.bounds): mu = self.means[c] sigma = (bound[1] - bound[0]) / n priors.append(mu + sigma * (ndtri(theta[c]))) else: for c, bound in enumerate(self.bounds): # When theta 0-> append bound[0], if theta 1-> append bound[1] priors.append(theta[c] * (bound[1] - bound[0]) + bound[0]) # At this moment, np.array(priors) has shape (dims,) return np.array(priors)
def delta_norm(return_pad, positions, max_record, lmbd=.94, signi=95, rm_sign=1): # Employ RiskMetrics Weights dn_weights = list( map(lambda x: (1 - lmbd) * lmbd**(max_record - x - 1), [x for x in range(max_record)])) if not rm_sign: dn_weights = 1 / max_record * np.ones(max_record) # Calculate Weighted/Unweighted COV, Assuming Zero-Mean weighted_cov = np.dot(return_pad.T * dn_weights, return_pad) delta = np.sqrt(np.dot(np.dot(positions, weighted_cov), positions)) dn_var = delta * ndtri(1 - signi / 100) return dn_var
def chauvenet(values, criterion=0.5): """ Uses Chauvenet's `criterion` (default 0.5) for one round of rejection. Returns a mask that is `True` for data that is not rejected. """ n = values.size if n < 7: return numpy.ones(values.shape, dtype=numpy.bool) av = numpy.mean(values) stdev = numpy.std(values) distance = abs(special.ndtri(criterion/n) * stdev) lo = av - distance hi = av + distance mask = (lo <= values) & (values <= hi) return mask
def c(self, *x): """ This function computes the pdf of the copula. It accepts a variable amount of arguments to support any dimension of copula. Args: x (List[float]) the points where you want to compute Returns: float: The density of the copula """ vect = sps.ndtri(x) intermediate = np.dot(np.dot(vect.T, (self.cor_matrix_inv - self.I)), vect) res = np.exp(-intermediate / 2) / np.sqrt(self.det_cor) if np.isnan(res): res = 0 return res
def lognormvariate(self, xi, mu=0.0, sigma=1.0): """ Random variate from the lognormal distribution. :param float xi: Shape parameter :param float mu: Location parameter :param float sigma: Scale paramter (|sigma| > 0) :returns: A random variate from the lognormal distribution """ if xi <= 0.0: raise ValueError("Invalid input parameter: `xi` must be positive") if sigma <= 0.0: raise ValueError( "Invalid input parameter: `sigma` must be positive") u1 = self.random() return mu + sigma * math.exp(xi * ndtri(u1))
def VaR(stocksInPortfolio,stocksExposure,confidenceAlpha,Print=False): alpha = ndtri(confidenceAlpha) # Stocks weighs in portfolio weight = (np.array(stocksExposure)/sum(stocksExposure))*100 # VarianceCovariance matrix and exposure matrix vcvm = varCovarMatrix(stocksInPortfolio) vmat = np.mat(stocksExposure) # Variance of portfolio in euro/usd varianceRR = vmat * vcvm * vmat.T # Value at Risk (portfolio) var = alpha * np.sqrt(varianceRR) if Print: print("\nPortfolio total value: ",sum(stocksExposure)) for s, v, w in zip(stocksInPortfolio,stocksExposure,weight): print(s.upper(),v,"usd/euro",round(w,2),"% of portfolio") print("VaR: @ "+str(confidenceAlpha*100)+"% confidence:",var,"euro/usd") print("VaR: "+str(var[0][0]/sum(stocksExposure)*100)+"% of portfolio value.") return var
def rfunc(p, nDim, nParams): for i in range(nDim): if priorLst[i][0] == "log": bMin = np.log(np.abs(priorLst[i][1])) bMax = np.log(np.abs(priorLst[i][2])) p[i] *= bMax - bMin p[i] += bMin p[i] = np.exp(p[i]) elif priorLst[i][0] == "normal": bMin, bMax = priorLst[i][1:] sigma = (bMax - bMin) / 2.0 mu = bMin + sigma p[i] = mu + sigma * ndtri(p[i]) elif priorLst[i][0] == "fixed": p[i] = priorLst[i][1] else: # uniform (linear) bMin, bMax = priorLst[i][1:] p[i] = bMin + p[i] * (bMax - bMin) return p
def poisson_cdf_root(target, n=2, tol=1e-8): """ Finds the parameter 'a' of a Poisson distribution X in order to have P(X <= n) = target or P(X < n + 1/2 ) = target The first guess is estimated through an approximation to the normal distribution: (n + 1/2 - a) / sqrt(a) = phi^-1(n) = phi a + sqrt(a) * phi - n - 1/2 = 0 Then: a = ((sqrt(phi ** 2 + 4 * n + 2) - phi) / 2) ** 2 """ # if not (isinstance(target, float) and target < 1 or target > 0): if target >= 1 or target <= 0: return np.NaN phi = ndtri(target) min_a = 1e-3 a = max(((phi * phi + 4 * n + 2) ** .5 - phi) ** 2 / 4, min_a) e = 1 count = 0 max_steps = 50 while abs(e) > abs(tol): f, df = poisson_cdf(a, n) e = target - f step = e / df scale_factor = 1 new_a = a + scale_factor * step new_f = f new_e = abs(target - new_f) for i in range(10): if new_a > 0 and new_e < abs(e): a = new_a break else: new_a = a + scale_factor * step new_f, _ = poisson_cdf(new_a, n) new_e = abs(target - new_f) scale_factor *= .5 count += 1 if count > max_steps: return np.NaN return a
def calculate_historical_var(form): """ This is a very basic approach to calculate an "uncorrelated value at risk (time) expost" based on: "III. Ermittlung des Value at Risk mit Simulationsverfahren, Historische Simulation, VWA. At Maximum 2 years of historical data is fetched.(Today-2y) :param form: relevant calculation information including symbols, confidence, initial_capital etc. :return on success: to_download with calculated future value, otherwise NULL""" try: form['initial_capital'] = 100000 df = quandl.get(form['symbol'], start_date=str(datetime.now() - relativedelta(years=2)), end_date=str(datetime.now), collapse="weekly", transform="rdiff", returns="pandas") # slice data frame to relevant close quotes df_portfolio = df.ix[::, 10::12] # Transform percent matrix for multiplication df_multiplier = pd.DataFrame(form['percentage']).transpose() df_multiplier *= form['initial_capital'] # multiply daily return in % with the total initial capital * percentage df_product = pd.DataFrame(df_multiplier.values * df_portfolio.values, columns=df_multiplier.columns, index=df_portfolio.index) # TODO: error? # sum up daily return, calculate standard deviation df_product['sum_portfolio'] = df_product.sum(axis=1) std = df_product["sum_portfolio"].std() # value at risk scaled to horizon = value at risk * square root time value_at_risk = ndtri(1 - form['confidence']) * std * sqrt( form['time']) form['future_value'] = max(form['initial_capital'] + value_at_risk, 0) return form except: return None
def Prior(self, cube): """ The prior transform going from the unit hypercube to the true parameters. This function has to be called "Prior". Args: cube (:class:`numpy.ndarray`): an array of values drawn from the unit hypercube Returns: :class:`numpy.ndarray`: an array of the transformed parameters """ # extract values mprime = cube[0] cprime = cube[1] m = self.mmu + self.msigma * ndtri(mprime) # convert back to m c = cprime * (self.cmax - self.cmin) + self.cmin # convert back to c return np.array([m, c])
def VaR(stocksInPortfolio, stocksExposure, confidenceAlpha, Print=False): alpha = ndtri(confidenceAlpha) # Stocks weighs in portfolio weight = (np.array(stocksExposure) / sum(stocksExposure)) * 100 # VarianceCovariance matrix and exposure matrix vcvm = varCovarMatrix(stocksInPortfolio) vmat = np.mat(stocksExposure) # Variance of portfolio in euro/usd varianceRR = vmat * vcvm * vmat.T # Value at Risk (portfolio) var = alpha * np.sqrt(varianceRR) if Print: print("\nPortfolio total value: ", sum(stocksExposure)) for s, v, w in zip(stocksInPortfolio, stocksExposure, weight): print(s.upper(), v, "usd/euro", round(w, 2), "% of portfolio") print("VaR: @ " + str(confidenceAlpha * 100) + "% confidence:", var, "euro/usd") print("VaR: " + str(var[0][0] / sum(stocksExposure) * 100) + "% of portfolio value.") return var
def traditional(amountOfDays=None): dua = newVal() if (amountOfDays == None): amountOfDays = len(df_price) Data = df_price[-amountOfDays:] Data2 = dua[-amountOfDays:] stdev = np.std(Data2) average = np.average(Data2) variance = np.var(Data2) drift = average - (variance / 2) latest = Data[len(Data) - 1] answer = latest * math.exp(drift + stdev * ndtri(random.random())) print(Data2) print("") print("answer: " + str(answer))
def Prior(self, cube): """ The prior transform going from the unit hypercube to the true parameters. This function has to be called "Prior". Args: cube (:class:`numpy.ndarray`): an array of values drawn from the unit hypercube Returns: :class:`numpy.ndarray`: an array of the transformed parameters """ # extract values mprime = cube[0] cprime = cube[1] m = self.mmu + self.msigma*ndtri(mprime) # convert back to m c = cprime*(self.cmax-self.cmin) + self.cmin # convert back to c return np.array([m, c])
def ns_prior_transform(utheta): # global config.BASEMENT theta = np.zeros_like(utheta) * np.nan for i in range(len(theta)): if config.BASEMENT.bounds[i][0] == 'uniform': theta[i] = utheta[i] * ( config.BASEMENT.bounds[i][2] - config.BASEMENT.bounds[i][1]) + config.BASEMENT.bounds[i][1] elif config.BASEMENT.bounds[i][0] == 'normal': theta[i] = config.BASEMENT.bounds[i][ 1] + config.BASEMENT.bounds[i][2] * ndtri(utheta[i]) elif config.BASEMENT.bounds[i][0] == 'trunc_normal': theta[i] = my_truncnorm_isf(utheta[i], config.BASEMENT.bounds[i][1], config.BASEMENT.bounds[i][2], config.BASEMENT.bounds[i][3], config.BASEMENT.bounds[i][4]) else: raise ValueError( 'Bounds have to be "uniform", "normal" and "trunc_normal". Input from "params.csv" was "' + config.BASEMENT.bounds[i][0] + '".') return theta
def walk(n=1000, annual_drift=.1, annual_volatility=.4, init_price=100, verbose=False): drift = float(annual_drift)/252 vol = float(annual_volatility)/np.sqrt(252) drift_mean = drift-0.5*math.pow(vol,2) prices = np.zeros(n) if verbose: print "drift:", drift print "volatility:", vol print "drift mean:", drift_mean previous_price = init_price for i in range(n): r = random() z = ndtri(r) log_return = drift+vol*z price = previous_price*math.exp(log_return) if verbose: print "#%i r=%.2f z=%.2f ret=%.2f%% price=%.2f$" %(i, r, z, log_return*100, price) previous_price = price prices[i]=price return prices
def neyman_classifier(class_0_mean, class_1_mean, variance, alpha, feature_list, n): threshold = [] detection_rate = [] inverse_alpha_complement = ndtri( 1 - alpha ) # in neyman pearson given alpha we need to find the inverse first rows = np.size(class_1_mean, 0) # these are the features # the most important step here is to get the variance # step a -- get the sqrt of the variance matrix sub_variance = variance[:, feature_list] sub_variance = sub_variance[feature_list, :] inv_variance = np.linalg.inv(n * variance) sub_inv_variance = inv_variance[:, feature_list] sub_inv_variance = sub_inv_variance[feature_list, :] sub_mu_0 = class_0_mean[feature_list] sub_mu_1 = class_1_mean[feature_list] sub_mu_0 = n * sub_mu_0 sub_mu_1 = n * sub_mu_1 sub_variance = n * sub_variance # sub_inv_variance = n*sub_inv_variance w = np.matmul((sub_mu_1 - sub_mu_0).T, sub_inv_variance) mean_for_alpha = int(float(np.matmul(w, sub_mu_0) * 100)) / 100 mean_for_beta = int(float(np.matmul(w, sub_mu_1)) * 100) / 100 transformed_variance = np.matmul(w, sub_variance) transformed_variance = int( float(np.matmul(transformed_variance, w.T)) * 100) / 100 std_deviation = math.sqrt(transformed_variance) threshold = int( (mean_for_alpha + inverse_alpha_complement * std_deviation) * 100) / 100 beta = 1 - int( norm.cdf((threshold - mean_for_beta) / std_deviation) * 100) / 100 return beta
def rtrunc_norm(mean, sd, lower, upper, size=None): """ Sample from a truncated normal distribution Parameters ---------- mean : float or array_like sd : float or array_like lower : float or array-like upper : float or array-like Note ---- Arrays passed must all be of the same length. Computes samples using the \Phi, the normal CDF, and Phi^{-1} using a standard algorithm: draw u ~ uniform(|Phi((l - m) / sd), |Phi((u - m) / sd)) return m + sd * \Phi^{-1}(u) Returns ------- samples : ndarray or float """ ulower = special.ndtr((lower - mean) / sd) uupper = special.ndtr((upper - mean) / sd) if size is None: if isinstance(ulower, np.ndarray): draws = np.random.rand(len(ulower)) else: draws = np.random.rand() else: raise ValueError('if array of bounds passed, size must be None') u = (uupper - ulower) * draws + ulower return mean + sd * special.ndtri(u)
def prior_transform(theta): """ A function defining the tranform between the parameterisation in the unit hypercube to the true parameters. Args: theta (tuple): a tuple containing the parameters. Returns: tuple: a new tuple or array with the transformed parameters. """ mprime, cprime = theta # unpack the parameters (in their unit hypercube form) cmin = -10. # lower bound on uniform prior on c cmax = 10. # upper bound on uniform prior on c mmu = 0. # mean of Gaussian prior on m msigma = 10. # standard deviation of Gaussian prior on m m = mmu + msigma*ndtri(mprime) # convert back to m c = cprime*(cmax-cmin) + cmin # convert back to c return (m, c)
def _ppf(self, q, c): return -special.ndtri(pow(1-q, 1./c))
def _ppf(self, x): return special.ndtri(x)
def tol2size(tol=1e-8, *args): if len(args) == 0: args = [1] # standard value return int(-ndtri(tol) * sum(map(lambda x: x ** .5, args)) + sum(args)) + 1
def _ppf(self, q, a): return 1.0/(a-special.ndtri(q*special.ndtr(a)))
def _ppf(self, x, a): return np.e**(a*special.ndtri(x))
plt.figure("Count 25 Events 40000 times") Numcounts25, binedges25, patches = plt.hist(Counters25, bins = 50, range = (0,50), color = "green", alpha = 0.5) #plot histogram with 50 bins. Store Number of counts/bin and bin edges centers25 = 0.5*(binedges25[1:] + binedges25[:-1]) #Computing bin centers as means of the bin edge values y25 = 40000 * Normal(centers25, 25, np.sqrt(25)) #Compute the y values(as per the gaussian function) xbar25 = np.zeros(2) ybar25 = np.zeros(2) xbar25[0] = 25 - np.sqrt(25) #Compute the one sigma values as xbar25[1] = 25 + np.sqrt(25) #mean +-error(on the mean value) ybar25 = 40000*Normal(xbar25, 25, np.sqrt(25)) #Computing y values as per the gaussian function for the X values plt.plot(xbar25, ybar25, color= "red", alpha = 1.0, lw =5) #plot the line joining the 2 one sigma points plt.plot(centers25, y25, alpha = 1.0, color = "red", lw =5) #plot the gaussian function passing through the center of each bin errors25 = np.sqrt(y25) #Compute the expected error on Y-values plt.errorbar(centers25, y25, yerr = errors25, linestyle='None', linewidth = 3.0, markeredgewidth = 3.0, marker ='o', color = 'black', markersize= 5.0 ) #Plot the errors on Y values prob1percent25 = 25 + np.sqrt(25) * ndtri(0.01) #compute the 1% point - x value prob99percent25 = 25 + np.sqrt(25) * ndtri(0.99) #compute the 99% point - x value y1percent25 = 40000*Normal(prob1percent25, 25, np.sqrt(25)) #compute the 1% point - y value y99percent25 = 40000*Normal(prob99percent25, 25, np.sqrt(25)) #compute the 99% point - y value #Perform labelling operations for the plots plt.annotate('One percent', xycoords="data", textcoords='offset points', arrowprops=dict(facecolor='black', arrowstyle="->"), xytext =(-75,50), xy = (prob1percent25, y1percent25)) plt.annotate('99 percent', xycoords="data", textcoords='offset points', arrowprops=dict(facecolor='black', arrowstyle="->"), xytext =(30,50), xy = (prob99percent25, y99percent25)) plt.annotate('One Sigma', xycoords="data", textcoords='offset points', xy = (20,ybar25[0]), xytext = (-70,30), arrowprops=dict(facecolor='black', arrowstyle="->")) plt.annotate('One Sigma', xycoords="data", textcoords='offset points', xy = (30,ybar25[1]), xytext = (30,30), arrowprops=dict(facecolor='black', arrowstyle="->")) plt.title("25 Events Counted 40000 times", backgroundcolor = "white") '''A similar experiment as above is performed with 250 events being performed 40000 times. Refer to the documentation of the above section.''' Events250 = np.random.rand(10000000) Counters250 = np.zeros(40000) for value in Events250: Place = int(40000 * value)
def discern(x1, x2, y, const, logger, findminFlag, alpha=.05, tol=.5, priorsamples=0, priorsampleflag=0, n0=2): """Discern determines which of f(x_1) or f(x_2) has a smaller mean, given only noisy evaluations, with significance level 1 - alpha, indifference level tol, and constraint level const. Arguments: x1, x2 - the two probe arguments for fcn. n0 - initial sample count. alpha - significance level (1-alpha). tol - indifference level on confidence interval size. const - constraint, below which we terminate. log - logging object Returns: (0, samples) - tolerance level reached. (1, samples) - x1 is smaller. (2, samples) - x2 is smaller. 3 - hard constraint achieved by x1. 4 - hard constraint achieved by x2. """ samplecount = 0 c = ndtri(1-alpha/2) x1samples = list() x2samples = list() if priorsampleflag == 1: x1samples = priorsamples if priorsampleflag == 2: x2samples = priorsamples # initial sampling and estimator construction and check for n in range(n0): if priorsampleflag != 1: x1samples.append(sample(logger, x1, y)) if priorsampleflag != 2: x2samples.append(sample(logger, x2, y)) n1 = len(x1samples) n2 = len(x2samples) x1var = var(x1samples, ddof=1) x2var = var(x2samples, ddof=1) # main loop body: construct mean and variance, then apply checks while max(sqrt(x1var/n1), sqrt(x2var/n2)) >= tol/c: x1mean = mean(x1samples) x2mean = mean(x2samples) x1var = var(x1samples, ddof=1) x2var = var(x2samples, ddof=1) if abs(x1mean - x2mean) >= c*(sqrt(x1var/n1) + sqrt(x2var/n2)): if x1mean <= x2mean: #logger.logEvent(1, "Chose x1") LOGGING EVENT return (1, x1samples) else: #logger.logEvent(2, "Chose x2") LOGGING EVENT return (2, x2samples) if ((x1mean + c*sqrt(x1var/n1)) < const) and not findminFlag: #logger.logEvent(3, "x1 satisfies constraint") LOGGING EVENT return (3,0) if ((x2mean + c*sqrt(x2var/n2)) < const) and not findminFlag: #logger.logEvent(4, "x2 satisfies constraint") LOGGING EVENT return (4,0) # if none of these checks pass, refinement if sqrt(x1var*n2*(n2+1))*(sqrt(n1+1) - sqrt(n1)) >= sqrt(x2var*n1*(n1+1))*(sqrt(n2+1)-sqrt(n2)): n1 += 1 x1samples.append(sample(logger, x1, y)) else: n2 += 1 x2samples.append(sample(logger, x2, y)) #logger.logEvent(0, "Can't tell; chose the lesser of x1, x2")LOGGING EVENT if (mean(x1samples) > mean(x2samples)): return (2, x2samples) else: return (1, x1samples)
def _ppf(self, q, C, Ci, loc): return (numpy.dot(C, special.ndtri(q)).T+loc.T).T
def test(t, x, eps=None, alpha=None, Ha=None): """ Runs the Mann-Kendall test for trend in time series data. Parameters ---------- t : 1D numpy.ndarray array of the time points of measurements x : 1D numpy.ndarray array containing the measurements corresponding to entries of 't' eps : scalar, float, greater than zero least count error of measurements which help determine ties in the data alpha : scalar, float, greater than zero significance level of the statistical test (Type I error) Ha : string, options include 'up', 'down', 'upordown' type of test: one-sided ('up' or 'down') or two-sided ('updown') Returns ------- MK : string result of the statistical test indicating whether or not to accept hte alternative hypothesis 'Ha' m : scalar, float slope of the linear fit to the data c : scalar, float intercept of the linear fit to the data p : scalar, float, greater than zero p-value of the obtained Z-score statistic for the Mann-Kendall test Raises ------ AssertionError : error least count error of measurements 'eps' is not given AssertionError : error significance level of test 'alpha' is not given AssertionError : error alternative hypothesis 'Ha' is not given """ # assert a least count for the measurements x assert eps, "Please provide least count error for measurements 'x'" assert alpha, "Please provide significance level 'alpha' for the test" assert Ha, "Please provide the alternative hypothesis 'Ha'" # estimate sign of all possible (n(n-1)) / 2 differences n = len(t) sgn = np.zeros((n, n), dtype="int") for i in range(n): tmp = x - x[i] tmp[np.where(np.fabs(tmp) <= eps)] = 0. sgn[i] = np.sign(tmp) # estimate mean of the sign of all possible differences S = sgn[np.triu_indices(n, k=1)].sum() # estimate variance of the sign of all possible differences # 1. Determine no. of tie groups 'p' and no. of ties in each group 'q' np.fill_diagonal(sgn, eps * 1E6) i, j = np.where(sgn == 0.) ties = np.unique(x[i]) p = len(ties) q = np.zeros(len(ties), dtype="int") for k in range(p): idx = np.where(np.fabs(x - ties[k]) < eps)[0] q[k] = len(idx) # 2. Determine the two terms in the variance calculation term1 = n * (n - 1) * (2 * n + 5) term2 = (q * (q - 1) * (2 * q + 5)).sum() # 3. estimate variance varS = float(term1 - term2) / 18. # Compute the Z-score based on above estimated mean and variance if S > eps: Zmk = (S - 1) / np.sqrt(varS) elif np.fabs(S) <= eps: Zmk = 0. elif S < -eps: Zmk = (S + 1) / np.sqrt(varS) # compute test based on given 'alpha' and alternative hypothesis # note: for all the following cases, the null hypothesis Ho is: # Ho := there is no monotonic trend # # Ha := There is an upward monotonic trend if Ha == "up": Z_ = ndtri(1. - alpha) if Zmk >= Z_: MK = "accept Ha := upward trend" indicator = 1 else: MK = "reject Ha := upward trend" indicator = 0 # Ha := There is a downward monotonic trend elif Ha == "down": Z_ = ndtri(1. - alpha) if Zmk <= -Z_: MK = "accept Ha := downward trend" indicator = 1 else: MK = "reject Ha := downward trend" indicator = 0 # Ha := There is an upward OR downward monotonic trend elif Ha == "upordown": Z_ = ndtri(1. - alpha / 2.) if np.fabs(Zmk) >= Z_: MK = "accept Ha := upward OR downward trend" indicator = 1 else: MK = "reject Ha := upward OR downward trend" indicator = 0 # ---------- # AS A BONUS # ---------- # estimate the slope and intercept of the line m = np.corrcoef(t, x)[0, 1] * (np.std(x) / np.std(t)) c = np.mean(x) - m * np.mean(t) # ---------- # AS A BONUS # ---------- # estimate the p-value for the obtained Z-score Zmk if S > eps: if Ha == "up": p = 1. - ndtr(Zmk) elif Ha == "down": p = ndtr(Zmk) elif Ha == "upordown": p = 0.5 * (1. - ndtr(Zmk)) elif np.fabs(S) <= eps: p = 0.5 elif S < -eps: if Ha == "up": p = 1. - ndtr(Zmk) elif Ha == "down": p = ndtr(Zmk) elif Ha == "upordown": p = 0.5 * (ndtr(Zmk)) return MK, m, c, p, indicator
def _ppf(self, q, c): tmp = c*special.ndtri(q) return 0.25*(tmp + np.sqrt(tmp**2 + 4))**2