def _covtest_sampler(cone, eta, sigma, ndraw=1000, mu=None): """ Due to special strucutre of covtest cone constraint, sampling is easy with importance weights. """ n = eta.shape[0] eta_n = eta / np.linalg.norm(eta) results = [] weights = [] if mu is None: mu = np.zeros(n) for _ in range(ndraw): Y0 = np.random.standard_normal(n) * sigma + mu mu_eta = (mu * eta_n).sum() Y0 -= (Y0 * eta_n).sum() * eta_n L, _, U = cone.bounds(eta_n, Y0)[:3] cdfL = ndtr(-(L - mu_eta) / sigma) cdfU = ndtr(-(U - mu_eta) / sigma) unif = np.random.sample() * (cdfU - cdfL) + cdfL if unif < 0.5: tnorm = ndtri(unif) * sigma else: tnorm = -ndtri(1-unif) * sigma tnorm = -tnorm results.append(np.sum(eta * (Y0 + (tnorm + mu_eta) * eta_n))) weights.append(np.fabs(cdfL - cdfU)) family = discrete_family(results, weights) return family
def integrate_box_1d(self, low, high): """ Computes the integral of a 1D pdf between two bounds. Parameters ---------- low : scalar Lower bound of integration. high : scalar Upper bound of integration. Returns ------- value : scalar The result of the integral. Raises ------ ValueError If the KDE is over more than one dimension. """ if self.d != 1: raise ValueError("integrate_box_1d() only handles 1D pdfs") stdev = ravel(sqrt(self.covariance))[0] normalized_low = ravel((low - self.dataset) / stdev) normalized_high = ravel((high - self.dataset) / stdev) value = np.sum( self.weights * (special.ndtr(normalized_high) - special.ndtr(normalized_low))) return value
def tauchen(mu, sigma_e, rho, lambda_z): # no. grid points N_z = 2 * lambda_z + 1 # value of grid points Z = np.asarray([ mu + lam * sigma_e / (1 - rho**2)**0.5 for lam in range(-lambda_z, lambda_z + 1) ]) # mid points M = np.asarray([(Z[i] + Z[i + 1]) / 2 for i in range(N_z - 1)]) # transition matrix Pi = np.empty((N_z, N_z)) # fill in probs for i in range(N_z): for j in range(N_z): if j == 0: Pi[i, j] = special.ndtr( (M[j] - (1 - rho) * mu - rho * Z[i]) / sigma_e) elif j < N_z - 1: Pi[i, j] = special.ndtr( (M[j] - (1 - rho) * mu - rho * Z[i]) / sigma_e) - special.ndtr( (M[j - 1] - (1 - rho) * mu - rho * Z[i]) / sigma_e) else: Pi[i, j] = 1 - special.ndtr( (M[j - 1] - (1 - rho) * mu - rho * Z[i]) / sigma_e) return Z, Pi
def cvConv(x, s0, K, xstar, r, s, dt): ''' x = log S/K, can be a vector ''' d1 = (x - xstar + (r + 0.5 * s * s) * dt) / (s * numpy.sqrt(dt)) d2 = d1 - s * numpy.sqrt(dt) return -s0 * numpy.exp(x) * ndtr(-d1) + K * numpy.exp(-r * dt) * ndtr(-d2)
def _covtest_sampler(cone, eta, sigma, ndraw=1000, mu=None): """ Due to special strucutre of covtest cone constraint, sampling is easy with importance weights. """ n = eta.shape[0] eta_n = eta / np.linalg.norm(eta) results = [] weights = [] if mu is None: mu = np.zeros(n) for _ in range(ndraw): Y0 = np.random.standard_normal(n) * sigma + mu mu_eta = (mu * eta_n).sum() Y0 -= (Y0 * eta_n).sum() * eta_n L, _, U = cone.bounds(eta_n, Y0)[:3] cdfL = ndtr(-(L - mu_eta) / sigma) cdfU = ndtr(-(U - mu_eta) / sigma) unif = np.random.sample() * (cdfU - cdfL) + cdfL if unif < 0.5: tnorm = ndtri(unif) * sigma else: tnorm = -ndtri(1 - unif) * sigma tnorm = -tnorm results.append(np.sum(eta * (Y0 + (tnorm + mu_eta) * eta_n))) weights.append(np.fabs(cdfL - cdfU)) family = discrete_family(results, weights) return family
def callforCarr(self, s0, K, T): s0, K, T = map(float, (s0, K, T)) r, m, s = self.r, self.m, self.s return s0 * numpy.exp(m + 0.5 * s * s - r * T) * ndtr( (m + s * s - numpy.log(K / s0)) / s) - K * numpy.exp( -r * T) * ndtr((m - numpy.log(K / s0)) / s)
def estimateSigma(self, gamma_sigma, param, K, F, T): a = self._a #param=np.exp(param) #transform to restrict a to be postive effective_K = 1 - np.exp(-a * T) * (1 - K / F) f0 = F + gamma_sigma[0] k0 = effective_K * F + gamma_sigma[0] d1 = (np.log(f0 / k0) + (r + 0.5 * gamma_sigma[1]**2) * T) / (gamma_sigma[1] * np.sqrt(T)) d2 = d1 - gamma_sigma[1] * np.sqrt(T) #call_price = np.exp(a*T)/F *(f0*norm.cdf(d1)-k0*norm.cdf(d2)) f1 = -2 * a * (effective_K - 1) * ndtr(d2) * np.exp(a * T) f2 = 4 * a**2 * (effective_K - 1)**2 * ndtr(d2)**2 * np.exp(2 * a * T) f3 = f0**2 * norm._pdf(d1)**2 * np.exp(a * T) * effective_K**2 f4 = f0 * norm._pdf(d1) / (F * np.sqrt(T)) estimate_sigma = (f1 + np.sqrt(f2 + (f3 * (param[0] * effective_K + param[1])**2) / (k0**2 * np.sqrt(T)))) / f4 return estimate_sigma
def conf(ratings): ratings = reject_outliers(ratings) if (ratings == []): return 0.0 if (np.std(ratings) == 0): if (len(ratings))>9: return 1.0 else: return math.log(len(ratings))/2.1 intwid = 0.01 perc = 1.0 upper = 1.0 lower = 0.0 n = len(ratings) mean = sum(ratings)/n stdev = np.std(ratings) while (perc>intwid): z = norm.ppf(1-(intwid/2)) lower = mean-(z*stdev/(math.sqrt(n))) upper = mean+(z*stdev/(math.sqrt(n))) perc = 0.0 for v in ratings: perc += (special.ndtr((upper - v)/n)-special.ndtr((lower - v)/n)) if (intwid == 1.0): return 0.0 intwid += 0.01 return 1-intwid
def test_optimizer_PI(self): for n_samples in range(1, 100): mean = np.random.rand(n_samples, 1) std = np.random.rand(n_samples, 1) tradeoff = np.random.rand() max_val = np.random.rand() # 1. fitted estimator mock_estimator = mock.MockEstimator(predict_return=(mean, std)) optimizer = modAL.models.BayesianOptimizer( estimator=mock_estimator) optimizer._set_max([0], [max_val]) true_PI = ndtr((mean - max_val - tradeoff) / std) np.testing.assert_almost_equal( true_PI, modAL.acquisition.optimizer_PI(optimizer, np.random.rand(n_samples, 2), tradeoff)) # 2. unfitted estimator mock_estimator = mock.MockEstimator(fitted=False) optimizer = modAL.models.BayesianOptimizer( estimator=mock_estimator) optimizer._set_max([0], [max_val]) true_PI = ndtr( (np.zeros(shape=(len(mean), 1)) - max_val - tradeoff) / np.ones(shape=(len(mean), 1))) np.testing.assert_almost_equal( true_PI, modAL.acquisition.optimizer_PI(optimizer, np.random.rand(n_samples, 2), tradeoff))
def confusion_matrix(v, vc, full=False): """ Compute the confusion matrix of detected spikes vs. non-spike local extrema. * v: intracellular trace * vc: separatrix * full=False: if True, return a dict with keys: TP, FP, FN, TN, mu1, s1, mu2, s2 """ sign_changes = find_peaks(v) peaks = v[sign_changes] # peak values peaks = sort(peaks) # sort values v1 = v2 = vc # first cluster mu1 = mean(peaks[peaks <= v1]) s1 = std(peaks[peaks <= v1]) # second cluster mu2 = mean(peaks[peaks >= v2]) s2 = std(peaks[peaks >= v2]) # compute the confusion matrix TP = 1 - ndtr((v2 - mu2) / s2) FP = 1 - ndtr((v2 - mu1) / s1) FN = ndtr((v1 - mu2) / s2) TN = ndtr((v1 - mu1) / s1) if full: return dict(TP=TP, FP=FP, FN=FN, TN=TN, mu1=mu1, s1=s1, mu2=mu2, s2=s2) return TP, FP, FN, TN
def call(self, s0, K, T, n=10): s0, K, T = map(float, (s0, K, T)) r = self.r alpha = self.alpha beta = self.beta lambdaa = self.lambdaa sigma = self.sigma price = 0 fac_k = 1 # k! mu = r - lambdaa * (numpy.exp(alpha + 0.5 * beta * beta) - 1) for k in range(n): price += numpy.exp(-lambdaa*T)*(lambdaa*T)**k/fac_k * \ (s0*numpy.exp(mu*T + k*(alpha + 0.5*beta*beta))*ndtr( \ (numpy.log(s0/K) + (mu + 0.5*sigma*sigma)*T + k*(alpha + beta*beta)) \ /numpy.sqrt(sigma*sigma*T + beta*beta*k)) \ - K*ndtr( \ (numpy.log(s0/K) + (mu - 0.5*sigma*sigma)*T + k*alpha) \ /numpy.sqrt(sigma*sigma*T + beta*beta*k))) fac_k *= (k + 1) return numpy.exp(-r * T) * price
def confusion_matrix(v, vc, full=False): """ Compute the confusion matrix of detected spikes vs. non-spike local extrema. * v: intracellular trace * vc: separatrix * full=False: if True, return a dict with keys: TP, FP, FN, TN, mu1, s1, mu2, s2 """ sign_changes = find_peaks(v) peaks = v[sign_changes] # peak values peaks = sort(peaks) # sort values v1 = v2 = vc # first cluster mu1 = mean(peaks[peaks<=v1]) s1 = std(peaks[peaks<=v1]) # second cluster mu2 = mean(peaks[peaks>=v2]) s2 = std(peaks[peaks>=v2]) # compute the confusion matrix TP = 1-ndtr((v2-mu2)/s2) FP = 1-ndtr((v2-mu1)/s1) FN = ndtr((v1-mu2)/s2) TN = ndtr((v1-mu1)/s1) if full: return dict(TP=TP,FP=FP,FN=FN,TN=TN,mu1=mu1,s1=s1,mu2=mu2,s2=s2) return TP, FP, FN, TN
def aprox_por_normal(n, m, r): """ test de rango para n y m grandes Aproximo el p_valor por una normal standard n : tamanho de la muestra mas chica m : tamanho de la muestra mas grande r : rango observado """ #esperanza de Ri #esperanza_ri = (n+m+1) / 2 #Esperanza de R esperanza_R = n * ((n + m + 1) / 2.0) #Varianza de R varianza_R = (n * m) * ((n + m + 1) / 12.0) #Normalizo a una N(0,1) Z = (r - esperanza_R) / sqrt(varianza_R) if r <= esperanza_R: p_value = 2 * ndtr(Z) else: p_value = 2 * (1 - ndtr(Z)) return p_value
def integrate_box_1d(self, low, high): """Computes the integral of a 1D pdf between two bounds. Parameters ---------- low : scalar lower bound of integration high : scalar upper bound of integration Returns ------- value : scalar the result of the integral """ if self.d != 1: raise ValueError("integrate_box_1d() only handles 1D pdfs") stdev = ravel(sqrt(self.covariance))[0] normalized_low = ravel((low - self.dataset)/stdev) normalized_high = ravel((high - self.dataset)/stdev) value = stats.mean(special.ndtr(normalized_high) - special.ndtr(normalized_low)) return value
def log_prior_transform_nested(self, x, x_name): self.log.debug( f'NestedSamplerStatModel::\tSUPERVERBOSE\tdoing some transformations for nestle/multinest ' f'to read the priors') if self.config['prior'][x_name]['prior_type'] == 'flat': a, b = self.config['prior'][x_name]['param'] # Prior transform of a flat prior is a simple line. return x * (b - a) + a if self.config['prior'][x_name]['prior_type'] == 'gauss': # Get the range from the config file a, b = self.config['prior'][x_name]['range'] m, s = self.config['prior'][x_name]['param'] # Here the prior transform is being constructed and shifted. This may not seem trivial # and one is advised to request a notebook where this is explained # from the developer(s). aprime = spsp.ndtr((a - m) / s) bprime = spsp.ndtr((b - m) / s) xprime = x * (bprime - aprime) + aprime res = m + s * spsp.ndtri(xprime) return res err_message = ( f"unknown prior type '{self.config['prior'][x_name]['prior_type']}'," f" choose either gauss or flat") raise TypeError(err_message)
def _bca_interval(data, statistic, axis, alpha, theta_hat_b, batch): """Bias-corrected and accelerated interval.""" # closely follows [2] "BCa Bootstrap CIs" sample = data[0] # only works with 1 sample statistics right now # calculate z0_hat theta_hat = statistic(sample, axis=axis)[..., None] percentile = _percentile_of_score(theta_hat_b, theta_hat, axis=-1) z0_hat = ndtri(percentile) # calculate a_hat theta_hat_i = [] # would be better to fill pre-allocated array for jackknife_sample in _jackknife_resample(sample, batch): theta_hat_i.append(statistic(jackknife_sample, axis=-1)) theta_hat_i = np.concatenate(theta_hat_i, axis=-1) theta_hat_dot = theta_hat_i.mean(axis=-1, keepdims=True) num = ((theta_hat_dot - theta_hat_i)**3).sum(axis=-1) den = 6 * ((theta_hat_dot - theta_hat_i)**2).sum(axis=-1)**(3 / 2) a_hat = num / den # calculate alpha_1, alpha_2 z_alpha = ndtri(alpha) z_1alpha = -z_alpha num1 = z0_hat + z_alpha alpha_1 = ndtr(z0_hat + num1 / (1 - a_hat * num1)) num2 = z0_hat + z_1alpha alpha_2 = ndtr(z0_hat + num2 / (1 - a_hat * num2)) return alpha_1, alpha_2
def vanilla(r, K, dt, sigma, S): """ This is a simple Vanilla Put Option calcualtion based on the analytic solution for a single underlying asset. The solution used is from The Mathematics of Financial Derivatives, Wilmott, et al. Uses ndtr and exp from scipy and ndtr scipy.special modules. r : risk free rate (float) K : strike price (float) dt : time to expiry (float) sigma: volatility of S (float) S : range of underlying values (array[float]) Usage: put_value = vanilla(r, K, dt, sigma, S) """ d1 = zeros(len(S)) d2 = zeros(len(S)) n1 = zeros(len(S)) n2 = zeros(len(S)) pt = zeros(len(S)) b = sigma * sqrt(dt) dsct = exp(-1.0 * r * dt) for i in range(len(S)): d1[i] = (log(S[i] / K) + (r + (0.5 * sigma**2)) * dt) / b d2[i] = (log(S[i] / K) + (r - (0.5 * sigma**2)) * dt) / b n1[i] = special.ndtr(-1.0 * d1[i]) n2[i] = special.ndtr(-1.0 * d2[i]) pt[i] = K * dsct * n2[i] - S[i] * n1[i] return pt
def BSFormula(self, option: vo.VanillaOption) -> float: d1 = (np.log(self.forward / option.strike) + 0.5 * self.vol * self.vol * option.tau) / (self.vol * np.sqrt(option.tau)) d2 = d1 - self.vol * np.sqrt(option.tau) return np.exp(-self.rate * option.tau) * (self.forward * ndtr(d1) - option.strike * ndtr(d2))
def integrate_box_1d(self, low, high): """ Computes the integral of a 1D pdf between two bounds. Parameters ---------- low : scalar Lower bound of integration. high : scalar Upper bound of integration. Returns ------- value : scalar The result of the integral. Raises ------ ValueError If the KDE is over more than one dimension. """ if self.d != 1: raise ValueError("integrate_box_1d() only handles 1D pdfs") stdev = ravel(sqrt(self.covariance))[0] normalized_low = ravel((low - self.dataset) / stdev) normalized_high = ravel((high - self.dataset) / stdev) value = np.mean(special.ndtr(normalized_high) - \ special.ndtr(normalized_low)) return value
def pexpunifgauss(t, tau, T, sigma): """ Convolution of an exponential with scale tau, a uniform in (0, T), and a normal with scale sigma. """ return 1 / T * (special.ndtr(t / sigma) - special.ndtr( (t - T) / sigma) - np.exp(logq(t, tau, sigma)) + np.exp(logq(t - T, tau, sigma)))
def range_normal(n, m, r): num = r - n * (n + m + 1) / 2 den = sqrt(n * m * (n + m + 1) / 12) R = num / den if r <= n * ((n + m + 1) / 2): return 2 * ndtr(R) else: return 2 * (1 - ndtr(R))
def BSputLogSc(x, s0, K, r, s, tau): ''' x = log S/K, can be a vector ''' d1 = (x + numpy.log(s0 / K) + (r + 0.5 * s * s) * tau) / (s * numpy.sqrt(tau)) d2 = d1 - s * numpy.sqrt(tau) return -s0 * numpy.exp(x) * ndtr(-d1) + K * numpy.exp(-r * tau) * ndtr(-d2)
def price_difference(self, param, K, F, T, market_price): d1 = (np.log((F + param[0]) / (K + param[0])) + (r + 0.5 * param[1]**2) * T) / (param[1] * np.sqrt(T)) d2 = d1 - param[1] * np.sqrt(T) bls_price = (F + param[0]) * ndtr(d1) - np.exp(-r * T) * K * ndtr(d2) difference = bls_price - market_price return abs(difference)
def call(self, s0, K, T): s0, K, T = map(float, (s0, K, T)) r, s = self.r, self.sigma d1 = (numpy.log(float(s0) / float(K)) + (r + 0.5 * s * s) * T) / (s * numpy.sqrt(T)) d2 = d1 - s * numpy.sqrt(T) return max(s0 - K, 0) if numpy.isclose( T, 0) else s0 * ndtr(d1) - K * numpy.exp(-r * T) * ndtr(d2)
def _cdf(self, x, mu): trm1 = 1. / mu - x trm2 = 1. / mu + x isqx = numpy.tile(numpy.inf, x.shape) indices = x > 0 isqx[indices] = 1. / numpy.sqrt(x[indices]) out = 1. - special.ndtr(isqx * trm1) out -= numpy.exp(2.0 / mu) * special.ndtr(-isqx * trm2) return out
def conditional_distance_cdf(r, distmu, distsigma, distnorm): """Cumulative conditional distribution of distance (ansatz). Parameters ---------- r : `numpy.ndarray` Distance (Mpc) distmu : `numpy.ndarray` Distance location parameter (Mpc) distsigma : `numpy.ndarray` Distance scale parameter (Mpc) distnorm : `numpy.ndarray` Distance normalization factor (Mpc^-2) Returns ------- pdf : `numpy.ndarray` Conditional probability density according to ansatz. Test against numerical integral of pdf: >>> import scipy.integrate >>> distmu = 10.0 >>> distsigma = 5.0 >>> distnorm = 1.0 >>> r = 8.0 >>> expected, _ = scipy.integrate.quad( ... conditional_distance_pdf, 0, r, ... (distmu, distsigma, distnorm)) >>> result = conditional_distance_cdf( ... r, distmu, distsigma, distnorm) >>> np.testing.assert_almost_equal(expected, result) """ mu = distmu sigma = distsigma mu2 = np.square(mu) sigma2 = np.square(sigma) arg1 = -mu / sigma arg2 = (r - mu) / sigma result = ( (mu2 + sigma2) * (ndtr(arg2) - ndtr(arg1)) + sigma / np.sqrt(2 * np.pi) * (mu * np.exp(-0.5 * np.square(arg1)) - (r + mu) * np.exp(-0.5 * np.square(arg2))) ) * distnorm good = ( np.isfinite(distmu) & np.isfinite(distsigma) & np.isfinite(distnorm)) result = np.where(good, result, 0.0) isscalar = ( np.isscalar(r) & np.isscalar(distmu) & np.isscalar(distsigma) & np.isscalar(distnorm)) if isscalar: result = np.asscalar(result) return result
def _cdf(self, x, mu): trm1 = 1. / mu - x trm2 = 1. / mu + x isqx = numpy.full_like(x, numpy.inf) indices = x > 0 isqx[indices] = 1. / numpy.sqrt(x[indices]) out = 1. - special.ndtr(isqx * trm1) out -= numpy.exp(2.0 / mu) * special.ndtr(-isqx * trm2) out = numpy.where(x == numpy.inf, 1, out) out = numpy.where(x == -numpy.inf, 0, out) return out
def _truncnorm_sf(truncation_level, values): """ Survival function for truncated normal distribution. Assumes zero mean, standard deviation equal to one and symmetric truncation. :param truncation_level: Positive float number representing the truncation on both sides around the mean, in units of sigma, or None, for non-truncation :param values: Numpy array of values as input to a survival function for the given distribution. :returns: Numpy array of survival function results in a range between 0 and 1. >>> from scipy.stats import truncnorm >>> truncnorm(-3, 3).sf(0.12345) == _truncnorm_sf(3, 0.12345) True >>> from scipy.stats import norm >>> norm.sf(0.12345) == _truncnorm_sf(None, 0.12345) True """ if truncation_level == 0: return values if truncation_level is None: return ndtr(-values) # notation from http://en.wikipedia.org/wiki/Truncated_normal_distribution. # given that mu = 0 and sigma = 1, we have alpha = a and beta = b. # "CDF" in comments refers to cumulative distribution function # of non-truncated distribution with that mu and sigma values. # assume symmetric truncation, that is ``a = - truncation_level`` # and ``b = + truncation_level``. # calculate CDF of b phi_b = ndtr(truncation_level) # calculate Z as ``Z = CDF(b) - CDF(a)``, here we assume that # ``CDF(a) == CDF(- truncation_level) == 1 - CDF(b)`` z = phi_b * 2 - 1 # calculate the result of survival function of ``values``, # and restrict it to the interval where probability is defined -- # 0..1. here we use some transformations of the original formula # that is ``SF(x) = 1 - (CDF(x) - CDF(a)) / Z`` in order to minimize # number of arithmetic operations and function calls: # ``SF(x) = (Z - CDF(x) + CDF(a)) / Z``, # ``SF(x) = (CDF(b) - CDF(a) - CDF(x) + CDF(a)) / Z``, # ``SF(x) = (CDF(b) - CDF(x)) / Z``. return ((phi_b - ndtr(values)) / z).clip(0.0, 1.0)
def dm_normd(a, b): ''' distance matrix for 2 normally distributed sequences ''' m, n = len(a), len(b) result = np.zeros((m, n), dtype=np.single) if m < n: for i in range(m): result[i, :] = np.abs(ndtr(a[i]) - ndtr(b)) else: for j in range(n): result[:, j] = np.abs(ndtr(a) - ndtr(b[j])) return result
def Prob(self, i, j, prix, k, l): if (k != i + 1 or j < l): return 0 if j == 0: return 0 demande = self.Demand(i, prix) proba = special.ndtr( ((j - l) + 1 - demande) / self.var_demande) - special.ndtr( (j - l - demande) / self.var_demande) return proba
def pdf(x, a, b, mu = 0, sigma = 1): """ Probablity density function of the truncated normal distribution on the interval [a, b]. Args: x: a value between a and b to calculate its pdf. a: the left boundary of the truncated normal distribution. b: the right boundary of the truncated normal distribution. mu: the mean value. sigma: the standard derivation. Returns: The probablity density at point x Raises: ValueError: the error is raised the value of x, a, and b are invalid. """ if a >= b: raise ValueError("The interval's left boundary is larger than \ the right boundary ") if x < a or x > b: raise ValueError("The query position is outside of the interval") if a == np.inf: raise ValueError("The interval's left boundary can not be np.inf") if b == -np.inf: raise ValueError("The interval's right boundary can not be -np.inf") x = (x - mu) / sigma a = (a - mu) / sigma b = (b - mu) / sigma if a * b <= 0: area = special.ndtr(b) - special.ndtr(a) p = 1 / np.sqrt(2*np.pi) * np.exp(-0.5*x**2) / area return p else: x = -np.abs(x) if a > 0: low = -np.abs(b) up = -np.abs(a) else: low = -np.abs(a) up = -np.abs(b) low_log_area = special.log_ndtr(low) up_log_area = special.log_ndtr(up) low_log_area += 0.5 * up**2 up_log_area += 0.5 * up**2 area = np.exp(up_log_area) - np.exp(low_log_area) p = 1 / np.sqrt(2*np.pi) * np.exp(-0.5*(x**2 - up**2)) / area return p
def p_value_big(sample_1, sample_2): """ Calcula el p-valor con una normal. """ n, m, sample_1, sample_2 = correct_order(sample_1, sample_2) R = Range(sample_1, sample_2) R_mean, R_std_dev = n * (n + m + 1) / 2, sqrt(n * m * (n + m + 1) / 12) r_star = (R - R_mean) / R_std_dev if R <= R_mean: return (2 * ndtr(r_star)) else: return (2 * (1 - ndtr(r_star)))
def cumulative_distribution(self, X): """Computes the integral of a 1-D pdf between two bounds Args: X(numpy.array): Shaped (1, n), containing the datapoints. Returns: numpy.array: estimated cumulative distribution. """ stdev = np.sqrt(self.model.covariance[0, 0]) lower = ndtr((self.lower - self.model.dataset) / stdev)[0] uppers = np.vstack( [ndtr((x - self.model.dataset) / stdev)[0] for x in X]) return (uppers - lower).dot(self.model.weights)
def AddaCooper(x, N): beta, rc, theta, rho, sigma = x sigma_z = sigma / (1 - rho**2)**0.5 Eps = Epsilon(x, N) ZZ = Z(x, Eps) PPi = np.zeros((N, N)) for i in range(N): for j in range(N): temp = integrate.quad( lambda t: np.exp(-t**2 / (2 * sigma_z**2)) * (special.ndtr( (Eps[j + 1] - rho * t) / sigma) - special.ndtr( (Eps[j] - rho * t) / sigma)), Eps[i], Eps[i + 1])[0] PPi[i, j] = temp * N / (2 * np.pi * sigma_z**2)**0.5 return ZZ, PPi
def implied_vol(mkt_price, F, K, T_maturity, *args): Max_iteration = 500 PRECISION = 1.0e-5 sigma = 0.4 for i in range(0, Max_iteration): d1 = (np.log(F / K) + ( 0.5 * sigma ** 2) * T_maturity) / (sigma * np.sqrt(T_maturity)) d2 = d1 - sigma * np.sqrt(T_maturity) bls_price = F * ndtr(d1) - K * ndtr(d2) vega = F * norm._pdf(d1) * np.sqrt(T_maturity) diff = mkt_price - bls_price if (abs(diff) < PRECISION): return sigma sigma = sigma + diff/vega # f(x) / f'(x) return sigma
def _cdf(self, y): # y = (x-loc)/scale if isinstance(y, np.ndarray): ret = special.ndtr(y) + self.offset ret[y < -self.k] = 0 ret[y > self.k] = 1 return ret else: if y <= -self.k: return 0 elif y >= self.k: return 1 else: return special.ndtr(y) + self.offset
def gk(order, dist, rule=24): assert isinstance(rule, int) if len(dist) > 1: if isinstance(order, int): xw = [gk(order, d, rule) for d in dist] else: xw = [gk(order[i], dist[i], rule) for i in range(len(dist))] x = [_[0][0] for _ in xw] x = chaospy.utils.combine(x).T w = [_[1] for _ in xw] w = np.prod(chaospy.utils.combine(w), -1) return x, w foo = eval("gk"+str(rule)) x,w = foo(order) x = dist.inv(ndtr(x)) x = x.reshape(1, x.size) return x, w
def getPValInfoNormal(observation, samplingDist): sDist = np.array(samplingDist) theStd, theMean = sDist.std(), sDist.mean() zScore = (float(observation) - theMean)/theStd thePVal = ndtr(-zScore) return (theStd, theMean, zScore, thePVal)
def approved(x) -> bool: """ Function to measure of value is above p = threshold """ zscore = (point_estimate - x)/standard_dev if ndtr(zscore) >= threshold: return True return False
def _truncnorm_sf(truncation_level, values): """ Survival function for truncated normal distribution. Assumes zero mean, standard deviation equal to one and symmetric truncation. :param truncation_level: Positive float number representing the truncation on both sides around the mean, in units of sigma. :param values: Numpy array of values as input to a survival function for the given distribution. :returns: Numpy array of survival function results in a range between 0 and 1. >>> from scipy.stats import truncnorm >>> truncnorm(-3, 3).sf(0.12345) == _truncnorm_sf(3, 0.12345) True """ # notation from http://en.wikipedia.org/wiki/Truncated_normal_distribution. # given that mu = 0 and sigma = 1, we have alpha = a and beta = b. # "CDF" in comments refers to cumulative distribution function # of non-truncated distribution with that mu and sigma values. # assume symmetric truncation, that is ``a = - truncation_level`` # and ``b = + truncation_level``. # calculate CDF of b phi_b = ndtr(truncation_level) # calculate Z as ``Z = CDF(b) - CDF(a)``, here we assume that # ``CDF(a) == CDF(- truncation_level) == 1 - CDF(b)`` z = phi_b * 2 - 1 # calculate the result of survival function of ``values``, # and restrict it to the interval where probability is defined -- # 0..1. here we use some transformations of the original formula # that is ``SF(x) = 1 - (CDF(x) - CDF(a)) / Z`` in order to minimize # number of arithmetic operations and function calls: # ``SF(x) = (Z - CDF(x) + CDF(a)) / Z``, # ``SF(x) = (CDF(b) - CDF(a) - CDF(x) + CDF(a)) / Z``, # ``SF(x) = (CDF(b) - CDF(x)) / Z``. return ((phi_b - ndtr(values)) / z).clip(0.0, 1.0)
def flip_coin(): """Return "Heads" or "Tails" depending on a calculation.""" # Returns a 2x2 randomly sampled array of values in the range [-5, 5] rand_array = 10 * np.random.random((2, 2)) - 5 # Computes the average of this avg = rand_array.mean() # Returns the Gaussian CDF of this average ndtr = special.ndtr(avg) return "Heads" if ndtr > .5 else "Tails"
def _test_grad_accuracy(self, dtype, grid_spec, error_spec): raw_grid = _make_grid(dtype, grid_spec) grid = ops.convert_to_tensor(raw_grid) with self.test_session(): fn = sm.log_ndtr if self._use_log else sm.ndtr # If there are N points in the grid, # grad_eval.shape = (N, N), with grad_eval[i, j] the partial derivative of # the ith output point w.r.t. the jth grid point. We only expect the # diagonal to be nonzero. # TODO(b/31131137): Replace tf.test.compute_gradient with our own custom # gradient evaluation to ensure we correctly handle small function delta. grad_eval, _ = gradient_checker.compute_gradient(grid, grid_spec.shape, fn(grid), grid_spec.shape) grad_eval = np.diag(grad_eval) # Check for NaN separately in order to get informative failures. self.assert_all_false(np.isnan(grad_eval)) self.assert_all_true(grad_eval > 0.) # isfinite checks for NaN and Inf. self.assert_all_true(np.isfinite(grad_eval)) # Do the same checks but explicitly compute the gradient. # (We did this because we're not sure if we trust # tf.test.compute_gradient.) grad_eval = gradients_impl.gradients(fn(grid), grid)[0].eval() self.assert_all_false(np.isnan(grad_eval)) if self._use_log: g = np.reshape(grad_eval, [-1]) half = np.ceil(len(g) / 2) self.assert_all_true(g[:int(half)] > 0.) self.assert_all_true(g[int(half):] >= 0.) else: # The ndtr gradient will only be non-zero in the range [-14, 14] for # float32 and [-38, 38] for float64. self.assert_all_true(grad_eval >= 0.) # isfinite checks for NaN and Inf. self.assert_all_true(np.isfinite(grad_eval)) # Versus scipy. expected = stats.norm.pdf(raw_grid) if self._use_log: expected /= special.ndtr(raw_grid) expected[np.isnan(expected)] = 0. # Scipy prematurely goes to zero at some places that we don't. So don't # include these in the comparison. self.assertAllClose( expected.astype(np.float64)[expected < 0], grad_eval.astype(np.float64)[expected < 0], rtol=error_spec.rtol, atol=error_spec.atol)
def apa_analysis(apa, w=5, cw=3): avg = apa.mean(axis=0) lowerpart = avg[-cw:,:cw] upperpart = avg[:cw,-cw:] maxi = upperpart.mean() * 5 ## APA score score = avg[w,w] / lowerpart.mean() ## z-score z = (avg[w,w] - lowerpart.mean()) / lowerpart.std() p = 1 - ndtr(z) return avg, score, z, p, maxi
def rtrunc_norm(mean, sd, lower, upper, size=None): """ Sample from a truncated normal distribution Parameters ---------- mean : float or array_like sd : float or array_like lower : float or array-like upper : float or array-like Note ---- Arrays passed must all be of the same length. Computes samples using the \Phi, the normal CDF, and Phi^{-1} using a standard algorithm: draw u ~ uniform(|Phi((l - m) / sd), |Phi((u - m) / sd)) return m + sd * \Phi^{-1}(u) Returns ------- samples : ndarray or float """ ulower = special.ndtr((lower - mean) / sd) uupper = special.ndtr((upper - mean) / sd) if size is None: if isinstance(ulower, np.ndarray): draws = np.random.rand(len(ulower)) else: draws = np.random.rand() else: raise ValueError('if array of bounds passed, size must be None') u = (uupper - ulower) * draws + ulower return mean + sd * special.ndtri(u)
def gauss_sigma_to_prob(sigma): """ gauss_sigma_to_prob(sigma): Returns the area under the Gaussian probability density function, integrated from 'sigma' to infinity. """ if sigma < 5.0: return 1.0 - ndtr(sigma) else: # From A&S page 932, eqn 26.2.12 for Q(x) x = sigma Z = 1.0/Num.sqrt(2.0*Num.pi) * Num.exp(-0.5*x*x) series = Num.sum(Num.asarray([1.0, -1.0/(x*x), 3.0/(x**4.0), -15.0/(x**6.0), 105.0/(x**8.0)])) return Z/x*series
def estimate_delay(sig1, sig2): """Estimates delay between two correlated signals. Returns: delay, pval delay : number of samples to delay sig2 to achieve max correlation pval : Bonferroni corrected p-value of the quality of this correlation, relative to the distribution of all other possible delays (for whatever that is worth) TODO: add flag to use scipy.signal.fftconvolve instead """ # Correlate at all lags C = np.correlate(sig1, sig2, 'full') # Now see how good the correlation is best = C.max() argbest = np.argmax(C) # number of zeros to prepend to smaller sig z = (best - np.median(C)) / C.std() pval = (1 - ndtr(z)) pval = 1 - ((1 - pval) ** len(C)) # Subtract off the length of sig1 res = argbest - len(sig2) + 1 if EVENTSYNC_DEBUG_FIGURE: import matplotlib.pyplot as plt f = plt.figure() ax = f.add_subplot(121) ax.plot(sig1) ax.plot(range(res, res + len(sig2)), sig2) ax.set_xlim((res, res + len(sig2))) ax = f.add_subplot(122) ax.hist(C, bins=100) plt.show() if EVENTSYNC_DEBUG: print ("corr %0.3f at %d (%d) z=%0.2f pval %0.3f" % (best, argbest, res, z, pval)) sys.stdout.flush() return res, pval
def _norm_sf(values): """ Survival function for normal distribution. Assumes zero mean and standard deviation equal to one. ``values`` parameter and the return value are the same as in :func:`_truncnorm_sf`. >>> from scipy.stats import norm >>> norm.sf(0.12345) == _norm_sf(0.12345) True """ # survival function by definition is ``SF(x) = 1 - CDF(x)``, # which is equivalent to ``SF(x) = CDF(- x)``, since (given # that the normal distribution is symmetric with respect to 0) # the integral between ``[x, +infinity]`` (that is the survival # function) is equal to the integral between ``[-infinity, -x]`` # (that is the CDF at ``- x``). return ndtr(- values)
def _test_grid_no_log(self, dtype, grid_spec, error_spec): with self.test_session(): grid = _make_grid(dtype, grid_spec) actual = sm.ndtr(grid).eval() # Basic tests. self.assertTrue(np.isfinite(actual).all()) # On the grid, 0 < cdf(x) < 1. The grid cannot contain everything due # to numerical limitations of cdf. self.assertTrue((actual > 0).all()) self.assertTrue((actual < 1).all()) _check_strictly_increasing(actual) # Versus scipy. expected = special.ndtr(grid) # Scipy prematurely goes to zero at some places that we don't. So don't # include these in the comparison. self.assertAllClose(expected.astype(np.float64)[expected < 0], actual.astype(np.float64)[expected < 0], rtol=error_spec.rtol, atol=error_spec.atol)
def _cdf(self, x, c): return special.ndtr(x-c) + special.ndtr(x+c) - 1.0
def simulate_raw(params, steps, verbose=False): """ Simulate data in HG' coordinates HG' = HG, except center at wave epicenter """ cadence = params["cadence"] direction = 180. + params["direction"].to('degree').value width_coeff = prep_coeff(params["width"]) wave_thickness_coeff = prep_coeff(params["wave_thickness"]) wave_normalization_coeff = prep_coeff(params["wave_normalization"]) speed_coeff = prep_speed_coeff(params["speed"], params["acceleration"]) lat_min = params["lat_min"].to('degree').value lat_max = params["lat_max"].to('degree').value lat_bin = params["lat_bin"].to('degree').value lon_min = params["lon_min"].to('degree').value lon_max = params["lon_max"].to('degree').value lon_bin = params["lon_bin"].to('degree').value # This roundabout approach recalculates lat_bin and lon_bin to produce # equally sized bins to exactly span the min/max ranges lat_num = int(round((lat_max-lat_min)/lat_bin)) lat_edges, lat_bin = np.linspace(lat_min, lat_max, lat_num+1, retstep=True) lon_num = int(round((lon_max-lon_min)/lon_bin)) lon_edges, lon_bin = np.linspace(lon_min, lon_max, lon_num+1, retstep=True) # Propagates from 90. down to lat_min, irrespective of lat_max p = np.poly1d([speed_coeff[2]/3., speed_coeff[1]/2., speed_coeff[0], -(90.-lat_min)]) # p = np.poly1d([0.0, speed_coeff[1], speed_coeff[2]/2., # -(90.-lat_min)]) # Will fail if wave does not propagate all the way to lat_min # duration = p.r[np.logical_and(p.r.real > 0, p.r.imag == 0)][0] # steps = int(duration/cadence)+1 # if steps > params["max_steps"]: # steps = params["max_steps"] # Maybe used np.poly1d() instead to do the polynomial calculation? time = params["start_time_offset"] + np.arange(steps)*cadence time_powers = np.vstack((time**0, time**1, time**2)) width = np.dot(width_coeff, time_powers).ravel() wave_thickness = np.dot(wave_thickness_coeff, time_powers).ravel() wave_normalization = np.dot(wave_normalization_coeff, time_powers).ravel() #Position #Propagates from 90., irrespective of lat_max wave_peak = 90.-(p(time)+(90.-lat_min)) out_of_bounds = np.logical_or(wave_peak < lat_min, wave_peak > lat_max) if out_of_bounds.any(): steps = np.where(out_of_bounds)[0][0] # Storage for the wave maps wave_maps = [] # Header of the wave maps dict_header = { "CDELT1": lon_bin, "NAXIS1": lon_num, "CRVAL1": lon_min, "CRPIX1": crpix12_value_for_HG, "CUNIT1": "deg", "CTYPE1": "HG", "CDELT2": lat_bin, "NAXIS2": lat_num, "CRVAL2": lat_min, "CRPIX2": crpix12_value_for_HG, "CUNIT2": "deg", "CTYPE2": "HG", "HGLT_OBS": 0.0, # (sun.heliographic_solar_center(BASE_DATE))[1], # the value of HGLT_OBS from Earth at the given date "CRLN_OBS": 0.0, # (sun.heliographic_solar_center(BASE_DATE))[0], # the value of CRLN_OBS from Earth at the given date "DSUN_OBS": sun.sunearth_distance(BASE_DATE.strftime(BASE_DATE_FORMAT)).to('m').value, "DATE_OBS": BASE_DATE.strftime(BASE_DATE_FORMAT), "EXPTIME": 1.0 } if verbose: print(" * Simulating "+str(steps)+" raw maps.") for istep in range(0, steps): # Current datetime current_datetime = BASE_DATE + datetime.timedelta(seconds=time[istep]) # Update the header to set the correct observation time and earth-sun # distance dict_header['DATE_OBS'] = current_datetime.strftime(BASE_DATE_FORMAT) # Update the Earth-Sun distance dict_header['DSUN_OBS'] = sun.sunearth_distance(dict_header['DATE_OBS']).to('m').value # Update the heliographic latitude dict_header['HGLT_OBS'] = 0.0 # (sun.heliographic_solar_center(dict_header['DATE_OBS']))[1].to('degree').value # Update the heliographic longitude dict_header['CRLN_OBS'] = 0.0 # (sun.heliographic_solar_center(dict_header['DATE_OBS']))[0].to('degree').value # Gaussian profile in longitudinal direction # Does not take into account spherical geometry (i.e., change in area # element) if wave_thickness[istep] <= 0: print(" * ERROR: wave thickness is non-physical!") z = (lat_edges-wave_peak[istep])/wave_thickness[istep] wave_1d = wave_normalization[istep]*(ndtr(np.roll(z, -1))-ndtr(z))[0:lat_num] wave_1d /= lat_bin wave_lon_min = direction-width[istep]/2 wave_lon_max = direction+width[istep]/2 if width[istep] < 360.: # Do these need to be np.remainder() instead? wave_lon_min_mod = ((wave_lon_min+180.) % 360.)-180. wave_lon_max_mod = ((wave_lon_max+180.) % 360.)-180. index1 = np.arange(lon_num+1)[np.roll(lon_edges, -1) > min(wave_lon_min_mod, wave_lon_max_mod)][0] index2 = np.roll(np.arange(lon_num+1)[lon_edges < max(wave_lon_min_mod, wave_lon_max_mod)], 1)[0] wave_lon = np.zeros(lon_num) wave_lon[index1+1:index2] = 1. # Possible weirdness if index1 == index2 wave_lon[index1] += (lon_edges[index1+1]-min(wave_lon_min_mod, wave_lon_max_mod))/lon_bin wave_lon[index2] += (max(wave_lon_min_mod, wave_lon_max_mod)-lon_edges[index2])/lon_bin if wave_lon_min_mod > wave_lon_max_mod: wave_lon = 1.-wave_lon else: wave_lon = np.ones(lon_num) # Could be accomplished with np.dot() without casting as matrices? wave = np.mat(wave_1d).T*np.mat(wave_lon) # Create the new map new_map = Map(wave, MapMeta(dict_header)) new_map.plot_settings = {'cmap': cm.gray, 'norm': ImageNormalize(stretch=LinearStretch()), 'interpolation': 'nearest', 'origin': 'lower' } # Update the list of maps wave_maps += [new_map] return Map(wave_maps, cube=True)
def binomialLimits(nsuccess, ntotal, cl=None, sigma=False): """ NAME: binomialLimits AUTHOR: Tim Haines, [email protected] PURPOSE: This function computes the single-sided upper and lower confidence limits for the binomial distribution. CATEGORY: Statistics and probability CALLING SEQUENCE: (u,l) = binomialLimits(nsuccess, ntotal, [, cl [, sigma]]) INPUTS: nsuccess: A strictly nonnegative integer that specifies the number of successes in ntotal Bernoulli trials. Can be a list or a numpy array. ntotal: An integer strictly greater than nsuccess that specifies the number of Bernoulli trials. Can be a list or a numpy array. OPTIONAL INPUTS: cl: The confidence level in the interval [0, 1]. The default is 0.8413 (i.e., 1 sigma) OPTIONS: sigma: If this is true, then cl is assumed to be a multiple of sigma, and the actual confidence level is computed from the standard normal distribution with parameter cl. RETURNS: Two lists: the first containing the upper limits, and the second containing the lower limits. If the inputs are numpy arrays, then numpy arrays are returned instead of lists. If the inputs are scalars, then scalars are returned. REFERENCES: N. Gehrels. Confidence limits for small numbers of events in astrophysical data. The Astrophysical Journal, 303:336-346, April 1986. EXAMPLE: I have a mass bin with 100 galaxies (20 reds and 80 blues) and I am computing the fraction of reds to blues, then for this bin NSUCCESS = 20 and NTOTAL = 100. To compute the confidence limits at the 2.5 sigma level, use (u,l) = binomialLimits(20, 100, 2.5, sigma=True) u = 0.31756 l = 0.11056 Since these are the confidence limits, the fraction would be reported as 0.2 (+0.11756, -0.08944) """ if cl is None: cl = 1.0 sigma = True if sigma: cl = ndtr(cl) if not type(nsuccess) == type(ntotal): exit('nsuccess and ntotal must have the same type') # Since there isn't any syntactical advantage to using # numpy, just convert them to lists and carry on. isNumpy = False if isinstance(nsuccess,numpy.ndarray): nsuccess = nsuccess.tolist() ntotal = ntotal.tolist() isNumpy = True # Box single values into a list isScalar = False if not isinstance(nsuccess,list): nsuccess = [nsuccess] ntotal = [ntotal] isScalar = True # Must have the same length if not len(nsuccess) == len(ntotal): exit('nsuccess and total must have same length') upper = [] lower = [] for (s,t) in zip(nsuccess,ntotal): nfail = t - s # See Gehrels (1986) for details if nfail == 0: upper.append(1.0) else: upper.append(bdtri(s,t,1-cl)) # See Gehrels (1986) for details if s == 0: lower.append(0.0) else: lower.append(1 - bdtri(nfail,t,1-cl)) if isNumpy: upper = numpy.array(upper) lower = numpy.array(lower) # Scalar-in/scalar-out if isScalar: return (upper[0],lower[0]) return (upper,lower)
def poissonLimits(k, cl=None, sigma=False): """ NAME: poissonLimits AUTHOR: Tim Haines, [email protected] PURPOSE: This function computes the single-sided upper and lower confidence limits for the Poisson distribution. CATEGORY: Statistics and probability CALLING SEQUENCE: (u,l) = poissonLimits(k, [cl [, sigma]]) INPUTS: k: A strictly nonnegative integer that specifies the number of observed events. Can be a list or numpy array. OPTIONAL INPUTS: cl: The confidence level in the interval [0, 1). The default is 0.8413 (i.e., 1 sigma) OPTIONS: sigma: If this is true, then cl is assumed to be a multiple of sigma, and the actual confidence level is computed from the standard normal distribution with parameter cl. RETURNS: Two lists: the first containing the upper limits, and the second containing the lower limits. If the input is a numpy array, then numpy arrays are returned instead of lists. If the input is a scalar, then scalars are returned. REFERENCES: N. Gehrels. Confidence limits for small numbers of events in astrophysical data. The Astrophysical Journal, 303:336-346, April 1986. EXAMPLE: Compute the confidence limits of seeing 20 events in 8 seconds at the 2.5 sigma. (u,l) = poissonLimits(20, 2.5, sigma=True) u = 34.1875 l = 10.5711 However, recall that the Poisson parameter is defined as the average rate, so it is necessary to divide these values by the time (or space) interval over which they were observed. Since these are the confidence limits, the fraction would be reported as 2.5 (+4.273, -1.321) observations per second """ if cl is None: cl = 1.0 sigma = True if sigma: cl = ndtr(cl) # Since there isn't any syntactical advantage to using # numpy, just convert it to a list and carry on. isNumpy = False if isinstance(k,numpy.ndarray): k = k.tolist() isNumpy = True # Box single values into a list isScalar = False if not isinstance(k,list): k = [k] isScalar = True upper = [] lower = [] for x in k: upper.append(pdtri(x,1-cl)) # See Gehrels (1986) for details if x == 0: lower.append(0.0) else: lower.append(pdtri(x-1,cl)) if isNumpy: upper = numpy.array(upper) lower = numpy.array(lower) # Scalar-in/scalar-out if isScalar: return (upper[0], lower[0]) return (upper,lower)
def _cdf(self, x, C, Ci, loc): return special.ndtr(numpy.dot(Ci, (x.T-loc.T).T))
def _pdf(self, x, a): return 1.0/(x**2)/special.ndtr(a)*np.e**(.5*(a-1.0/x)**2)/np.sqrt(2*np.pi)
def _cdf(self, x, c): return special.ndtr(1.0/c*(np.sqrt(x)-1.0/np.sqrt(x)))
def _cdf(self, x, a): return special.ndtr(np.log(x+(1-x)*(x<=0))/a)*(x>0)
def _cdf(self, x): return special.ndtr(x)
def test(t, x, eps=None, alpha=None, Ha=None): """ Runs the Mann-Kendall test for trend in time series data. Parameters ---------- t : 1D numpy.ndarray array of the time points of measurements x : 1D numpy.ndarray array containing the measurements corresponding to entries of 't' eps : scalar, float, greater than zero least count error of measurements which help determine ties in the data alpha : scalar, float, greater than zero significance level of the statistical test (Type I error) Ha : string, options include 'up', 'down', 'upordown' type of test: one-sided ('up' or 'down') or two-sided ('updown') Returns ------- MK : string result of the statistical test indicating whether or not to accept hte alternative hypothesis 'Ha' m : scalar, float slope of the linear fit to the data c : scalar, float intercept of the linear fit to the data p : scalar, float, greater than zero p-value of the obtained Z-score statistic for the Mann-Kendall test Raises ------ AssertionError : error least count error of measurements 'eps' is not given AssertionError : error significance level of test 'alpha' is not given AssertionError : error alternative hypothesis 'Ha' is not given """ # assert a least count for the measurements x assert eps, "Please provide least count error for measurements 'x'" assert alpha, "Please provide significance level 'alpha' for the test" assert Ha, "Please provide the alternative hypothesis 'Ha'" # estimate sign of all possible (n(n-1)) / 2 differences n = len(t) sgn = np.zeros((n, n), dtype="int") for i in range(n): tmp = x - x[i] tmp[np.where(np.fabs(tmp) <= eps)] = 0. sgn[i] = np.sign(tmp) # estimate mean of the sign of all possible differences S = sgn[np.triu_indices(n, k=1)].sum() # estimate variance of the sign of all possible differences # 1. Determine no. of tie groups 'p' and no. of ties in each group 'q' np.fill_diagonal(sgn, eps * 1E6) i, j = np.where(sgn == 0.) ties = np.unique(x[i]) p = len(ties) q = np.zeros(len(ties), dtype="int") for k in range(p): idx = np.where(np.fabs(x - ties[k]) < eps)[0] q[k] = len(idx) # 2. Determine the two terms in the variance calculation term1 = n * (n - 1) * (2 * n + 5) term2 = (q * (q - 1) * (2 * q + 5)).sum() # 3. estimate variance varS = float(term1 - term2) / 18. # Compute the Z-score based on above estimated mean and variance if S > eps: Zmk = (S - 1) / np.sqrt(varS) elif np.fabs(S) <= eps: Zmk = 0. elif S < -eps: Zmk = (S + 1) / np.sqrt(varS) # compute test based on given 'alpha' and alternative hypothesis # note: for all the following cases, the null hypothesis Ho is: # Ho := there is no monotonic trend # # Ha := There is an upward monotonic trend if Ha == "up": Z_ = ndtri(1. - alpha) if Zmk >= Z_: MK = "accept Ha := upward trend" indicator = 1 else: MK = "reject Ha := upward trend" indicator = 0 # Ha := There is a downward monotonic trend elif Ha == "down": Z_ = ndtri(1. - alpha) if Zmk <= -Z_: MK = "accept Ha := downward trend" indicator = 1 else: MK = "reject Ha := downward trend" indicator = 0 # Ha := There is an upward OR downward monotonic trend elif Ha == "upordown": Z_ = ndtri(1. - alpha / 2.) if np.fabs(Zmk) >= Z_: MK = "accept Ha := upward OR downward trend" indicator = 1 else: MK = "reject Ha := upward OR downward trend" indicator = 0 # ---------- # AS A BONUS # ---------- # estimate the slope and intercept of the line m = np.corrcoef(t, x)[0, 1] * (np.std(x) / np.std(t)) c = np.mean(x) - m * np.mean(t) # ---------- # AS A BONUS # ---------- # estimate the p-value for the obtained Z-score Zmk if S > eps: if Ha == "up": p = 1. - ndtr(Zmk) elif Ha == "down": p = ndtr(Zmk) elif Ha == "upordown": p = 0.5 * (1. - ndtr(Zmk)) elif np.fabs(S) <= eps: p = 0.5 elif S < -eps: if Ha == "up": p = 1. - ndtr(Zmk) elif Ha == "down": p = ndtr(Zmk) elif Ha == "upordown": p = 0.5 * (ndtr(Zmk)) return MK, m, c, p, indicator
def _ppf(self, q, a): return 1.0/(a-special.ndtri(q*special.ndtr(a)))
def _pdf(self, x, alpha): # 2*normpdf(x)*normcdf(alpha*x) return 2.0 / np.sqrt(2 * np.pi) * np.exp(-x ** 2 / 2.0) * special.ndtr(alpha * x)