def thompson_tau_test(data, alpha=0.05): """Not found in either scipy.stats or statsmddels. Uses the Thompson-Tau criteria to iteratively identify outliers until no more exist. Parameters ---------- data: list or numpy array, 1-D Our dataset we are evaluating for outliers alpha: float, default is 0.05 Our level of significance for detecting outliers Returns ------- outliers_list: list A list containing all datapoints that we found to be an outlier by Thompson-Tau's criteria """ data = _check_table(data, only_count=False) if alpha < 0 or alpha > 1: raise ValueError("Cannot have alpha level greater than 1 or less than 0") outlier_exist, outlier_table = True, [] data_copy = np.copy(data) while outlier_exist: n, mu, s = len(data_copy), np.mean(data_copy), np.std(data_copy, ddof=1) ab_resid = np.abs(data_copy - mu) / s rejection = t.isf(alpha / 2, n - 2) * (n - 1) / (sqrt(n) * sqrt(n - 2 + pow(t.isf(alpha / 2, n - 2), 2))) is_outlier = ab_resid > rejection if np.sum(is_outlier) != 0: outlier_table.append(data_copy[np.argsort(ab_resid)][-1:][0]) data_copy = data_copy[np.argsort(ab_resid)][:-1] else: outlier_exist = False return outlier_table
def regression_figure(x, y): plt.figure() plt.scatter(x, y) sxy = sum((x - x.mean())* (y - y.mean())) sxx = sum((x - x.mean())**2) syy = sum((y - y.mean())**2) ssr = (sxx*syy - sxy ** 2) / sxx # print sxy, sxx, syy, ssr b_estimator = sxy / sxx a_estimator = y.mean() - b_estimator * x.mean() # print b_estimator num = len(x) alpha = 0.1 t_distr_value = t.isf(alpha / 2,num - 2) print 't value for confidence interval computation: ', t_distr_value interval_left_b = b_estimator - math.sqrt(ssr / ((num - 2) * sxx)) * t_distr_value interval_right_b = b_estimator + math.sqrt(ssr / ((num - 2) * sxx)) * t_distr_value interval_left_a = a_estimator - math.sqrt(sum(x ** 2) * ssr / (num * (num - 2) * sxx)) * t_distr_value interval_right_a = a_estimator + math.sqrt(sum(x ** 2) * ssr / (num * (num - 2) * sxx)) * t_distr_value plt.plot(x, a_estimator + b_estimator * x, linewidth = 7) plt.plot(x, interval_left_a + interval_left_b * x, linewidth = 1) plt.plot(x, interval_right_a + interval_right_b * x, linewidth = 1) plt.savefig('assign5_2_plot.pdf') print '------------------------------------------------' # judge hypothesis β = 0 at 1% level of significance test_stat = math.sqrt((num - 2) * sxx / ssr) * math.fabs(b_estimator) print 'value of test statistic: ', test_stat alpha_level = 0.01 t_distr_value_judge = t.isf(alpha_level / 2,num - 2) print 't value for hypothesis test computation: ', t_distr_value_judge if test_stat > t_distr_value_judge: print 'hypothesis test result: reject' else: print 'hypothesis test result: accept'
def t_val_from_t_percentile(t_percentile, df, one_tailed = 0): """ Find T score given T percentile, DF and if its 1 Tailed or 2 Tailed t_percentile: % in proportion """ return round(t.isf(t_percentile, df), 3) if one_tailed else round(t.isf(t_percentile/2., df), 3)
def ROC_CI(N, Vec_theta, alpha=0.05): """ One-Dimensional Confidence-Interval Calculations Parameters ---------- N Vec_theta alpha Returns ------- theta_L theta_U """ theta_L = np.zeros(Vec_theta.size) theta_U = np.zeros(Vec_theta.size) for i, theta in enumerate(Vec_theta): if theta != 0: alpha_2 = alpha / 2 else: alpha_2 = alpha if N > 100 and theta > 0.1: d = N - 1 sigma = sqrt(theta * (1 - theta)) if theta == 0: theta_L[i] = 0 else: theta_L[i] = theta - t.isf(alpha_2, df=d) * sigma / sqrt(N) theta_U[i] = theta + t.isf(alpha_2, df=d) * sigma / sqrt(N) elif N > 100 and theta < 0.1: if theta == 0: theta_L[i] = 0 else: d_L = 2 * N * theta theta_L[i] = chi2.isf(1 - alpha_2, df=d_L) / (2 * N) d_U = 2 * (N * theta + 1) theta_U[i] = chi2.isf(alpha_2, df=d_U) / (2 * N) else: d1L = N - N * theta + 1 d2L = N * theta if theta == 0: theta_L[i] = 0 else: theta_L[i] = d2L / (d2L + d1L * f.isf(alpha_2, 2 * d1L, 2 * d2L)) d1U = N * theta + 1 d2U = N - N * theta theta_U[i] = d1U * f.isf(alpha_2, 2 * d1U, 2 * d2U) / ( d2U + d1U * f.isf(alpha_2, 2 * d1U, 2 * d2U)) # ensure increase for i in range(Vec_theta.size - 1): if theta_L[i + 1] < theta_L[i]: theta_L[i + 1] = theta_L[i] if theta_U[i + 1] < theta_U[i]: theta_U[i + 1] = theta_U[i] return theta_L, theta_U
def grubbs(X, test='max', alpha=0.05): """ Performs Grubbs' test for outliers recursively until the null hypothesis is true. Parameters ---------- X : ndarray A numpy array to be tested for outliers. test : str Describes the types of outliers to look for. Can be 'min' (look for small outliers), 'max' (look for large outliers), or 'two-tailed' (look for both). alpha : float The significance level. Returns ------- X : ndarray The original array with outliers removed. outliers : ndarray An array of outliers. """ Z = zscore(X, ddof=1) # Z-score N = len(X) # number of samples # calculate extreme index and the critical t value based on the test if test == 'two-tailed': extreme_ix = lambda Z: np.abs(Z).argmax() t_crit = lambda N: t.isf(alpha / (2. * N), N - 2) elif test == 'max': extreme_ix = lambda Z: Z.argmax() t_crit = lambda N: t.isf(alpha / N, N - 2) elif test == 'min': extreme_ix = lambda Z: Z.argmin() t_crit = lambda N: t.isf(alpha / N, N - 2) else: raise ValueError("Test must be 'min', 'max', or 'two-tailed'") # compute the threshold thresh = lambda N: (N - 1.) / np.sqrt(N) * \ np.sqrt(t_crit(N)**2 / (N - 2 + t_crit(N)**2)) # create array to store outliers outliers = np.array([]) # loop throught the array and remove any outliers del_index_list = [] while abs(Z[extreme_ix(Z)]) > thresh(N): # update the outliers outliers = np.r_[outliers, X[extreme_ix(Z)]] # remove outlier from array X = np.delete(X, extreme_ix(Z)) del_index_list.append(extreme_ix(Z)) # repeat Z score Z = zscore(X, ddof=1) N = len(X) return X, outliers, del_index_list
def cochrancox(n1, m1, var1, n2, m2, var2, alpha=0.05): v1, v2 = n1 - 1, n2 - 1 _t = (m1 - m2) / np.sqrt(var1 / n1 + var2 / n2) _t_av1 = t.isf(alpha / 2, v1) _t_av2 = t.isf(alpha / 2, v2) var1_SE, var2_SE = var1 / n1, var2 / n2 _t_a = (var1_SE * _t_av1 + var2_SE * _t_av2) / (var1_SE + var2_SE) return _t, (v1, v2), _t_a
def t_val_from_t_percentile(t_percentile, df, one_tailed=0): """ Find T score given T percentile, DF and if its 1 Tailed or 2 Tailed t_percentile: % in proportion """ return round(t.isf(t_percentile, df), 3) if one_tailed else round( t.isf(t_percentile / 2., df), 3)
def linReg(self, alpha=0.05, debug=False): ''' Does linear regression on the model data vs. recorded data. Gives a 100(1-alpha)% confidence interval for the slope ''' if debug or self._debug: print "linReg..." # set stuff up to make the code cleaner obs = self.observed mod = self.model obs_mean = np.mean(obs) mod_mean = np.mean(mod) n = mod.size df = n - 2 # calculate square sums SSxx = np.sum(mod**2) - np.sum(mod)**2 / n SSyy = np.sum(obs**2) - np.sum(obs)**2 / n SSxy = np.sum(mod * obs) - np.sum(mod) * np.sum(obs) / n SSE = SSyy - SSxy**2 / SSxx MSE = SSE / df # estimate parameters slope = SSxy / SSxx intercept = obs_mean - slope * mod_mean sd_slope = np.sqrt(MSE / SSxx) r_squared = 1 - SSE / SSyy # calculate 100(1 - alpha)% CI for slope width = t.isf(0.5 * alpha, df) * sd_slope lower_bound = slope - width upper_bound = slope + width slope_CI = (lower_bound, upper_bound) # calculate 100(1 - alpha)% CI for intercept lower_intercept = obs_mean - lower_bound * mod_mean upper_intercept = obs_mean - upper_bound * mod_mean intercept_CI = (lower_intercept, upper_intercept) # estimate 100(1 - alpha)% CI for predictands predictands = slope * mod + intercept sd_resid = np.std(obs - predictands) y_CI_width = t.isf(0.5 * alpha, df) * sd_resid * \ np.sqrt(1 - 1 / n) # return data in a dictionary data = {} data['slope'] = slope data['intercept'] = intercept data['r_2'] = r_squared data['slope_CI'] = slope_CI data['intercept_CI'] = intercept_CI data['pred_CI_width'] = y_CI_width data['conf_level'] = 100 * (1 - alpha) if debug or self._debug: print "...linReg done." return data
def bootstrap_estimate(b, n, k): s= Sample(k, n, lambda x: x) x=[sample_g_function(s) for i in range(b)] mu = numpy.mean(x) sd = numpy.std(x) se = sd / numpy.sqrt(b) lci = mu - se*t.isf(0.025, b-1) uci = mu + se*t.isf(0.025, b-1) return (n, mu, sd, lci, uci, numpy.amax(x))
def get_ci(self, alpha=0.95): ''' Returns the confidence interval of the estimated parameters. You should call 'fit' before calling this method. @keyword alpha: Confidence level (default: 95%) ''' plo = self.p - t.isf(1. - alpha, self.dof)*np.sqrt(np.diag(self.pcov)) pup = self.p + t.isf(1. - alpha, self.dof)*np.sqrt(np.diag(self.pcov)) self.ci = np.zeros((len(self.p),2)) self.ci[:,0] = plo self.ci[:,1] = pup return self.ci
def grubbs_test(data, alternative='two-sided', alpha=0.05): """Not found in either scipy.stats or statsmodels Used to determine if there exists one outlier in the dataset based on their dispersion from the mean. Note that this assumes that the data is normally distributed. Parameters ---------- data: list or numpy array, 1-D The sample dataset we are evaluating for outliers alternative: str, {'two-sided', 'greater', 'less'} Whether we are evaluating only minimum values, maximum values or both alpha: float, default is 0.05 Our alpha level for determining significant difference Returns ------- If there is an outlier, returns the outlier. Else, returns None """ if not isinstance(alternative, str): raise TypeError("Alternative Hypothesis is not of string type") if alternative.casefold() not in ['two-sided', 'greater', 'less']: raise ValueError("Cannot determine method for alternative hypothesis") if not isinstance(alpha, float): raise TypeError("Cannot discern alpha level for Grubb's test") if alpha > 1 or alpha < 0: raise ValueError("Alpha level must be within 0 and 1") data = _check_table(data, only_count=False) y_bar, s, n = np.mean(data), np.std(data, ddof=1), len(data) if alternative.casefold() == 'less': return_val = np.min(data) val = y_bar - return_val t_value = t.isf(alpha / (2 * n), n - 2) elif alternative.casefold() == 'greater': return_val = np.max(data) val = return_val - y_bar t_value = t.isf(alpha / (2 * n), n - 2) else: val = np.max([y_bar - np.min(data), np.max(data) - y_bar]) if val == y_bar - np.min(data): return_val = np.min(data) else: return_val = np.max(data) t_value = t.isf(alpha / n, n - 2) g = val / s rejection_stat = ((n - 1) / sqrt(n)) * sqrt(pow(t_value, 2) / (n - 2 + pow(t_value, 2))) if g > rejection_stat: return return_val else: return None
def kramers_v(x, y, bias_correction=True): """Calculates Cramer's V statistic for categorical-categorical association. Taken from https://github.com/shakedzy/dython/blob/master/dython/nominal.py Inspired by Shaked Zychlinski. This is a symmetric coefficient: V(x,y) = V(y,x) Original function taken from: https://stackoverflow.com/a/46498792/5863503 Wikipedia: https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V Parameters: ----------- x : list / NumPy ndarray / Pandas Series A sequence of categorical measurements y : list / NumPy ndarray / Pandas Series A sequence of categorical measurements bias_correction : Boolean, default = True Use bias correction from Bergsma and Wicher, Journal of the Korean Statistical Society 42 (2013): 323-328. Returns: -------- float in the range of [0,1] """ confusion_matrix = crosstab(x, y) c2 = chi2_contingency(confusion_matrix)[0] n = confusion_matrix.sum().sum() phi2 = c2 / n r, k = confusion_matrix.shape if bias_correction: phi2corr = max(0, phi2 - ((k - 1) * (r - 1)) / (n - 1)) rcorr = r - ((r - 1) ** 2) / (n - 1) kcorr = k - ((k - 1) ** 2) / (n - 1) if min((kcorr - 1), (rcorr - 1)) == 0: warnings.warn( "Unable to calculate Cramer's V using bias correction. Consider using bias_correction=False", RuntimeWarning) return np.nan else: V = np.sqrt(phi2corr / min((kcorr - 1), (rcorr - 1))) # calculate p-value using V tval = t.isf(0.975, n-3) return V, t.sf(abs(tval), n-2) else: V = np.sqrt(phi2 / min(k - 1, r - 1)) tval = t.isf(0.975, n-3) return V, t.sf(abs(tval), n-2)
def getError(var_matrix, dof): a = 1 - 0.05 / 2 factorSE = t.isf(a, dof) variance = np.diagonal(var_matrix) SE = np.sqrt(variance) error = np.abs(SE * factorSE) return error
def grubbs(timeseries, debug, debug_path): """ A timeseries is anomalous if the Z score is greater than the Grubb's score. """ try: series = scipy.array([x[1] for x in timeseries]) stdDev = scipy.std(series) if stdDev == 0: return False mean = np.mean(series) tail_average = tail_avg(timeseries, debug, debug_path) z_score = (tail_average - mean) / stdDev len_series = len(series) threshold = scipy_stats_t.isf(.05 / (2 * len_series), len_series - 2) threshold_squared = threshold * threshold grubbs_score = ((len_series - 1) / np.sqrt(len_series)) * np.sqrt(threshold_squared / (len_series - 2 + threshold_squared)) return z_score > grubbs_score except: if debug: trace = traceback.format_exc() errorline = 'error in grubbs - %s\n' % str(trace) with open(debug_path + '/nab.earthgecko_skyline.algorithm.errors.txt', 'a') as errorfile: errorfile.write(errorline) return None
def solve(self): filename_old = "http://py.mooctest.net:8081/dataset/population/population_old.csv" filename_total = "http://py.mooctest.net:8081/dataset/population/population_total.csv" reader_old = csv.reader(urllib.urlopen(filename_old)) reader_total = csv.reader(urllib.urlopen(filename_total)) count_line_old = 3 old_num = [] for row in reader_old: if count_line_old > 0: count_line_old -= 1; continue; old_num.append(int(row[1])) count_line_total = 5 total_num = [] for row in reader_total: if count_line_total > 0: count_line_total -= 1 continue total_num.append(int(row[4])) old_rate = [] for i in range(len(old_num)): old_rate.append(100.0 * old_num[i-1] / total_num[i-1]) a = pd.Series(old_rate) x = a.mean() std = a.std() var = a.var() # var = s^2 z = t.isf(0.05, 31) mean_lower = x - std / math.sqrt(31) * z mean_upper = x + std / math.sqrt(31) * z std_lower = 31 * var / chi2.isf(0.05, 31) std_upper = 31 * var / chi2.isf(0.95, 31) result = [[mean_lower, mean_upper], [std_lower, std_upper]] print result return result
def parameters_significance_test(self): nk1 = (self.n - (self.k + 1)) s2 = np.dot(self.residuals.T, self.residuals) / nk1 da = self.gram_schmidt * s2 self.log(da) t_alpha_nk1 = t.isf(df=nk1, q=self.alpha) result = True self.ltw('\\subsubsection{Istotność zmiennych objaśniających}\n') mx = 0 for i in range(self.k + 1): self.log(f"Testowanie istotnosci zmiennej {i}") t_stat = self.params[i] / da[i, i] self.log(t_stat, t_alpha_nk1) self.ltw(f'\\[t_{{\\alpha_{{{i + 1}}}}} = {t_stat}\\]\n') self.ltw(f'\\[t_{{{self.alpha}, {nk1}}} = {t_alpha_nk1}\\]\n') if abs(t_stat) > mx and i: mx = abs(t_stat) self.least_important_param_name = self.var_names[i-1] if abs(t_stat) < t_alpha_nk1: result = False self.ltw(f'Zmienna ~$X_{{{i + 1}}}$ jest statystycznie nieistotna.\n') self.log(f"Zmienna {i} nieistotna") else: self.ltw(f'Zmienna ~$X_{{{i + 1}}}$ jest statystycznie istotna.\n') return result
def calcStudent(x1, x2): n1 = len(x1) n2 = len(x2) M1 = sum(x1) / n1 M2 = sum(x2) / n2 S1 = sum((x-M1)**2 for x in x1)/n1 S2 = sum((x-M2)**2 for x in x2)/n2 # Satervait for alpha in [0.2, 0.1, 0.05,0.01,0.001,0.0001]: alpha /= 2 v1 = S1/n1 v2 = S2/n2 ta = (v1 * t.isf(alpha,n1-1) + v2 * t.isf(alpha,n2-1))/(v1+v2) print ta #print t.isf(alpha,n1+n2-2) return (M1 - M2) / (S1/(n1-1) + S2/(n2-1))**0.5
def solve(self): file_2010 = "http://py.mooctest.net:8081/dataset/temperature/temperature_2010.csv" file_2014 = "http://py.mooctest.net:8081/dataset/temperature/temperature_2014.csv" reader_2010 = csv.reader(urllib.urlopen(file_2010)) reader_2014 = csv.reader(urllib.urlopen(file_2014)) temperature_2010 = [] i = 0 for row in reader_2010: i += 1 if i <= 6 or i >= 38: continue temperature_2010.append(float(row[8])) temperature_2014 = [] i = 0 for row in reader_2014: i += 1 if i <= 5 or i >= 37: continue temperature_2014.append(float(row[8])) diff = [] for i in range(len(temperature_2010)): diff.append(temperature_2014[i] - temperature_2010[i]) d = np.mean(diff) sd = np.std(diff) result = d / (sd / np.sqrt(31)) t_alpha = t.isf(0.05, 30) if result >= t_alpha: return "YES" else: return "NO"
def confidence_interval_b1(self, *, alpha): tmp_t_value = t.isf(alpha / 2, len(self.__x_data) - 2) print(tmp_t_value) return [ self.__b1 - tmp_t_value * math.sqrt(self.__estimator_variance_b1), self.__b1 + tmp_t_value * math.sqrt(self.__estimator_variance_b1) ]
def grubbs(samples_rtt): N = len(samples_rtt) G = (max(samples_rtt) - mean(samples_rtt)) / std(samples_rtt) a = 0.01 crit_val = t.isf(a / N, N - 2) crit_reg = (crit_val ** 2 / (N - 2 + crit_val ** 2)) * (N - 1) / sqrt(N) return G, a, crit_reg
def pth2Cth(pth, N, dz): """Convert threshold on partial correlation to equivalent threshold on its p-value""" #dz = 1 # dimension of conditioning variable df = max(N - dz - 2, 0) # degrees of freedom y = -t.isf(1.0 - pth / 2.0, df, loc=0, scale=1) / math.sqrt(df) Cth = abs(y / math.sqrt(1.0 + y**2)) return Cth
def solve(self): n = 51 std = 4.9 mean = 1.1 t_value = t.isf(0.025, n - 1) stat_value = mean / (std / math.sqrt(n)) return [round(n - 1, 2), round(stat_value, 2), not stat_value <= -t_value]
def solve(self): filename_old = "http://py.mooctest.net:8081/dataset/population/population_old.csv" filename_total = "http://py.mooctest.net:8081/dataset/population/population_total.csv" reader_old = csv.reader(urllib.urlopen(filename_old)) reader_total = csv.reader(urllib.urlopen(filename_total)) count_line_old = 3 old_num = [] for row in reader_old: if count_line_old > 0: count_line_old -= 1 continue old_num.append(int(row[1])) count_line_total = 5 total_num = [] for row in reader_total: if count_line_total > 0: count_line_total -= 1 continue total_num.append(int(row[4])) old_rate = [] for i in range(len(old_num)): old_rate.append(100.0 * old_num[i - 1] / total_num[i - 1]) a = pd.Series(old_rate) x = a.mean() std = a.std() var = a.var() # var = s^2 z = t.isf(0.05, 31) mean_lower = x - std / math.sqrt(31) * z mean_upper = x + std / math.sqrt(31) * z std_lower = 31 * var / chi2.isf(0.05, 31) std_upper = 31 * var / chi2.isf(0.95, 31) result = [[mean_lower, mean_upper], [std_lower, std_upper]] print result return result
def confidence_interval_of_mu_x(self, x0, alpha): y0 = self.a + self.b * x0 t_value = t.isf(alpha / 2, self.n - 2) sigma = math.sqrt(self.sigma_sqr_of_epsilon) others = math.sqrt((1 / self.n) + (x0 - self.mean_x) ** 2 / self.S_xx) self.upper_bound = y0 + t_value * sigma * others self.lower_bound = y0 - t_value * sigma * others
def extreme_studentized_deviate_test(data, num_outliers=1, alpha=0.05): """Not found in either scipy.stats or statsmodels Used when we think there are at most k outliers, as other tests such as Grubbs or Tietjen-Moore rely on there existing exactly k number of outliers. Note that this assumes the data is normally distributed. Parameters ---------- data: list or numpy array, 1-D The data we are analyzing for outliers num_outliers: int, default is 1 The maximum number of outliers we are checking for alpha: float, default is 0.05 The level of significance for determining outliers Returns ------- max_outliers: int The maximum number of outliers that out test found to exist outliers: list The outliers corresponding to num_outliers """ data = _check_table(data, only_count=False) if not isinstance(num_outliers, int): raise TypeError("Number of outliers must be an integer") if num_outliers < 0: raise ValueError("Cannot test for negative amount of outliers") r = np.zeros(num_outliers) if alpha >= 1 or alpha <= 0: raise ValueError("Alpha level must be within 0 and 1") outliers = [] data_copy = np.copy(data) n = len(data) if num_outliers > n: raise ValueError("Cannot have number of outliers greater than number of observations") for i in range(1, num_outliers+1): y_bar = np.mean(data_copy) s = np.std(data_copy, ddof=1) abs_resids = np.abs(data_copy - y_bar) r_i = np.max(abs_resids) / s p = 1 - (alpha / (2 * (n - i + 1))) lambda_i = ((n - i) * t.isf(p, n - i - 1)) / sqrt((n - i - 1 + pow(t.isf(p, n - i - 1), 2)) * (n - i + 1)) r[i-1] = r_i > abs(lambda_i) outliers.append(data_copy[np.argsort(abs_resids)][-1:][0]) data_copy = data_copy[np.argsort(abs_resids)][:-1] max_outliers = np.max(np.where(r == 1)[0]) + 1 return max_outliers, outliers[:max_outliers]
def solve(self): n = 20 std = 2.2 mean = 4.6 standard = 5 stat_value =(mean - standard) / (std / math.sqrt(n)) t_value = t.isf(0.025, n - 1) return [round(n - 1, 2), round(stat_value, 2), not stat_value <= -t_value]
def solve(self): n = 51 std = 4.9 mean = 1.1 t_value = t.isf(0.025, n - 1) stat_value = mean / (std / np.sqrt(n)) print [round(n - 1, 2), round(stat_value, 2), not stat_value <= -t_value] return [round(n - 1, 2), round(stat_value, 2), not stat_value <= -t_value]
def get_ci(self, alpha=0.05): ''' Returns the confidence interval of the estimated parameters. You should call 'fit' before calling this method. @keyword alpha: Percentile (default: 0.05, which means 95% confidence) ''' self._alpha = alpha plo = self._p - t.isf(alpha/2., self._dof)*self._delta pup = self._p + t.isf(alpha/2., self._dof)*self._delta if self._modelPhysParamsDict[self.model]: self._phys_plo = plo self._phys_pup = pup self._ci = np.zeros((len(self._p),2)) self._ci[:,0] = plo self._ci[:,1] = pup return self._ci
def genThreshold(self, strThresholdMode, tpFrameShape=None): lsStrThresholdModeColumn = strThresholdMode.split('-') sigmaMatch = re.match('as(\d*\.?\d*)sigma', lsStrThresholdModeColumn[2]) fpeMatch = re.match('(\d*\.?\d*)fpe', lsStrThresholdModeColumn[2]) if tpFrameShape is None: if self.arrStdBGFrame is not None: tpFrameShape = self.arrStdBGFrame.shape elif self.arrKurtosisBGFrame is not None: tpFrameShape = self.arrKurtosisBGFrame.shape if sigmaMatch is not None: prob = norm.sf(float(sigmaMatch.group(1))) elif fpeMatch is not None: prob = 1 / (np.prod(tpFrameShape) * float(fpeMatch.group(1))) if lsStrThresholdModeColumn[0] == 'frame': if lsStrThresholdModeColumn[1] == 'norm': return norm.isf(prob) * self.dicPHStats['std'] elif lsStrThresholdModeColumn[1] == 't': validKurtosis = max(dicPHStats['kurtosis'], 0) if validKurtosis == 0: return norm.isf(prob) * self.dicPHStats['std'] else: nu = 6 / validKurtosis + 4 scale = dicPHStats['std'] * sqrt((nu - 2) / nu) return t.isf(prob, df=nu, scale=scale) if lsStrThresholdModeColumn[0] == 'pixel': if lsStrThresholdModeColumn[1] == 'norm': return norm.isf(prob) * self.arrStdBGFrame elif lsStrThresholdModeColumn[1] == 't': arrIsTargetKurtosisFrame = ~np.isnan(self.arrKurtosisBGFrame) arrIsTargetKurtosisFrame[arrIsTargetKurtosisFrame] *= ( self.arrKurtosisBGFrame[arrIsTargetKurtosisFrame] <= 0) arrRet = np.ones(tpFrameShape) * np.nan arrRet[arrIsTargetKurtosisFrame] = ( norm.isf(prob) * self.arrStdBGFrame[arrIsTargetKurtosisFrame]) arrIsTargetKurtosisFrame = ~np.isnan(self.arrKurtosisBGFrame) arrIsTargetKurtosisFrame[arrIsTargetKurtosisFrame] *= ( self.arrKurtosisBGFrame[arrIsTargetKurtosisFrame] > 0) arrValidNu = ( 6 / self.arrKurtosisBGFrame[arrIsTargetKurtosisFrame] + 4) arrRet[arrIsTargetKurtosisFrame] = ( t.isf(prob, df=arrValidNu) * self.arrStdBGFrame[arrIsTargetKurtosisFrame] * np.sqrt( (arrValidNu - 2) / arrValidNu)) return arrRet
def solve(self): n = 25 mean = 7.73 std = 0.77 u0 = 8 stat_value = (mean - u0) / (std / math.sqrt(n)) t_value = t.isf(0.05, n - 1) return [round(n-1, 2), round(stat_value, 2), not math.fabs(stat_value) >= t_value]
def confidence_prediction_bands(self, x_fit, results, confidence_interval, pcov): """ Computes confidence prediction bands. Parameters ---------- x_fit : array_like results : int confidence_interaval : int pcov : numpy.array Returns ------- cp_band_0, cp_band_1 : array_like """ param_names = [] param_values = [] param_deltas = [] for pname in self.minimizer.params.keys(): if self.minimizer.params[pname].vary: param_names.append(pname) param_values.append(results[pname]) param_deltas.append(1e-5 * results[pname]) x_m_0s = numpy.empty_like(x_fit) f_m_0s = numpy.empty_like(x_fit) for i, xx in enumerate(x_fit): x_m_0s[i] = x_fit[i] f_m_0s[i] = self.equation(results, xx) diag_delta = numpy.diag(param_deltas) dxdbeta = numpy.empty([len(param_values), len(x_fit)]) for i, value in enumerate(param_values): adj_param_values = param_values + diag_delta[i] for j, pname in enumerate(param_names): results[pname] = adj_param_values[j] for j, x_m_0 in enumerate(x_m_0s): dxdbeta[i][j] = (self.equation(results, x_m_0) - f_m_0s[j]) / diag_delta[i][i] variance = numpy.empty(len(x_fit)) for i, gprime in enumerate(dxdbeta.T): variance[i] = gprime.T.dot(pcov).dot(gprime) critical_value = t.isf(0.5 * (confidence_interval + 1.0), len(param_names)) confidence_half_widths = critical_value * numpy.sqrt(variance) cp_band_0 = f_m_0s - confidence_half_widths cp_band_1 = f_m_0s + confidence_half_widths return cp_band_0, cp_band_1
def test_b_is_zero(self, alpha): if self.sigma_sqr_of_epsilon == None: self.var_of_epsilon() self.H0_test_value = abs(self.b * math.sqrt(self.S_xx) / math.sqrt(self.sigma_sqr_of_epsilon)) self.H0_t_value = t.isf(alpha / 2, self.n - 2) if self.H0_test_value >= self.H0_t_value: self.H0_valid = False else: self.H0_valid = True
def prediction_interval_of_Y(self, x0, alpha): y0 = self.a + self.b * x0 t_value = t.isf(alpha / 2, self.n - 2) if self.sigma_sqr_of_epsilon == None: self.var_of_epsilon() sigma = math.sqrt(self.sigma_sqr_of_epsilon) others = math.sqrt(1 + (1 / self.n) + (x0 - self.mean_x) ** 2 / self.S_xx) self.upper_bound = y0 + t_value * sigma * others self.lower_bound = y0 - t_value * sigma * others
def sample_size(self, alpha, beta, nlimit=10000): """ :param alpha: risk alpha :param beta: risk beta :param nlimit: :return: an estimate of the required sample size """ n = 3 d = np.abs(self.m1 - self.m2) tbeta = t.isf(beta, n) talpha = t.isf(alpha, self.n1) while d*np.sqrt(n)/self.pooled_s < (tbeta+talpha): n += 1 tbeta = t.isf(beta, n) if n > nlimit: break return n
def compute_theta_sym(alpha_sym, number_of_walks_ran, length_of_walk): """ Computes the threshold difference in the truncated hitting times of two nodes for the null hypothesis of the two nodes being path-symmetric to be violated at a significance level given by alpha_sym. return: theta_sym: used as a parameter for clustering based on truncated hitting time """ return ((length_of_walk - 1) / (2 * number_of_walks_ran) ** 0.5) * t.isf(alpha_sym, df=number_of_walks_ran - 1)
def solve(self): lower = 18.985 upper = 21.015 mean = (lower + upper) / 2 delta = mean - lower n = 36 t_value = t.isf(0.025, n - 1) std = delta * math.sqrt(n) / t_value return [round(mean, 2), round(std, 2)]
def prefiction_interval_yh(self, *, xh, alpha): tmp_t_value = t.isf(alpha / 2, len(self.__x_data) - 2) tmp_value = math.sqrt(self.__MSE * (1 + 1 / len(self.__x_data) + pow( (xh - self.__mean_x), 2) / self.__sxx)) return [ self.y_hat(x=xh) - tmp_t_value * tmp_value, self.y_hat(x=xh) + tmp_t_value * tmp_value ]
def solve(self): n1 = 22 n2 = 22 std2 = 45.1 mean2 = 52.1 std1 = 26.4 mean1 = 27.1 s_w_square = ((n1 - 1) * std1 * std1 + (n2 - 1) * std2 * std2) / (n1 + n2 - 2) stat_value = (mean1 - mean2) / (math.sqrt(s_w_square) * math.sqrt(1.0 / n1 + 1.0 / n2)) t_value = t.isf(0.05, n1 + n2 - 2) return [min(n1 - 1, n2 - 1), -round(stat_value, 2), not stat_value <= -t_value]
def solve(self): n = 20 std = 2.2 mean = 4.6 standard = 5 stat_value = (mean - standard) / (std / math.sqrt(n)) t_value = t.isf(0.025, n - 1) return [ round(n - 1, 2), round(stat_value, 2), not stat_value <= -t_value ]
def solve(self): n = 25 mean = 7.73 std = 0.77 u0 = 8 stat_value = (mean - u0) / (std / math.sqrt(n)) t_value = t.isf(0.05, n - 1) return [ round(n - 1, 2), round(stat_value, 2), not math.fabs(stat_value) >= t_value ]
def confidence_interval_yh(self, *, xh, alpha): tmp_t_value = t.isf(alpha / 2, len(self.__x_data) - 2) print('2a - t value : ', tmp_t_value) print("2a - Y hat : ", self.y_hat(x=xh)) print('2a - S^2 : ', self.estimator_variance_yh(xh=xh)) return [ self.y_hat(x=xh) - tmp_t_value * math.sqrt(self.estimator_variance_yh(xh=xh)), self.y_hat(x=xh) + tmp_t_value * math.sqrt(self.estimator_variance_yh(xh=xh)) ]
def bwt_ave(x): x_median = np.median(x) x_mad = np.median(np.abs(x - np.median(x))) bwt_ave = 0.0 while np.around(np.abs(bwt_ave - x_median), 8) > 0: bwt_ave, bwt_std = iter_bwt(x, x_median, x_mad) x_median = bwt_ave chi2_68_left = chi2.ppf(0.32 / 2.0, len(x) - 1) chi2_68_right = chi2.isf(0.32 / 2.0, len(x) - 1) t_68 = t.isf(0.32 / 2.0, long(0.7 * (len(x) - 1))) bwt_ave_low = bwt_ave + t_68 * bwt_std / np.sqrt(len(x)) bwt_ave_up = bwt_ave - t_68 * bwt_std / np.sqrt(len(x)) bwt_std_low = (np.sqrt((len(x) - 1) / chi2_68_left) - 1.0) * bwt_std bwt_std_up = (np.sqrt((len(x) - 1) / chi2_68_right) - 1.0) * bwt_std return (bwt_ave, bwt_ave_low, bwt_ave_up), (bwt_std, bwt_std_low, bwt_std_up)
def qt(q, df=1, loc=0, scale=1, ncp=None, lowertail=True, log=False): """ The quantile function for the t distribution. You provide a quantile (eg q=0.75) or array of quantiles, and it returns the value along the t distribution that corresponds to the qth quantile. So using a value of q=0.30 means that 30% of the values are below the returned value. So it essentially gives us the cut off point for the lowest 30% of values. If you want the cutoff point for the top 30% of values, then use lowertail=False. ARGS: --------------- :param q (float, array of floats): The quantile(s) :param df (float): degrees of freedom :param loc: array_like, optional location parameter (default=0) :param scale: float, optional scale (default=1) :param ncp (float): non-centrality parameter delta. Currently not implemented. :param lowertail (boolean): Lower tail? :param log: (boolean) use log? Currently not implemented RETURN: --------------- :return: an array of the value(s) corresponding to the quantiles q """ # ========================================================================== if log: raise NotImplementedError("Log option is not implemented yet.") elif lowertail: return t.ppf(q=q, df=df, loc=loc, scale=scale) else: return t.isf(q=q, df=df, loc=loc, scale=scale)
def background_subtract_line(profile, profile_sd, background_mask): """ Performs a linear background subtraction on a 1D peak profile Parameters ---------- profile : np.ndarray 1D profile profile_sd : np.ndarray standard deviations for profile background_mask : array_like array of bool that specifies which Y pixels to use for background subtraction. """ # which values to use as a background region mask = np.array(background_mask).astype("bool") x_vals = np.where(mask)[0] try: y_vals = profile[x_vals] except IndexError: print(x_vals) y_sdvals = profile_sd[x_vals] x_vals = x_vals.astype("float") # some SD values may have 0 SD, which will screw up curvefitting. y_sdvals = np.where(y_sdvals == 0, 1, y_sdvals) # equation for a straight line def f(x, a, b): return a + b * x # estimate the linear fit y_bar = np.mean(y_vals) x_bar = np.mean(x_vals) bhat = np.sum((x_vals - x_bar) * (y_vals - y_bar)) bhat /= np.sum((x_vals - x_bar) ** 2) ahat = y_bar - bhat * x_bar # get the weighted fit values # we know the absolute sigma values popt, pcov = curve_fit(f, x_vals, y_vals, sigma=y_sdvals, p0=np.array([ahat, bhat]), absolute_sigma=True) def CI(xx, pcovmat): return pcovmat[0, 0] + pcovmat[1, 0] * xx + pcovmat[0, 1] * xx + pcovmat[1, 1] * (xx ** 2) bkgd = f(np.arange(np.size(profile, 0)), popt[0], popt[1]) # now work out confidence intervals # TODO, should this be confidence interval or prediction interval? # if you try to do a fit which has a singular matrix if np.isfinite(pcov).all(): bkgd_sd = np.asarray([CI(x, pcov) for x in np.arange(len(profile))], dtype="float64") else: bkgd_sd = np.zeros_like(bkgd) bkgd_sd = np.sqrt(bkgd_sd) # get the t value for a two sided student t test at the 68.3 confidence # level bkgd_sd *= t.isf(0.1585, np.size(x_vals, 0) - 2) return EP.EPsub(profile, profile_sd, bkgd, bkgd_sd)
# from scipy.stats import chi2, t, f import numpy as np # Q1 q1_1 = chi2.isf(q=0.95, df=4) assert np.allclose(q1_1, 0.710723) q1_2 = chi2.isf(q=0.05, df=4) assert np.allclose(q1_2, 9.48773) q1_3 = chi2.isf(q=0.95, df=9) assert np.allclose(q1_3, 3.32511) q1_4 = chi2.isf(q=0.05, df=9) assert np.allclose(q1_4, 16.9190) # Q2 q2_1 = t.isf(q=0.05, df=7) assert np.allclose(q2_1, 1.895, rtol=1.e-3) q2_2 = t.isf(q=0.025, df=7) assert np.allclose(q2_2, 2.365, rtol=1.e-3) q2_3 = t.isf(q=0.05, df=12) assert np.allclose(q2_3, 1.782, rtol=1.e-3) q2_4 = t.isf(q=0.025, df=12) assert np.allclose(q2_4, 2.179, rtol=1.e-3) # Q3 q3_1 = f.isf(q=0.05, dfn=5, dfd=7) assert np.allclose(q3_1, 3.9715) q3_2 = f.isf(q=0.95, dfn=5, dfd=7) assert np.allclose(q3_2, 0.2050903422957813) # inverse of F(7,5; 0.05)
def critical_t(percentile, df, one_tailed): return round(t.isf((100-percentile)/100., df), 3) if one_tailed else round(t.isf((100-percentile)/200., df), 3)
def _sim_EOS(self, a, da, b, db, c, dc, d, dd, *args): ''' Simulates the EOS models to obtain average values and standard deviations of physical parameters. Only 4- and 5-parameter EOS models are currently supported. @param a: Average value of fitting parameter a @param da: Standard deviation of fitting parameter a @param b: Average value of fitting parameter b @param db: Standard deviation of fitting parameter b @param c: Average value of fitting parameter c @param dc: Standard deviation of fitting parameter c @param d: Average value of fitting parameter d @param dd: Standard deviation of fitting parameter d @param args: Additional parameters and their standard deviations (higher-order models) ''' import random random.seed() # Sample fitting parameters N = 10000 # Number of samples t_scores = t.isf(self._alpha/2., self._dof)*self._delta a_vals = np.asarray([random.gauss(a, da/t_scores[0]) for i in range(N)]) b_vals = np.asarray([random.gauss(b, db/t_scores[1]) for i in range(N)]) c_vals = np.asarray([random.gauss(c, dc/t_scores[2]) for i in range(N)]) d_vals = np.asarray([random.gauss(d, dd/t_scores[3]) for i in range(N)]) if (len(args) > 0): # Check if len(args) == 2 (5-parameter models) if (len(args) == 2): e_vals = np.asarray([random.gauss(args[0], args[1]/t_scores[4]) for i in range(N)]) else: print "ERROR: Currently cannot simulate EOS models with more than 5 parameters" print "Returning zero values from simulation" return [0., 0., 0., 0., [0. for i in range(len(args))]] if (self.model == EOSmodel.BM4): import BM4_aux # Obtain distribution of V0 from exact expressions V0_vals_ind, self._V0_vals = BM4_aux.BM4_V0(b_vals, c_vals, d_vals, self.V) # Some values of V0 are invalid, so use only the valid values a_vals = a_vals[V0_vals_ind] b_vals = b_vals[V0_vals_ind] c_vals = c_vals[V0_vals_ind] d_vals = d_vals[V0_vals_ind] # Obtain distributions of E0, B0, and B0p self._E0_vals = BM4_aux.BM4_E0(a_vals, b_vals, c_vals, d_vals, self._V0_vals) self._B0_vals = BM4_aux.BM4_B0(b_vals, c_vals, d_vals, self._V0_vals) self._B0p_vals = BM4_aux.BM4_B0p(b_vals, c_vals, d_vals, self._V0_vals) return np.mean(self._V0_vals), np.mean(self._E0_vals), np.mean(self._B0_vals), np.mean(self._B0p_vals) elif (self.model == EOSmodel.mBM4): import mBM4_aux # Obtain distribution of V0 from exact expressions V0_vals_ind, self._V0_vals = mBM4_aux.mBM4_V0(b_vals, c_vals, d_vals, self.V) # Some values of V0 are invalid, so use only the valid values a_vals = a_vals[V0_vals_ind] b_vals = b_vals[V0_vals_ind] c_vals = c_vals[V0_vals_ind] d_vals = d_vals[V0_vals_ind] # Obtain distributions of E0, B0, and B0p self._E0_vals = mBM4_aux.mBM4_E0(a_vals, b_vals, c_vals, d_vals, self._V0_vals) self._B0_vals = mBM4_aux.mBM4_B0(b_vals, c_vals, d_vals, self._V0_vals) self._B0p_vals = mBM4_aux.mBM4_B0p(b_vals, c_vals, d_vals, self._V0_vals) return np.mean(self._V0_vals), np.mean(self._E0_vals), np.mean(self._B0_vals), np.mean(self._B0p_vals) elif (self.model == EOSmodel.LOG4): import LOG4_aux # Obtain distribution of V0 from exact expressions V0_vals_ind, self._V0_vals = LOG4_aux.LOG4_V0(b_vals, c_vals, d_vals, self.V) # Some values of V0 are invalid, so use only the valid values a_vals = a_vals[V0_vals_ind] b_vals = b_vals[V0_vals_ind] c_vals = c_vals[V0_vals_ind] d_vals = d_vals[V0_vals_ind] # Obtain distributions of E0, B0, and B0p self._E0_vals = LOG4_aux.LOG4_E0(a_vals, b_vals, c_vals, d_vals, self._V0_vals) self._B0_vals = LOG4_aux.LOG4_B0(b_vals, c_vals, d_vals, self._V0_vals) self._B0p_vals = LOG4_aux.LOG4_B0p(b_vals, c_vals, d_vals, self._V0_vals) return np.mean(self._V0_vals), np.mean(self._E0_vals), np.mean(self._B0_vals), np.mean(self._B0p_vals) elif (self.model == EOSmodel.MO4): import MO4_aux # Obtain distribution of V0 from exact expressions V0_vals_ind, self._V0_vals = MO4_aux.MO4_V0(b_vals, c_vals, d_vals, self.V) # Some values of V0 are invalid, so use only the valid values a_vals = a_vals[V0_vals_ind] b_vals = b_vals[V0_vals_ind] c_vals = c_vals[V0_vals_ind] d_vals = d_vals[V0_vals_ind] # Obtain distributions of E0, B0, and B0p self._E0_vals = MO4_aux.MO4_E0(a_vals, b_vals, c_vals, d_vals, self._V0_vals) self._B0_vals = MO4_aux.MO4_B0(b_vals, c_vals, d_vals, self._V0_vals) self._B0p_vals = MO4_aux.MO4_B0p(b_vals, c_vals, d_vals, self._V0_vals) return np.mean(self._V0_vals), np.mean(self._E0_vals), np.mean(self._B0_vals), np.mean(self._B0p_vals) elif (self.model == EOSmodel.BM5): import BM5_aux # Obtain distribution of V0 from exact expressions V0_vals_ind, self._V0_vals = BM5_aux.BM5_V0(b_vals, c_vals, d_vals, e_vals, self.V) # Some values of V0 are invalid, so use only the valid values a_vals = a_vals[V0_vals_ind] b_vals = b_vals[V0_vals_ind] c_vals = c_vals[V0_vals_ind] d_vals = d_vals[V0_vals_ind] e_vals = e_vals[V0_vals_ind] # Obtain distributions of E0, B0, and B0p self._E0_vals = BM5_aux.BM5_E0(a_vals, b_vals, c_vals, d_vals, e_vals, self._V0_vals) self._B0_vals = BM5_aux.BM5_B0(b_vals, c_vals, d_vals, e_vals, self._V0_vals) self._B0p_vals = BM5_aux.BM5_B0p(b_vals, c_vals, d_vals, e_vals, self._V0_vals) self._B0pp_vals = BM5_aux.BM5_B0pp(b_vals, c_vals, d_vals, e_vals, self._V0_vals) return np.mean(self._V0_vals), np.mean(self._E0_vals), np.mean(self._B0_vals), np.mean(self._B0p_vals), np.mean(self._B0pp_vals) elif (self.model == EOSmodel.mBM5): import mBM5_aux # Obtain distribution of V0 from exact expressions V0_vals_ind, self._V0_vals = mBM5_aux.mBM5_V0(b_vals, c_vals, d_vals, e_vals, self.V) # Some values of V0 are invalid, so use only the valid values a_vals = a_vals[V0_vals_ind] b_vals = b_vals[V0_vals_ind] c_vals = c_vals[V0_vals_ind] d_vals = d_vals[V0_vals_ind] e_vals = e_vals[V0_vals_ind] # Obtain distributions of E0, B0, and B0p self._E0_vals = mBM5_aux.mBM5_E0(a_vals, b_vals, c_vals, d_vals, e_vals, self._V0_vals) self._B0_vals = mBM5_aux.mBM5_B0(b_vals, c_vals, d_vals, e_vals, self._V0_vals) self._B0p_vals = mBM5_aux.mBM5_B0p(b_vals, c_vals, d_vals, e_vals, self._V0_vals) self._B0pp_vals = mBM5_aux.mBM5_B0pp(b_vals, c_vals, d_vals, e_vals, self._V0_vals) return np.mean(self._V0_vals), np.mean(self._E0_vals), np.mean(self._B0_vals), np.mean(self._B0p_vals), np.mean(self._B0pp_vals) elif (self.model == EOSmodel.LOG5): import LOG5_aux # Obtain distribution of V0 from exact expressions V0_vals_ind, self._V0_vals = LOG5_aux.LOG5_V0(b_vals, c_vals, d_vals, e_vals, self.V) # Some values of V0 are invalid, so use only the valid values a_vals = a_vals[V0_vals_ind] b_vals = b_vals[V0_vals_ind] c_vals = c_vals[V0_vals_ind] d_vals = d_vals[V0_vals_ind] e_vals = e_vals[V0_vals_ind] # Obtain distributions of E0, B0, and B0p self._E0_vals = LOG5_aux.LOG5_E0(a_vals, b_vals, c_vals, d_vals, e_vals, self._V0_vals) self._B0_vals = LOG5_aux.LOG5_B0(b_vals, c_vals, d_vals, e_vals, self._V0_vals) self._B0p_vals = LOG5_aux.LOG5_B0p(b_vals, c_vals, d_vals, e_vals, self._V0_vals) self._B0pp_vals = LOG5_aux.LOG5_B0pp(b_vals, c_vals, d_vals, e_vals, self._V0_vals) return np.mean(self._V0_vals), np.mean(self._E0_vals), np.mean(self._B0_vals), np.mean(self._B0p_vals), np.mean(self._B0pp_vals)
def confidence_prediction_bands(model, x_array, confidence_interval, f, flag=None): """ This function calculates the confidence and prediction bands of the function f(x) from a best-fit model with uncertainties in its parameters as calculated (for example) by the function nonlinear_least_squares_fit(). The values are calculated via the delta method, which estimates the variance of f evaluated at x as var(f(x)) = df(x)/dB var(B) df(x)/dB where df(x)/dB is the vector of partial derivatives of f(x) with respect to B Parameters ---------- ' model : class instance As modified (for example) by the function nonlinear_least_squares_fit(). Should contain the following functions: get_params, set_params, function, normal And attributes: delta_params, pcov, dof, noise_variance x_array : 2D numpy array coordinates at which to evaluate the bounds confidence_interval : float Probability level of finding the true model (confidence bound) or any new data point (probability bound). For example, the 95% confidence bounds should be calculated using a confidence interval of 0.95. f : function This is the function defining the variable y=f(x) for which the confidence and prediction bounds are desired flag : variable type This (optional) flag is passed to model.function to control how the modified position of x is calculated. This value is then used by f(x) Output ------ bounds : 2D numpy array An element of bounds[i][j] gives the lower and upper confidence (i=0, i=1) and prediction (i=2, i=3) bounds for the jth data point. """ # Check array dimensions n_dimensions = len(model.data[0]) if len(x_array[0]) != n_dimensions: raise Exception('Dimensions of each point must be the same as the total number of dimensions') param_values = model.get_params() x_m_0s = np.empty_like(x_array) f_m_0s = np.empty_like(x_array[:,0]) for i, x in enumerate(x_array): x_m_0s[i] = model.function(x, flag) f_m_0s[i] = f(x) diag_delta = np.diag(model.delta_params) dxdbeta = np.empty([len(param_values), len(x_array)]) for i, value in enumerate(param_values): model.set_params(param_values + diag_delta[i]) for j, x_m_0 in enumerate(x_m_0s): x_m_1 = model.function(x_m_0, flag) dxdbeta[i][j] = (f(x_m_1) - f_m_0s[j])/diag_delta[i][i] model.set_params(param_values) # reset params variance = np.empty(len(x_array)) for i, Gprime in enumerate(dxdbeta.T): variance[i] = Gprime.T.dot(model.pcov).dot(Gprime) critical_value = t.isf(0.5*(confidence_interval + 1.), model.dof) confidence_half_widths = critical_value*np.sqrt(variance) prediction_half_widths = critical_value*np.sqrt(variance + model.noise_variance) confidence_bound_0 = f_m_0s - confidence_half_widths confidence_bound_1 = f_m_0s + confidence_half_widths prediction_bound_0 = f_m_0s - prediction_half_widths prediction_bound_1 = f_m_0s + prediction_half_widths return np.array([confidence_bound_0, confidence_bound_1, prediction_bound_0, prediction_bound_1])
x4 = bootstrap_estimate(b, 40, 197) x5 = bootstrap_estimate(b, 80, 197) x6 = bootstrap_estimate(b, 160, 197) x7 = bootstrap_estimate(b, 320, 197) x8 = bootstrap_estimate(b, 640, 197) x9 = bootstrap_estimate(b, 1280, 197) x10 = bootstrap_estimate(b, 2560, 197) x11 = bootstrap_estimate(b, 5120, 197) x12 = bootstrap_estimate(b, 10240, 197) x13 = bootstrap_estimate(b, 20480, 197) profile_n = numpy.array([x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13]) # Raw result from pylab import * errorbar(numpy.log10(profile_n[:,0]), numpy.sqrt(profile_n[:,1]), yerr=numpy.sqrt(profile_n[:,2]*t.isf(0.25,b-1) / numpy.sqrt(b))) show() # Rescaled to total error from pylab import * errorbar(numpy.log10(profile_n[:,0]), numpy.sqrt(profile_n[:,1]*197), yerr=numpy.sqrt(profile_n[:,2]*197.0*197.0*t.isf(0.25,b-1) / numpy.sqrt(b))) show() from pylab import * plot(numpy.log10(profile_n[:,0]), numpy.sqrt(profile_n[:,1]*197)) show() # Doing this craziness so I can abort if it takes too long b=30 x1 = bootstrap_estimate(b, 5, 6)
def estima_amostra(amostra_piloto, alpha, margem_erro): df = len(amostra_piloto) - 1 std = np.std(amostra_piloto, dtype=np.float64) tval = t.isf(alpha/2, df) return math.ceil((tval*std/margem_erro)**2);
y = alpha return (y) while True: #this while statement is the main program print ('\nThis program can calculate the t score, the t critical value and p value for a tail.') print ('This program will also conduct a basic one sample hypothesis test about the population mean, population standard deviation unknown.') alpha = alphaf() tails = tailchoice() revalpha = tailrev(tails,alpha) sampsize = ssize() df = sampsize - 1 xval = xvalf() mean = meanf() stdev = stdevf() critval = t.isf(revalpha, df) print ('\nThe critical value corresponding to an alpha level of ', alpha, ' in a single tail is ', critval, '.\n', sep='') if tails == '3': print ('There is another tail at ', critval*-1, '\n', sep='') tval = (xval - mean)/(stdev/sampsize**0.5) if tails == '1': pval = t.sf(tval, df) elif tails == '2': pval = 1.0000 - t.sf(tval, df) else: pval = t.sf(tval, df) pval2 = 1.0000 - t.sf(tval, df)