def main(args): # Next, we build a very simple model. bests = [] worsts = [] averages = [] for i in range(args.n): print("Training and evaluating iteration: ", i) # SARSA does not require a memory. agent = get_agent(args.agent, args.model, args.lr) agent.fit(env, nb_steps=args.train_steps, visualize=False, verbose=2) # After training is done, we save the final weights. # sarsa.save_weights('sarsa_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. callback = EvaluationCallback(verbose=True) agent.test(env, nb_episodes=args.test_episodes, callbacks=[callback], verbose=0, visualize=False) best, worst, average = callback.get_result() bests.append(best) worsts.append(worst) averages.append(average) bests_mvs = bayes_mvs(bests) worsts_mvs = bayes_mvs(worsts) average_mvs = bayes_mvs(averages) print("Best Performance: {:.2f} +- {:.2f}".format( bests_mvs[0][0], bests_mvs[0][0] - bests_mvs[0][1][0])) print("Worst Performance: {:.2f} +- {:.2f}".format( worsts_mvs[0][0], worsts_mvs[0][0] - worsts_mvs[0][1][0])) print("Average Performance: {:.2f} +- {:.2f}".format( average_mvs[0][0], average_mvs[0][0] - average_mvs[0][1][0]))
def smith_cost(df, debug=False): ire = df[df["effect"] == "Item repetition effect"]["contrast"] cre1 = df[df["effect"] == "Context repetition, 1st target"]["contrast"] cre2 = df[df["effect"] == "Context repetition, 2nd target"]["contrast"] ire_mvs = bayes_mvs(ire) cre1_mvs = bayes_mvs(cre1) cre2_mvs = bayes_mvs(cre2) if debug: print(f"ire_mvs: {ire_mvs}") print(f"cre1_mvs: {cre1_mvs}") print(f"cre2_mvs: {cre2_mvs}") ratio_top = cre1_mvs[0][0] ratio_bottom = ire_mvs[0][0] if np.isnan(ratio_top): ratio_top = 0 if np.isnan(ratio_bottom): ratio_bottom = 0.00001 ratio = ratio_top / ratio_bottom ratio_cost = abs(ratio - 0.744) # cost = 0 if ratio is an exact match # add 1 if ire and cre1 CIs don't overlap # ire_cre1_overlap_cost = 1 - ( # (cre1_mvs[0][1][0] < ire_mvs[0][1][0]) and # (cre1_mvs[0][1][1] < ire_mvs[0][1][1]) and # (cre1_mvs[0][1][1] > ire_mvs[0][1][0]) # ) # add 1 if zero is in cre1 CI cre1_ci_lo = cre1_mvs[0][1][0] cre1_ci_hi = cre1_mvs[0][1][1] if np.isnan(cre1_ci_lo) or np.isnan(cre1_ci_hi): cre1_zero_cost = 1 else: cre1_zero_cost = int(cre1_ci_lo < 0 < cre1_ci_hi) # add 1 if zero is not in cre2 CI cre2_ci_lo = cre2_mvs[0][1][0] cre2_ci_hi = cre2_mvs[0][1][1] if np.isnan(cre2_ci_lo) or np.isnan(cre2_ci_hi): if np.isclose(cre2.mean(), 0): cre2_zero_cost = 0 else: cre2_zero_cost = 1 else: cre2_zero_cost = 1 - int(cre2_ci_lo <= 0 <= cre2_ci_hi) cost = ratio_cost + cre1_zero_cost + cre2_zero_cost if debug: print(f"Ratio cost: {ratio_cost}") print(f"CRE1 zero cost: {cre1_zero_cost}") print(f"CRE2 zero cost: {cre2_zero_cost}") print(f"Total cost: {cost}") return cost
def IntervalEstimation(the_list, ci=0.95): """ constrct a 95% C.I for the ... """ stats.bayes_mvs(the_list, ci) #mean = np.mean(the_list) #stdDeviation = np.std(the_list, ddof=1) lower, upper = stats.bayes_mvs(the_list, ci)[0][1] __printBlock(('constrct a ' + str(ci), '$(' + str(lower.round(3)) + ', ' + str(upper.round(3)) + ')'))
def plot_distribution_summaries(output_arrays, ax): m = 1 low_lim, high_lim = [], [] for output_array in output_arrays: mean, var, std = bayes_mvs(output_array) low_lim.append(mean[0] - 1.96 * std[0]) high_lim.append(mean[0] + 1.96 * std[0]) markers, caps, bars = ax.errorbar([m - 0.5], [mean[0]], yerr=[std[0] * 1.96], fmt='o', c='#000000', capsize=3, elinewidth=1, markeredgewidth=1) [bar.set_alpha(0.5) for bar in bars] [cap.set_alpha(0.5) for cap in caps] ax.scatter([m for _ in range(len(output_array))], output_array, marker='+', s=1, c=PLOT_COLORS[m - 1], alpha=0.25) m += 1 low_lim = np.max([np.min(low_lim), 0]) high_lim = np.max(high_lim) span_lim = high_lim - low_lim low_lim = low_lim - 2 * span_lim / 10 high_lim = high_lim + 3 * span_lim / 10 ax.set_ylim(low_lim, high_lim) ax.set_xticks([m + 0.75 for m in range(len(output_arrays))]) ax.set_xticklabels(['obs.', 'gan', 'knn', 'mice']) [lab.set_fontsize(6.5) for lab in ax.get_xticklabels()] [lab.set_fontsize(6.5) for lab in ax.get_yticklabels()] m = 1 for output_array in output_arrays: mean, var, std = bayes_mvs(output_array) y1, y2 = ax.get_ylim() ax.annotate('%.2f ± %.2f' % (mean[0], std[0]), xy=(m - 0.5, mean[0] + 1.96 * std[0] + (y2 - y1) / 10), fontsize=6) m += 1
def run(self): """ Resets env. Loop env.step and agent.step() until number of steps have been made. If an env is done before the number of steps have been reached, the env is reset. @return the total reward divided by the total number of episodes """ steps = 0 episodeCount = 0 totalReward = 0 episodeRewards = [] while steps < self._maxSteps: self._env.seed(self._getSeed()) obs = self._env.reset() episode = Episode(self._agent, self._env, obs, self._render, self._renderDelay) episode.addListener(self) episodeSteps, episodeReward = episode.run() logging.info("New episode") steps += episodeSteps totalReward += episodeReward episodeRewards.append(episodeReward) logging.info("Episode reward: " + str(episodeReward)) episodeCount += 1 try: return stats.describe(episodeRewards), stats.bayes_mvs( episodeRewards) except ValueError as err: print(err)
def estimator_boostrap_bayes( err, alpha=0.05, ): from scipy.stats import bayes_mvs mean, var, std = bayes_mvs(err, alpha=alpha) return mean, var, std
def bayes_mvs_wrapper(sequence, alpha): if max(sequence) == min(sequence): return (np.mean(sequence), (np.mean(sequence), np.mean(sequence))), (0, (0,0)), (0, (0,0)) else: return stats.bayes_mvs(sequence, alpha)
def getStatistics(list): df = pd.DataFrame(list) statsm = {} mean, var, std = stats.bayes_mvs(df, alpha=0.95) if math.isnan(mean[0]): statsm['mean'] = 0 else: statsm['mean'] = int(mean[0]) if math.isnan(var[0]): statsm['var'] = 0 else: statsm['var'] = int(var[0]) if math.isnan(std[0]): statsm['std'] = 0 else: statsm['std'] = int(std[0]) out = grubbs.max_test_outliers(list, alpha=0.05) if out: statsm['outlier'] = numpy.amax(out) else: statsm['outlier'] = 0 return statsm
def test_result_attributes(self): x = np.arange(15) attributes = ('statistic', 'minmax') res = stats.bayes_mvs(x) for i in res: check_named_results(i, attributes)
def calculateMeans(df): calcVectors = {} calcVectors["_ids"] = df["_id"].tolist() calcVectors["totals"] = df["total"].tolist() calcVectors["region"] = df["region"][0] calcVectors["hour"] = df["hour"][0] calcVectors["region"] = df["region"][0] calcVectors["weekday"] = df["weekday"][0] mean, var, std = stats.bayes_mvs(df["total"].tolist()) if math.isnan(mean[0]): calcVectors["mean"] = 0 else: calcVectors["mean"] = int(mean[0]) if math.isnan(var[0]): calcVectors["var"] = 0 else: calcVectors["var"] = int(var[0]) if math.isnan(std[0]): calcVectors["std"] = 0 else: calcVectors["std"] = int(std[0]) calcVectors["outliers"] = outliersTest(df["total"].tolist()) out = [] for v in calcVectors["outliers"]: q = df.query(df["total"] == v)["_id"].tolist() out.append(q) calcVectors["vector_ids"] = out return calcVectors
def train_all(self, architecture, data, seed=None, save_params=False, augment_fn=augment): """ Runs all training splits for a given architecture and caches trained networks in a list. """ net_list = [] # initialize list if seed: np.random.seed(seed) # set random seed if provided starttime = time.time() # set start time num_splits = len(data[2]) for split in range(num_splits): net = self.run_split(architecture, data, split, augment_fn) net_list.append(net) mvs = bayes_mvs([n.test_err for n in net_list ], alpha=.95) # get mean test performance after all splits complete time_elapsed = time.time() - starttime # check total elapsed time print("\n\nOVERALL RESULTS") print("\tAverage NLL:\t\t{:.3f}".format(mvs[0][0])) print("\tCred. Interval:\t\t[{:.3f}, {:.3f}]".format(mvs[0][1][0], mvs[0][1][1])) print("\tTotal time:\t\t{:.2f}".format(time_elapsed)) return net_list
def fooof_channel_rejection(eeg, psds, freqs, f_low, f_high, participant, session_name): from scipy.stats import bayes_mvs n_bads = 0 fooof_group = FOOOFGroup(max_n_peaks=6, min_peak_amplitude=0.1, peak_width_limits=[1, 12], background_mode='knee') fooof_group.fit(freqs, psds, freq_range=[f_low, f_high / 2], n_jobs=-1) fooof_group_fig = fooof_group.plot(save_fig=True, file_name='FOOOF_group_' + participant + '_' + session_name, file_path=data_dir + '/results/') bg_slope = fooof_group.get_all_data('background_params', col='slope') mean_cntr, var_cntr, std_cntr = bayes_mvs(bg_slope, alpha=0.9) lower_slope = mean_cntr[1][0] - std_cntr[1][1] upper_slope = mean_cntr[1][1] + std_cntr[1][1] print('upper and lower slope range (mean, std)', lower_slope, upper_slope, np.mean(bg_slope), np.std(bg_slope)) for channel_idx, slope in enumerate(bg_slope): if slope < lower_slope or slope > upper_slope: eeg.info['bads'].append(eeg.ch_names[channel_idx]) n_bads += 1 eeg.interpolate_bads(reset_bads=True) return eeg, n_bads
def __init__(self, metric_name, measurements): self._metric_name = metric_name self._measurements = measurements print("measurements:", self._measurements) array = np.array(self._measurements) self._stats = stats.describe(array) self._bayes_mvs = stats.bayes_mvs(array)
def get_data(column, np_values, alpha): mvs = bayes_mvs(np_values, alpha) #report these metrics output = [ present("Column", column), present("Length", len(np_values)), present("Unique", len(np.unique(np_values))), present("Min", np_values.min()), present("Max", np_values.max()), present("Mid-Range", (np_values.max() - np_values.min()) / 2), present("Range", np_values.max() - np_values.min()), present("Mean", np_values.mean()), present("Mean-%s-CI" % alpha, tupleToString(mvs[0][1])), present("Variance", mvs[1][0]), present("Var-%s-CI" % alpha, tupleToString(mvs[1][1])), present("StdDev", mvs[2][0]), present("Std-%s-CI" % alpha, tupleToString(mvs[2][1])), present("Mode", stats.mode(np_values)[0][0]), present("Q1", stats.scoreatpercentile(np_values, 25)), present("Q2", stats.scoreatpercentile(np_values, 50)), present("Q3", stats.scoreatpercentile(np_values, 75)), present("Trimean", trimean(np_values)), present("Minhinge", midhinge(np_values)), present("Skewness", stats.skew(np_values)), present("Kurtosis", stats.kurtosis(np_values)), present("StdErr", sem(np_values)), present("Normal-P-value", normaltest(np_values)[1]) ] return output
def get_data(column, np_values, alpha): mvs = bayes_mvs(np_values, alpha) #report these metrics output = [ present("Column", column), present("Length", len(np_values)), present("Unique", len(np.unique(np_values))), present("Min", np_values.min()), present("Max", np_values.max()), present("Mid-Range", (np_values.max() - np_values.min())/2), present("Range", np_values.max() - np_values.min()), present("Mean", np_values.mean()), present("Mean-%s-CI" % alpha, tupleToString(mvs[0][1])), present("Variance", mvs[1][0]), present("Var-%s-CI" % alpha, tupleToString(mvs[1][1])), present("StdDev", mvs[2][0]), present("Std-%s-CI" % alpha, tupleToString(mvs[2][1])), present("Mode", stats.mode(np_values)[0][0]), present("Q1", stats.scoreatpercentile(np_values, 25)), present("Q2", stats.scoreatpercentile(np_values, 50)), present("Q3", stats.scoreatpercentile(np_values, 75)), present("Trimean", trimean(np_values)), present("Minhinge", midhinge(np_values)), present("Skewness", stats.skew(np_values)), present("Kurtosis", stats.kurtosis(np_values)), present("StdErr", sem(np_values)), present("Normal-P-value", normaltest(np_values)[1]) ] return output
def evaluate(x, fp): results_df = pd.DataFrame(columns=[ 'Fingerprint', 'Average Precision', 'low_ap', 'high_ap', 'Training Set Size', 'Estimator' ]) count = 0 for size in x: f = h5py.File( '../../processed_data/' + fp + '_' + str(8192) + '_' + str(size) + '_' + estimator_name + '.hdf5', 'r') nranks = list() aps = list() for _ in range(5): proba = f[f'repeat{_}']['prediction'][:].copy() test_idx = f[f'repeat{_}']['test_idx'][:].copy()[~np.isinf(proba)] cutoff = np.percentile(true_scores[test_idx], 0.3) aps.append( average_precision_score(true_scores[test_idx] < cutoff, proba[~np.isinf(proba)])) mean = expit(np.mean(logit(aps))) cr = bayes_mvs(logit(aps))[0][1] f.close() results_df.loc[count] = [ fp, mean, expit(cr[0]), expit(cr[1]), size, estimator_name ] count += 1 return results_df
def rmse_ci(attack, num_colluders): print("{} colluders under attack {}".format(num_colluders, attack)) rms_errors = [ attack_rmse(attack, num_colluders) for i in range(num_iterations_per_attack_size) ] mean_ci, variance_ci, stddev_ci = bayes_mvs(rms_errors) return mean_ci
def bayes_scale(s): """ Remove mean and divide by standard deviation, using bayes_kvm statistics. """ if sum(~np.isnan(s)) > 1: bm, bv, bs = bayes_mvs(s[~np.isnan(s)]) return (s - bm.statistic) / bs.statistic else: return np.full(s.shape, np.nan)
def rm_outlier3(points, threshold=0.9): try: confidence = bayes_mvs(rm_outlier2(points), threshold) return [ points[i] for i in range(0, len(points)) if confidence[i][0] != float('inf') ] except: return points
def __call__(self, alpha, df): with warnings.catch_warnings(): warnings.simplefilter('error', RuntimeWarning) try: (mean, *_) = bayes_mvs(df, alpha=alpha) except RuntimeWarning: raise ValueError() (_, (lower, upper)) = mean return Band(lower, upper)
def part_b(fname=fname): fname += '_b' nsamps = int(1e5) thetaA = np.zeros(nsamps) thetaB = np.zeros(nsamps) yA = np.zeros(nsamps) yB = np.zeros(nsamps) for i in range(nsamps): # do Monte Carlo thetaA[i] = gamma.rvs(56, scale=1.0/59) yA[i] = poisson.rvs(thetaA[i]) thetaB[i] = gamma.rvs(307, scale=1.0/219) yB[i] = poisson.rvs(thetaB[i]) thetaBA = thetaB - thetaA yBA = yB - yA plt.figure() plt.hist(thetaBA, bins=16, normed=True) plt.xlabel(r'$\theta_B-\theta_A$',fontsize=labelFontSize) plt.ylabel(r'$p(\theta_B-\theta_A)$',fontsize=labelFontSize) plt.title(r"4.8b $\theta_B-\theta_A$",fontsize=titleFontSize) plt.xticks(fontsize=tickFontSize) plt.yticks(fontsize=tickFontSize) plt.savefig(fname+'thetaBA.'+imgFmt, format=imgFmt) plt.figure() plt.hist(yBA, bins=16, normed=True) plt.xlabel(r'$\tilde{Y}_B-\tilde{Y}_A$',fontsize=labelFontSize) plt.ylabel(r'$p(\tilde{Y}_B-\tilde{Y}_A)$',fontsize=labelFontSize) plt.title(r"4.8b $\tilde{Y}_B-\tilde{Y}_A$",fontsize=titleFontSize) plt.xticks(fontsize=tickFontSize) plt.yticks(fontsize=tickFontSize) plt.savefig(fname+'yBA.'+imgFmt, format=imgFmt) thetaCI = bayes_mvs(thetaB - thetaA, alpha=0.95) yCI = bayes_mvs(yB - yA, alpha=0.95) print("thetaCI = {0}".format(thetaCI[0][1])) print("yCI = {0}".format(yCI[0][1])) thetaCI2 = (np.percentile(thetaBA, 2.5), np.percentile(thetaBA, 97.5)) yCI2 = (np.percentile(yBA, 2.5), np.percentile(yBA, 97.5)) print("thetaCI2 = {0}".format(thetaCI2)) print("yCI2 = {0}".format(yCI2))
def posterior_mean(h_loaded, param_list): mean_dict = {} for param in param_list: data = h_loaded.get_distribution(t=3)[0][param] res_mean, res_var, res_std = stats.bayes_mvs(data, alpha=0.90) mean_dict.update({param: res_mean[0]}) return mean_dict
def posterior_mean_and_credibility_intervals( h_loaded, param="C_lambda", rounding=4): data = h_loaded.get_distribution(t=3)[0][param] res_mean, res_var, res_std = stats.bayes_mvs(data, alpha=0.90) mean_cred_dict = {'mean': res_mean[0].round(rounding), 'lower': res_mean[1][0].round(rounding), 'upper': res_mean[1][1].round(rounding), 'std/n': round(res_std[0] / SMCABC_POPULATION_SIZE, rounding)} return mean_cred_dict
def test_confidence_interval_for_variance(self): n = 100 sample = [] for i in range(0, n): sample.append(random()) mean = self.stat.calculate_mean(sample) var = self.stat.calculate_variance(sample, mean) center, lower, upper, precision = self.stat.confidence_interval_for_variance( var, n, 0.95) res = bayes_mvs(sample, 0.95) self.assertAlmostEqual(lower, res[1][1][0]) self.assertAlmostEqual(upper, res[1][1][1])
def test_iterfilter(num_honest, num_skewing, num_avg, num_times, repetitions, collusion_value, randseed=None): if randseed: random.seed(randseed) num_pre_avg = num_honest + num_skewing recip_rms = [] expo_rms = [] for r in range(repetitions): honest_data = [[ random.gauss(TRUTH, VARIANCE) for j in range(num_times) ] for i in range(num_honest)] skewed_data = [[ random.gauss(collusion_value, VARIANCE) for j in range(num_times) ] for i in range(num_skewing)] uncolluded_data = honest_data + skewed_data sneaky_data = [[ sum([uncolluded_data[j][i] for j in range(num_pre_avg)]) / num_pre_avg for i in range(num_times) ]] final_data = uncolluded_data + sneaky_data recip_rms += [ rms_error(iterfilter(final_data, reciprocal), [TRUTH] * num_times) ] expo_rms += [ rms_error(iterfilter(final_data, exponential), [TRUTH] * num_times) ] recip_rms_bayes = stats.bayes_mvs(recip_rms) expo_rms_bayes = stats.bayes_mvs(expo_rms) return (recip_rms_bayes[0], expo_rms_bayes[0])
def test_iterfilter(num_honest, num_skewing, num_avg, num_times, repetitions, randseed=None): if randseed: random.seed(randseed) num_pre_avg = num_honest + num_skewing recip_rms = [] expo_rms = [] for r in range(repetitions): honest_data = [[random.gauss(TRUTH, VARIANCE) for j in range(num_times)] for i in range(num_honest)] skewed_data = [[random.gauss(COLLUSION_VALUE, VARIANCE) for j in range(num_times)] for i in range(num_skewing)] uncolluded_data = honest_data + skewed_data sneaky_data = [[sum([uncolluded_data[j][i] for j in range(num_pre_avg)])/num_pre_avg for i in range(num_times)]] final_data = uncolluded_data + sneaky_data recip_rms += [rms_error(iterfilter(final_data, reciprocal), [TRUTH] * num_times)] expo_rms += [rms_error(iterfilter(final_data, exponential), [TRUTH] * num_times)] recip_rms_bayes = stats.bayes_mvs(recip_rms) expo_rms_bayes = stats.bayes_mvs(expo_rms) return (recip_rms_bayes[0], expo_rms_bayes[0])
def fit_glm_bootstrap(self, glm_model, n=20): """Fit GLM, using bootstrap resamplig to estimate parameter distributions.""" data_dict = collections.defaultdict(list) for _ in range(n): beta_hat_poly, nll, aic = self.bootstrap_resample().fit_glm( glm_model) data_dict['beta_hat_poly'].append(beta_hat_poly) data_dict['nll'].append(nll) data_dict['AIC'].append(aic) stat_dict = {'AIC': {}, 'nll': {}} for key in ['AIC', 'nll']: fit = sps.bayes_mvs(np.array(data_dict[key])) stat_dict[key]['mean'] = { 'statistic': fit[0].statistic, 'minmax': list(fit[0].minmax) } stat_dict[key]['std'] = { 'statistic': fit[2].statistic, 'minmax': list(fit[2].minmax) } for bi, beta_data in enumerate(zip(*data_dict['beta_hat_poly'])): key = f'b{bi}' if np.abs(beta_data).sum() == 0: continue fit = sps.bayes_mvs(beta_data) stat_dict[key] = {} stat_dict[key]['mean'] = { 'statistic': fit[0].statistic, 'minmax': list(fit[0].minmax) } stat_dict[key]['std'] = { 'statistic': fit[2].statistic, 'minmax': list(fit[2].minmax) } return stat_dict
def _conf_interval(x, conf_level=0.95): # Гистограмма hist, bins = np.histogram(x) width = np.diff(bins) center = (bins[:-1] + bins[1:]) / 2 fig, ax = plt.subplots(figsize=(8, 4)) ax.bar(center, hist, align='center', width=width) plt.savefig('..\\resources\\hist.png') plt.show() # Мат.ожидание, дисперсия, стандартное отклонение mean_, variance_, std_ = stats.bayes_mvs(x, conf_level) return mean_[0], mean_[1], variance_[0], variance_[1]
def _calculate_mean_and_confidence_intervals(vector): bundled_stats = stats.bayes_mvs(vector) mean = np.mean(vector) lower_bound = bundled_stats[0][1][0] upper_bound = bundled_stats[0][1][1] if np.isnan(lower_bound): lower_bound = mean upper_bound = mean return { 'mean': mean, 'lower_bound': lower_bound, 'upper_bound': upper_bound }
def submit_time_histogram(arr): """ Use Matplotlib to plot a normalized histogram of submit times """ from math import ceil, log try: import matplotlib.mlab as mlab from prettyplotlib import plt except ImportError: print( 'You must have Matplotlib and Prettyplotlib installed to plot a histogram.' ) # Use Sturges' formula for number of bins: k = ceiling(log2 n + 1) k = ceil(log(len(arr), 2) + 1) n, bins, patches = plt.hist(arr, k, normed=1, facecolor='green', alpha=0.75) # throw a PDF plot on top of it #y = mlab.normpdf(bins, np.mean(arr), np.std(arr)) #l = plt.plot(bins, y, 'r--', linewidth=1) # Get a Bayesian confidence interval for mean, variance, standard deviation dmean, dvar, dsd = bayes_mvs(deltas) # drop a line in at the mean for fun plt.axvline(dmean[0], color='blue', alpha=0.5) plt.axvspan(dmean[1][0], dmean[1][1], color='blue', alpha=0.5) plt.axvline(np.median(deltas), color='y', alpha=0.5) # Caclulate a Kernel Density Estimate density = gaussian_kde(deltas) xs = np.arange(0., np.max(deltas), 0.1) density.covariance_factor = lambda: .25 density._compute_covariance() plt.plot(xs, density(xs), color='m') #FIXME: come up with better legend names #plt.legend(('Normal Curve', 'Mean', 'Median', 'KDE')) plt.legend(('Mean', 'Median', 'KDE')) plt.xlabel('Submit Times (in Seconds)') plt.ylabel('Probability') plt.title('Histogram of Worker submit times') plt.grid(True) plt.show()
def random_sample(fdist, sample_size, runs=1000): """ Given an nltk.FreqDist, compute frequency distributions from random of the data. Returns the means for type count and number of hapaxes, with confidence intervals: ((mean (min, max)), (mean (min, max))) """ stats_pool = [] for x in range(0, runs): sample = nltk.FreqDist( random.sample(list(fdist.elements()), sample_size)) stats_pool.append(( sample.B(), #number of types, realized productivity len(sample.hapaxes()), # number of hapax legomena )) stats_pool = np.array(stats_pool) mean_types = st.bayes_mvs(stats_pool[:, 0]) mean_hapaxes = st.bayes_mvs(stats_pool[:, 1]) return mean_types, mean_hapaxes
def basicStats(x): return pd.Series([ x.count(), x.isna().sum(), x.min(), x.max(), round(x.quantile(.25), 6), round(x.quantile(.75), 6), round(x.mean(), 6), round(x.median(), 6), round(x.sum(), 6), round(x.sem(), 6), round(stat.bayes_mvs(x, alpha=0.95)[0].minmax[0], 6), round(stat.bayes_mvs(x, alpha=0.95)[0].minmax[1], 6), round(x.var(), 6), round(x.std(), 6), round(x.skew(), 6), round(x.kurt(), 6) ], index=[ '總計', 'NAs', '最小值', '最大值', '25%分位數', '75%分位數', '均值', '中位數', '總和', '平均值標準誤差', "LCL Mean", "UCL Mean", '方差', '標準差', '偏度', '超額峰度' ])
def generate_plots(self, mcmc, map_fit=None, pdf_filename=None): """ Generate interactive or PDF plots from MCMC trace. Parameters ---------- mcmc : pymc.MCMC MCMC samples to plot map_fit : pymc.MAP, optional, default=None Plot the maximum a posteriori (MAP) estimate if provided. pdf_filename : str, optional, default=None If specified, generate a PDF containing plots. """ alpha = 0.95 # confidence interval width print('') print('Generating plots...') from scipy.stats import bayes_mvs with PdfPages(pdf_filename) as pdf: for group in self.parameter_names: print(group) for name in self.parameter_names[group]: try: if map_fit: mle = getattr(map_fit, name).value else: mle = getattr(mcmc, name).trace().mean() trace = getattr(mcmc, name).trace() mean_cntr, var_cntr, std_cntr = bayes_mvs(trace, alpha=alpha) (center, (lower, upper)) = mean_cntr if trace.std() == 0.0: lower = upper = trace[0] plt.figure(figsize=(12, 8)) plt.hold(True) niterations = len(trace) plt.plot([0, niterations], [mle, mle], 'r-') plt.plot(trace, 'k.') plt.xlabel('iteration') plt.ylabel(name) plt.title(name) pdf.savefig() plt.close() except AttributeError as e: pass
def test_basic(self): # Expected values in this test simply taken from the function. For # some checks regarding correctness of implementation, see review in # gh-674 data = [6, 9, 12, 7, 8, 8, 13] mean, var, std = stats.bayes_mvs(data) assert_almost_equal(mean.statistic, 9.0) assert_allclose(mean.minmax, (7.1036502226125329, 10.896349777387467), rtol=1e-14) assert_almost_equal(var.statistic, 10.0) assert_allclose(var.minmax, (3.1767242068607087, 24.45910381334018), rtol=1e-09) assert_almost_equal(std.statistic, 2.9724954732045084, decimal=14) assert_allclose(std.minmax, (1.7823367265645145, 4.9456146050146312), rtol=1e-14)
def dados(): clf = AdaBoostClassifier(n_estimators=50, learning_rate=1, random_state=0) x, y = questao1() modelo = clf.fit(x, y) k = 10 scores = cross_val_score(modelo, x, y, cv=k) cont = 0 for score in scores: cont += 1 print("score " + str(cont) + ": ", score) taxa_de_acerto = np.mean(scores) print("Taxa de Acerto: ", taxa_de_acerto) desvio = scores.std() print("Desvio", desvio) print(bayes_mvs(scores, 0.95))
def update_figure8(n_clicks, value, start_date, end_date, my_ticker_symbol, confidence): month = [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ] dataOpen = pdr.DataReader(value, 'yahoo', start_date, end_date)["Open"] dataClose = pdr.DataReader(value, 'yahoo', start_date, end_date)["Close"] dataLogReturns = np.log(dataClose) - np.log(dataOpen) for i in value: dataLogReturns = dataLogReturns.rename(columns={i: i + "_returns"}) dataLogReturns = dataLogReturns.assign(DateTime=dataLogReturns.index) filter1 = dataLogReturns["DateTime"].dt.day_name() dataLogReturns = dataLogReturns.assign(Day=filter1) filter2 = dataLogReturns["DateTime"].dt.month_name() dataLogReturns = dataLogReturns.assign(Month=filter2) filter3 = dataLogReturns["DateTime"].dt.year dataLogReturns = dataLogReturns.assign(Year=filter3) i = my_ticker_symbol + "_returns" res_mean, res_var, res_std = stats.bayes_mvs(dataLogReturns[i], alpha=confidence) res_mean1 = list(res_mean.minmax) res_var1 = list(res_var.minmax) table_header = [ html.Thead( html.Tr([ html.Th("Stock"), html.Th("Mean Range (Min,Max)"), html.Th("Variance Range (Min,Max)") ])) ] row1 = html.Tr([ html.Td(my_ticker_symbol), html.Td("Min:" + str(res_mean1[0]) + " Max:" + str(res_mean1[1])), html.Td("Min:" + str(res_var1[0]) + " Max:" + str(res_var1[1])) ]) table_body = [html.Tbody([row1])] table = dbc.Table(table_header + table_body, bordered=True, dark=True, hover=True) return table
def show_summary(self, mcmc, map_fit=None): """ Show summary statistics of MCMC and (optionally) MAP estimates. Parameters ---------- mcmc : pymc.MCMC MCMC samples map_fit : pymc.MAP, optional, default=None The MAP fit. TODO ---- * Automatically determine appropriate number of decimal places from statistical uncertainty. * Automatically adjust concentration units (e.g. pM, nM, uM) depending on estimated affinity. """ # Compute summary statistics alpha = 0.95 # confidence interval width from scipy.stats import bayes_mvs for group in self.parameter_names: print(group) for name in self.parameter_names[group]: try: if map_fit: mle = getattr(map_fit, name).value else: mle = getattr(mcmc, name).trace().mean() trace = getattr(mcmc, name).trace() mean_cntr, var_cntr, std_cntr = bayes_mvs(trace, alpha=alpha) (center, (lower, upper)) = mean_cntr if trace.std() == 0.0: lower = upper = trace[0] if ('concentration' in name) or ('volume' in name): print("%-64s : initial %7.1e final %7.1e : %7.1e [%7.1e, %7.1e]" % (name, trace[0], trace[-1], mle, lower, upper)) else: print("%-64s : initial %7.1f final %7.1f : %7.1f [%7.1f, %7.1f]" % (name, trace[0], trace[-1], mle, lower, upper)) except AttributeError as e: # Skip observed stochastics pass print('')
def confianca(self): ''' funcao que realiza intervalo de confianca de cada serie de amostra ''' self.juntaMesmoTipo() for download in self.dadosOrganizados: for serie in self.dadosOrganizados[download]: interador = 0 confianca = [] for i in self.dadosOrganizados[download][serie][0]: vetor = [] for numero in self.dadosOrganizados[download][serie]: vetor.append(numero[interador]) if (len(list(set(vetor))) > 1): confianca.append(stats.bayes_mvs(vetor,alpha=0.95)[0]) else: confianca.append((vetor[0],(0,0))) interador += 1 self.dadosOrganizados[download][serie].append({'confianca':confianca}) return self.dadosOrganizados, self.quantidade
def mean_intervals(dataset, column_label="irony", alpha=0.05): """ Confidence confidence intervals for the mean of a bunch of samples (1 for each different label). Args: DataFrame:: dataset str::column_label: cloumn containing sample labels float::alpha: first order risk Return: DataFrame """ intervals = [] for feature in dataset.columns: if feature!=column_label: temp = {} for label, group in dataset.groupby(column_label): mean, (inf, sup) = stats.bayes_mvs(group[feature], alpha=1-alpha)[0] temp[label] = {"inf":inf, "mean":mean, "sup":sup} intervals.append(DataFrame(temp).T) return concat(intervals, keys=dataset.columns)
num_sensors = 20 biases = array([0.3 * sensor for sensor in range(num_sensors)]) compensated_biases = biases - array([mean(biases)] * num_sensors) variances = array([(num_sensors - sensor + 1) / 2 for sensor in range(num_sensors)]) num_times = 10 true_value = lambda t: (t - num_times / 2) ** 4 num_readings = 500 reading_sampling = [readings_generator.readings(compensated_biases, variances, num_times, true_value) for i in range(num_readings)] bias_estimates = array([linear_solution_bias_estimate(r) for r in reading_sampling]) variance_estimates_with_estimated_biases = array( [linear_solution_variance_estimate(r, b) for r, b in zip(reading_sampling, bias_estimates)]) alpha = 0.95 #variance_estimates[i,s] gives the estimate of sensor s in reading i. #variance_estimates.transpose()[s, i] gives the same. variance_cis = [stats.bayes_mvs(v_est, alpha) for v_est in variance_estimates_with_estimated_biases.transpose()] v_mean_cis, v_var_cis, v_std_cis = zip(*variance_cis) v_mean_cntrs, v_mean_bounds = zip(*v_mean_cis) v_mean_lo, v_mean_hi = zip(*v_mean_bounds) v_mean_lo_diff = array(v_mean_cntrs) - array(v_mean_lo) v_mean_hi_diff = array(v_mean_hi) - array(v_mean_cntrs) fig = pyplot.figure() axes = fig.add_subplot(1, 1, 1) axes.set_title('Variance Estimation (p={}, n={})'.format(alpha, num_readings)) axes.plot(variances) axes.errorbar(range(num_sensors), v_mean_cntrs, vstack((v_mean_lo_diff, v_mean_hi_diff))) reading_sampling = [readings_generator.readings(compensated_biases, variances, num_times, true_value) for i in range(num_readings)] variance_estimates_with_true_biases = array(
variance = 1 bias = 0 truth = 0 times = 10 num_sensors = 10 variances = [variance] * num_sensors biases = [bias] * num_sensors iter_rms_errors = [] mle_rms_errors = [] def truth_fn(t): return truth for i in range(repeats): print ('{}/{}'.format(i, repeats)) readings = readings_generator.readings(biases, variances, times, truth_fn) estimate = robust_aggregate.estimate(readings, exponential) iter_rms_errors += [rms_error(estimate, [0]*num_sensors)] mle_estiamte = mle.estimate(readings, variances, biases) mle_rms_errors += [rms_error(mle_estiamte, [0]*num_sensors)] iter_mvs = bayes_mvs(iter_rms_errors) mle_mvs = bayes_mvs(mle_rms_errors) pp.bar([0, 1], [iter_mvs[0][0], mle_mvs[0][0]], yerr=[iter_mvs[0][0]-iter_mvs[0][1][0], mle_mvs[0][0]-mle_mvs[0][1][0]]) pp.show()
for line in tt: target_taxa.append(line.rstrip()) tt.close() #now read in a collection of trees, calc branch lengths over sample, summarise and print out branch_lengths = defaultdict(list) #key = taxa, value = list of brlens treefile = open(sys.argv[3]) for line in treefile: curr_tree = Tree(line.rstrip()) root_node = curr_tree.get_common_ancestor(outgroups) if curr_tree != root_node: curr_tree.set_outgroup(root_node) print curr_tree #bundle = curr_tree.check_monophyly(values=outgroups,target_attr='name') #print bundle #if bundle[0] == False: # continue #find common ancestor of the target taxa, and use this as the reference node for calculating branch lengths. This might not always be the measure you want! reference_node = curr_tree.get_common_ancestor(target_taxa) #if reference_node != curr_tree: # curr_tree.set_outgroup(reference_node) #calc distance from root to each branch of interest for taxon in target_taxa: dist = curr_tree.get_distance(taxon, reference_node) branch_lengths[taxon].append(dist) #now compute the credible intervals of the branch length for each of the target taxa for taxon in branch_lengths: mean, var, std = stats.bayes_mvs(branch_lengths[taxon], alpha=0.95) print taxon + "\t" + str(mean[0]) + "\t" + str(mean[1][0]) + "\t" + str(mean[1][1])
def rmse_ci(attack, num_colluders): print("{} colluders under attack {}".format(num_colluders, attack)) rms_errors = [attack_rmse(attack, num_colluders) for i in range(num_iterations_per_attack_size)] mean_ci, variance_ci, stddev_ci = bayes_mvs(rms_errors) return mean_ci
def intervaloConfianca(cop,limiteTempo,coeficientes,confidence = 0.99): """ Calcula o intervalo de confianca a partir da simulação usando a funcao e seus parametros A simulação para no tempoLimite = distancia entre o primeiro e último evento """ qtdeSimulacoes = 100 serieSimulacao = [] serieIntervaloEntreChegadas = [] serieQtdeIntervalo = [] serieProbIntervalo = [] for simulacao in range(0,qtdeSimulacoes): serieSimulacao.append([]) serieIntervaloEntreChegadas.append([]) serieQtdeIntervalo.append([]) serieProbIntervalo.append([]) tempoSimulacao = 0 # controlando por tempo while tempoSimulacao < limiteTempo: #tempoSimulacao = tempoSimulacao + invFuncWeibull(np.random.uniform(0,1),*coeficientes) #serieSimulacao[simulacao].append(tempoSimulacao) randomNumber = random.uniform(0,1) ts = 0 while ts < 60: if(funcWeibull(ts,*coeficientes)>randomNumber): tempoSimulacao = tempoSimulacao + ts serieSimulacao[simulacao].append(tempoSimulacao) ts = 61 ts = ts + 1 """ # por qtde for item in range(0,limiteTempo+1): serieSimulacao[simulacao].append(invFuncWeibull(np.random.uniform(0,1),*coeficientes)) """ # calculo do intervalo entre chegadas for i in range(0,len(serieSimulacao[simulacao])-1): serieIntervaloEntreChegadas[simulacao].append(serieSimulacao[simulacao][i+1] - serieSimulacao[simulacao][i]) print 'QTDE======= ',len(serieIntervaloEntreChegadas[simulacao]) #calculo da qtde de chegadas em cada intervalo for t in np.arange(0,61,1): serieQtdeIntervalo[simulacao].append(float(len([q for q in serieIntervaloEntreChegadas[simulacao] if (q <= t)]))) #transformando qtde em porcentagem for t in np.arange(0,61,1): serieProbIntervalo[simulacao] = [q/float(serieQtdeIntervalo[simulacao][-1]) for q in serieQtdeIntervalo[simulacao]] # calculo do IC para cada ponto da distribuicao media = [] upper = [] lower = [] valoresPorInstante = [] for tempo in np.arange(0,61,1): valoresPorInstante.append([]) for simulacao in range(0,qtdeSimulacoes): #valoresPorInstante[tempo].append(serieQtdeIntervalo[simulacao][tempo]) valoresPorInstante[tempo].append(serieProbIntervalo[simulacao][tempo]) # gerando media e limites do IC icmedia = str(bayes_mvs(valoresPorInstante[tempo],confidence)).split(')),')[0] icmedia = icmedia.replace(" ","") icmedia = icmedia.replace("(","") icmedia = icmedia.replace(")","") m,l,u = icmedia.split(',') #dados da populacao sizeData, (minimum,maximum),arithmeticMean,variance,skeness,kurtosis = stats.describe(valoresPorInstante[tempo]) #l,u = norm.interval(confidence, loc=arithmeticMean, scale=sqrt(variance)) #media.append(m) tmp = 2.58 * sqrt(variance)/sqrt(len(valoresPorInstante[tempo])) l = arithmeticMean - tmp u = arithmeticMean + tmp media.append(arithmeticMean) lower.append(l) upper.append(u) return serieProbIntervalo, media,lower,upper
if __name__ == '__main__': repeats = 1000 variance = 1 bias = 0 truth = 0 times = 10 num_sensors = 10 variances = [variance] * num_sensors biases = [bias] * num_sensors time_errors = [[] for t in range(times)] def truth_fn(t): return truth for i in range(repeats): print ('{}/{}'.format(i, repeats)) readings = readings_generator.readings(biases, variances, times, truth_fn) estimate = robust_aggregate.estimate(readings, exponential) for t in range(times): time_errors[t] += [estimate[t]] mvs = [bayes_mvs(t) for t in time_errors] pp.errorbar(range(times), [m[0][0] for m in mvs], yerr=[m[0][0]-m[0][1][0] for m in mvs]) pp.show()
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile", required=True, help="Tabular file.") parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;") parser.add_argument("--test_id", help="statistical test method") parser.add_argument( "--mwu_use_continuity", action="store_true", default=False, help="Whether a continuity correction (1/2.) should be taken into account.", ) parser.add_argument( "--equal_var", action="store_true", default=False, help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.", ) parser.add_argument( "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values." ) parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used") parser.add_argument( "--bias", action="store_true", default=False, help="if false,then the calculations are corrected for statistical bias", ) parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored") parser.add_argument( "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored" ) parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored") parser.add_argument( "--printextras", action="store_true", default=False, help="If True, if there are extra points a warning is raised saying how many of those points there are", ) parser.add_argument( "--initial_lexsort", action="store_true", default="False", help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.", ) parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ") parser.add_argument( "--axis", type=int, default=0, help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)", ) parser.add_argument( "--n", type=int, default=0, help="the number of trials. This is ignored if x gives both the number of successes and failures", ) parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram") parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction") parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--m", type=float, default=0.0, help="limits") parser.add_argument("--mf", type=float, default=2.0, help="lower limit") parser.add_argument("--nf", type=float, default=99.9, help="higher_limit") parser.add_argument( "--p", type=float, default=0.5, help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5", ) parser.add_argument("--alpha", type=float, default=0.9, help="probability") parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds") parser.add_argument( "--proportiontocut", type=float, default=0.0, help="Proportion (in range 0-1) of total data set to trim of each end.", ) parser.add_argument( "--lambda_", type=float, default=1.0, help="lambda_ gives the power in the Cressie-Read power divergence statistic", ) parser.add_argument( "--imbda", type=float, default=0, help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.", ) parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e") parser.add_argument("--dtype", help="dtype") parser.add_argument("--med", help="med") parser.add_argument("--cdf", help="cdf") parser.add_argument("--zero_method", help="zero_method options") parser.add_argument("--dist", help="dist options") parser.add_argument("--ties", help="ties options") parser.add_argument("--alternative", help="alternative options") parser.add_argument("--mode", help="mode options") parser.add_argument("--method", help="method options") parser.add_argument("--md", help="md options") parser.add_argument("--center", help="center options") parser.add_argument("--kind", help="kind options") parser.add_argument("--tail", help="tail options") parser.add_argument("--interpolation", help="interpolation options") parser.add_argument("--statistic", help="statistic options") args = parser.parse_args() infile = args.infile outfile = open(args.outfile, "w+") test_id = args.test_id nf = args.nf mf = args.mf imbda = args.imbda inclusive1 = args.inclusive1 inclusive2 = args.inclusive2 sample0 = 0 sample1 = 0 sample2 = 0 if args.sample_cols != None: sample0 = 1 barlett_samples = [] for sample in args.sample_cols.split(";"): barlett_samples.append(map(int, sample.split(","))) if args.sample_one_cols != None: sample1 = 1 sample_one_cols = args.sample_one_cols.split(",") if args.sample_two_cols != None: sample_two_cols = args.sample_two_cols.split(",") sample2 = 1 for line in open(infile): sample_one = [] sample_two = [] cols = line.strip().split("\t") if sample0 == 1: b_samples = columns_to_values(barlett_samples, line) if sample1 == 1: for index in sample_one_cols: sample_one.append(cols[int(index) - 1]) if sample2 == 1: for index in sample_two_cols: sample_two.append(cols[int(index) - 1]) if test_id.strip() == "describe": size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one)) cols.append(size) cols.append(min_max) cols.append(mean) cols.append(uv) cols.append(bs) cols.append(bk) elif test_id.strip() == "mode": vals, counts = stats.mode(map(float, sample_one)) cols.append(vals) cols.append(counts) elif test_id.strip() == "nanmean": m = stats.nanmean(map(float, sample_one)) cols.append(m) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "kurtosistest": z_value, p_value = stats.kurtosistest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "itemfreq": freq = stats.itemfreq(map(float, sample_one)) for list in freq: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "boxcox_llf": IIf = stats.boxcox_llf(imbda, map(float, sample_one)) cols.append(IIf) elif test_id.strip() == "tiecorrect": fa = stats.tiecorrect(map(float, sample_one)) cols.append(fa) elif test_id.strip() == "rankdata": r = stats.rankdata(map(float, sample_one), method=args.md) cols.append(r) elif test_id.strip() == "nanstd": s = stats.nanstd(map(float, sample_one), bias=args.bias) cols.append(s) elif test_id.strip() == "anderson": A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist) cols.append(A2) for list in critical: cols.append(list) cols.append(",") for list in sig: cols.append(list) elif test_id.strip() == "binom_test": p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p) cols.append(p_value) elif test_id.strip() == "gmean": gm = stats.gmean(map(float, sample_one), dtype=args.dtype) cols.append(gm) elif test_id.strip() == "hmean": hm = stats.hmean(map(float, sample_one), dtype=args.dtype) cols.append(hm) elif test_id.strip() == "kurtosis": k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias) cols.append(k) elif test_id.strip() == "moment": n_moment = stats.moment(map(float, sample_one), n=args.n) cols.append(n_moment) elif test_id.strip() == "normaltest": k2, p_value = stats.normaltest(map(float, sample_one)) cols.append(k2) cols.append(p_value) elif test_id.strip() == "skew": skewness = stats.skew(map(float, sample_one), bias=args.bias) cols.append(skewness) elif test_id.strip() == "skewtest": z_value, p_value = stats.skewtest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "sem": s = stats.sem(map(float, sample_one), ddof=args.ddof) cols.append(s) elif test_id.strip() == "zscore": z = stats.zscore(map(float, sample_one), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "signaltonoise": s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof) cols.append(s2n) elif test_id.strip() == "percentileofscore": p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind) cols.append(p) elif test_id.strip() == "bayes_mvs": c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha) cols.append(c_mean) cols.append(c_var) cols.append(c_std) elif test_id.strip() == "sigmaclip": c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n) cols.append(c) cols.append(c_low) cols.append(c_up) elif test_id.strip() == "kstest": d, p_value = stats.kstest( map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode ) cols.append(d) cols.append(p_value) elif test_id.strip() == "chi2_contingency": chi2, p, dof, ex = stats.chi2_contingency( map(float, sample_one), correction=args.correction, lambda_=args.lambda_ ) cols.append(chi2) cols.append(p) cols.append(dof) cols.append(ex) elif test_id.strip() == "tmean": if nf is 0 and mf is 0: mean = stats.tmean(map(float, sample_one)) else: mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(mean) elif test_id.strip() == "tmin": if mf is 0: min = stats.tmin(map(float, sample_one)) else: min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive) cols.append(min) elif test_id.strip() == "tmax": if nf is 0: max = stats.tmax(map(float, sample_one)) else: max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive) cols.append(max) elif test_id.strip() == "tvar": if nf is 0 and mf is 0: var = stats.tvar(map(float, sample_one)) else: var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(var) elif test_id.strip() == "tstd": if nf is 0 and mf is 0: std = stats.tstd(map(float, sample_one)) else: std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(std) elif test_id.strip() == "tsem": if nf is 0 and mf is 0: s = stats.tsem(map(float, sample_one)) else: s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(s) elif test_id.strip() == "scoreatpercentile": if nf is 0 and mf is 0: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation ) else: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation ) for list in s: cols.append(list) elif test_id.strip() == "relfreq": if nf is 0 and mf is 0: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b) else: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf)) for list in rel: cols.append(list) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "binned_statistic": if nf is 0 and mf is 0: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b ) else: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, range=(mf, nf), ) cols.append(st) cols.append(b_edge) cols.append(b_n) elif test_id.strip() == "threshold": if nf is 0 and mf is 0: o = stats.threshold(map(float, sample_one), newval=args.new) else: o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new) for list in o: cols.append(list) elif test_id.strip() == "trimboth": o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut) for list in o: cols.append(list) elif test_id.strip() == "trim1": t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail) for list in t1: cols.append(list) elif test_id.strip() == "histogram": if nf is 0 and mf is 0: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b) else: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf)) cols.append(hi) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "cumfreq": if nf is 0 and mf is 0: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b) else: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf)) cols.append(cum) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "boxcox_normmax": if nf is 0 and mf is 0: ma = stats.boxcox_normmax(map(float, sample_one)) else: ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method) cols.append(ma) elif test_id.strip() == "boxcox": if imbda is 0: box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha) cols.append(box) cols.append(ma) cols.append(ci) else: box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha) cols.append(box) elif test_id.strip() == "histogram2": h2 = stats.histogram2(map(float, sample_one), map(float, sample_two)) for list in h2: cols.append(list) elif test_id.strip() == "ranksums": z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two)) cols.append(z_statistic) cols.append(p_value) elif test_id.strip() == "ttest_1samp": t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two)) for list in t: cols.append(list) for list in prob: cols.append(list) elif test_id.strip() == "ansari": AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two)) cols.append(AB) cols.append(p_value) elif test_id.strip() == "linregress": slope, intercept, r_value, p_value, stderr = stats.linregress( map(float, sample_one), map(float, sample_two) ) cols.append(slope) cols.append(intercept) cols.append(r_value) cols.append(p_value) cols.append(stderr) elif test_id.strip() == "pearsonr": cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two)) cols.append(cor) cols.append(p_value) elif test_id.strip() == "pointbiserialr": r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two)) cols.append(r) cols.append(p_value) elif test_id.strip() == "ks_2samp": d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two)) cols.append(d) cols.append(p_value) elif test_id.strip() == "mannwhitneyu": mw_stats_u, p_value = stats.mannwhitneyu( map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "zmap": z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "ttest_ind": mw_stats_u, p_value = stats.ttest_ind( map(float, sample_one), map(float, sample_two), equal_var=args.equal_var ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "ttest_rel": t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(t) cols.append(prob) elif test_id.strip() == "mood": z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(z) cols.append(p_value) elif test_id.strip() == "shapiro": W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta) cols.append(W) cols.append(p_value) for list in a: cols.append(list) elif test_id.strip() == "kendalltau": k, p_value = stats.kendalltau( map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort ) cols.append(k) cols.append(p_value) elif test_id.strip() == "entropy": s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base) cols.append(s) elif test_id.strip() == "spearmanr": if sample2 == 1: rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two)) else: rho, p_value = stats.spearmanr(map(float, sample_one)) cols.append(rho) cols.append(p_value) elif test_id.strip() == "wilcoxon": if sample2 == 1: T, p_value = stats.wilcoxon( map(float, sample_one), map(float, sample_two), zero_method=args.zero_method, correction=args.correction, ) else: T, p_value = stats.wilcoxon( map(float, sample_one), zero_method=args.zero_method, correction=args.correction ) cols.append(T) cols.append(p_value) elif test_id.strip() == "chisquare": if sample2 == 1: rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof) else: rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof) cols.append(rho) cols.append(p_value) elif test_id.strip() == "power_divergence": if sample2 == 1: stat, p_value = stats.power_divergence( map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_ ) else: stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_) cols.append(stat) cols.append(p_value) elif test_id.strip() == "theilslopes": if sample2 == 1: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha) else: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha) cols.append(mpe) cols.append(met) cols.append(lo) cols.append(up) elif test_id.strip() == "combine_pvalues": if sample2 == 1: stat, p_value = stats.combine_pvalues( map(float, sample_one), method=args.med, weights=map(float, sample_two) ) else: stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med) cols.append(stat) cols.append(p_value) elif test_id.strip() == "obrientransform": ob = stats.obrientransform(*b_samples) for list in ob: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "f_oneway": f_value, p_value = stats.f_oneway(*b_samples) cols.append(f_value) cols.append(p_value) elif test_id.strip() == "kruskal": h, p_value = stats.kruskal(*b_samples) cols.append(h) cols.append(p_value) elif test_id.strip() == "friedmanchisquare": fr, p_value = stats.friedmanchisquare(*b_samples) cols.append(fr) cols.append(p_value) elif test_id.strip() == "fligner": xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(xsq) cols.append(p_value) elif test_id.strip() == "bartlett": T, p_value = stats.bartlett(*b_samples) cols.append(T) cols.append(p_value) elif test_id.strip() == "levene": w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(w) cols.append(p_value) elif test_id.strip() == "median_test": stat, p_value, m, table = stats.median_test( ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples ) cols.append(stat) cols.append(p_value) cols.append(m) cols.append(table) for list in table: elements = ",".join(map(str, list)) cols.append(elements) outfile.write("%s\n" % "\t".join(map(str, cols))) outfile.close()
def rm_outlier3(points, threshold=0.9): try: confidence = bayes_mvs(rm_outlier2(points), threshold) return [points[i] for i in range(0, len(points)) if confidence[i][0] != float('inf')] except: return points
truth = [TRUTH * num_sensors for i in range(num_times)] for i in range(num_repetitions): readings = [[random.gauss(TRUTH, VARIANCE) for i in range(num_sensors)] for j in range(num_times)] for j in range(num_times): recip_iterative_result = iterative_filter.by_time(readings[0:j+1], iterative_filter.reciprocal) exp_iterative_result = iterative_filter.by_time(readings[0:j+1], iterative_filter.exponential) recip_avg_error = rms_error(recip_iterative_result[0:1], [TRUTH] * len(recip_iterative_result)) exp_avg_error = rms_error(exp_iterative_result[0:1], [TRUTH] * len(exp_iterative_result)) recip_error_calcs[j].append(recip_avg_error) exp_error_calcs[j].append(exp_avg_error) recip_error_bayes = [stats.bayes_mvs(x) for x in recip_error_calcs] exp_error_bayes = [stats.bayes_mvs(x) for x in exp_error_calcs] slice_length = range(1, num_times + 1) recip_mids = [t[0][0] for t in recip_error_bayes] recip_errors = [e[0][1][0]-m for (e, m) in zip(recip_error_bayes, recip_mids)] exp_mids = [t[0][0] for t in exp_error_bayes] exp_errors = [e[0][1][0]-m for (e, m) in zip(exp_error_bayes, exp_mids)] pp.errorbar(slice_length, recip_mids, yerr=recip_errors, label='reciprocal discriminant') pp.errorbar(slice_length, exp_mids, yerr=exp_errors, label='exponential discriminant') pp.xlabel('Instants') pp.ylabel('RMS Error') pp.title('RMS Error using discriminant (n={})'.format(num_repetitions))
def confidence_interval(self, fun, alpha=0.95): "will return mean, confidence interval" return bayes_mvs(self.map_doc_scores(fun), alpha)[0]
def get_conf_interval(dataset): if len(dataset) > 1: return stats.bayes_mvs(dataset, alpha=.9)[0][1] else: return (None, None)
def gMIC(distrib): """ Bayesian estimates of mean, and standard deviation """ estimates = spp.bayes_mvs(distrib,alpha=0.95)[0] return[estimates[0],estimates[1][0],estimates[1][1]]
iter_recip = [] iter_expo = [] robust_agg_recip = [] robust_agg_expo = [] for i in range(repeats): print (i) readings = readings_generator.readings(biases, variances, num_times, truth) #iter_recip += [rms_error(iterfilter(readings, reciprocal), [0]*num_sensors)] iter_expo += [rms_error(iterfilter(readings, exponential), [0]*num_sensors)] #robust_agg_recip += [rms_error(estimate(readings, reciprocal), [0]*num_sensors)] robust_agg_expo += [rms_error(estimate(readings, exponential), [0]*num_sensors)] #iter_recip_mean = bayes_mvs(iter_recip)[0] iter_expo_mean = bayes_mvs(iter_expo)[0] #robust_agg_recip_mean = bayes_mvs(robust_agg_recip)[0] robust_agg_expo_mean = bayes_mvs(robust_agg_expo)[0] #iter_recip_means += [iter_recip_mean[0]] iter_expo_means += [iter_expo_mean[0]] #robust_agg_recip_means += [robust_agg_recip_mean[0]] robust_agg_expo_means += [robust_agg_expo_mean[0]] #iter_recip_errors += [iter_recip_mean[0] - iter_recip_mean[1][0]] iter_expo_errors += [iter_expo_mean[0] - iter_expo_mean[1][0]] #robust_agg_recip_errors += [robust_agg_recip_mean[0] - robust_agg_recip_mean[1][0]] robust_agg_expo_errors += [robust_agg_expo_mean[0] - robust_agg_expo_mean[1][0]] #pp.errorbar(x_values, iter_recip_means, yerr=iter_recip_errors, label='Iterative Filtering - reciprocal') pp.errorbar(x_values, iter_expo_means, yerr=iter_expo_errors, label='Iterative Filtering - exponential')