def filterByMutualRemoval(data1, data2): nSTD = 1 x=[] y=[] std1 = stats.tstd(data1) mean1 = stats.tmean(data1) std2 = stats.tstd(data2) mean2 = stats.tmean(data2) print 'm1, std1: ', mean1, std1 print 'm2, std2: ', mean2, std2 for i, value in enumerate(data1): if (data1[i] > mean1 + (nSTD*std1)): pass elif (data1[i] < mean1 - (nSTD*std1)): pass elif data2[i] > mean2 + (nSTD*std2): pass elif value < mean2 - (nSTD*std2): pass else: x.append(data1[i]) y.append(data2[i]) return x,y
def num_features_smarch(samplefile_, n_): _configs = list() if os.path.exists(samplefile_): with open(samplefile_, "r") as sf: for line in sf: raw = line.split(',') config = raw[:len(raw) - 1] _configs.append(config) else: return -1 _samples = list() if n_ < 0: _samples = _configs.copy() else: rands = get_random(n_, len(_configs)) for r in rands: _samples.append(_configs[r - 1]) _fnums = list() for sample in _samples: fnum = 0 for v in sample: if not v.startswith('-'): fnum += 1 _fnums.append(fnum) if n_ < 0: avg = stats.tmean(_fnums) std = stats.tstd(_fnums) return avg, std return stats.tmean(_fnums), stats.tstd(_fnums)
def plot_acc_results(paths, name): plt.clf() for c, l, p in zip(['green', 'orange'], ['Random Teacher', 'RL Teacher'], paths): rewards = [] with open(p) as file: reader = csv.reader(file, delimiter=',') for t in reader: rewards.append([]) for r in t: rewards[-1].append(float(r)) rewards = rewards[:200] avg = [sum(x) / len(x) for x in rewards] high = [sum(x) / len(x) + stats.tstd(x) / 4 for x in rewards] low = [sum(x) / len(x) - stats.tstd(x) / 4 for x in rewards] plt.plot(avg, color=c, label=l) plt.fill_between(list(range(len(low))), low, high, alpha=.2, color=c) plt.xlabel('Student Steps') plt.ylabel('Validation Accuracy') plt.title('Student Performance Comparison') plt.legend() plt.savefig(name, bbox_inches='tight')
def calc_stats(bonds, density_vals, integral_1, integral_2=[0]): av_densities, std_dens, av_int_1_list, std_int_1_list, av_int_2_list, std_int_2_list = [], [], [], [], [], [] for i, bond in enumerate(bonds): mask = np.logical_and(bonds[i] <= density_vals, density_vals < bonds[i + 1]) density_vals = density_vals[mask] av_int_1 = integral_1[mask] av_int_2 = integral_2[mask] av_density, std_density = np.mean(density_vals), tstd(density_vals) av_int_1, std_int_1 = np.mean(av_int_1), tstd(av_int_1) av_int_2, std_int_2 = np.mean(av_int_2), tstd(av_int_2) av_densities.append(av_density) std_dens.append(std_density) av_int_1_list.append(av_int_1_list) std_int_1_list.append(std_int_1) av_int_2_list.append(av_int_2_list) std_int_2_list.append(std_int_2) average_dict = { 'density_vals': np.asarray(av_densities), 'std_dens': np.asarray(std_dens), 'int_1': np.asarray(av_int_1_list), 'std_1': np.asarray(std_int_1_list), 'int_2': np.asarray(av_int_2_list), 'std_2': np.asarray(std_int_2_list) } return average_dict
def compute_ttest_for_col(self, p_thresh): res_4df = {'features': [], 'ttest': [], 'welch': []} res = dict() for col in self.ls_cols: group1 = self.df[self.df[self.group_col] == self.groups[0]][col] group2 = self.df[self.df[self.group_col] == self.groups[1]][col] ttest_eq_pop_var = stats.ttest_ind(group1, group2, equal_var=True) ttest_welch = stats.ttest_ind(group1, group2, equal_var=False) if ttest_eq_pop_var[1] < p_thresh: meas, struct = get_structure_measurement( col, self.ls_meas, self.ls_struct) #print('{:<15} {}'.format(meas, struct)) res[col] = { '{}, mean'.format(self.groups[0]): stats.tmean(group1), '{}, std'.format(self.groups[1]): stats.tstd(group2), '{}, mean'.format(self.groups[1]): stats.tmean(group2), '{}, std'.format(self.groups[1]): stats.tstd(group2), 'ttest': ttest_eq_pop_var[1], 'welch': ttest_welch[1], 'kurtosis': stats.kurtosis(self.df[self.group_col]), 'skewness': stats.skew(self.df[self.group_col]) } res_4df['features'].append(struct + ' (' + meas + ')') res_4df['ttest'].append(ttest_eq_pop_var[1]) res_4df['welch'].append(ttest_welch[1]) self.save_res(res_4df) return res
def stdtrigger(img,framenum=1,threshold=threshold,message=False): try: global avg global std global xstart xstart=width/4 global ystart ystart=height/4 global sumlist except: sys.exit("error getting globals in stdtrigger") if message: if message=="update": sumlist[0]=img[xstart:(xstart*3),ystart:(ystart*3),:].sum() std=stats.tstd(sumlist) avg=sum(sumlist)/n return sumlist=[None]*n def calibcam(n,sumlist): stream=io.BytesIO() for i in range(n): yield stream stream.seek(0) fwidth = (width + 31) // 32 * 32 fheight = (height + 15) // 16 * 16 #starttime=time.time() img=np.fromstring(stream.getvalue(), dtype=np.uint8).reshape((fheight, fwidth, 3))[:height, :width, :] #print str((time.time()-starttime)*1000)+" ms for reading the image" #xstart=img.shape[0]/4 #ystart=img.shape[1]/4 #starttime=time.time() sumlist[i]=img[xstart:(xstart*3),ystart:(ystart*3),:].sum() #print str((time.time()-starttime)*1000)+" ms" stream.seek(0) stream.truncate() sys.stdout.write("\r"+str(float(i+1)*100/n)+"% ") sys.stdout.flush() print "calibrating stdtrigger" with PiCamera() as cam: cam.resolution=(width,height) cam.framerate=80 time.sleep(.3) cam.capture_sequence(calibcam(n,sumlist),"rgb",use_video_port=True) std=stats.tstd(sumlist) avg=sum(sumlist)/n return thisz=(img[xstart:(xstart*3),ystart:(ystart*3),:].sum()-avg)/std #print thisz if abs(thisz)>threshold: print "\nsomething weird, zscore="+str(thisz)+" at "+time.strftime("%a, %d %b %H:%M:%S", time.localtime()) return True if framenum%10==0: sumlist[0]=img[xstart:(xstart*3),ystart:(ystart*3),:].sum() std=stats.tstd(sumlist) avg=sum(sumlist)/n return False
def get_slope_error(x, y, students_t=2): """Return error of the slope of the line given by (x, y)""" assert len(x) == len(y), 'Different input lengths' slope = stats.linregress(x, y).slope D_y = stats.tstd(y)**2 D_x = stats.tstd(x)**2 # Formula from the MIPT lab manual return students_t * np.sqrt(1 / (len(x) - 2) * (D_y / D_x - slope**2))
def visualize(test_name, files, columns, step_win=1000, smooth_win=101): band_list = [] file_list = [] for f in files: if isinstance(f, list): band_list.append(len(f)) file_list = file_list + f else: band_list.append(1) file_list.append(f) data = get_data(file_list, columns, step_win=step_win, smooth_win=smooth_win) for col, d in data: plt.clf() count = 0 i = 0 while i < len(d): if band_list[count] == 1: n, v = d[i] plt.plot(v, label=n) else: n = d[i][0] data_lists = [x[1] for x in d[i:i + band_list[count]]] max_len = max(len(x) for x in data_lists) data_list = [] for j in range(max_len): data_list.append([]) for l in data_lists: if j < len(l): data_list[-1].append(l[j]) high, low, avg = [], [], [] for v in data_list: avg.append(sum(v) / len(v)) high.append(avg[-1] + stats.tstd(v)) low.append(avg[-1] - stats.tstd(v)) plt.plot(list(range(0, len(avg) * step_win, step_win)), avg, label=n) plt.fill_between(list(range(0, len(avg) * step_win, step_win)), low, high, alpha=.2) i += band_list[count] count += 1 plt.legend(loc=0) # plt.title(test_name) plt.ylabel(col) plt.xlabel('Steps') plt.savefig('images/' + test_name + '_' + col + '.png')
def write_transforms_to_file(transforms,filename="dummy_transforms.txt",min_pairs=3,p_level=0.05,std_min=0.0,id_assays=True,full_info=False): """ Write selected transformations to file. min_pairs : Minimum number of pairs per transformations p_level : Maximum p_value std_min : Minimum Standard deviation of differences within pairs id_assays : separately output statistics for using pairs from identical assays only """ print "Writing significant transformations to file" if min_pairs < 2: print "At least 2 pairs per transformation are necessary for significance tests." print "min_pairs set to 2" min_pairs = 2 header = "Transformation\tAssay_specific\tp-value\tAverage_Activity_Difference\tSigma_Differences\tnpairs" if full_info: header = header+"\tLigand_IDs\tlog(Activities[nM])\tAssay_Identity" header = header+"\n" f = open(filename,"w") f.write(header) for transf,pairs in transforms.iteritems(): if len(pairs["ligand_ids"]) < min_pairs: continue diffs = pairs["deltas"] npairs_all = len(diffs) p_all = stats.ttest_rel(diffs,[0.0 for i in diffs])[1] av_all = sum(diffs)/npairs_all std_all = stats.tstd(diffs) if npairs_all >= min_pairs and p_all <= p_level and std_all >= std_min: f.write(transf+"\t"+"mixed_assays"+"\t"+"{:4.2}".format(p_all)+"\t"+"{:4.3}".format(av_all)+"\t"+"{:4.2}".format(std_all)+"\t"+str(npairs_all)) if full_info: for i in range(npairs_all): f.write("\t"+pairs["ligand_ids"][i][0]+":"+pairs["ligand_ids"][i][1]) for i in range(npairs_all): f.write("\t"+"{:4.3}".format(pairs["activities1"][i])+":"+"{:4.3}".format(pairs["activities2"][i])) for i in range(npairs_all): f.write("\t"+str(pairs["assay_identity"][i])) f.write("\n") if id_assays == False: continue diffs_id = list(set([pairs["deltas"][i] for i in range(npairs_all) if pairs["assay_identity"][i]])) npairs_id = len(diffs_id) if npairs_id < min_pairs: continue p_id = stats.ttest_rel(diffs_id,[0.0 for i in diffs_id])[1] av_id = sum(diffs_id)/npairs_id std_id = stats.tstd(diffs_id) if npairs_id >= min_pairs and p_id <= p_level and std_id >= std_min: f.write(transf+"\t"+"ident_assays"+"\t"+"{:4.2}".format(p_id)+"\t"+"{:4.3}".format(av_id)+"\t"+"{:4.2}".format(std_id)+"\t"+str(npairs_id)) if full_info: for i in range(npairs_all): if pairs["assay_identity"][i] == True: f.write("\t"+pairs["ligand_ids"][i][0]+":"+pairs["ligand_ids"][i][1]) for i in range(npairs_all): if pairs["assay_identity"][i] == True:f.write("\t"+"{:4.3}".format(pairs["activities1"][i])+":"+"{:4.2}".format(pairs["activities2"][i])) for i in range(npairs_all): if pairs["assay_identity"][i] == True:f.write("\t"+str(pairs["assay_identity"][i])) f.write("\n") f.close()
def get_std_deviation(img): path = images_path + img img = cv2.imread(path) hist_b = cv2.calcHist([img], [0], None, [256], [0, 256]) hist_g = cv2.calcHist([img], [1], None, [256], [0, 256]) hist_r = cv2.calcHist([img], [2], None, [256], [0, 256]) tstd_b = tstd(hist_b) tstd_g = tstd(hist_g) tstd_r = tstd(hist_r) return [tstd_r[0], tstd_g[0], tstd_b[0]]
def fit(self, x, y): self.x = x self.y = y self.mean_x = float('%.02f' % statistics.mean(self.x)) self.mean_y = float('%.02f' % statistics.mean(self.y)) self.stdev_x = float('%.02f' % stats.tstd(self.x)) self.stdev_y = float('%.02f' % stats.tstd(self.y)) self.corr_coeff, self.p_value = stats.pearsonr(self.x, self.y) self.corr_coeff = float('%.2f' % self.corr_coeff) self.slope = self.corr_coeff * (self.stdev_y / self.stdev_x) self.intercept = self.mean_y - (self.slope * self.mean_x)
def train_models(slope_history): if len(slope_history) < 3: return None #slope_histroy = list of tuples: (avg_outdoor, slope, std_err,temperature_profile[0,0]) sh = np.matrix(slope_history) lnmodel = LinearRegression() error_inverse = np.array(1 / sh[:, 2])[:, 0] lnfit = lnmodel.fit(sh[:, 0], sh[:, 1]) #svr_rbf = SVR(kernel='linear', C=10, epsilon=0.5) #svrfit = svr_rbf.fit(sh[:, 0], sh[:,1]) ln_residue = [] for i in range(len(slope_history)): p = lnfit.predict(slope_history[i][0])[0][0] ln_residue.append((p - slope_history[i][1])**2) ln_std = stats.tstd(ln_residue) ln_mean = stats.tmean(ln_residue) new_sh = None for i in range(len(ln_residue)): if ln_residue[i] < ln_mean + 3 * ln_std: #sh = np.delete(sh,i,axis=0) if new_sh is None: new_sh = sh[i, :] else: new_sh = np.vstack((new_sh, sh[i, :])) sh = new_sh #redo the fit error_inverse = np.array(1 / sh[:, 2])[:, 0] slope_mean = stats.tmean(sh[:, 1]) slope_std = stats.tstd(sh[:, 1]) lnfit = lnmodel.fit(sh[:, 0], sh[:, 1]) ln_residue = [] for i in range(len(sh)): p = lnfit.predict(sh[i, 0])[0][0] ln_residue.append((p - sh[i, 1])**2) ln_std = stats.tstd(ln_residue) ln_mean = stats.tmean(ln_residue) return { 'ln_model': lnfit, 'ln_residue': ln_residue, 'ln_residue_std': ln_std, 'ln_residue_mean': ln_mean, 'slope_mean': slope_mean, 'slope_std': slope_std, 'data_matrix': sh }
def prediction_interval(x, alpha=0.05, type="two-sided", k_future_obs=1): """Compute an interval to contain future measurements with given confidence Parameters ---------- x : array_like Input array or object that can be converted to an array. alpha : float Individual comparison false positive rate. Default value is 0.05. type : string The type of interval to be returned. Upper, lower, or two-sided. Default is two-sided. k_future_obs: int Number of total future comparisons(e.g., number of wells multiplied by the number of analytes). Returns ------- Notes ----- Uses Bonferroni inequality method. """ if not isinstance(x, np.ndarray): x = np.asarray(x) alpha_s = alpha if k_future_obs > 1: alpha = alpha_s / k_future_obs if type == "two-sided": lpl = x.mean() - stats.tstd(x) * stats.t.ppf( 1 - alpha / 2, x.size - 1) * np.sqrt(1 + 1 / x.size) upl = x.mean() + stats.tstd(x) * stats.t.ppf( 1 - alpha / 2, x.size - 1) * np.sqrt(1 + 1 / x.size) if type == "upper": lpl = np.NINF upl = x.mean() + stats.tstd(x) * stats.t.ppf( 1 - alpha, x.size - 1) * np.sqrt(1 + 1 / x.size) if type == "lower": lpl = x.mean() - stats.tstd(x) * stats.t.ppf( 1 - alpha, x.size - 1) * np.sqrt(1 + 1 / x.size) upl = np.Inf return lpl, upl
def write_current_stats(log_file, link_bandwidth_usage_Mbps, switch_num_flows, response_times, cur_group_index, group): link_bandwidth_list = [] total_num_flows = 0 for switch_dpid in link_bandwidth_usage_Mbps: for port_no in link_bandwidth_usage_Mbps[switch_dpid]: link_bandwidth_list.append(link_bandwidth_usage_Mbps[switch_dpid][port_no]) for switch_dpid in switch_num_flows: total_num_flows += switch_num_flows[switch_dpid] avg_response_time = sum(response_times) / float(len(response_times)) avg_network_time = sum(network_times) / float(len(network_times)) avg_processing_time = sum(processing_times) / float(len(processing_times)) average_link_bandwidth_usage = sum(link_bandwidth_list) / float(len(link_bandwidth_list)) traffic_concentration = 0 if average_link_bandwidth_usage != 0: traffic_concentration = max(link_bandwidth_list) / average_link_bandwidth_usage link_util_std_dev = tstd(link_bandwidth_list) log_file.write('Group:' + str(cur_group_index)) log_file.write(' NumReceivers:' + str(len(group.dst_hosts))) log_file.write(' TotalNumFlows:' + str(total_num_flows)) log_file.write(' MaxLinkUsageMbps:' + str(max(link_bandwidth_list))) log_file.write(' AvgLinkUsageMbps:' + str(average_link_bandwidth_usage)) log_file.write(' TrafficConcentration:' + str(traffic_concentration)) log_file.write(' LinkUsageStdDev:' + str(link_util_std_dev)) log_file.write(' ResponseTime:' + str(avg_response_time)) log_file.write(' NetworkTime:' + str(avg_network_time)) log_file.write(' ProcessingTime:' + str(avg_processing_time)) log_file.write('\n')
def print_stats(datums): print 'Mean:', stats.tmean(datums) print 'Median:', stats.cmedian(datums) print 'Std Dev:', stats.tstd(datums) print 'Variation:', stats.variation(datums) print 'Kurtosis:', stats.kurtosis(datums, fisher=False) print 'Skewness:', stats.skew(datums)
def get_generation_stats(population, environment): average_fitness = get_average_fitness(population, environment) best_org = get_best_organism(population, environment) best_fitness = best_org.fitness(environment) stdev = stats.tstd([org.fitness(environment) for org in population]) return average_fitness, stdev, best_org, best_fitness
def calculating_season_stats(df_X_train): bat_avg = (df_X_train.groupby(['batsman_striker', 'season', 'career_age']).sum()).reset_index() bat_avg['runs_per_match_avg'] = bat_avg['runs_scored'] / bat_avg['matches'] bat_avg = bat_avg[[ 'player_id', 'batsman_striker', 'season', 'age', 'runs_per_match_avg' ]] bat_avg = (bat_avg.pivot_table(index=['player_id', 'batsman_striker'], columns=['season'], values=['runs_per_match_avg', 'age'])) bat_avg = bat_avg.reset_index() #calculating standard deviation of runs per match through out the career upto target year std = bat_avg.reindex_axis(sorted(bat_avg.columns), axis=1) std.drop(std.columns[[0, 1, 2, 3, 4]], axis=1, inplace=True) std = std.T.fillna(bat_avg.mean(axis=1)).T #adding mean to not nan seasons(season missed by players) std_l = [] for i in range(len(std)): std_l.append(stats.tstd(std.iloc[i][3:10])) std['std'] = std_l std = std[['player_id', 'batsman_striker', 'std']] bat_avg_std = bat_avg.merge(std, on=['batsman_striker']) bat_avg_std = bat_avg_std.rename(columns={ 'player_id_x': 'player_id' }).drop('player_id_y', axis=1) return bat_avg_std
def passos(xa, ya, xmax, ymax, series=1000): '''Retorna a média de tantas séries de quantos passos o bêbado leva pra chegar em tal ponto.''' lista_passos = [] for i in range(series): x, y = 0, 0 passos = 1 while x != xa and y != ya: a = direcao() if a == 'N': passos += 1 y += 1 if y >= ymax: y = ymax elif a == 'S': passos += 1 y -= 1 if abs(y) >= ymax: y = -ymax elif a == 'O': passos += 1 x -= 1 if abs(x) >= xmax: x = -xmax else: passos += 1 x += 1 if x >= xmax: x = xmax lista_passos.append(passos) print('Média=' + str(round(stats.tmean(lista_passos), 5)) + '\nDesvio-padrão=' + str(round(stats.tstd(lista_passos), 5)))
def main(): train_df = pd.read_csv('data/match_feature.csv') # Split X, y and scale X, y, min_max_scaler = get_X_y(train_df) print("Total dataset size : ", len(X)) #check_train_size_curve(X, y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.01, random_state=42) model = Lgbm_Model() #param = model.tune(X_train, y_train) model.train(X_train, y_train, X_test, y_test) #model.save("Lgbm") model.evaluate(X_train, y_train, cross_val=True) model.evaluate(X_test, y_test) res = model.predict(X_train) print(tstd(res)) print(tmean(res)) plt.hist(res, bins=100) plt.show() model.plot_importance()
def get_generation_stats(population, environment): """Gets the stats for the given population""" average_fitness = get_average_fitness(population, environment) best_org = get_best_organism(population, environment) best_fitness = best_org.fitness(environment) stdev = stats.tstd([org.fitness(environment) for org in population]) return average_fitness, stdev, best_org, best_fitness
def std(self, **kwargs): """ Unbiased standard deviation of time series. Parameters ---------- kwargs : optional see documentation of :meth:`get()` method for available options Returns ------- float Sample standard deviation Notes ----- Computes the unbiased sample standard deviation, i.e. it uses a correction factor n / (n - ddof). See also -------- scipy.stats.tstd """ # get data array, time does not matter _, x = self.get(**kwargs) return tstd(x)
def calculation(self): self.clear_line() filename = self.entry_0_string.get() fileString = fisher.open_file(filename) # Получаем путь файла if fileString[0] == 0: arr = list(map(float, fileString[1].split(' '))) else: self.show_warning(fileString[1]) self.clear_line() return 1 answer_3 = len(arr) #количество элементов выборки answer_4 = fisher.normalizeFloat( np.mean(arr)) # Среднее арифметическое значение answer_5 = fisher.normalizeFloat( stats.tstd(arr)) # Оценка среднего квадратического отклонения name = self.entry_1_string.get() countSplit = fisher.inputCountSplit(name, len(arr)) if countSplit[0] == 0: arr = np.array_split(arr, countSplit[1]) else: self.show_warning(countSplit[1]) self.clear_line() return 1 answer_6 = fisher.normalizeFloat( fisher.MSA(arr)) # Межгрупповая дисперсия answer_7 = fisher.normalizeFloat( fisher.MSW(arr)) # Внутригрупповая дисперсия fCritery = fisher.getFishersCritery(arr) answer_8 = fisher.normalizeFloat(fCritery) # Критерий Фишера significanceLevel = self.entry_2_string.get() significanceLevel = fisher.inputSignificanceLevel(significanceLevel) if significanceLevel[0] == 0: fCriticalCritery = fisher.getCriticalFishersCritery( (significanceLevel[1]), arr) answer_9 = fisher.normalizeFloat( fCriticalCritery) # Критический критерий Фишера else: self.show_warning(significanceLevel[1]) self.clear_line() return 1 self.entry_3_string.set( answer_3) # Выводим количество элементов выборки self.entry_4_string.set(answer_4) # Среднее арифметическое значение self.entry_5_string.set( answer_5) # Оценка среднего квадратического отклонения self.entry_6_string.set(answer_6) # Межгрупповая дисперсия self.entry_7_string.set(answer_7) # Внутригрупповая дисперсия self.entry_8_string.set(answer_8) # Критерий Фишера self.entry_9_string.set(answer_9) # Критический критерий Фишера summary = fisher.summary(fCritery, fCriticalCritery) self.text_1.insert(1.0, summary) return filename
def __init__(self, parent, figure, sel, plot_t, cap): self.parent = parent self.cap = cap self.data = [] self.x_labels = [] self.selector = sel for day in Plot.day_order: data = self.parent.data.get(sel[0], day, sel[1], sel[2]) if not len(data): text = "%s\nx̅: %s\nCol/img: %s\nσ: %s" % ( day, 0, 0, 0) self.x_labels.append(text) self.data.append(Plot.cap([0], self.cap)) continue c_normal = parent.image_n / len(parent.image_counter.out["%s,%s" % (sel[0], day)]) current_data = normalize_data(data, c_normal) gm = round(float(stats.gmean(current_data)), 3) cpi = round(len(current_data) * c_normal, 3) sd = round(float(stats.tstd(current_data)), 3) text = "%s\nx̅: %s\nCol/img: %s\nσ: %s" % ( day, gm, cpi, sd) self.x_labels.append(text) self.data.append(Plot.cap(current_data, self.cap)) self.figure = figure self.plot_tuple = plot_t self.graph_label = "%s population, %s %s" % (sel[0], sel[1], sel[2].replace("supra", "supra-basal"))
def num_features_DDbS(samplefile_, n_): _configs = list() init = True if os.path.exists(samplefile_): with open(samplefile_, 'r') as sf: for line in sf: if not init: raw = line.split(";") if len(raw) != 0: raw = raw[1:] config = list() for i in range(0, len(raw)): if raw[i] == '1': config.append(i + 1) _configs.append(config) else: init = False else: return -1 _fnums = list() for sample in _configs: fnum = 0 for v in sample: if v > 0: fnum += 1 _fnums.append(fnum) return stats.tmean(_fnums), stats.tstd(_fnums)
def make_frame(t): """ returns an image of the frame at time t """ # ... create the frame with any library fitness = fitness_list[int(t)] __sum_fit = sum(fitness) __mean_fit = float(__sum_fit) / float(len(fitness)) from scipy.stats import tstd, iqr, variation, entropy __sd_fit = tstd(fitness) __iqr = iqr(fitness) __v = variation(fitness) __e = entropy(fitness) fig = plt.figure() plt.hist(fitness) # ,bins=int(params['POPULATION_SIZE']*0.1)) plt.title("Moving Point - Population Fitness Histogram - Generation " + str(int(t))) plt.axis([0, 20000, 0, params['POPULATION_SIZE']]) plt.ylabel('#Individuals') plt.xlabel('Fitness') plt.grid(True) __hist_text = "$\mu=" + "{0:.2f}".format( __mean_fit) + ",\ \sigma=" + "{0:.2f}".format( __sd_fit) + ",\ entropy=" + "{0:.2f}".format( __e) + ",\ iqr=" + "{0:.2f}".format(__iqr) + "$" plt.text(1000, params['POPULATION_SIZE'] * .9, __hist_text) return mplfig_to_npimage(fig) # (Height x Width x 3) Numpy array
def plot_gauss(no): seq = 1 if no==1: n = str(no) n = n.zfill(3) imtype ='IIM'+n elif no<1000: n = str(no) n = n.zfill(3) imtype = 'IIM'+n else: imtype = 'II'+str(no) print 'doing '+imtype testImg = WAIPSImage('J0528+2200', imtype, 1, seq,43) dat = testImg.pixels.flatten() end_1, end_2 = np.array([dat.size*0.1,dat.size*0.9],dtype=int) mu=np.mean(dat[end_1:end_2]) sigma=stats.tstd(dat[end_1:end_2]) peak = np.max(dat) print 'peak:', np.max(dat), 'rms: ', sigma, 'snr: ', peak/sigma plt.figure() n,bins,patches = plt.hist(dat,100,normed=1,histtype='stepfilled') plt.setp(patches,'facecolor','g','alpha',0.75) y = plt.normpdf(bins,mu,sigma) plt.plot(bins,y,'k--',linewidth=1.5) plt.show()
def determine_nbins1D(X, rule='Sturges'): ''' There are three common methods to determine the number of bins to compute entropy of ONE variable X :param X: array-like of numbers :param rule: 1) Freedman‐Diaconis's rule: used for unknown distributions or non-parametric nbins = ceil(max(X) - min(X) / 2 * IQR * N^{-1/3}) 2) Scotts's Rule: used for normal distribution nbins = ceil(max(X) - min(X) / 3.5 * STD * N^{-1/3}) 3) Sturges' Rule nbins = ceil(1 + log2(n)) default: Sturges's rule :return: the optimal number of bins used to calculate entropy ''' maxmin_range = max(X) - min(X) n = len(X) n3 = n**(-1 / 3) if rule == 'Freedman‐Diaconis': return np.ceil(maxmin_range / (2.0 * iqr(X) * n3)).astype('int') if rule == 'Scott': return np.ceil(maxmin_range / (3.5 * tstd(X) * n3)).astype('int') if rule == 'Sturges': return np.ceil(1 + np.log2(n)).astype('int') return 0
def compute_metric(self): tmid=(self.ts.t[:-1]+self.ts.t[1:])/2.0 rng=range(1,len(tmid)) # Throw out first and last self.tmid=tmid[rng] maxval=numpy.zeros(len(rng)) minval=numpy.ones(len(rng))*1e100 self.rate=[] for v in self.ts: self.rate.append(numpy.divide(numpy.diff(v)[rng], numpy.diff(self.ts.t)[rng])) maxval=numpy.maximum(maxval,self.rate[-1]) minval=numpy.minimum(minval,self.rate[-1]) vals=[] mean=[] std=[] for j in range(len(rng)): vals.append([]) for v in self.rate: vals[j].append(v[j]) mean.append(tmean(vals[j])) std.append(tstd(vals[j])) imbl=maxval-minval self.ratio=numpy.divide(std,mean) self.ratio2=numpy.divide(imbl,maxval) # mean of ratios is the threshold statistic self.metric = abs(tmean(self.ratio))
def plot_heatmap(matrix, ax, label): vmax = np.max(matrix) vmin = np.min(matrix) vextreme = max(abs(vmin), vmax) k = kurtosis(matrix.flat) try: std = tstd(matrix.flat) except ZeroDivisionError: std = 0 #print "k: {} std: {}".format(k, std) args = {'vmax':vextreme, 'vmin':-vextreme, 'interpolation':'none', 'aspect':'auto', 'origin':'lower', 'cmap':plt.get_cmap('RdBu')} #Spectral if k > 15: norm = SymLogNorm(std/3.0, vmin=-vextreme, vmax=vextreme) args['norm'] = norm label = "Symmetric log of " + label if len(matrix.shape) == 1: matrix = np.tile(matrix, (1, 2)) plt.imshow(matrix, **args) ax.set_title(format_name(label)) ax.set_frame_on(False) plt.axis('off') # ax.grid(False) ax.invert_yaxis() cb = plt.colorbar() if k > 15: ticks = np.linspace(0, 1, 9) tick_map = norm.inverse(ticks) cb.set_ticks(tick_map) cb.set_ticklabels(["{:.4g}".format(t) for t in tick_map])
def iters(N): i = 0 simulations = [] while i < N: ith_iter = single('off') simulations.append(ith_iter) print('$', ith_iter) i += 1 data = simulations 'plot density histogram' plt.figure() wts = np.ones_like(data) / float(len(data)) plt.hist(data, stacked=True, weights=wts, edgecolor='k', color='darkorange') average = int(np.mean(data)) plt.suptitle('<Closet value>: $ {} +- {} (std. dev.)'.\ format(average,int(tstd(data))),size=13) plt.xlabel(r"Total closet values for {} simulation(s) / $".format(N), size=13) plt.ylabel('P', size=13) plt.show()
def num_features_QS(samplefile_, n_): i = 0 _configs = list() if os.path.exists(samplefile_): with open(samplefile_, 'r') as sf: for line in sf: raw = line.split(" ") if len(raw) != 0: config = raw[:len(raw) - 1] _configs.append(config) i += 1 else: return -1 _samples = list() rands = get_random(n_, len(_configs)) for r in rands: _samples.append(_configs[r - 1]) _fnums = list() for sample in _samples: fnum = 0 for v in sample: if not v.startswith('-'): fnum += 1 _fnums.append(fnum) return stats.tmean(_fnums), stats.tstd(_fnums)
def calc_moments(self, periods=252): if self.model.current_step - 1 < periods: return None, None, None else: rets = self.model.datacollector.model_vars["Return"][-periods-1:] rets.append(self.last_return) return tstd(rets), skew(rets), kurtosis(rets)
def player_info(accountId): try: summoner = watcher.summoner.by_account(my_region, accountId) summonerId = summoner['id'] league = watcher.league.by_summoner(my_region, summonerId)[0] except: return None, None, None, None, None level = summoner['summonerLevel'] total_win = league['wins'] total_loss = league['losses'] hot_streak = int(league['hotStreak']) data = [1] * total_win + [0] * total_loss win_skew = skew(data) win_std = tstd(data) win_mean = tmean(data) ''' match_lst = watcher.match.matchlist_by_account( my_region, accountId, end_index=30, queue='420') for match in match_lst: print(match) ''' return win_mean, win_std, win_skew, level, hot_streak
def norm_fit_sparsely(self, show_it=0, save_it=0, save_dir=None, save_name=None, start=0, end=0): if not end: print('norm fit:asign end') _sparseness = 10**4 _start = int(start) _end = int(end) _data_num = _end - _start print( 223, 'int(data_num/sparseness + 1):{}, data_num:{}'.format( int(_data_num / _sparseness + 1), _data_num)) _cur_x = [ self.x[i] for i in range(_start, _end, int(_data_num / _sparseness + 1)) ] _guess = [stats.tmean(self.x), stats.tstd(self.x)] _x = _cur_x _x.sort() self.norm_params, self.norm_params_covariance = optimize.curve_fit( self.norm_dist_CDF, _x, [(i + 1) / len(_x) for i in range(len(_x))], _guess) self.hist_norm_of_move_sparsely(show_it=show_it, save_it=save_it, save_dir=save_dir, save_name=save_name, start=start, end=end)
def std(self, **kwargs): """ Unbiased standard deviation of time series. Parameters ---------- **kwargs : optional Additional keyword arguments are passed to :meth:`get()`. Returns ------- float Sample standard deviation Notes ----- Computes the unbiased sample standard deviation, i.e. it uses a correction factor n / (n - ddof). See also -------- scipy.stats.tstd """ # get data array, time does not matter _, x = self.get(**kwargs) return tstd(x)
def sort_spikes(dataframe, analog_data, standardize=False): """ Sorts spikes in dataframe for the given analog_data in place. Spikes are sorted by the first two principal components after the waveforms have been smoothed and up-sampled. Cluster analysis is done using the OPTICS density based clustering algorithm. An appropriate epsilon is found by looking for significant peaks in the reachability plot. Parameters ---------- dataframe : pandas.DataFrame DataFrame of spike data. analog_data : MEARecording The MEARecording for the spikes given in dataframe. standardize : bool If True, standardize data before cluster finding. """ for (tag, sdf) in dataframe.groupby('electrode'): waveforms = extract_waveforms(bandpass_filter(analog_data[tag]), sdf.time.values) with warnings.catch_warnings(): warnings.simplefilter('ignore', category=RuntimeWarning) pcs = PCA(n_components=2).fit_transform(waveforms) if standardize: pcs = StandardScaler().fit_transform(pcs) opt = optics.OPTICS(300, 5) opt.fit(pcs) reach = opt._reachability[opt._ordered_list] rprime = reach[np.isfinite(reach)] if len(rprime) < 2: continue try: thresh = 8.5 * stats.tstd(rprime, (np.percentile( rprime, 15), np.percentile(rprime, 85))) + np.median( rprime) # noqa except: continue peaks = peak_local_max(reach, min_distance=4, threshold_abs=thresh, threshold_rel=0).flatten() # Find largest peak for close neighbors min_dist = 0.05 * len(reach) splits = np.where(np.diff(peaks) > min_dist)[0] + 1 peak_vals = [ np.max(x) for x in np.split(reach[peaks], splits) if len(x) > 0 ] try: eps = 0.90 * np.min(peak_vals) except: eps = 0.5 * reach[-1] opt.extract(eps) dataframe.loc[sdf.index, 'electrode'] = \ sdf.electrode.str.cat(opt.labels_.astype(str), sep='.')
def sd(self, samples=100): vals = [] for i in range(samples): solution = [] for j in range(self.arglen): solution.append(random.randrange(*self.range_)) vals.append(self.fitness1(solution)) return stats.tstd(vals)
def post_credible_interval(data, confidence=0.95): # compute posterior credible interval a = 1.0*np.array(data) n = len(a) m, se = np.mean(a), stats.tstd(a) h = se * stats.t._ppf((1+confidence)/2., n-1) return m, m-h, m+h
def norm_fit(self, show_it=0, save_it=0, save_dir=None, save_name=None): _guess = [stats.tmean(self.x), stats.tstd(self.x)] _x = self.x _x.sort() self.norm_params, self.norm_params_covariance = optimize.curve_fit( self.norm_dist_CDF, _x, [(i + 1) / len(_x) for i in range(len(_x))], _guess) self.hist_norm_of_move( show_it=show_it, save_it=save_it, save_dir=save_dir, save_name=save_name)
def print_and_plot_results(count, results, verbose, plot_file_name): print("RPS calculated as 95% confidence interval") rps_mean_ar = [] low_ar = [] high_ar = [] test_name_ar = [] for test_name in sorted(results): data = results[test_name] rps = count / array(data) rps_mean = tmean(rps) rps_var = tvar(rps) low, high = norm.interval(0.95, loc=rps_mean, scale=rps_var**0.5) times = array(data) * 1000000 / count times_mean = tmean(times) times_stdev = tstd(times) print('Results for', test_name) print('RPS: {:d}: [{:d}, {:d}],\tmean: {:.3f} μs,' '\tstandard deviation {:.3f} μs' .format(int(rps_mean), int(low), int(high), times_mean, times_stdev)) test_name_ar.append(test_name) rps_mean_ar.append(rps_mean) low_ar.append(low) high_ar.append(high) if verbose: print(' from', times) print() if plot_file_name is not None: import matplotlib.pyplot as plt from matplotlib import cm fig = plt.figure() ax = fig.add_subplot(111) L = len(rps_mean_ar) color = [cm.autumn(float(c) / (L - 1)) for c in arange(L)] bars = ax.bar( arange(L), rps_mean_ar, color=color, yerr=(low_ar, high_ar), ecolor='k') # order of legend is reversed for visual appeal ax.legend( reversed(bars), reversed(test_name_ar), loc='upper left') ax.get_xaxis().set_visible(False) plt.ylabel('Requets per Second', fontsize=16) print(plot_file_name) plt.savefig(plot_file_name, dpi=96) print("Plot is saved to {}".format(plot_file_name)) if verbose: plt.show()
def sort_spikes(dataframe, analog_data, standardize=False): """ Sorts spikes in dataframe for the given analog_data in place. Spikes are sorted by the first two principal components after the waveforms have been smoothed and up-sampled. Cluster analysis is done using the OPTICS density based clustering algorithm. An appropriate epsilon is found by looking for significant peaks in the reachability plot. Parameters ---------- dataframe : pandas.DataFrame DataFrame of spike data. analog_data : MEARecording The MEARecording for the spikes given in dataframe. standardize : bool If True, standardize data before cluster finding. """ for (tag, sdf) in dataframe.groupby('electrode'): waveforms = extract_waveforms( bandpass_filter(analog_data[tag]), sdf.time.values) with warnings.catch_warnings(): warnings.simplefilter('ignore', category=RuntimeWarning) pcs = PCA(n_components=2).fit_transform(waveforms) if standardize: pcs = StandardScaler().fit_transform(pcs) opt = optics.OPTICS(300, 5) opt.fit(pcs) reach = opt._reachability[opt._ordered_list] rprime = reach[np.isfinite(reach)] if len(rprime) < 2: continue try: thresh = 8.5*stats.tstd(rprime, (np.percentile(rprime, 15), np.percentile(rprime, 85))) + np.median(rprime) # noqa except: continue peaks = peak_local_max(reach, min_distance=4, threshold_abs=thresh, threshold_rel=0).flatten() # Find largest peak for close neighbors min_dist = 0.05 * len(reach) splits = np.where(np.diff(peaks) > min_dist)[0] + 1 peak_vals = [np.max(x) for x in np.split(reach[peaks], splits) if len(x) > 0] try: eps = 0.90*np.min(peak_vals) except: eps = 0.5*reach[-1] opt.extract(eps) dataframe.loc[sdf.index, 'electrode'] = \ sdf.electrode.str.cat(opt.labels_.astype(str), sep='.')
def detectchange(threshold,n): cam=cv2.VideoCapture(0) facedetector=cv2.CascadeClassifier("/usr/local/Cellar/opencv/2.4.9/share/OpenCV/haarcascades/haarcascade_frontalface_alt2.xml") sumlist=calibcam(n,cam) std=stats.tstd(sumlist) avg=sum(sumlist)/n i=0 framenum=0 print "starting detection" while True: retval,img=cam.read() thisz=(img[240:720,320:960,:].sum()-avg)/std if abs(thisz)>threshold: print "something weird, zscore="+str(thisz) time.sleep(1) retval,newimg=cam.read() #sumlist=calibcam(n,cam) #avg=sum(sumlist)/n #std=stats.tstd(sumlist) faces=facedetector.detectMultiScale(newimg) if len(faces)>0: print "FOUND A FACEZ!!!!!11!" for (x,y,h,w) in faces: cv2.rectangle(newimg,(x,y),(x+w,y+h),(0,255,255),1) #cv2.imshow("obj num "+str(i),newimg) cv2.imshow("obj found",newimg) else: print "no facez :(" cv2.imshow("obj not found",newimg) i=i+1 cv2.waitKey(1) sumlist[0]=img[240:720,320:960,:].sum() std=stats.tstd(sumlist) avg=sum(sumlist)/n framenum=framenum+1 if framenum%10==0: sumlist[0]=img[240:720,320:960,:].sum() std=stats.tstd(sumlist) avg=sum(sumlist)/n if framenum%100==0: print framenum #time.sleep(.5)
def NormXCorr(s1, s2): """ Computes the normalized cross correlation distance between two vector. Parameters: s1: `numpy array` The first vector. s2: `numpy array` The second vector. Returns: `float` NormXCorr distance between s1 and s2, dist is between [-1, 1]. A value of one indicate a perfect match. """ # s1 and s2 have the same length import scipy.stats as ss s = s1.shape[0] corr = np.sum((s1 - np.mean(s1)) * (s2 - np.mean(s2))) / (ss.tstd(s1) * ss.tstd(s2)) return corr * (1./(s-1))
def calculate_weights(prev_population, prev_weights, sim_theta): from scipy.stats import tstd weights = np.array([]) for i in range(param_number): rv = uniform(-5, 5) prior = rv.pdf(sim_theta[i]) prod = [] for w, th in izip(prev_weights[i], prev_population[i]): prod.append(w * norm(sim_theta[i], tstd(prev_population[i])).pdf(th)) weights = np.append(weights, (0.1 / math.fsum(prod))) return weights
def sample_from_previous(prev_population, weights): from scipy.stats import tstd theta = np.array([]) for i in range(param_number): weighted_mu = calc_weighted_mean(prev_population[i], weights[i]) # sigma = 0.5 * (np.max(prev_population[i]) - np.min(prev_population[i])) sigma = tstd(prev_population[i]) particle = np.random.normal(weighted_mu, sigma) pert_sigma = get_pert_sigma(prev_population[i]) # pert_particle = np.random.normal(particle, pert_sigma) pert_particle = np.random.uniform(particle - pert_sigma, particle + pert_sigma) theta = np.append(theta, pert_particle) return theta
def getStd(flux,limits=(None,None),wave=None,wmin=None,wmax=None,minsize=20): ''' Get the std of a flux array in a given wavelength range. If no min/max wavelengths are given, the std of the whole array is given. If the array used for std calculation is too small, None is returned. A 1-sigma clipping of the flux array can be done by providing limits. @param flux: The wavelength array @type flux: array @keyword limits: Flux limits if flux clipping (1 sigma!) is needed before STD calculation. None for both limits implies no clipping. None for one of the limits implies a half-open interval. (default: (None,None)) @type limits: (float,float) @keyword wave: The wavelength array. If default, the STD is calculated of the whole flux array @type wave: array @keyword wmin: The minimum wavelength. If not given, the minimum wavelength is the first entry in the wave array (default: None) @type wmin: float @keyword wmin: The maximum wavelength. If not given, the maximum wavelength is the last entry in the wave array (default: None) @type wmax: float @keyword minsize: The minimum size of the selected array before proceeding with the noise calculation. 0 if no min size is needed. (default: 20) @type minsize: int @return: The flux std between given wavelengths @rtype: float ''' fsel = selectArray(flux,wave,wmin,wmax) if fsel.size <= minsize: return None if limits == (None,None): return std(fsel) else: return tstd(fsel,limits=limits)
def getLineScoreStats(df,lineScoreCol,histScoreCol,binNumber=50): '''Return a Dataframe of line score stats for each bin. Relevant one is probably the mean.''' D = {} binnedScores = binLineScore(df,lineScoreCol,histScoreCol,binNumber) for bin in binnedScores: L = binnedScores[bin] if len(L) <=1: mean,var,dev = L[0],0,0 continue mean = stats.tmean(L) var = stats.tvar(L) stanD = stats.tstd(L) D[bin] = {"mean":mean,"var":var,"stanDev.": stanD} return pd.DataFrame(D).T
def norm_fit_sparsely(self, show_it=0, save_it=0, save_dir=None, save_name=None, start=0, end=0): if not end:print('norm fit:asign end') _sparseness=10**4 _start=int(start) _end=int(end) _data_num=_end-_start print(223,'int(data_num/sparseness + 1):{}, data_num:{}'.format(int(_data_num/_sparseness+1), _data_num)) _cur_x=[self.x[i] for i in range(_start,_end,int(_data_num/_sparseness+1))] _guess = [stats.tmean(self.x), stats.tstd(self.x)] _x = _cur_x _x.sort() self.norm_params, self.norm_params_covariance = optimize.curve_fit( self.norm_dist_CDF, _x, [(i + 1) / len(_x) for i in range(len(_x))], _guess) self.hist_norm_of_move_sparsely( show_it=show_it, save_it=save_it, save_dir=save_dir, save_name=save_name, start=start, end=end)
def paired_diff_t_test(a,b): assert len(a) == len(b) diffs = [a[i]-b[i] for i in xrange(len(a))] x_d = stats.tmean(diffs) s_d = stats.tstd(diffs) n = len(diffs) dof = n-1 t_d = x_d / (s_d / n) # sf() is the survival function (1-cdf) pval = stats.t.sf(abs(t_d), dof) print print 't-statistic:\t%.4f' % (t_d) print 'dof:\t%d' % (dof) print 'p-value:\t%.4f' % (pval)
def background_data(imageName, center_coords, size=100): '''calculate the background value and standard deviations and return as a tuple (background, sigma) @param imageName the path to the image to get the data for @param center_coords the coordinates to center the sampling box around, probably the coordinates of the target object @param size the size of the sampling box in pixels @returns a modal value with bins of size 1 count and a trimmed standard deviation reject values more than twice the background value''' with pyfits.open(imageName) as im: box = getBox(im[0], center_coords,size) bins = numpy.arange(box.min(), box.max(),1) #use bins of size 1 ranging from the minimum to maximum values of the sample box x,y = im_histogram(box, bins=bins) #compute the location of the peak of the histogram midx = numpy.argmax(y) center = x[midx] sigma = tstd(box, [0,2*center]) #trim to twice the the peak value return (center, sigma)
def main(argv): args = ARGS.parse_args() count = args.count concurrency = args.concurrency verbose = args.verbose tries = args.tries loop = asyncio.get_event_loop() suite = [run_aiohttp, run_tornado, run_twisted] suite *= tries random.shuffle(suite) all_times = collections.defaultdict(list) all_rps = collections.defaultdict(list) for test in suite: test_name = test.__name__ rps, times = loop.run_until_complete(run(test, count, concurrency, loop=loop, verbose=verbose, profile=args.profile)) all_times[test_name].extend(times) all_rps[test_name].append(rps) if args.profile: profiler.dump_stats('out.prof') print() for test_name in sorted(all_rps): rps = array(all_rps[test_name]) times = array(all_times[test_name]) * 1000 rps_mean = tmean(rps) times_mean = tmean(times) times_stdev = tstd(times) times_median = float(median(times)) print('Results for', test_name) print('RPS: {:d},\tmean: {:.3f} ms,' '\tstandard deviation {:.3f} ms\tmedian {:.3f} ms' .format(int(rps_mean), times_mean, times_stdev, times_median)) return 0
def data(): '''EXPERIMENTAL DATA''' #Microspheres size measurements [microsphere diameters] matrix micsM=pickle.load( open( "micsmat.p", "rb" ) ) '''STATISTICAL VALUES''' 'mean vector' meanV=np.zeros(8) for i in range(0,8): meanV[i]=np.mean(micsM[i]) 'sample std. deviation (sigma) vector' sigmaV=np.zeros(8) for j in range(0,8): sigmaV[j]=tstd(micsM[j]) return micsM, meanV, sigmaV
def get_merit(self, start_date, number_of_days_to_look_back, use_exponential_moving_average=False): adjusted_prices = self.get_adjusted_prices_in_range(start_date, number_of_days_to_look_back) if adjusted_prices == None: return None merit = None if use_exponential_moving_average: scaling_factor = 0.6 ema_for_start_date = self.compute_exponential_moving_average(adjusted_prices, scaling_factor) # Compute EMA for trading day |number_of_days_to_look_back| days ago. start_date_index = self.find_date_index_in_adjusted_prices(start_date) previous_start_date_index = start_date_index + number_of_days_to_look_back previous_months_adjusted_prices = self.get_adjusted_prices_in_range(previous_start_date_index, number_of_days_to_look_back, 'index') ema_for_previous_start_date = self.compute_exponential_moving_average(previous_months_adjusted_prices, scaling_factor) merit = ((ema_for_start_date - ema_for_previous_start_date) / ema_for_previous_start_date) / stats.tstd(adjusted_prices) else: # Reverse adjusted_prices so index 0 is the oldest date adjusted_prices.reverse() # Compute best fit line's slope and y-intercept for adjusted_prices slope, y_intercept = numpy.polyfit(range(0, number_of_days_to_look_back), adjusted_prices, 1) # Build list for adjusted_prices best fit line best_fit_line_for_adjusted_prices = [] actual_and_best_fit_difference = [] for x in range(0, number_of_days_to_look_back): best_fit_line_for_adjusted_prices.append((slope * x) + y_intercept) diff = (adjusted_prices[x] - best_fit_line_for_adjusted_prices[x]) / best_fit_line_for_adjusted_prices[x] actual_and_best_fit_difference.append(diff) # Compute gain of best_fit_line_for_adjusted_prices start_price = best_fit_line_for_adjusted_prices[0] end_price = best_fit_line_for_adjusted_prices[-1] best_fit_gain_percentage = 100.0 * (end_price - start_price) / start_price # Compute standard deviation of best_fit_line_for_adjusted_prices actual_and_best_fit_difference_standard_deviation = stats.tstd(actual_and_best_fit_difference) merit = best_fit_gain_percentage / actual_and_best_fit_difference_standard_deviation return merit
def get_ci(im_data, center='median', mod=3000.0, percentile=0.01): flattened = scipy.concatenate(im_data) flattened = flattened[scipy.nonzero(flattened)] if center == 'median': mu = scipy.median(flattened) elif center == 'mean': mu = scipy.average(flattened) sigma = stats.tstd(flattened) mod == scipy.float_(mod) sigma = var_truncNormal(mu - mod, mu + mod, mu, sigma, flattened, mod=mod) ci = 2 * mu - stats.norm.ppf(percentile, mu, sigma) return ci
def id_cr(orig_img, kernel_size = 9, thresh = 300): img = orig_img.copy() filt_img = img.copy() l1 = pyplot.imshow(img, interpolation = 'nearest', cmap = 'bone', vmin = 0, vmax = 1000) for irow in range(np.shape(img)[0]): filt_img[irow, :] = medfilt(filt_img[irow, :], kernel_size = kernel_size) stdev = tstd(img[irow, :]) cr_pix = np.where(abs((img[irow, :] - filt_img[irow, :])) > stdev) img[irow, cr_pix[0]] = -999 #What should I do with the error array x = np.where(img == -999) #Uncomment this block of code to see #print np.shape(x) #pyplot.plot(x[1], x[0], 'r.') #pdb.set_trace() #pyplot.close() return img
def _convert_time_series(time_series): """Establishes symbolic library and normalizes the time series data, then represents the data (represented as a list of floats) with that library (i.e., each data point is converted into a symbol to be used in the symbolic matrix for motif discovery - allows us to compare similar data points by specifying a threshold range of values that each symbol represents). This function will average data points over intervals of specified length to determine the symbolic representations of those intervals. """ differential = np.subtract(time_series[1:], time_series[:-1]) std_dev = stats.tstd(differential) mean = stats.tmean(differential) norm_differential = (differential - mean) / std_dev percentiles = np.arange(len(symbol_list) + 1) * (100. / len(symbol_list)) zscores = np.array([stats.scoreatpercentile(norm_differential, p) for p in percentiles]) return norm_differential, zscores
def filterByRemoval(data, ydata, time): std = stats.tstd(data) mean = stats.tmean(data) for i, x in enumerate(data): if x > mean + (1*std): data[i] = 'x' ydata[i] = 'x' time[i] = 'x' elif x < mean - (2*std): data[i] = 'x' ydata[i] = 'x' time[i] = 'x' ##a[:] = [x for x in a if x != [1, 1]] time[:] = [x for x in time if x != 'x'] data[:] = [x for x in data if x != 'x'] ydata[:] = [x for x in ydata if x != 'x'] return data, ydata, time
def get_ci(im_data, center='median', mod=3000.0, percentile=0.01): flattened = sp.concatenate(im_data) flattened = flattened[sp.nonzero(flattened)] if center == 'median': mu = sp.median(flattened) elif center == 'mean': mu = sp.average(flattened) old_sigma = stats.tstd(flattened) # mod = sp.float_(mod) sigma, x1, x2, cx, yhat, sigma2 = var_truncNormal(mu - mod, mu + mod, mu, old_sigma, flattened, mod=mod) ci = 2 * mu - stats.norm.ppf(percentile, mu, sigma) return ci, mu, sigma, mu-mod, mu+mod, old_sigma, x1, x2, cx, yhat, sigma2