def _get_means_stdevs(cls, x, y): x_y_counter_lin = cls._convert_x_y_to_counter(x, y) x_y_counter = cls._convert_x_y_to_counter(x, [ln(y_i) for y_i in y]) st_dev = {x: ln(stdev(y) if stdev(y) > 0 else 1 ** -10) for x, y in x_y_counter_lin.items()} mean_ = {x: mean(y) for x, y in x_y_counter.items()} return cls._get_mean_stdev_from_counter(x_y_counter, st_dev, mean_)
def cohens_d(df, value='score_combined', group_col='patient_diagnosis_super_class'): groups = df[group_col].value_counts() print('warning, this is a paired ttest, it will use the 2 groups with most data:\n', groups.index[0], groups.index[1]) rvs1 = df.loc[df[group_col] == groups.index[0], [value]] rvs2 = df.loc[df[group_col] == groups.index[1], [value]] res = (np.mean(rvs1) - np.mean(rvs2)) / (np.sqrt((np.stdev(rvs1) ** 2 + np.stdev(rvs2) ** 2) / 2)) return res
def add_pixel_gain(self, gain): def add_nonlinear_gain(self, gain): def add_background(self, bg): @property def ensemble_statistics(self) stats = {} stats['raw intensity'] = (np.mean(self.raw_intensities), np.stdev(self.raw_intensities)) if self.modified_intensities: stats['modified intensity'] = (np.mean(self.modified_intensities), np.stdev(self.modified_intensities)) if self.raw_correlations: stats['raw correlation'] = (np.mean(self.raw_correlations), np.stdev(raw_correlations)) if self.modified_correlations: stats['modified correlation'] = (np.mean(self.modified_correlations), np.stdev(self.modified_correlations)) return stats
def plot_peptide_avg_model_fits(dataset, model_pfs, num_models=100, outfile=None, show_plot=False): # Given a dataset and a set of models, plot the fit to the experimental data fig = plt.figure() ax = plt.gca() # First, calculate the average and SD of Deuterium incorporation at each peptide, # timepoint model_deuts = {} for pep in dataset.get_peptides(): pep_deuts = {} for tp in pep.get_timepoints(): tp_deuts = [] for pfs in model_pfs: tp_deuts.append(get_timepoint_deuteration(peptide, tp.time, pfs)) pep_deuts[tp.time] = tp_deuts model_deuts[pep.sequence] = pep_deuts for pep in dataset.get_peptides(): pep_deut = model_deuts[pep.sequence] x=[] yavg=[] yerror=[] #print f.seq, x for tp in pep.get_timepoints(): #print t.model x.append(t.time) xt=[int(t.time)]*len(tp.get_replicates()) yt=[float(r.deut) for r in t.get_replicates()] plt.scatter(xt, yt) yavg.append(numpy.average(pep_deut[tp.time])) yerror.append(numpy.stdev(pep_deut[tp.time])) #plt.show() plt.errorbar(x, yavg, yerr=yerror) ax.set_xscale('log') ax.set_ylabel("%D Incorporation") ax.set_xlabel("Time (seconds)") #ax.set_xlim=(1,3600) plt.axis=(1,3600,0,100) #plt.text(1,1,chi) fig.title=(dataset.name +"_"+pep.sequence)#+"_"+str(chi)) if outfile==None: plt.show() elif show_plot==False: plt.savefig(outfile, bbox_inches=0) else: plt.show() plt.savefig(outfile, bbox_inches=0)
def get_stocks(stocks): stock_list = [] price_list = [] mean_list = [] std_list = [] for stock in stocks: growth = np.array(stock.growth) returns = np.array(stock.returns) multiple = np.array(stock.multiple) value = np.array(stock.value) matrix_of_stock = np.column_stack((growth, returns, multiple)) mean_list.append(np.mean(growth)) mean_list.append(np.mean(returns)) mean_list.append(np.mean(multiple)) mean_list.append(np.mean(value)) std_list.append(np.stdev(growth)) std_list.append(np.stdev(returns)) std_list.append(np.stdev(multiple)) std_list.append(np.stdev(value)) stock_list.append(matrix_of_stock) price_list.append(value) return stock_list, price_list, mean_list, std_list
def printme(self, verbose=False): try: print "\nHa", np.mean(self.Ha) if verbose: print self.Ha except (IndexError, TypeError): pass try: print "\nHb", np.mean(self.Hb) if verbose: print self.Hb except (IndexError, TypeError): pass try: print "\nO2",np.mean(self.O23727) if verbose: print self.O23727 except (IndexError, TypeError): pass try: print "\nO3",np.mean(self.O35007) if verbose: print self.O35007 except (IndexError, TypeError): pass try: print "\nO34959",np.mean(self.O34959) if verbose: print self.O34959 except (IndexError, TypeError): pass try: print "\nZ94",np.mean(self.mds['Z94']) if verbose: print self.mds['Z94'] except (IndexError, TypeError): pass try: print "\nR23",np.mean(self.R23) if verbose: print self.R23 except (IndexError, TypeError): pass try: print "\nlog(R23)", np.mean(self.logR23) if verbose: print self.logR23 except (TypeError, IndexError): pass try: print "\nlog([NII][OII])",stats.nanmean(self.logN2O2) if verbose: print self.logN2O2 except (TypeError, IndexError): pass try: print "\nlog([OIII][OII])",stats.nanmean(self.logO3O2) if verbose: print self.logO3O2 except (TypeError, IndexError): pass for k in self.mds.iterkeys(): print "\n",k, try: print stats.nanmean(self.mds[k]), np.stdev(self.mds[k]) except (IndexError,TypeError): if verbose: print self.mds[k]
def get_noise(self, data): """Calculates noise over last dim in data (time), using .noisemethod""" #print('calculating noise') #ncores = mp.cpu_count() #pool = threadpool.Pool(ncores) if self.noisemethod == 'median': #noise = pool.map(self.get_median, data) # multithreads over rows in data #noise = np.median(np.abs(data), axis=-1) / 0.6745 # see Quiroga2004 # np.abs does a copy, so modifying the result in-place is safe: noise = util.median_inplace_2Dshort(np.abs(data)) / 0.6745 # see Quiroga2004 #noise = np.mean(np.abs(data), axis=-1) / 0.6745 / 1.2 #noise = util.mean_2Dshort(np.abs(data)) / 0.6745 # see Quiroga2004 elif self.noisemethod == 'stdev': #noise = pool.map(self.get_stdev, data) # multithreads over rows in data noise = np.stdev(data, axis=-1) else: raise ValueError #pool.terminate() # pool.close() doesn't allow Python to exit when spyke is closed #pool.join() # unnecessary, hangs #return np.asarray(noise) return noise
def HasEnoughRuns(self, graph_config, confidence_level): """Checks if the mean of the results for a given trace config is within 0.1% of the true value with the specified confidence level. This assumes Gaussian distribution of the noise and based on https://en.wikipedia.org/wiki/68%E2%80%9395%E2%80%9399.7_rule. Args: graph_config: An instance of GraphConfig. confidence_level: Number of standard deviations from the mean that all values must lie within. Typical values are 1, 2 and 3 and correspond to 68%, 95% and 99.7% probability that the measured value is within 0.1% of the true value. Returns: True if specified confidence level have been achieved. """ if not isinstance(graph_config, TraceConfig): return all( self.HasEnoughRuns(child, confidence_level) for child in graph_config.children) trace = self.traces.get(graph_config.name, {}) results = trace.get('results', []) logging.debug('HasEnoughRuns for %s', graph_config.name) if len(results) < MIN_RUNS_FOR_CONFIDENCE: logging.debug(' Ran %d times, need at least %d', len(results), MIN_RUNS_FOR_CONFIDENCE) return False logging.debug(' Results: %d entries', len(results)) avg = mean(results) avg_stderr = stdev(results) / sqrt(len(results)) logging.debug(' Mean: %.2f, mean_stderr: %.2f', avg, avg_stderr) logging.info('>>> Confidence level is %.2f', avg / (1000.0 * avg_stderr)) return confidence_level * avg_stderr < avg / 1000.0
def exposureStatistics(self, statisticsDictionary): """Calculate exposure level statistics based on the existing per-amplifier and per-detector measurements. Parameters ---------- statisticsDictionary : `dict [`str`, `dict` [`str`, scalar]], Dictionary of measured statistics. The top level dictionary is keyed on the detector names, and contains the measured statistics from the per-detector measurements. Returns ------- outputStatistics : `dict` [`str, scalar] A dictionary of the statistics measured and their values. """ detectorMeans = [] for detName, stats in statisticsDictionary.items(): # Get detector stats: detectorMeans.append(stats['DET']['MEAN']) return {'SCATTER': np.stdev(detectorMeans)}
def tTest(self, a, b, alpha, hNull, hAlt): # remove nas temp = pd.DataFrame({'a': a, 'b': b}).dropna() a, b = temp['a'], temp['b'] # get stdev stdev = np.stdev([a, b]) # get stdev stdev = np.sqrt((varA + varB) / 2) # get test stat tStat = (np.mean(a) - np.mean(b)) / (stdev / np.sqrt(2 / n)) # calcalte degrees of freedom and get p-value dof = 2 * n - 2 pVal = 1 - stats.t.cdf(tStat, df=dof) t2, p2 = stats.ttest_ind(a, b) if pVal == p2: return (tStat, pVal) else: print("incorrect p value") return (t2, p2)
data = wave.read(song_file) sample_rate = data[0] data = numpy.asarray(data[1]) num_samples = len(data) example_length = frame_length * seq_length # Normalize? if normalize == True: print "\nNormalizing -1 to 1..." data_mean = numpy.mean(data) data_min = numpy.min(data) data_max = numpy.max(data) data = rescale(data - data_mean, -1, 1, data_min, data_max) print "subtracted training mean and normalized data to [-1,1]" elif normalize == "stdev": data_stdev = numpy.stdev(data) data = data / stdev print "divided by standard deviation" else: print "using unnormalized data" # Make sure data will be the right length for reshaping print "\nGetting examples..." shift = 0 num_examples = 0 data_to_use = [] while len(data[shift:]) >= example_length + frame_length: num_ex_this_pass = len(data[shift:]) // example_length data_to_use.extend(data[shift : shift + (num_ex_this_pass * example_length)]) num_examples += num_ex_this_pass shift += example_shift
def scan(self): initial_setpoint = self._get_control_f() scan_min = initial_setpoint + self._scan_min scan_max = initial_setpoint + self._scan_max #print 'initial_setpoint {:.2f},scan_min {:.2f},scan_max {:.2f}, steps {}'.format(initial_setpoint,scan_min,scan_max, steps) # First scan negative. See if is going up or down (or unclear) # If going down, switch to positive # If going up, keep going until goes down then break # If unclear, go to edge of scan range and return # Next do same for positive finished = 0 ########################################## # Start with negative print "Scanning negative" steps = int((initial_setpoint - scan_min) / self._control_step_size) udrange_temp = np.linspace(initial_setpoint, scan_min, steps) values_temp = np.zeros(len(udrange_temp)) true_udrange_temp = np.zeros(len(udrange_temp)) smoothed_values = np.zeros( len(udrange_temp) - self.get_smoothing_N() + 1) max_value = 0 max_frac_change = 0 for i, sp in enumerate(udrange_temp): print i if (msvcrt.kbhit() and (msvcrt.getch() == 'q')): self._set_control_f(initial_setpoint) break #print 'sp',sp self._set_control_f(sp) if self.get_dwell_after_set(): qt.msleep(self._dwell_time) true_udrange_temp[i] = self._get_control_f() values_temp[i] = self.get_value() # First of all, if above the threshold, thats us happy. if values_temp[i] > self.get_good_value(): 'Found good value!' finished = 1 break if i >= ( self.get_smoothing_N() - 1 ): # Need to build up some values at start before considering whether decreasing or not smoothed_index = i - (self.get_smoothing_N() - 1) smoothed_values[smoothed_index] = np.mean( values_temp[smoothed_index:( i + 1)]) # very simple smoothing filter fractional_change = smoothed_values[ smoothed_index] / smoothed_values[ 0] # fractional change from first smoothed value if values_temp[ i] > max_value: # Track the max value reached so far max_value = values_temp[i] max_frac_change = fractional_change # Try and measure the 'noise' in the signal if smoothed_index > 0: diff_smoothed = ( smoothed_values[0:(smoothed_index - 1)] - smoothed_values[1:smoothed_index]) / smoothed_values[0] else: diff_smoothed = 0 # If nothing is changing, noise in diff might be high not_too_noisy_to_tell = np.stdev(diff_smoothed) < ( 1 - self.get_threshold_for_decreasing()) if fractional_change < self.get_threshold_for_decreasing( ) and not_too_noisy_to_tell: # Therefore must be decreasing from start print "Decreasing in this direction!" break if (max_value > self.get_min_peak_value()) and ( fractional_change < max_frac_change * self.get_threshold_for_past_peak() ): # Must have gone past peak print "Gone past peak!" finished = 1 break if finished == 0: # If not finished yet print "Scanning positive" # Store measured values so far true_udrange = true_udrange_temp[0:i] values = values_temp[0:i] # Start by going back to start self._set_control_f(initial_setpoint) if self.get_dwell_after_set(): for x in range(i + 4): qt.msleep( self._dwell_time ) # Wait for a length of time determined by how far we got if self._get_control_f( ) > initial_setpoint + 0.05 * self._scan_min: break print x ########################################## # Do positive steps = int( (scan_max - initial_setpoint) / self._control_step_size) udrange_temp = np.linspace(initial_setpoint, scan_max, steps) values_temp = np.zeros(len(udrange_temp)) true_udrange_temp = np.zeros(len(udrange_temp)) smoothed_values = np.zeros( len(udrange_temp) - self.get_smoothing_N() + 1) max_value = 0 max_frac_change = 0 finished = 0 for i, sp in enumerate(udrange_temp): if (msvcrt.kbhit() and (msvcrt.getch() == 'q')): self._set_control_f(initial_setpoint) break #print 'sp',sp self._set_control_f(sp) if self.get_dwell_after_set(): qt.msleep(self._dwell_time) true_udrange_temp[i] = self._get_control_f() values_temp[i] = self.get_value() # First of all, if above the threshold, thats us happy. if values_temp[i] > self.get_good_value(): 'Found good value!' finished = 1 break if i >= ( self.get_smoothing_N() - 1 ): # Need to build up some values at start before considering whether decreasing or not smoothed_index = i - (self.get_smoothing_N() - 1) smoothed_values[smoothed_index] = np.mean( values_temp[smoothed_index:( i + 1)]) # very simple smoothing filter fractional_change = smoothed_values[ smoothed_index] / smoothed_values[ 0] # fractional change from first smoothed value if values_temp[ i] > max_value: # Track the max value reached so far max_value = values_temp[i] max_frac_change = fractional_change # Try and measure the 'noise' in the signal if smoothed_index > 0: diff_smoothed = ( smoothed_values[0:(smoothed_index - 1)] - smoothed_values[1:smoothed_index] ) / smoothed_values[0] else: diff_smoothed = 0 # If nothing is changing, noise in diff might be high not_too_noisy_to_tell = np.stdev(diff_smoothed) < ( 1 - self.get_threshold_for_decreasing()) #basically_dead = smoothed_values[smoothed_index] < self.get_dead_value() if fractional_change < self.get_threshold_for_decreasing( ) and not_too_noisy_to_tell: # Therefore must be decreasing from start print "Decreasing in this direction!" break if (max_value > self.get_min_peak_value()) and ( fractional_change < max_frac_change * self.get_threshold_for_past_peak() ): # Must have gone past peak print "Gone past peak!" finished = 1 break if finished == 0: # If neccessary, end by going back to start self._set_control_f(initial_setpoint) if self.get_dwell_after_set(): for x in range(i + 4): qt.msleep( self._dwell_time ) # Wait for a length of time determined by how far we got if self._get_control_f( ) > initial_setpoint + 0.05 * self._scan_min: break print x # Add on extra values np.append(true_udrange, true_udrange_temp[0:i]) np.append(values, values_temp[0:i]) else: true_udrange = true_udrange_temp[0:i] values = values_temp[0:i] valid_i = np.where(values > self._min_value) print true_udrange print values true_udrange = true_udrange[valid_i] values = values[valid_i] print true_udrange print values if self.get_do_plot(): p = plt.plot(name=self._plot_name) p.clear() plt.plot(true_udrange, values, 'O', name=self._plot_name) return (true_udrange, values)
def main(nV,sparseness,noise): number_of_vectors=nV dimension_of_cla_vectors=1024 dimension_of_mcr_vectors=1024 r=[0,15] mcrV=list() claV=generateCLAVector(number_of_vectors,dimension_of_cla_vectors,sparseness) print( distance(claV[0],claV[1],[0,1])) convSet=generateConversionSet(dimension_of_cla_vectors,dimension_of_mcr_vectors,r) idxSet=generateConversionSet(dimension_of_cla_vectors,dimension_of_cla_vectors,r) print (distance(convertCLAtoMCR(claV[0],convSet,r,idxSet),convertCLAtoMCR(claV[1],convSet,r,idxSet),r)) for i in range(0,number_of_vectors): mcrV.append(convertCLAtoMCR(claV[i],convSet,r,idxSet)) CLA_dist=list() MCR_dist=list() random_MCR_dist=list() CLA_noisy_dist=list() MCR_noisy_dist=list() avg_CLA_dist=0 avg_MCR_dist=0 avg_random_MCR_dist=0 avg_CLA_noisy_dist=0 avg_MCR_noisy_dist=0 for i in range(0,number_of_vectors): for j in range(i+1,number_of_vectors): CLA_dist.append(distance(claV[i],claV[j],[0,1])) MCR_dist.append(distance(mcrV[i],mcrV[j],r)) random_MCR_dist.append(distance(convSet[i],convSet[j],r)) for i in range(0,number_of_vectors): noisyV=addNoise(claV[i],noise) CLA_noisy_dist.append(distance(claV[i],noisyV,[0,1])) MCR_noisy_dist.append(distance(mcrV[i],convertCLAtoMCR(noisyV,convSet,r,idxSet),r)) f = open('dist_datapoints','w') csvwriter=csv.writer(f) csvwriter.writerow(CLA_dist) csvwriter.writerow(MCR_dist) csvwriter.writerow(CLA_noisy_dist) csvwriter.writerow(MCR_noisy_dist) f.close() avg_CLA_dist=statistics.mean(CLA_dist) avg_MCR_dist=statistics.mean(MCR_dist) avg_MCR_noisy_dist=statistics.mean(MCR_noisy_dist) avg_CLA_noisy_dist=statistics.mean(CLA_noisy_dist) avg_random_MCR_dist=statistics.mean(random_MCR_dist) print ("Vectors used="+str(nV)) print ("Average random MCR distance="+str(avg_random_MCR_dist) ) #average distance between any random MCR vectors print ("Average CLA Distance="+str(avg_CLA_dist) )#average distance of combination of all points(CLA vectors) in CLA space print ("Average MCR Distance="+str(avg_MCR_dist) )#average distance of combination of all points(MCR vectors) in MCR space print ("Average CLA Distance in "+str(noise)+" noisy CLA="+str(avg_CLA_noisy_dist) )#average distance between MCR projection of CLA vector and its noisy version print ("Average MCR Distance from "+str(noise)+" noisy CLA="+str(avg_MCR_noisy_dist) )#average distance between MCR projection of CLA vector and its noisy version sdv_CLA_dist=statistics.stdev(CLA_dist) sdv_MCR_dist=statistics.stdev(MCR_dist) sdv_MCR_noisy_dist=statistics.stdev(MCR_noisy_dist) sdv_CLA_noisy_dist=statistics.stdev(CLA_noisy_dist) sdv_random_MCR_dist=statistics.stdev(random_MCR_dist) print ("Standard Deviation random MCR Distance="+str(sdv_random_MCR_dist) )#std dev distance of random MCR vectors print ("Standard Deviation CLA Distance="+str(sdv_CLA_dist) )#std dev distance of combijation of all points(CLA vectors) in CLA space print ("Standard Deviation MCR Distance="+str(sdv_MCR_dist) )#std dev distance of combination of all points(MCR vectors) in MCR space print ("Standard Deviation CLA Distance in "+str(noise)+" noisy CLA="+str(sdv_CLA_noisy_dist) )#std dev distance between MCR projection of CLA vector and its noisy version print ("Standard Deviation MCR Distance from "+str(noise)+" noisy CLA="+str(sdv_MCR_noisy_dist)) #std dev distance between MCR projection of CLA vector and its noisy version return
def dumpStatistics(arglist): print("mean({:.4E}) median({:.4E}) stdev({:.4E}) min({:.4E}) max({:.4E})". format(np.mean(arglist), np.median(arglist), np.stdev(arglist), min(arglist), max(arglist)))
def dumpFullStatistics(arglist): print( "mean({:.4E}) median({:.4E}) stdev({:.4E}) min({:.4E}) max({:.4E}) skew({:.4E}) fat_tail({:.4E}) kurtosis({:.4E})" .format(np.mean(arglist), np.median(arglist), np.stdev(arglist), min(arglist), max(arglist), skew(arglist), fatTail(arglist), kurtosis(arglist)))
def error(g): nEFF=len(g)/ACtime(g) ERROR=numpy.stdev(g)/math.sqrt(nEFF) return ERROR
def _get_means_stdevs(cls, x, y): x_y_counter = cls._convert_x_y_to_counter(x, y) st_dev = {x: stdev(y) for x, y in x_y_counter.items()} mean_ = {x: mean(y) for x, y in x_y_counter.items()} return cls._get_mean_stdev_from_counter(x_y_counter, st_dev, mean_)
def get_mean_stdev(x: list, y: list) -> tuple: x_y_counter = LinearDistributionFunction._convert_x_y_to_counter(x, y) st_dev = {x: stdev(y) for x, y in x_y_counter.items()} mean_ = {x: mean(y) for x, y in x_y_counter.items()} return mean_, st_dev
from sklearn import tree from sklearn.tree import DecisionTreeClassifier from sklearn.pipeline import Pipeline from sklearn.model_selection import GridSearchCV from sklearn.preprocessing import StandardScaler # Prep dataset print('Prepare data') data = pd.read_csv('data/btcusd.csv') data['returns'] = data['close'] - data['close'].shift(1) data['ma200'] = data.rolling( window=200).apply(lambda values: np.average(values)) # Discrete values data['std30'] = data.rolling( window=30).apply(lambda values: np.round(np.stdev(values))) data['std200'] = data.rolling( window=200).apply(lambda values: np.round(np.stdev(values))) data['mom10'] = data['close'] - data['close'].shift(10) > 0 data['moving_average'] = data['close'] - data['ma200'] > 0 data = data.dropna() # Setup pipeline print('Create pipeline') pipe_steps = [('scaler', StandardScaler()), ('descT', DecisionTreeClassifier())] check_params = { 'descT_criterion': ['gini', 'entropy'], 'descT_max_depth': np.arrange(3, 15) } pipeline = Pipeline(pipe_steps)
def CreateImageProduct(vid,product): ''' Create an image product from a (web)camera. Inputs: vid (str): The video file from which to ceate the image product. product (int): The code for the image product to create. Currently only a code of 1, for timex, is supported. Returns: product (array): The image product data ''' def addFrame(r,g,b,r_list,g_list,b_list): r = r+r_list[0] g = g+g_list[0] b = b+b_list[0] r_list = r_list[1:len(r_list)] g_list = g_list[1:len(g_list)] b_list = b_list[1:len(b_list)] return r,g,b,r_list,g_list,b_list # Read a frame to get the size # cap = cv2.VideoCapture(vid) cap.set(1,1) test,im = cap.read() # Initialize empty arrays for each color channel # r = np.empty((len(im[:,0,:]),len(im[0,:,:]))) g = np.empty((len(im[:,0,:]),len(im[0,:,:]))) b = np.empty((len(im[:,0,:]),len(im[0,:,:]))) # Loop through each frame to keep and add its color channel values to the growing array # r_list = list() g_list = list() b_list = list() numFrames = int(cap.get(7)) for frame in range(0,numFrames): cap.set(1,frame) test,image = cap.read() r_list.append(image[:,:,2]) g_list.append(image[:,:,1]) b_list.append(image[:,:,0]) # Create the desired product # if product == 1: # Timex # while len(r_list)>0: r,g,b,r_list,g_list,b_list = addFrame(r,g,b,r_list,g_list,b_list) del r_list,g_list,b_list r = r/numFrames g = g/numFrames b = b/numFrames product = np.stack([r/255,g/255,b/255],axis=2) return product elif product == 2: # Variance # r = np.stdev(r)
def get_all_Qs_centre(elat, elon, slat, slon, evdp, half_bin_size=3.0, npoints=8): """ OPTION 2: function draws a default 6 degree polygon around the point at the centre of a ray and randomly samples points within the polygon. Sized based on the spacing in the Q model. Also returns a standard deviation Q value calculated in log space. This option chosen over the ray as there is less variation in the values, and the code is much quicker to run (smaller nested loop). """ centre = get_centre_ray(elat, elon, slat, slon, evdp) lon, lat = centre[1], centre[0] #print(lon, lat) # create polygon around central point # arbitarily use 3 degrees... ? minlon = lon - half_bin_size maxlon = lon + half_bin_size minlat = lat - half_bin_size maxlat = lat + half_bin_size # define polygon #poly = Polygon([left_lower, left_upper, right_lower, right_upper]) #points = random_points_within(poly, npoints) #print(points) freqs = extract_Q_freq.get_freq_list() Q_list = [] stdevQ_list = [] #print('looping freqs') for freq in freqs: print(freq) qdat = extract_Q_freq.get_Q_data(freq) # get q vals within region idx = where((qdat[:,0] >= minlon) & (qdat[:,0] <= maxlon) \ (qdat[:,1] >= minlat) & (qdat[:,1] <= maxlat))[0] Q_vals = qdat[:, 2][idx] # get mean & stdev Q_list.append(np.mean(Q_vals)) stdevQ_list.append(np.stdev(Q_vals)) ''' Q_cumulative = 0 Q_each_point_list = [] #print('looping points') for i,point in enumerate(points): point = points[i] lonp, latp = point.coords.xy[0][0], point.coords.xy[1][0] Q_dict = extract_Q_freq.get_Q_value(float(freq), lonp, latp) #print(Q_dict) Q_cumulative += Q_dict['Q'] Q_each_point_list.append(Q_dict['Q']) average_Q = Q_cumulative / npoints # calculate stdev in log10 space diff_cumulative = 0 for Q in Q_each_point_list: diff_sqrd = (np.log10(Q) - np.log10(average_Q))**2 diff_cumulative += diff_sqrd #standard error calculation for sample stdev_Q = (np.sqrt(diff_cumulative / (npoints-1))) / np.sqrt(npoints) Q_list.append(average_Q) stdevQ_list.append(stdev_Q) # stdev in log10 space ''' return Q_list, stdevQ_list
def dgc_monthly(station, variable, flags, start, end, plots=False, diagnostics=False, idl=False): ''' Original Distributional Gap Check :param obj station: station object :param str variable: variable to act on :param array flags: flags array :param datetime start: data start :param datetime end: data end :param bool plots: run plots :param bool diagnostics: run diagnostics :param bool idl: run IDL equivalent routines for median :returns: flags - updated flag array ''' if plots: import matplotlib.pyplot as plt st_var = getattr(station, variable) month_ranges = utils.month_starts_in_pairs(start, end) # get monthly averages month_average = np.empty(month_ranges.shape[0]) month_average.fill(st_var.mdi) month_average_filtered = np.empty(month_ranges.shape[0]) month_average_filtered.fill(st_var.mdi) all_filtered = utils.apply_filter_flags(st_var) for m, month in enumerate(month_ranges): data = st_var.data[month[0]:month[1]] filtered = all_filtered[month[0]:month[1]] month_average[m] = dgc_get_monthly_averages(data, OBS_LIMIT, st_var.mdi, MEAN) month_average_filtered[m] = dgc_get_monthly_averages( filtered, OBS_LIMIT, st_var.mdi, MEAN) # get overall monthly climatologies - use filtered data month_average = month_average.reshape(-1, 12) month_average_filtered = month_average_filtered.reshape(-1, 12) standardised_months = np.empty(month_average.shape) standardised_months.fill(st_var.mdi) for m in range(12): valid_filtered = np.where(month_average_filtered[:, m] != st_var.mdi) if len(valid_filtered[0]) >= VALID_MONTHS: valid_data = month_average_filtered[valid_filtered, m][0] if MEAN: clim = np.mean(valid_data) spread = np.stdev(valid_data) else: if idl: clim = utils.idl_median( valid_data.compressed().reshape(-1)) else: clim = np.median(valid_data) spread = utils.IQR(valid_data) if spread <= SPREAD_LIMIT: spread = SPREAD_LIMIT standardised_months[valid_filtered, m] = (month_average[valid_filtered, m] - clim) / spread standardised_months = standardised_months.reshape(month_ranges.shape[0]) good_months = np.where(standardised_months != st_var.mdi) # must be able to do this with masked arrays if plots: bins, bincenters = utils.create_bins(standardised_months[good_months], BIN_SIZE) dummy, plot_bincenters = utils.create_bins( standardised_months[good_months], BIN_SIZE / 10.) hist, binEdges = np.histogram(standardised_months[good_months], bins=bins) fit = utils.fit_gaussian(bincenters, hist, max(hist), mu=np.mean(standardised_months[good_months]), sig=np.std(standardised_months[good_months])) plot_gaussian = utils.gaussian(plot_bincenters, fit) dgc_set_up_plot(plot_gaussian, standardised_months[good_months], variable, sub_par="Months") # remove all months with a large standardised offset if len(good_months[0]) >= MONTH_LIMIT: standardised_months = np.ma.masked_values(standardised_months, st_var.mdi) large_offsets = np.where(standardised_months >= LARGE_LIMIT) if len(large_offsets[0]) > 0: for lo in large_offsets[0]: flags[month_ranges[lo, 0]:month_ranges[lo, 1]] = 1 if plots: hist, binEdges = np.histogram( standardised_months[large_offsets], bins=bins) plot_hist = np.array([0.01 if h == 0 else h for h in hist]) plt.step(bincenters, plot_hist, 'g-', label='> %i' % LARGE_LIMIT, where='mid', zorder=5) plt.axvline(5, c='g') plt.axvline(-5, c='g') # walk distribution from centre and see if any assymetry sort_order = standardised_months[good_months].argsort() mid_point = len(good_months[0]) / 2 good = True iter = 1 while good: if standardised_months[good_months][sort_order][ mid_point - iter] != standardised_months[good_months][sort_order][ mid_point + iter]: # using IDL notation tempvals = [ np.abs( standardised_months[good_months][sort_order][mid_point - iter]), np.abs( standardised_months[good_months][sort_order][mid_point + iter]) ] if min(tempvals) != 0: if max(tempvals) / min(tempvals) >= 2. and min( tempvals) >= 1.5: # substantial asymmetry in distribution - at least 1.5 from centre and difference of 2. if tempvals[0] == max(tempvals): # LHS bad = good_months[0][sort_order][:mid_point - iter] if plots: badplot = standardised_months[good_months][ sort_order][:mid_point - iter] elif tempvals[1] == max(tempvals): #RHS bad = good_months[0][sort_order][mid_point + iter:] if plots: badplot = standardised_months[good_months][ sort_order][mid_point + iter:] for b in bad: flags[month_ranges[b, 0]:month_ranges[b, 1]] = 1 if plots: hist, binEdges = np.histogram(badplot, bins=bins) plot_hist = np.array( [0.01 if h == 0 else h for h in hist]) plt.step(bincenters, plot_hist, 'r-', label='Gap', where='mid', zorder=4) good = False iter += 1 if iter == mid_point: break if plots: plt.legend(loc='lower center', ncol=4, bbox_to_anchor=(0.5, -0.2), frameon=False, prop={'size': 13}) plt.show() #plt.savefig(IMAGELOCATION+'/'+station.id+'_DistributionalGap.png') return flags # dgc_monthly
times = defaultdict(dict) case_str = ','.join(['{}'] * 7).format(*case) string = "[{}] (Case {} of {}):".format(case_str, idx + 1, N) print_to_output(string, tee=True) for m1 in xrange(M1): instance = InstanceStructure(*case) instance.create_node_data() instance.create_scenario_data() string = 'INSTANCE ID: {}'.format(instance.ID) print_to_output(string) for m2 in xrange(M2): for mthd, app in (('RLT', 'PH'), ('BigM', 'PH')): obj, t = funcs[mthd](app) objs[mthd, app][m1, m2] = obj times[mthd, app][m1, m2] = t print '\tRUN TIME [{0:>4}/{1}]: {2} ({3})'.format(mthd, app, curr(obj), ptime(t)) print with open(output, 'ab') as f: for (app, mthd), d in times.iteritems(): thymes = d.values() for t in thymes: f.write('\tRUN TIME [{}/{}]: {}\n'.format(mthd, app, t)) else: f.write('\n\tMEAN: {} seconds\n'.format(mean(thymes))) f.write('\tSTDEV: {} seconds\n'.format(stdev(thymes))) f.write('\tMIN: {} seconds\n'.format(min(thymes))) f.write('\tMAX: {} seconds\n\n'.format(max(thymes)))
else: coord2_src1.append(row['y1']) coord2_src2.append(row['y2']) diff = coord1_src2 - coord1_src1 diff_list.append(diff) if horiz: dx = row['x2'] - row['x1'] else: dx = row['y2'] - row['y1'] dx_list.append(dx) angle = np.arctan2(diff, dx) * 180. / np.pi angle_list.append(angle) median_angle = np.median(angle_list) stdev_angle = np.stdev(angle_list) print( f'Median and stdev of trance angles relative to rows/cols: {median_angle} +/- {stdev_angle} degrees.' ) if horiz: results = Table([ coord2_src1, src1_list, coord2_src2, src2_list, diff_list, dx_list, angle_list ], names=('src1_x', 'src1_y', 'src2_x', 'src2_y', 'diff', 'trace_length', 'angle')) else: results = Table([ src1_list, coord2_src1, src2_list, coord2_src2, diff_list, dx_list, angle_list
array = matrix[2,:] # Get all columns of the third row and put them in an array column_as_array = matrix[:,3] # Gets column 4 and turns it into a 1d array column_as_column = matrix[:, 3:4] # Gets column four and preserves it as a column new_matrix_with_2_cols = numpy.hstack(matrix[:, 3:4], matrix[:, 4:5]) # Stacks two columns horizontally new_matrix_without_col4 = numpy.delete(matrix, 3, 1) # Delete one column starting at column 4 # Array/matrix arithmetic doublematrix = matrix * 2 # Every number in matrix gets multiplied by 2 doublematrix + matrix # Every number in matrix is added to the corresponding number in doublematrix # Summary statistics of matrices import numpy numpy.mean(matrix) # Computes the mean of all values in the matrix numpy.max(matrix) numpy.min(matrix) numpy.stdev(matrix) numpy.mean(matrix, axis=0) # Computes the average of each column, i.e. for each column, computes the average of all rows for that column numpy.mean(matrix, axis=1) # Computes the average of each row, i.e. for each row, computes the average of all columns in that row. # Get the dimensions of a matrix (or a 1d-array) (rows, cols) matrix.shape # Sorting a matrix ### PLOTS ### # Make plots import matplotlib.pyplot
def dgc_monthly(station, variable, flags, start, end, plots=False, diagnostics=False, idl = False): ''' Original Distributional Gap Check :param obj station: station object :param str variable: variable to act on :param array flags: flags array :param datetime start: data start :param datetime end: data end :param bool plots: run plots :param bool diagnostics: run diagnostics :param bool idl: run IDL equivalent routines for median :returns: flags - updated flag array ''' if plots: import matplotlib.pyplot as plt st_var = getattr(station, variable) month_ranges = utils.month_starts_in_pairs(start, end) # get monthly averages month_average = np.empty(month_ranges.shape[0]) month_average.fill(st_var.mdi) month_average_filtered = np.empty(month_ranges.shape[0]) month_average_filtered.fill(st_var.mdi) all_filtered = utils.apply_filter_flags(st_var) for m, month in enumerate(month_ranges): data = st_var.data[month[0]:month[1]] filtered = all_filtered[month[0]:month[1]] month_average[m] = dgc_get_monthly_averages(data, OBS_LIMIT, st_var.mdi, MEAN) month_average_filtered[m] = dgc_get_monthly_averages(filtered, OBS_LIMIT, st_var.mdi, MEAN) # get overall monthly climatologies - use filtered data month_average = month_average.reshape(-1,12) month_average_filtered = month_average_filtered.reshape(-1,12) standardised_months = np.empty(month_average.shape) standardised_months.fill(st_var.mdi) for m in range(12): valid_filtered = np.where(month_average_filtered[:,m] != st_var.mdi) if len(valid_filtered[0]) >= VALID_MONTHS: valid_data = month_average_filtered[valid_filtered,m][0] if MEAN: clim = np.mean(valid_data) spread = np.stdev(valid_data) else: if idl: clim = utils.idl_median(valid_data.compressed().reshape(-1)) else: clim = np.median(valid_data) spread = utils.IQR(valid_data) if spread <= SPREAD_LIMIT: spread = SPREAD_LIMIT standardised_months[valid_filtered,m] = (month_average[valid_filtered,m] - clim) / spread standardised_months = standardised_months.reshape(month_ranges.shape[0]) good_months = np.where(standardised_months != st_var.mdi) # must be able to do this with masked arrays if plots: bins, bincenters = utils.create_bins(standardised_months[good_months], BIN_SIZE) dummy, plot_bincenters = utils.create_bins(standardised_months[good_months], BIN_SIZE/10.) hist, binEdges = np.histogram(standardised_months[good_months], bins = bins) fit = utils.fit_gaussian(bincenters, hist, max(hist), mu = np.mean(standardised_months[good_months]), sig = np.std(standardised_months[good_months])) plot_gaussian = utils.gaussian(plot_bincenters, fit) dgc_set_up_plot(plot_gaussian, standardised_months[good_months], variable, sub_par = "Months") # remove all months with a large standardised offset if len(good_months[0]) >= MONTH_LIMIT: standardised_months = np.ma.masked_values(standardised_months, st_var.mdi) large_offsets = np.where(standardised_months >= LARGE_LIMIT) if len(large_offsets[0]) > 0: for lo in large_offsets[0]: flags[month_ranges[lo,0]:month_ranges[lo,1]] = 1 if plots: hist, binEdges = np.histogram(standardised_months[large_offsets], bins = bins) plot_hist = np.array([0.01 if h == 0 else h for h in hist]) plt.step(bincenters, plot_hist, 'g-', label = '> %i' % LARGE_LIMIT, where = 'mid', zorder = 5) plt.axvline(5,c='g') plt.axvline(-5,c='g') # walk distribution from centre and see if any assymetry sort_order = standardised_months[good_months].argsort() mid_point = len(good_months[0]) / 2 good = True iter = 1 while good: if standardised_months[good_months][sort_order][mid_point - iter] != standardised_months[good_months][sort_order][mid_point + iter]: # using IDL notation tempvals = [np.abs(standardised_months[good_months][sort_order][mid_point - iter]),np.abs(standardised_months[good_months][sort_order][mid_point + iter])] if min(tempvals) != 0: if max(tempvals)/min(tempvals) >= 2. and min(tempvals) >= 1.5: # substantial asymmetry in distribution - at least 1.5 from centre and difference of 2. if tempvals[0] == max(tempvals): # LHS bad = good_months[0][sort_order][:mid_point - iter] if plots: badplot = standardised_months[good_months][sort_order][:mid_point - iter] elif tempvals[1] == max(tempvals): #RHS bad = good_months[0][sort_order][mid_point + iter:] if plots: badplot = standardised_months[good_months][sort_order][mid_point + iter:] for b in bad: flags[month_ranges[b,0]:month_ranges[b,1]] = 1 if plots: hist, binEdges = np.histogram(badplot, bins = bins) plot_hist = np.array([0.01 if h == 0 else h for h in hist]) plt.step(bincenters, plot_hist, 'r-', label = 'Gap', where = 'mid', zorder = 4) good = False iter += 1 if iter == mid_point: break if plots: plt.legend(loc='lower center',ncol=4, bbox_to_anchor=(0.5,-0.2),frameon=False,prop={'size':13}) plt.show() #plt.savefig(IMAGELOCATION+'/'+station.id+'_DistributionalGap.png') return flags # dgc_monthly