def _do_outer_iteration_stage(self): #Generate curve from points for inv_val in self._inv_points: #Apply one applied pressure and determine invaded pores logger.info('Applying capillary pressure: '+str(inv_val)) self._do_one_inner_iteration(inv_val) #Store results using networks' get/set method self['pore.inv_Pc'] = self._p_inv self['throat.inv_Pc'] = self._t_inv #Find invasion sequence values (to correspond with IP algorithm) self._p_seq = sp.searchsorted(sp.unique(self._p_inv),self._p_inv) self._t_seq = sp.searchsorted(sp.unique(self._t_inv),self._t_inv) self['pore.inv_seq'] = self._p_seq self['throat.inv_seq'] = self._t_seq #Calculate Saturations v_total = sp.sum(self._net['pore.volume'])+sp.sum(self._net['throat.volume']) sat = 0. self['pore.inv_sat'] = 1. self['throat.inv_sat'] = 1. for i in range(self._npts): inv_pores = sp.where(self._p_seq==i)[0] inv_throats = sp.where(self._t_seq==i)[0] new_sat = (sum(self._net['pore.volume'][inv_pores])+sum(self._net['throat.volume'][inv_throats]))/v_total sat += new_sat self['pore.inv_sat'][inv_pores] = sat self['throat.inv_sat'][inv_throats] = sat
def _do_outer_iteration_stage(self): #Generate curve from points for inv_val in self._inv_points: #Apply one applied pressure and determine invaded pores logger.info('Applying capillary pressure: ' + str(inv_val)) self._do_one_inner_iteration(inv_val) #Store results using networks' get/set method self['pore.inv_Pc'] = self._p_inv self['throat.inv_Pc'] = self._t_inv #Find invasion sequence values (to correspond with IP algorithm) self._p_seq = sp.searchsorted(sp.unique(self._p_inv), self._p_inv) self._t_seq = sp.searchsorted(sp.unique(self._t_inv), self._t_inv) self['pore.inv_seq'] = self._p_seq self['throat.inv_seq'] = self._t_seq #Calculate Saturations v_total = sp.sum(self._net['pore.volume']) + sp.sum( self._net['throat.volume']) sat = 0. self['pore.inv_sat'] = 1. self['throat.inv_sat'] = 1. for i in range(self._npts): inv_pores = sp.where(self._p_seq == i)[0] inv_throats = sp.where(self._t_seq == i)[0] new_sat = (sum(self._net['pore.volume'][inv_pores]) + sum(self._net['throat.volume'][inv_throats])) / v_total sat += new_sat self['pore.inv_sat'][inv_pores] = sat self['throat.inv_sat'][inv_throats] = sat
def incidence_matrices(relation): results = {} with tqdm(total=len(relation)) as pbar: for i, (subreddit, group) in enumerate(relation.groupby('subreddit')): links = sp.array(sorted(sp.concatenate(group.link_ids.tolist()))) authors = sp.array(sorted(group.author)) rs, cs = [], [] for _, row in group.iterrows(): r = sp.searchsorted(authors, row.author) c = sp.searchsorted(links, row.link_ids) rs.append(sp.full_like(c, r)) cs.append(c) rs, cs = sp.concatenate(rs), sp.concatenate(cs) vals = sp.ones_like(rs) incidence = sp.sparse.csr_matrix((vals, (rs, cs)), (len(authors), len(links))) results[subreddit] = { 'incidence': incidence, 'authors': authors, 'links': links } pbar.update(len(group)) return results
def split_dt(ts, xss, dt, t_min): assert all(len(ts) == len(xs) for xs in xss) from scipy import searchsorted t1, t2 = t_min, t_min + dt i1, i2 = searchsorted(ts, t1), searchsorted(ts, t2) while True: yield ts[i1:i2], tuple(xs[i1:i2] for xs in xss) t2 += dt i1, i2 = i2, searchsorted(ts, t2)
def run(self, npts=25, inv_pressures=None): r""" Run the algorithm for specified number of points or at given capillary pressures. Parameters ---------- npts : scalar The number of points to obtain on the curve. The points are automatically selected to span the range of capillary pressures using a logarithmic spacing (more points are lower capillary pressure values). inv_pressures : array_like A list of capillary pressures to apply. List should contain increasing and unique values. """ # If no invasion points are given then generate some if inv_pressures is None: logger.info('Generating list of invasion pressures') min_p = sp.amin(self['throat.entry_pressure']) * 0.98 # nudge down max_p = sp.amax(self['throat.entry_pressure']) * 1.02 # bump up inv_points = sp.logspace(sp.log10(min_p), sp.log10(max_p), npts) else: # Make sure the given invastion points are sensible inv_points = sp.unique(inv_pressures) self._inv_points = inv_points # Ensure inlets are set if sp.sum(self['pore.inlets']) == 0: raise Exception('Inlet pores have not been specified') # Ensure outlet pores are set if trapping is enabled if self._trapping: if sp.sum(self['pore.outlets']) == 0: raise Exception('Outlet pores have not been specified') # Generate curve from points for inv_val in self._inv_points: # Apply one applied pressure and determine invaded pores logger.info('Applying capillary pressure: ' + str(inv_val)) self._apply_percolation(inv_val) if self._trapping: logger.info('Checking for trapping') self._check_trapping(inv_val) # Find invasion sequence values (to correspond with IP algorithm) Pinv = self['pore.inv_Pc'] self['pore.inv_seq'] = sp.searchsorted(sp.unique(Pinv), Pinv) Tinv = self['throat.inv_Pc'] self['throat.inv_seq'] = sp.searchsorted(sp.unique(Tinv), Tinv)
def _do_outer_iteration_stage(self, inv_points): # Generate curve from points for inv_val in inv_points: # Apply one applied pressure and determine invaded pores logger.info('Applying capillary pressure: ' + str(inv_val)) self._do_one_inner_iteration(inv_val) # Find invasion sequence values (to correspond with IP algorithm) self['pore.inv_seq'] = sp.searchsorted(sp.unique(self['pore.inv_Pc']), self['pore.inv_Pc']) self['throat.inv_seq'] = sp.searchsorted(sp.unique(self['throat.inv_Pc']), self['throat.inv_Pc']) if self._trapping: self.evaluate_trapping(self['pore.outlets'])
def _do_outer_iteration_stage(self, inv_points): # Generate curve from points for inv_val in inv_points: # Apply one applied pressure and determine invaded pores logger.info('Applying capillary pressure: ' + str(inv_val)) self._do_one_inner_iteration(inv_val) # Find invasion sequence values (to correspond with IP algorithm) self['pore.inv_seq'] = sp.searchsorted(sp.unique(self['pore.inv_Pc']), self['pore.inv_Pc']) self['throat.inv_seq'] = sp.searchsorted( sp.unique(self['throat.inv_Pc']), self['throat.inv_Pc']) if self._trapping: self.evaluate_trapping(self['pore.outlets'])
def remove_outliers(self, points): points_filtered = points[:] empty_space_width = .02 check_percent = .02 edge_width = .005 #remove outliers in each dimension (x,y,z) for dim in range(3): #sort the points by their values in that dimension num_points = scipy.shape(points_filtered)[1] points_sorted = points_filtered[:, points_filtered[ dim, :].argsort().tolist()[0]] #chop off the top points if they are more than empty_space_width away from the bounding box edge ind = int(math.floor(num_points * (1 - check_percent))) if points_sorted[dim, -1] - points_sorted[dim, ind] > empty_space_width: #find the first point that isn't within edge_width of the point at ind, and lop off all points after searcharr = scipy.array(points_sorted[dim, :]).flatten() searchval = points_sorted[dim, ind] + edge_width thres_ind = scipy.searchsorted(searcharr, searchval) if thres_ind != 0 and thres_ind != len(searcharr): points_filtered = points_sorted[:, 0:thres_ind] rospy.loginfo( "chopped points off of dim %d, highest val = %5.3f, searchval = %5.3f" % (dim, points_sorted[dim, -1], searchval)) else: points_filtered = points_sorted #do both sides for x and y if dim != 2: ind = int(math.floor(num_points * check_percent)) if points_filtered[dim, ind] - points_filtered[ dim, 0] > empty_space_width: #find the first point that isn't within edge_width of the point at ind, and lop off all points before searcharr = scipy.array(points_sorted[dim, :]).flatten() searchval = points_sorted[dim, ind] - edge_width thres_ind = scipy.searchsorted(searcharr, searchval) if thres_ind != 0 and thres_ind != len(searcharr): points_filtered = points_filtered[:, thres_ind:-1] rospy.loginfo( "chopped points off of dim -%d, lowest val = %5.3f, searchval = %5.3f" % (dim, points_sorted[dim, 0], searchval)) return points_filtered
def divide_vector_to_chunks(v, p): """Sort a vector and divides split it to chunks at given points. Input arguments are a vector 'v', [ v_1, ..., v_n ] and a vector 'p' of split points [ p_1, ..., p_m ]. The result is a list of vectors of type [ v_{k}, v_{k+1}, ..., v_{k+j} ] where p_{t} <= v_{k}, ..., v_{k+j} < p_{t+1} for some t. Precondition: Both 'v' and 'p' must be sorted in ascending order! """ vectors = [] last_i = 0 for p_low in p: i = scipy.searchsorted(v, p_low) if i > last_i: vectors.append(v[last_i:i]) last_i = i if last_i < len(v): vectors.append(v[last_i:]) return vectors
def update_map(self, bar): thist = bar.get_value() xmin, xmax = self.axes.get_xlim() if thist<xmin or thist>xmax: return t, data = self.eegplot.eeg.get_data(xmin, xmax) if self._filterGM: data = filter_grand_mean(data) detrend = self._detrendd[self._detrend] data = detrend(self._filter(t, data)) dt = t[1]-t[0] indTime = searchsorted(t, thist) if indTime==data.shape[0]: return # we do this here rather than in the eegview win so that we # can use this windows filter params slice = {} for trode, eoiInd in self.eegplot.eoiIndDict.items(): indData = self.eegplot.indices[eoiInd] slice[trode] = -data[indTime, indData] self.view3.gridManager.set_scalar_data(slice) self.draw_vlines(thist)
def _assembleInit(xdata, ydata, bounds=None): """assemble the initial values based on the spectrum""" #INTITIALIZE VARIABLES init = scipy.zeros((3 * len(semiEmperical) + 1, )) output = scipy.zeros((len(init), 2)) ones = scipy.ones(semiEmperical.shape) #set baseline init[0] = ydata.min() #_initial[0] output[0] = _bounds[0] #set peak values init[1::3] = ydata[scipy.searchsorted(xdata, semiEmperical)] - init[0] output[1::3] = scipy.array([_bounds[1, 0] * ones, _bounds[1, 1] * ones]).T #set offsets init[2::3] = semiEmperical output[2::3] = (scipy.array([_bounds[2, 0] * ones, _bounds[2, 1] * ones]) + semiEmperical).T #set width values init[3::3] = _initial[3] * ones output[3::3] = scipy.array([_bounds[3, 0] * ones, _bounds[3, 1] * ones]).T if not bounds == None: return init, output else: return init
def _assembleInit(xdata, ydata, bounds=None): #INTITIALIZE VARIABLES init = scipy.zeros((3 * len(semiEmperical) + 1, )) output = scipy.zeros((len(init), 2)) ones = scipy.ones(semiEmperical.shape) #set baseline axis = scipy.mgrid[-32:65569:64] init[0] = (scipy.histogram( ydata, bins=axis)[0]).argmax() * 64 #ydata.min()#_initial[0] output[0] = _bounds[0] #set peak values init[1::3] = ydata[scipy.searchsorted(xdata, semiEmperical)] - init[0] output[1::3] = scipy.array([_bounds[1, 0] * ones, _bounds[1, 1] * ones]).T #set offsets init[2::3] = semiEmperical output[2::3] = (scipy.array([_bounds[2, 0] * ones, _bounds[2, 1] * ones]) + semiEmperical).T #set width values init[3::3] = _initial[3] * ones output[3::3] = scipy.array([_bounds[3, 0] * ones, _bounds[3, 1] * ones]).T if not bounds == None: return init, output else: return init
def _tidy(self,x1,xnew,yi): i = sp.searchsorted(self.x,x1) #if x1 is in self.x, search.sorted returns the correct index. #Other wise, it matches the index for the next greater self.x, #but we want the index for the next lesser. if self.x[i] != x1: i -= 1 assert i.size == 1 #just do linear interpolation on the section spoiled by the #convolution if self.window =='lanczos': edge = 11. elif self.window =='cubic_conv': edge = 5. else: edge = 2*self.kw + 1 if i < edge : z = interp1d(self.x,self.y) yi[i] = z(x1) elif i > yi.size - edge: z = interp1d(self.x,self.y) yi[i] = z(x1) i2 = sp.where(xnew == x1)[0] assert i2.size == 1 self.out[i2] = yi[i]
def _do_outer_iteration_stage(self): #Generate curve from points for inv_val in self._inv_points: #Apply one applied pressure and determine invaded pores self._logger.info('Applying capillary pressure: '+str(inv_val)) self._do_one_inner_iteration(inv_val) #Store results using networks' get/set method self.set_pore_data(prop='inv_Pc',data=self._p_inv) self.set_throat_data(prop='inv_Pc',data=self._t_inv) #Find invasion sequence values (to correspond with IP algorithm) self._p_seq = sp.searchsorted(sp.unique(self._p_inv),self._p_inv) self._t_seq = sp.searchsorted(sp.unique(self._t_inv),self._t_inv) self.set_pore_data(prop='inv_seq',data=self._p_seq) self.set_throat_data(prop='inv_seq',data=self._t_seq) #Remove temporary arrays and adjacency matrices del self._net.adjacency_matrix['csr']['invaded']
def rerun(name='shots2016', startidx=0): conn = sqlite3.connect(_tablename) oldConn = sqlite3.connect(_olddb) oldC = oldConn.cursor() # extract necessary idx, time, shot c_w_qc and c_w_l idx = scipy.squeeze((oldC.execute('SELECT id from ' + name)).fetchall()) newstart = scipy.searchsorted(idx, [startidx])[0] time = scipy.squeeze( (oldC.execute('SELECT time from ' + name)).fetchall())[newstart:] c_w_qc = scipy.squeeze( (oldC.execute('SELECT c_W from ' + name)).fetchall())[newstart:] c_w_l = scipy.squeeze( (oldC.execute('SELECT c_w_l from ' + name)).fetchall())[newstart:] shots = scipy.squeeze( (oldC.execute('SELECT shot from ' + name)).fetchall())[newstart:] idx = idx[newstart:] uniq = scipy.unique(shots) for i in uniq: idxin = shots == i # find index which matches current shot reAnalyzeShot(i, conn, name, idx[idxin], time[idxin], c_w_qc[idxin], c_w_l[idxin], bounds=True, serial=False)
def drawpartitionmin2(G, S, xm, rm, n): #distance to xmin xm = sp.array(xm) ns, d = S.shape #R is distance from xm for each S R = sp.empty(ns) for i in xrange(ns): R[i] = sp.linalg.norm(S[i, :] - xm) #O is indicies by distance from xm O = sp.argsort(R) split = sp.searchsorted(R[O], rm) + 1 S_ = sp.vstack([xm, S[O, :]]) Z = G.draw_post(S_, [[sp.NaN]] * (ns + 1), n) Res = sp.empty([n, 5]) Res[:, 1] = Z[:, :split].min(axis=1) Res[:, 2] = Z[:, split:].min(axis=1) Res[:, 3] = Z[:, 0] Res[:, 0] = Res[:, 1:3].min(axis=1) Res[:, 4] = Res[:, 1:3].argmin(axis=1) argminin = Z[:, :split].argmin(axis=1) argminmax = argminin.max() maxRin = R[O[argminmax - 1]] #print(str(argminin)+'\n'+str(argminmax)+'\n'+str(maxRin)+' ' +str(rm)+'\n'+str(R[O[argminin]])) print( 'from {} draws {} in rpve with rad {}. Furthest within rpve index{} rad{} ' .format(ns, split, rm, argminmax - 1, maxRin)) return Res, maxRin
def __call__(self, x_new): """Find linearly interpolated y_new = <name>(x_new). Inputs: x_new -- New independent variables. Outputs: y_new -- Linearly interpolated values corresponding to x_new. """ # 1. Handle values in x_new that are outside of x. Throw error, # or return a list of mask array indicating the outofbounds values. # The behavior is set by the bounds_error variable. ## RHC -- was x_new = atleast_1d(x_new) x_new_1d = atleast_1d(x_new) out_of_bounds = self._check_bounds(x_new_1d) # 2. Find where in the orignal data, the values to interpolate # would be inserted. # Note: If x_new[n] = x[m], then m is returned by searchsorted. x_new_indices = searchsorted(self.x, x_new_1d) # 3. Clip x_new_indices so that they are within the range of # self.x indices and at least 1. Removes mis-interpolation # of x_new[n] = x[0] # RHC -- changed Int to Numeric_Int to avoid name clash with numarray x_new_indices = clip(x_new_indices, 1, len(self.x) - 1).astype(Numeric_Int) # 4. Calculate the slope of regions that each x_new value falls in. lo = x_new_indices - 1 hi = x_new_indices # !! take() should default to the last axis (IMHO) and remove # !! the extra argument. x_lo = take(self.x, lo, axis=self.interp_axis) x_hi = take(self.x, hi, axis=self.interp_axis) y_lo = take(self.y, lo, axis=self.interp_axis) y_hi = take(self.y, hi, axis=self.interp_axis) slope = (y_hi - y_lo) / (x_hi - x_lo) # 5. Calculate the actual value for each entry in x_new. y_new = slope * (x_new_1d - x_lo) + y_lo # 6. Fill any values that were out of bounds with NaN # !! Need to think about how to do this efficiently for # !! mutli-dimensional Cases. yshape = y_new.shape y_new = y_new.flat new_shape = list(yshape) new_shape[self.interp_axis] = 1 sec_shape = [1] * len(new_shape) sec_shape[self.interp_axis] = len(out_of_bounds) out_of_bounds.shape = sec_shape new_out = ones(new_shape) * out_of_bounds putmask(y_new, new_out.flat, self.fill_value) y_new.shape = yshape # Rotate the values of y_new back so that they correspond to the # correct x_new values. result = swapaxes(y_new, self.interp_axis, self.axis) try: len(x_new) return result except TypeError: return result[0] return result
def __call__(self,exp,band,lam): ll = self.ll[band][exp] fl = self.fl[band][exp] iv = self.iv[band][exp] wd = self.wd[band][exp] la=10**ll i = sp.searchsorted(la,lam) w=i>=len(ll) i[w]=len(ll)-1 w=i==0 i[w]=1 flux = (la[i]-lam)*fl[i-1]*iv[i-1] + (lam-la[i-1])*fl[i]*iv[i] norm = (la[i]-lam)*iv[i-1] + (lam-la[i-1])*iv[i] ivar = norm**2 norm_ivar = (iv[i-1]*(la[i]-lam)**2 + iv[i]*(lam-la[i-1])**2) w=norm_ivar>0 ivar[w]/=norm_ivar[w] w=(iv[i-1]==0) | (iv[i]==0) norm[w]=0 flux[w]=0 ivar[w]=0 w=norm>0 flux[w]/=norm[w] wdisp=(la[i]-lam)*wd[i-1] + (lam-la[i-1])*wd[i] wdisp/=la[i]-la[i-1] re = sp.exp(-(sp.arange(ndiag)-ndiag/2)[:,None]**2/2./wdisp**2) re/=sp.sum(re,axis=0) return flux,ivar,re
def initialize(self, state, chain): params = {} for key in self.scan_range.keys(): # Check for single range if len(self.scan_range[key]) == 2: params[key] = sp.rand() * (self.scan_range[key][1] - self.scan_range[key][0]) + self.scan_range[key][0] else: # calculate weights of sub_regions sub_size = sp.array([]) # Determine weights of region for i in range(0, len(self.scan_range[key]), 2): sub_size = sp.append(sub_size, self.scan_range[key][i + 1] - self.scan_range[key][i]) self.range_weight[key] = sub_size / float(sp.sum(sub_size)) # sample region based on size i_sel = 2 * sp.searchsorted(sp.cumsum(self.range_weight[key]), sp.rand()) # sample point params[key] = ( sp.rand() * (self.scan_range[key][i_sel + 1] - self.scan_range[key][i_sel]) + self.scan_range[key][i_sel] ) # params=dict([(key,sp.rand()*(self.scan_range[key][1]-self.scan_range[key][0])+self.scan_range[key][0]) for key in self.scan_range.keys() if type(self.scan_range[key])==list]) # Add constant parameters for key in self.constants.keys(): params[key] = self.constants[key] for key in self.functions.keys(): params[key] = self.functions[key](params) modelid = "%i%01i" % (self.rank, 0) + "%i" % chain.accepted return params, modelid
def _midpoint(self, left, right): """ given two coordinates returns the middle coordinate in terms of actual distance. """ return (scipy.searchsorted( self._cld, self._cld[left] + (self._cld[right] - self._cld[left]) / 2))
def analyzeShot(shot, conn, name, idx, bounds=True, method=None, tol=None, serial=True): try: #find data lims = findTime(shot) data = SIFData(shot) if lims[1] > data.time[-1]: raise dd.PyddError #timebase is way wrong, toss it #solve for indicies of data to be fitted indices = scipy.searchsorted(data.time,lims) #snip snip data.data = data.data[indices[0]:indices[1]] data.time = data.time[indices[0]:indices[1]] #fit the data output = fitData(data, bounds=bounds, method=method, tol=tol, serial=serial) #write to sql database idx = writeData(shot, output, data.time, conn, name, idx) return idx except dd.PyddError: #if there is any error pulling the data (SIF or EQH), toss the shot return idx
def midpoint(self, guess=None, threshold=4, smooth=10, end=5): """ Takes some perameters and returns the best guess of the midpoint. It does this iteritivly. It attemps to define regions based on midpoint. Then looks at the second to last set of regions in the shorter strand and attempts to recalculate the midpoint given that region and its coorrosponding region in the longer strand. does this until convergence. or (until it attempts number of regions found in one long molecule)--uppper bound not super important. helps in absence of convergence. """ guess = guess or scipy.searchsorted(self._cld, (self._cld[-1] / 2)) (ff, fl), (rf, rl) = self.regionify(guess, threshold=threshold, smooth=smooth, end=end) for i in range(min(len(ff), len(rf))): i = min(len(ff), len(rf)) - 2 new_guess = self._midpoint(ff[i][1], rf[i][0]) if guess == new_guess: return (guess) guess = new_guess (ff, fl), (rf, rl) = self.regionify(guess, threshold=threshold, smooth=smooth, end=end) return (guess)
def reAnalyzeShot2(shot, conn, idx, time, bounds=True, method=None, tol=None, serial=True, offset=9): try: #find data curs= conn.cursor() data = SIFData(shot, offset=offset) #solve for indicies of data to be fitted indices = scipy.searchsorted(data.time, time-1e-5) #the -1e-5 solves an issue with the data.time being a double, and time being a float #snip snip data.data = data.data[indices] data.time = data.time[indices] #fit the data output = fitData(data, bounds=bounds, method=method, tol=tol, serial=serial) print(output.shape) #modify sql database print(idx.shape) for i in xrange(len(idx)): changeValue(curs, idx[i], output[i]) return idx except dd.PyddError: #if there is any error pulling the data (SIF or EQH), toss the shot return idx
def _assembleInit(xdata, ydata, bounds=None): """assemble the initial values based on the spectrum""" #INTITIALIZE VARIABLES init = scipy.zeros((3*len(semiEmperical)+1,)) output = scipy.zeros((len(init), 2)) ones = scipy.ones(semiEmperical.shape) #set baseline axis = scipy.mgrid[-32:65569:64] init[0] = (scipy.histogram(ydata,bins=axis)[0])[:pow(2,9)].argmax()*64 #ydata.min()#_initial[0] This makes the initial baseline #limited to the lower part of the measured ADC, which will prevent saturation events from becoming the 'baseline' output[0] = _bounds[0] #set peak values init[1::3] = ydata[scipy.searchsorted(xdata, semiEmperical)] - init[0] output[1::3] = scipy.array([_bounds[1,0]*ones, _bounds[1,1]*ones]).T #set offsets init[2::3] = semiEmperical output[2::3] = (scipy.array([_bounds[2,0]*ones, _bounds[2,1]*ones]) + semiEmperical).T #set width values init[3::3] = _initial[3]*ones output[3::3] = scipy.array([_bounds[3,0]*ones, _bounds[3,1]*ones]).T if not bounds == None: return init, output else: return init
def __call__(self,x_new): """Find linearly interpolated y_new = <name>(x_new). Inputs: x_new -- New independent variables. Outputs: y_new -- Linearly interpolated values corresponding to x_new. """ # 1. Handle values in x_new that are outside of x. Throw error, # or return a list of mask array indicating the outofbounds values. # The behavior is set by the bounds_error variable. ## RHC -- was x_new = atleast_1d(x_new) x_new_1d = atleast_1d(x_new) out_of_bounds = self._check_bounds(x_new_1d) # 2. Find where in the orignal data, the values to interpolate # would be inserted. # Note: If x_new[n] = x[m], then m is returned by searchsorted. x_new_indices = searchsorted(self.x,x_new_1d) # 3. Clip x_new_indices so that they are within the range of # self.x indices and at least 1. Removes mis-interpolation # of x_new[n] = x[0] # RHC -- changed Int to Numeric_Int to avoid name clash with numarray x_new_indices = clip(x_new_indices,1,len(self.x)-1).astype(Numeric_Int) # 4. Calculate the slope of regions that each x_new value falls in. lo = x_new_indices - 1; hi = x_new_indices # !! take() should default to the last axis (IMHO) and remove # !! the extra argument. x_lo = take(self.x,lo,axis=self.interp_axis) x_hi = take(self.x,hi,axis=self.interp_axis) y_lo = take(self.y,lo,axis=self.interp_axis) y_hi = take(self.y,hi,axis=self.interp_axis) slope = (y_hi-y_lo)/(x_hi-x_lo) # 5. Calculate the actual value for each entry in x_new. y_new = slope*(x_new_1d-x_lo) + y_lo # 6. Fill any values that were out of bounds with NaN # !! Need to think about how to do this efficiently for # !! mutli-dimensional Cases. yshape = y_new.shape y_new = y_new.flat new_shape = list(yshape) new_shape[self.interp_axis] = 1 sec_shape = [1]*len(new_shape) sec_shape[self.interp_axis] = len(out_of_bounds) out_of_bounds.shape = sec_shape new_out = ones(new_shape)*out_of_bounds putmask(y_new, new_out.flat, self.fill_value) y_new.shape = yshape # Rotate the values of y_new back so that they correspond to the # correct x_new values. result = swapaxes(y_new,self.interp_axis,self.axis) try: len(x_new) return result except TypeError: return result[0] return result
def get_price(self, date): try: price = self.by_date[date] except: # Search for earlier date. index = searchsorted(self.sorted, date) - 1 if index < 0: raise KeyError("Delivery date {} not found in '{}' forward curve.".format(date, self.name)) price = self.by_date[self.sorted[index]] return price
def remove_outliers(self, points): points_filtered = points[:] empty_space_width = .02 check_percent = .02 edge_width = .005 #remove outliers in each dimension (x,y,z) for dim in range(3): #sort the points by their values in that dimension num_points = scipy.shape(points_filtered)[1] points_sorted = points_filtered[:, points_filtered[dim, :].argsort().tolist()[0]] #chop off the top points if they are more than empty_space_width away from the bounding box edge ind = int(math.floor(num_points*(1-check_percent))) if points_sorted[dim, -1] - points_sorted[dim, ind] > empty_space_width: #find the first point that isn't within edge_width of the point at ind, and lop off all points after searcharr = scipy.array(points_sorted[dim, :]).flatten() searchval = points_sorted[dim, ind] + edge_width thres_ind = scipy.searchsorted(searcharr, searchval) if thres_ind != 0 and thres_ind != len(searcharr): points_filtered = points_sorted[:, 0:thres_ind] rospy.loginfo("chopped points off of dim %d, highest val = %5.3f, searchval = %5.3f"%(dim, points_sorted[dim, -1], searchval)) else: points_filtered = points_sorted #do both sides for x and y if dim != 2: ind = int(math.floor(num_points*check_percent)) if points_filtered[dim, ind] - points_filtered[dim, 0] > empty_space_width: #find the first point that isn't within edge_width of the point at ind, and lop off all points before searcharr = scipy.array(points_sorted[dim, :]).flatten() searchval = points_sorted[dim, ind] - edge_width thres_ind = scipy.searchsorted(searcharr, searchval) if thres_ind != 0 and thres_ind != len(searcharr): points_filtered = points_filtered[:, thres_ind:-1] rospy.loginfo("chopped points off of dim -%d, lowest val = %5.3f, searchval = %5.3f"%(dim, points_sorted[dim, 0], searchval)) return points_filtered
def get_price(self, date): try: price = self.by_date[date] except: # Search for earlier date. index = searchsorted(self.sorted, date) - 1 if index < 0: raise KeyError( "Delivery date {} not found in '{}' forward curve.".format( date, self.name)) price = self.by_date[self.sorted[index]] return price
def plot_gridsearch_scores(self, grid_scores, filters={}): param_grid = self.get_param_grid() x, y = sp.meshgrid(param_grid[self.tau_key], param_grid[self.c_key]) counts = sp.zeros_like(x) scores = sp.zeros_like(x) for params, s, unused in grid_scores: if all(params[key] == filters[key] for key in filters): i = sp.searchsorted(param_grid[self.c_key], params[self.c_key]) j = sp.searchsorted( param_grid[self.tau_key], params[self.tau_key]) counts[i, j] += 1 scores[i, j] += s scores /= counts plt.contourf(x, y, scores) plt.loglog() plt.xlabel(self.tau_key) plt.ylabel(self.c_key) cb = plt.colorbar() cb.set_label("Accuracy")
def _assembleInit2(xdata, ydata, bounds=None): #internal fitting bounds and initial value for baseline and gaussian height, offset, and width _bounds = scipy.array([[1,65536],[0,1e15],[-3e-5,3e-5],[5e-5,3e-4]]) _bounds2 = scipy.array([[0,scipy.inf],[0,scipy.inf],[-3e-5,3e-5],[0,scipy.inf]]) _initial = scipy.array([0.,1.,1.,.7e-4,0.]) #INTITIALIZE VARIABLES init = scipy.zeros((3*len(semiEmperical)+1,)) output = scipy.zeros((len(init), 2)) ones = scipy.ones(semiEmperical.shape) #set baseline axis = scipy.mgrid[-32:65569:64] init[0] = (scipy.histogram(ydata,bins=axis)[0]).argmax()*64#ydata.min()#_initial[0] output[0] = _bounds[0] #set peak values print(ydata[scipy.searchsorted(xdata, semiEmperical)]) init[1::3] = ydata[scipy.searchsorted(xdata, semiEmperical)] - init[0] init[1::3] = (abs(init[1::3]) + init[1::3])/2. output[1::3] = scipy.array([_bounds[1,0]*ones, _bounds[1,1]*ones]).T #set offsets init[2::3] = semiEmperical output[2::3] = (scipy.array([_bounds[2,0]*ones, _bounds[2,1]*ones]) + semiEmperical).T #set width values init[3::3] = _initial[3]*ones output[3::3] = scipy.array([_bounds[3,0]*ones, _bounds[3,1]*ones]).T init[1::3] *= init[3::3]*scipy.sqrt(scipy.pi) #convert to integrated counts output[1::3] *= output[3::3]*scipy.sqrt(scipy.pi) #same for these values print(init[1::3],'initial vals') if not bounds == None: return init, output else: return init
def linear_interp_error(x,xinterp,z): """ Does the actual calculation for error propagation on linear interpolation. x = xold xinterp = grid for interpolatin x z = old error spectrum Note, returns variance, i.e. (new error spectrum)**2 """ i = sp.searchsorted(x,xinterp) f = (xinterp - x[i -1 ])/(x[i] - x[i -1]) return f**2*z[i]**2 + (1 - f)**2*z[i-1]**2
def linear_interp_error(x, xinterp, z): """ Does the actual calculation for error propagation on linear interpolation. x = xold xinterp = grid for interpolatin x z = old error spectrum Note, returns variance, i.e. (new error spectrum)**2 """ i = sp.searchsorted(x, xinterp) f = (xinterp - x[i - 1]) / (x[i] - x[i - 1]) return f**2 * z[i]**2 + (1 - f)**2 * z[i - 1]**2
def exp_diff(file, ll): nexp_per_col = file[0].read_header()['NEXP'] // 2 fltotodd = sp.zeros(ll.size) ivtotodd = sp.zeros(ll.size) fltoteven = sp.zeros(ll.size) ivtoteven = sp.zeros(ll.size) if (nexp_per_col) < 2: print("DBG : not enough exposures for diff") for iexp in range(nexp_per_col): for icol in range(2): llexp = file[4 + iexp + icol * nexp_per_col]["loglam"][:] flexp = file[4 + iexp + icol * nexp_per_col]["flux"][:] ivexp = file[4 + iexp + icol * nexp_per_col]["ivar"][:] mask = file[4 + iexp + icol * nexp_per_col]["mask"][:] bins = sp.searchsorted(ll, llexp) # exclude masks 25 (COMBINEREJ), 23 (BRIGHTSKY)? if iexp % 2 == 1: civodd = sp.bincount(bins, weights=ivexp * (mask & 2**25 == 0)) cflodd = sp.bincount(bins, weights=ivexp * flexp * (mask & 2**25 == 0)) fltotodd[:civodd.size - 1] += cflodd[:-1] ivtotodd[:civodd.size - 1] += civodd[:-1] else: civeven = sp.bincount(bins, weights=ivexp * (mask & 2**25 == 0)) cfleven = sp.bincount(bins, weights=ivexp * flexp * (mask & 2**25 == 0)) fltoteven[:civeven.size - 1] += cfleven[:-1] ivtoteven[:civeven.size - 1] += civeven[:-1] w = ivtotodd > 0 fltotodd[w] /= ivtotodd[w] w = ivtoteven > 0 fltoteven[w] /= ivtoteven[w] alpha = 1 if (nexp_per_col % 2 == 1): n_even = (nexp_per_col - 1) // 2 alpha = sp.sqrt(4. * n_even * (n_even + 1)) / nexp_per_col diff = 0.5 * (fltoteven - fltotodd) * alpha ### CHECK THE * alpha (Nathalie) return diff
def _get_kernel(self,x1,func_name): #evaluate pixel shift if func_name in func_dic.keys(): func = func_dic[func_name] else: func = lambda kx: sp.sinc(kx)*get_window(func_name,kx.size) i = sp.searchsorted(self.x,x1) dpix = (x1 - self.x[i - 1])/(self.x[i] - self.x[i - 1]) if dpix == 1.0: dpix = 0 assert sp.absolute(dpix) < 1.0 kx = sp.r_[-self.kw:self.kw + 1] + dpix k = func(kx) k = k/sp.sum(k) return k
def histogram(series,nbins=100): """ Generate histogram from series. Returns an array of histogram bins and bin counts. """ assert len(series.shape) == 1, "Cannot histogram multidimensional arrays!" series = scipy.sort(series) mx = series[-1] mn = series[0] # 2.22044604925e-16 is scipy.limits.double_epsilon bins = mdp.utils.linspace(mn, mx+2.22044604925e-16) n = scipy.searchsorted(series, bins) n = scipy.concatenate([n,[len(series)]]) hist = (n[1:]-n[:-1]) / float(len(series)) return bins, hist
def ppf(self,u): ''' Evaluates the percentile function (inverse c.d.f.) for a given array of quantiles. :param u: Percentiles for which the ppf will be computed. :type u: numpy.array :returns: A Data object containing the values of the ppf. :rtype: natter.DataModule.Data ''' b = self.param['b'] dt = b[1]-b[0] P = hstack((0,cumsum(self.param['p'])*dt)) ind = searchsorted(P,u) return Data(b[ind],'Function values of the Histogram distribution')
def GenExtensionMat(y,x): m = y.size-1 n = x.size-1 mat = sp.zeros((m,n)) mat = sp.mat(mat) indBase = sp.searchsorted(x, y) for i in range(0,m): ww = Weights(y[i],x,indBase[i]) try: mat[i,indBase[i]-2:indBase[i]+2] = ww except(IndexError,ValueError): mat[i,(indBase[i]-2)%n] = ww[0] mat[i,(indBase[i]-1)%n] = ww[1] mat[i,(indBase[i])%n] = ww[2] mat[i,(indBase[i]+1)%n] = ww[3] return mat
def indexes_after(seq,tm): """Return an array of indexes representing the index of seq that is on or after each member of tm. Parameters ---------- seq : :ref:`time_sequence<time_sequence>` The sequence whose index will be searched tm : list of :py:class:`datetime.datetime` List of times whose index is sought When tm is bigger than the last item of seq, it will return len(seq). When tm is smaller than the first time of seq, it will return 0. """ candidates = scipy.searchsorted(seq,tm) return candidates
def histogram(a, bins, relative=False): """Standard histogram straight from the Numeric manual If relative is True, values will be normalised againts the total and thus represent frequencies rather than counts. """ n = searchsorted(sort(a), bins) n = concatenate([n, [len(a)]]) hist = n[1:] - n[:-1] if relative is True: hist = hist / float(sum(hist)) return hist
def ppf(self, u): ''' Evaluates the percentile function (inverse c.d.f.) for a given array of quantiles. :param u: Percentiles for which the ppf will be computed. :type u: numpy.array :returns: A Data object containing the values of the ppf. :rtype: natter.DataModule.Data ''' b = self.param['b'] dt = b[1] - b[0] P = hstack((0, cumsum(self.param['p']) * dt)) ind = searchsorted(P, u) return Data(b[ind], 'Function values of the Histogram distribution')
def single_epoch(self, velocity, sigvel, mass, log_minv=-3, log_maxv=None, log_stepv=0.02): """Returns a callable Basefitter which computes the log-likelihood to reproduce the observed single-epoch radial velocity distribution. Uses the current settings of the binary properties to calculate the distribution of radial velocity offsets due to binary orbital motions. Arguments: - `velocity`: 1D array-like giving velocities in km/s. - `sigvel`: 1D array-like (or single number) giving measurement uncertainties in km/s. - `mass`: 1D array-like (or single number) giving best estimate for mass of the observed stars in solar masses. - `log_minv`: 10_log of the lowest velocity bin in km/s (should be significantly smaller than the velocity dispersion). - `log_maxv`: 10_log maximum of the largest velocity bin (default: logarithm of maximum velocity) - `log_stepv`: step size in 10_log(velocity) space. """ vel = sp.sort(sp.sum(self.velocity(1.)**2., 0)**.5) cum_weight = sp.cumsum(1. / vel[::-1])[::-1] if log_maxv == None: log_maxv = sp.log10(vel[-1]) vbord = 10**sp.arange(log_minv, log_maxv, log_stepv) ixbound = sp.searchsorted(vel, vbord) pdist = [] vtot = sp.append(0, vbord) for ix in range(len(vbord)): lower = vtot[ix] upper = vtot[ix + 1] if ix == 0: vuse = vel[:ixbound[ix]] else: vuse = vel[ixbound[ix - 1]:ixbound[ix]] if ixbound[ix] == len(vel): est = 0. else: est = cum_weight[ixbound[ix]] pdist.append(est + sp.sum((vuse - lower) / vuse) / (upper - lower)) vbound = sp.append(-vbord[::-1], sp.append(0, vbord)) prob = sp.append(pdist[::-1], pdist) / 2. / len(vel) return fitter.BinaryFit(velocity, sigvel, mass, vbound, prob)
def get_probit_endog(true_params, exog, noise_level): """ Gets an endogenous response that is consistent with the true_params, perturbed by noise at noise_level. """ N = exog.shape[0] ### Create the probability of entering the different classes, ### given exog and true_params Xdotparams = sp.dot(exog, true_params) noise = noise_level * sp.randn(*Xdotparams.shape) ### Create the endog cdf = stats.norm._cdf(-Xdotparams) endog = sp.zeros(N) for i in range(N): endog[i] = sp.searchsorted(cdf[i, :], sp.rand()) return endog
def get_probit_endog(true_params, exog, noise_level): """ Gets an endogenous response that is consistent with the true_params, perturbed by noise at noise_level. """ N = exog.shape[0] ### Create the probability of entering the different classes, ### given exog and true_params Xdotparams = sp.dot(exog, true_params) noise = noise_level * sp.randn(*Xdotparams.shape) ### Create the endog cdf = stats.norm._cdf(-Xdotparams) endog = sp.zeros(N) for i in xrange(N): endog[i] = sp.searchsorted(cdf[i, :], sp.rand()) return endog
def pdf(self,dat): ''' Evaluates the probability density function on the data points in dat. :param dat: Data points for which the p.d.f. will be computed. :type dat: natter.DataModule.Data :returns: An array containing the values of the density. :rtype: numpy.array ''' b = self.param['b'] p = self.param['p'] dt = b[1]-b[0] ind = searchsorted(b,squeeze(dat.X)) ptmp = hstack((amin(p),self.param['p'],amin(p))) ptmp = ptmp/sum(ptmp)/dt return ptmp[ind]
def pdf(self, dat): ''' Evaluates the probability density function on the data points in dat. :param dat: Data points for which the p.d.f. will be computed. :type dat: natter.DataModule.Data :returns: An array containing the values of the density. :rtype: numpy.array ''' b = self.param['b'] p = self.param['p'] dt = b[1] - b[0] ind = searchsorted(b, squeeze(dat.X)) ptmp = hstack((amin(p), self.param['p'], amin(p))) ptmp = ptmp / sum(ptmp) / dt return ptmp[ind]
def get_logit_endog(true_params, exog, noise_level): """ Gets an endogenous response that is consistent with the true_params, perturbed by noise at noise_level. """ N = exog.shape[0] ### Create the probability of entering the different classes, ### given exog and true_params Xdotparams = sp.dot(exog, true_params) noise = noise_level * sp.randn(*Xdotparams.shape) eXB = sp.column_stack((sp.ones(len(Xdotparams)), sp.exp(Xdotparams))) class_probabilities = eXB / eXB.sum(1)[:, None] ### Create the endog cdf = class_probabilities.cumsum(axis=1) endog = sp.zeros(N) for i in xrange(N): endog[i] = sp.searchsorted(cdf[i, :], sp.rand()) return endog
def get_logit_endog(true_params, exog, noise_level): """ Gets an endogenous response that is consistent with the true_params, perturbed by noise at noise_level. """ N = exog.shape[0] ### Create the probability of entering the different classes, ### given exog and true_params Xdotparams = sp.dot(exog, true_params) noise = noise_level * sp.randn(*Xdotparams.shape) eXB = sp.column_stack((sp.ones(len(Xdotparams)), sp.exp(Xdotparams))) class_probabilities = eXB / eXB.sum(1)[:, None] ### Create the endog cdf = class_probabilities.cumsum(axis=1) endog = sp.zeros(N) for i in range(N): endog[i] = sp.searchsorted(cdf[i, :], sp.rand()) return endog
def single_epoch(self, velocity, sigvel, mass, log_minv=-3, log_maxv=None, log_stepv=0.02): """Returns a callable Basefitter which computes the log-likelihood to reproduce the observed single-epoch radial velocity distribution. Uses the current settings of the binary properties to calculate the distribution of radial velocity offsets due to binary orbital motions. Arguments: - `velocity`: 1D array-like giving velocities in km/s. - `sigvel`: 1D array-like (or single number) giving measurement uncertainties in km/s. - `mass`: 1D array-like (or single number) giving best estimate for mass of the observed stars in solar masses. - `log_minv`: 10_log of the lowest velocity bin in km/s (should be significantly smaller than the velocity dispersion). - `log_maxv`: 10_log maximum of the largest velocity bin (default: logarithm of maximum velocity) - `log_stepv`: step size in 10_log(velocity) space. """ vel = sp.sort(sp.sum(self.velocity(1.) ** 2., 0) ** .5) cum_weight = sp.cumsum(1. / vel[::-1])[::-1] if log_maxv == None: log_maxv = sp.log10(vel[-1]) vbord = 10 ** sp.arange(log_minv, log_maxv, log_stepv) ixbound = sp.searchsorted(vel, vbord) pdist = [] vtot = sp.append(0, vbord) for ix in range(len(vbord)): lower = vtot[ix] upper = vtot[ix + 1] if ix == 0: vuse = vel[:ixbound[ix]] else: vuse = vel[ixbound[ix - 1]: ixbound[ix]] if ixbound[ix] == len(vel): est = 0. else: est = cum_weight[ixbound[ix]] pdist.append(est + sp.sum((vuse - lower) / vuse) / (upper - lower)) vbound = sp.append(-vbord[::-1], sp.append(0, vbord)) prob = sp.append(pdist[::-1], pdist) / 2. / len(vel) return fitter.BinaryFit(velocity, sigvel, mass, vbound, prob)
def rebin(self,xnew): """ Rebin the spectrum on a new grid named xnew """ #Does not need equal spaced bins, but why would you not? xnew.sort() fbin = sp.zeros(xnew.size) efbin = sp.zeros(xnew.size) #up sampling is just interpolation m = (self.wv >= xnew[0])*(self.wv <= xnew[-1]) if self.wv[m].size <= xnew.size - 1: fbin,efbin = self.interp(xnew) else: #down sampling-- #1) define bins so that xnew is at the center. #2) interpolate to account for fractional pixel weights #3) take the mean within each bin db = 0.5*sp.diff(xnew) b2 = xnew[1::] - db b2 = sp.insert(b2,0,xnew[0]) insert = sp.searchsorted(self.wv,b2) xinsert = sp.insert(self.wv,insert,xnew) xinsert = sp.unique(xinsert) yinsert,zinsert = self.interp(xinsert) i = sp.digitize(xinsert,b2) for j in range(b2.size): iuse = sp.where(i == j+1)[0] fbin[j] = sp.mean(yinsert[iuse]) efbin[j] = sp.mean(zinsert[iuse]) self._wv = xnew if self.ef is not None: self._ef = efbin self.f = fbin assert self.wv.size == self.f.size
def propose(self, X_i, state, chain): params = {} for key in self.scan_range.keys(): # sample multiranged parameter if key in self.range_weight.keys(): i_sel = 2 * sp.searchsorted(sp.cumsum(self.range_weight[key]), sp.rand()) params[key] = ( sp.rand() * (self.scan_range[key][i_sel + 1] - self.scan_range[key][i_sel]) + self.scan_range[key][i_sel] ) else: params[key] = sp.rand() * (self.scan_range[key][1] - self.scan_range[key][0]) + self.scan_range[key][0] # Add constant parameters for key in self.constants.keys(): params[key] = self.constants[key] # functions for key in self.functions.keys(): params[key] = self.functions[key](params) modelid = "%i%01i" % (self.rank, 0) + "%i" % chain.accepted return params, modelid
def cdf(self,dat,nonparametric=True): ''' Evaluates the cumulative distribution function on the data points in dat. :param dat: Data points for which the c.d.f. will be computed. :type dat: natter.DataModule.Data :param nonparametric: Determines whether the cdf should be estimated non-parametrically. This works well if the data points in dat represent a large sample from the whole range of values. :type dat: boolean :returns: A numpy array containing the probabilities. :rtype: numpy.array ''' if nonparametric: u = linspace(0.,1.,dat.numex())[argsort(argsort(dat.X.ravel()))] else: b = self.param['b'] dt = b[1]-b[0] P = cumsum(self.param['p'])*dt ind = searchsorted(b,squeeze(dat.X)) P = hstack((0,P,1)) u = P[ind] return u
# Assign the positions of two graphes gs = gc.GridSpec(1, 2) gs.update(wspace=0.15, hspace=0, bottom=0.15) ax1 = fig.add_subplot(gs[0]) ax2 = fig.add_subplot(gs[1]) # histogram and fitted normal distribution of Congruent data binsC = range(8, max(C.astype(np.int64)) + 2) probC = mlab.normpdf(binsC, muC, sigmaC) * 24 probCt = mlab.normpdf(binsC, muCt, sigmaCt) * 22 lC = sp.linspace(8, max(C.astype(np.int64)) + 1, 100) sC = interpolate.spline(binsC, probC, lC) sCt = interpolate.spline(binsC, probCt, lC) xxC = sp.searchsorted(lC, muC + sigmaC) vC = ((sC[xxC + 1] - sC[xxC]) / (lC[xxC + 1] - lC[xxC])) * (muC + sigmaC - lC[xxC]) vC += sC[xxC] # histogram and fitted normal distribution of Incongruent data binsI = range(15, max(I.astype(np.int64)) + 2) binsIt = range(15, max(It.astype(np.int64)) + 2) probI = mlab.normpdf(binsI, muI, sigmaI) * 24 probIt = mlab.normpdf(binsI, muIt, sigmaIt) * 22 lI = sp.linspace(15, max(I.astype(np.int64)) + 1, 100) sI = interpolate.spline(binsI, probI, lI) sIt = interpolate.spline(binsI, probIt, lI) ax1.hist(C, bins=binsC, align="left", histtype="step", color="black") ax1.plot(binsC, probC, "o", color="black") ax1.plot(binsC, probCt, "o", color="red")
def findClosestArray(input_array, target_array, tol): """ Find the set of elements in input_array that are closest to elements in target_array. Record the indices of the elements in target_array that are within tolerance, tol, of their closest match. Also record the indices of the elements in target_array that are outside tolerance, tol, of their match. For example, given an array of observations with irregular observation times along with an array of times of interest, this routine can be used to find those observations that are closest to the times of interest that are within a given time tolerance. NOTE: input_array must be sorted! The array, target_array, does not have to be sorted. Inputs: input_array: a sorted Float64 numarray target_array: a Float64 numarray tol: a tolerance Returns: closest_indices: the array of indices of elements in input_array that are closest to elements in target_array accept_indices: the indices of elements in target_array that have a match in input_array within tolerance reject_indices: the indices of elements in target_array that do not have a match in input_array within tolerance Author: Gerry Wiener, 2004 Version 1.0 """ input_array_len = len(input_array) closest_indices = searchsorted(input_array, target_array) # determine the locations of target_array in input_array # acc_rej_indices = [-1] * len(target_array) curr_tol = [tol] * len(target_array) est_tol = 0.0 for i in xrange(len(target_array)): best_off = 0 # used to adjust closest_indices[i] for best approximating element in input_array if closest_indices[i] >= input_array_len: # the value target_array[i] is >= all elements in input_array so check whether it is within tolerance of the last element closest_indices[i] = input_array_len - 1 est_tol = target_array[i] - input_array[closest_indices[i]] if est_tol < curr_tol[i]: curr_tol[i] = est_tol # acc_rej_indices[i] = i elif target_array[i] == input_array[closest_indices[i]]: # target_array[i] is in input_array est_tol = 0.0 curr_tol[i] = 0.0 # acc_rej_indices[i] = i elif closest_indices[i] == 0: # target_array[i] is <= all elements in input_array est_tol = input_array[0] - target_array[i] if est_tol < curr_tol[i]: curr_tol[i] = est_tol # acc_rej_indices[i] = i else: # target_array[i] is between input_array[closest_indices[i]-1] and input_array[closest_indices[i]] # and closest_indices[i] must be > 0 top_tol = input_array[closest_indices[i]] - target_array[i] bot_tol = target_array[i] - input_array[closest_indices[i]-1] if bot_tol <= top_tol: est_tol = bot_tol best_off = -1 # this is the only place where best_off != 0 else: est_tol = top_tol if est_tol < curr_tol[i]: curr_tol[i] = est_tol # acc_rej_indices[i] = i if est_tol <= tol: closest_indices[i] += best_off # accept_indices = compress(greater(acc_rej_indices, -1), # acc_rej_indices) # reject_indices = compress(equal(acc_rej_indices, -1), # arange(len(acc_rej_indices))) return closest_indices #, accept_indices, reject_indices)
def summed_dist_matrix(self, vectors, presorted=False): # This implementation is based on # # Houghton, C., & Kreuz, T. (2012). On the efficient calculation of van # Rossum distances. Network: Computation in Neural Systems, 23(1-2), # 48-58. # # Note that the cited paper contains some errors: In formula (9) the # left side of the equation should be divided by two and in the last # sum in this equation it should say `j|v_i >= u_i` instead of # `j|v_i > u_i`. Also, in equation (11) it should say `j|u_i >= v_i` # instead of `j|u_i > v_i`. # # Given N vectors with n entries on average the run-time complexity is # O(N^2 * n). O(N^2 + N * n) memory will be needed. if len(vectors) <= 0: return sp.zeros((0, 0)) if not presorted: vectors = [v.copy() for v in vectors] for v in vectors: v.sort() sizes = sp.asarray([v.size for v in vectors]) values = sp.empty((len(vectors), max(1, sizes.max()))) values.fill(sp.nan) for i, v in enumerate(vectors): if v.size > 0: values[i, :v.size] = \ (v / self.kernel_size * pq.dimensionless).simplified exp_diffs = sp.exp(values[:, :-1] - values[:, 1:]) markage = sp.zeros(values.shape) for u in xrange(len(vectors)): markage[u, 0] = 0 for i in xrange(sizes[u] - 1): markage[u, i + 1] = (markage[u, i] + 1.0) * exp_diffs[u, i] # Same vector terms D = sp.empty((len(vectors), len(vectors))) D[sp.diag_indices_from(D)] = sizes + 2.0 * sp.sum(markage, axis=1) # Cross vector terms for u in xrange(D.shape[0]): all_ks = sp.searchsorted(values[u], values, 'left') - 1 for v in xrange(u): js = sp.searchsorted(values[v], values[u], 'right') - 1 ks = all_ks[v] slice_j = sp.s_[sp.searchsorted(js, 0):sizes[u]] slice_k = sp.s_[sp.searchsorted(ks, 0):sizes[v]] D[u, v] = sp.sum( sp.exp(values[v][js[slice_j]] - values[u][slice_j]) * (1.0 + markage[v][js[slice_j]])) D[u, v] += sp.sum( sp.exp(values[u][ks[slice_k]] - values[v][slice_k]) * (1.0 + markage[u][ks[slice_k]])) D[v, u] = D[u, v] if self.normalize: normalization = self.normalization_factor(self.kernel_size) else: normalization = 1.0 return normalization * D