def trim(self, points, radius): """ remove points too close to the cut curve. they dont add anything, and only lead to awkward faces """ #some precomputations tree = KDTree(points) cp = self.vertices[self.faces] normal = util.normalize(np.cross(cp[:,0], cp[:,1])) mid = util.normalize(cp.sum(axis=1)) diff = np.diff(cp, axis=1)[:,0,:] edge_radius = np.sqrt(util.dot(diff, diff)/4 + radius**2) index = np.ones(len(points), np.bool) #eliminate near edges def near_edge(e, p): return np.abs(np.dot(points[p]-mid[e], normal[e])) < radius for i,(p,r) in enumerate(izip(mid, edge_radius)): coarse = tree.query_ball_point(p, r) index[[c for c in coarse if near_edge(i, c)]] = 0 #eliminate near points for p in self.vertices: coarse = tree.query_ball_point(p, radius) index[coarse] = 0 return points[index]
def find_pairs(cutoff, X, Y=None): """ Find pairs with euclidean distance below C{cutoff}. Either between C{X} and C{Y}, or within C{X} if C{Y} is C{None}. Uses a KDTree and thus is memory efficient and reasonable fast. @type cutoff: float @type X: (m,n) numpy.array @type Y: (k,n) numpy.array @return: set of index tuples @rtype: iterable """ try: from scipy.spatial import cKDTree as KDTree KDTree.query_pairs KDTree.query_ball_tree except (ImportError, AttributeError): from scipy.spatial import KDTree tree = KDTree(X, len(X)) if Y is None: return tree.query_pairs(cutoff) other = KDTree(Y, len(Y)) contacts = tree.query_ball_tree(other, cutoff) return ((i, j) for (i, js) in enumerate(contacts) for j in js)
def group_vectors(vectors, angle = np.radians(10), include_negative = False): ''' Group vectors based on an angle tolerance, with the option to include negative vectors. This is very similar to a group_rows(stack_negative(rows)) The main difference is that max_angle can be much looser, as we are doing actual distance queries. ''' dist_max = np.tan(angle) unit_vectors, valid = unitize(vectors, check_valid = True) valid_index = np.nonzero(valid)[0] consumed = np.zeros(len(unit_vectors), dtype=np.bool) tree = KDTree(unit_vectors) unique_vectors = deque() aligned_index = deque() for index, vector in enumerate(unit_vectors): if consumed[index]: continue aligned = np.array(tree.query_ball_point(vector, dist_max)) vectors = unit_vectors[aligned] if include_negative: aligned_neg = tree.query_ball_point(-1.0*vector, dist_max) vectors = np.vstack((vectors, -unit_vectors[aligned_neg])) aligned = np.append(aligned, aligned_neg) aligned = aligned.astype(int) consumed[aligned] = True unique_vectors.append(np.median(vectors, axis=0)) aligned_index.append(valid_index[aligned]) return np.array(unique_vectors), np.array(aligned_index)
def swept_extrude(self, thickness): """ outer is a copy of inner, possibly with added detail, but with identical boundary we seek to create a castable object with a constant thickness 'thickness' to that end, we need to match the boundary points to make a closed extrusion extrusion is done iteratively we init by radially shinking the inner mesh by thickness """ assert thickness > 0 outer = self.vertices tree = KDTree(outer) outer_radius = np.linalg.norm(outer, axis=1) inner = outer #incremental updates while True: # find nearest point for each inner point dist, idx = tree.query(inner, k=1) inner_radius = np.linalg.norm(inner, axis=1) radial_dist = inner_radius - outer_radius[idx] ortho_dist2 = dist**2 - radial_dist**2 new_radius = outer_radius[idx] - np.sqrt(1 - ortho_dist2 / thickness ** 2) * thickness if np.allclose(inner_radius, new_radius): break inner = inner / (inner_radius / new_radius)[:, None] #return inner surface swepth by thickness return self.extrude(inner)
def remove_close(points, radius): """ Given an (n, m) set of points where n=(2|3) return a list of points where no point is closer than radius. Parameters ------------ points : (n, dimension) float Points in space radius : float Minimum radius between result points Returns ------------ culled : (m, dimension) float Points in space mask : (n,) bool Which points from the original set were returned """ from scipy.spatial import cKDTree as KDTree tree = KDTree(points) consumed = np.zeros(len(points), dtype=np.bool) unique = np.zeros(len(points), dtype=np.bool) for i in range(len(points)): if consumed[i]: continue neighbors = tree.query_ball_point(points[i], r=radius) consumed[neighbors] = True unique[i] = True return points[unique], unique
def main(): # read in the file try: ifs = open(sys.argv[1]) sample, ext = os.path.splitext(sys.argv[1]) except IndexError: ifs = sys.stdin sample = '' data = np.loadtxt(ifs, delimiter=',') if ifs is not sys.stdin: ifs.close() # view of the com com = data[:,1:4] # construct a KD tree tree = KDTree(com) # query KD tree to find the first nearest neighbor dist, idx = tree.query(com, k=2) nn = [(i, j, d2) for ((d1, d2), (i, j)) in zip(dist, idx)] # histogram of the nearest neighbor distance hist(np.array(nn)[:,2]) #title='{} pore-pore distances'.format(sample), #output='{}.pdf'.format(sample)) # save the nearest neighbor distance to .json files ofile = '{}_pore-distribution.json'.format(sample) medianDist = np.median(np.array(nn)[:,2]) cmp0 = lambda lhs, rhs: -1 if lhs[0] < rhs[0] else \ (1 if lhs[0] > rhs[0] else 0)
def binder(positions, orientations, bl, m=4, method='ball', margin=0): """Calculate the binder cumulant, given positions and orientations. bl: the binder length scale, such that B(bl) = 1 - .333 * S4 / S2^2 where SN are <phibl^N> averaged over each block/cluster of size bl in frame. """ if margin: if margin < ss: margin *= ss center = 0.5*(positions.max(0) + positions.min(0)) dmask = d < d.max() - margin positions = positions[dmask] orientations = orientations[dmask] if 'neigh' in method or 'ball' in method: tree = KDTree(positions) balls = tree.query_ball_tree(tree, bl) balls, ball_mask = helpy.pad_uneven(balls, 0, True, int) ball_orient = orientations[balls] ball_orient[ball_mask] = np.nan phis = np.nanmean(np.exp(m*ball_orient*1j), 1) phi2 = np.dot(phis, phis) / len(phis) phiphi = phis*phis phi4 = np.dot(phiphi, phiphi) / len(phiphi) return 1 - phi4 / (3*phi2*phi2) else: # elif method=='block': raise ValueError("method {} not implemented".format(method))
def get_N_ngbs(positions, radii, N=12, maxdist=3.0, edge = None): """N first neighbours, with a maximum relative distances, such that $r_{ij} < maxdist (R_i + R_j)$. If a potential neighbour is further away than the distance to the edge of the field of view, the current particle of interest is considered as "on the edge" and the neighbour not taken into account. Returns neighbours, inside""" assert len(positions)==len(radii) if edge is None: edge = (positions.min(0), positions.max(0)) #initialize the geometry of each particle to_edge = np.minimum((positions - edge[0]).min(-1), (edge[0] - positions).min(-1))**2 inside = np.full(len(positions), True, dtype=bool) neighbours = np.full([len(positions), N], -1, dtype=int) tree = KDTree(positions) rmax = radii.max() for i, js in enumerate(tree.query_ball_tree(tree, 2*rmax*maxdist)): disq = np.sum((positions[js] - positions[i])**2, -1) ags = np.argsort(disq)[:N] if disq[ags[-1]] < to_edge[i]: neighbours[i, :len(js)] = np.array(js)[ags] else: inside[i] = False N2 = np.where(disq[ags] < to_edge[i])[0][0]+1 neighbours[i, :N2] = np.array(js)[ags[:N2]] return neighbours, inside
def find_neighbor_pixels(pix_x, pix_y, rad): """uses a KD-Tree to quickly find nearest neighbors of the pixels in a camera. This function can be used to find the neighbor pixels if such a list is not already present in the file. Parameters ---------- pix_x : array_like x position of each pixel pix_y : array_like y position of each pixels rad : float radius to consider neighbor it should be slightly larger than the pixel diameter. Returns ------- array of neighbor indices in a list for each pixel """ points = np.array([pix_x, pix_y]).T indices = np.arange(len(pix_x)) kdtree = KDTree(points) neighbors = [kdtree.query_ball_point(p, r=rad) for p in points] for nn, ii in zip(neighbors, indices): nn.remove(ii) # get rid of the pixel itself return neighbors
def sht_isosurface(filename, l_max=20, prop='electric_potential', test=None): """Given an SBF, describe the set of vertices and their esp using sht. Will scale the mesh to be of unit mean radius. Arguments: filename -- name of the SBF file containing a surface Keyword arguments: prop -- the name of the vertex property to describe in combination with the shape (or radius) l_max -- maximum angular momenta test -- use to keep the actual shape and property values for examination of accuracy of descriptor """ name = Path(filename).stem LOG.debug('Describing %s surface with spherical harmonics', name) datafile = sbf.read_file(filename) pts = datafile['vertices'].data.transpose() LOG.debug('Loaded vertex data') # shift to be centered about the origin pts -= np.mean(pts, axis=0) # this is faster for some reason than np.apply_along_axis norms = np.sqrt(pts[:, 0] ** 2 + pts[:, 1] ** 2 + pts[:, 2] ** 2) mean_norm = np.mean(norms) pts /= mean_norm norms /= mean_norm pts_normalized = pts / np.reshape(norms, (pts.shape[0], 1)) LOG.debug('Normalized points') sht = SHT(l_max) grid_cartesian = spherical_to_cartesian( np.c_[np.ones(sht.grid.shape[0]), sht.grid[:, 1], sht.grid[:, 0]]) LOG.debug('Constructing tree') tree = KDTree(pts_normalized) LOG.debug('Done') LOG.debug('Interpolating values') nearest = tree.query(grid_cartesian, 1) LOG.debug('Done') shape = values_from_grid(norms, nearest[1]) property_values = values_from_grid(datafile[prop].data, nearest[1]) if test is not None: test['actual'] = shape # normalize property to be in [0,1], keep track of min and range prop_min = np.min(property_values) prop_scale = np.abs(np.max(property_values) - np.min(property_values)) property_values -= prop_min if prop_scale != 0: property_values /= prop_scale others = [mean_norm, prop_min, prop_scale] combined = np.zeros(property_values.shape, dtype=np.complex128) combined.real = shape combined.imag = property_values return name, others, sht.analyse(combined)
def cull_dataset(outdir, field_ra, field_dec, table): """ Efficiently finds all neighbors within 0.01 degrees using kdt.query_ball_point method to get points within radius d, where d is the cartesian distance equivalent to 0.01 degree separation resulting from calculation: ra1, ra2 = 0, 0.01 dec1, dec2 = 0, 0 c1 = spherical_to_cartesian(ra1, dec1) c2 = spherical_to_cartesian(ra2, dec2) d = np.sqrt(sum( [ (c1[i] - c2[i])**2 for i in range(3) ] )) If there are any neighbors within 0.01 degrees of a given source, and if any of these neighbors are brighter than 2 magnitudes fainter than the source, remove the source from the table. Also use the Wang et al. 2012 relations to get the angular size of each source and remove any sources with angular size greater than 0.01 arcsec. Returns a pandas DataFrame object containing the culled dataset. """ good = (table.array['V'] != 30.0) & (table.array['K'] != 30.0) arr = table.array[good] df = get_ang_size(pd.DataFrame.from_records(arr)) # ignore sources with theta >= 0.01 arcsec df = df[df.theta < 0.01] if df.shape[0] == 0: return None ra, dec, Vmag = [np.array(i) for i in [df.RA, df.DEC, df.V]] kdt = KDT(radec_to_coords(ra, dec)) d = 0.00017453292497790891 no_neighbors = np.ones(df.shape[0]) for i in range(df.shape[0]): coords = radec_to_coords(ra[i],dec[i]) # skip the first returned index - this is the query point itself idx = kdt.query_ball_point(coords,d)[0][1:] if len(idx) < 1: continue ds = great_circle_distance(ra[i],dec[i],ra[idx],dec[idx])[0] Vmag_i = Vmag[i] Vmag_neighbors = Vmag[idx] # flag sources that have bright nearby neighbors as bad for Vmag_j in Vmag_neighbors: if Vmag_j - Vmag_i < 2: no_neighbors[i] = 0 df = df[no_neighbors.astype('bool')] log(outdir, field_ra, field_dec, df.shape[0], arr.shape[0]) return df
def closest_index(sample_points, indices): r""" Find the nearest sample_point at a given index (along with the distance to the point). Input is an array of sample_points and an array of indicies to test at. Output is array of indices and distances. """ kdtree = KDTree(sample_points) distance, index = kdtree.query(indices) return index, distance
def KLdivTree(X1, X2): "fast KL estimation using KDTrees" n, d = X1.shape m, dy = X2.shape xtree = KDTree(X1) ytree = KDTree(X2) r = xtree.query(X1, k=2, eps=.01, p=2)[0][:, 1] s = ytree.query(X1, k=1, eps=.01, p=2)[0] diff = r/s return -np.log(diff).sum() * d / n + np.log(m/(n-1))
def _fast_construct_edges(G, radius, p): """Construct edges for random geometric graph. Requires scipy to be installed. """ pos = nx.get_node_attributes(G, 'pos') nodes, coords = list(zip(*pos.items())) kdtree = KDTree(coords) # Cannot provide generator. edge_indexes = kdtree.query_pairs(radius, p) edges = ((nodes[u], nodes[v]) for u, v in edge_indexes) G.add_edges_from(edges)
def kdtree_clean(xx2d, yy2d, xS, yS, elevation2d): #REMOVE DODGY ADDED DATA FROM THE REGRIDDING BASED ON KDTREE. # dist is how far away the nearest neighbours are. # need to decide on this threshold. # ONLY DO THIS FOR POINTS THAT HAVE ALREADY BEEN CLASSIFIED AS RIDGES grid_points = np.c_[xx2d.ravel(), yy2d.ravel()] tree = KDTree(np.c_[xS, yS]) dist, _ = tree.query(grid_points, k=1) dist = dist.reshape(xx2d.shape) elevation2d_KD=ma.masked_where(dist > 4, elevation2d) return elevation2d_KD
def generate_galaxy(num_stars, spiral_arm_count, spiral_tightness, galaxy_radius, bulge_height, disk_height): #generate vertices star_dict = {} next_index = 0 #spiral stars for i in xrange(int(num_stars*0.65)): star_dict[next_index] = create_vertex_spiral(max_radius=galaxy_radius, arm_count=spiral_arm_count, beta=spiral_tightness, disk_height=disk_height) next_index += 1 #inner cluster stars for i in xrange(int(num_stars*0.15)): star_dict[next_index] = create_vertex_inner(max_radius=galaxy_radius * 0.8, bulge_height=bulge_height) next_index += 1 #outer "spread out" stars while(len(star_dict) < num_stars): star_dict[next_index] = create_vertex_outer(max_radius=galaxy_radius * 0.9, disk_height=disk_height) next_index += 1 #generate a KDTree from the star data in order to help with edges star_keys = star_dict.keys() star_values = star_dict.values() star_tree = KDTree(star_values) #compute the nearest neighbors for each vertex distance_data, index_data = star_tree.query(star_values, k=20, eps=0.1) #for each vertex, randomly add edges to its nearest neighbors edge_dict = {} for distances, indexes in zip(distance_data, index_data): v1 = star_keys[int(indexes[0])] if(v1 not in edge_dict): edge_dict[v1] = set() for distance, v2 in create_edges(zip(distances[1:],indexes[1:])): v2 = star_keys[int(v2)] edge_dict[v1].add(v2) if(v2 not in edge_dict): edge_dict[v2] = set() edge_dict[v2].add(v1) #remove disconnected components from the graph star_dict, edge_dict = remove_disconnected_stars(star_dict, edge_dict) #convert the star array to an array of dictionaries before returning, so other data can be added star_dict = {key:{'position':Vector3D(*p)} for key, p in star_dict.iteritems()} return star_dict, edge_dict
def match_model_masses(isoMasses, starMasses): kdt = KDTree( isoMasses.reshape((len(isoMasses), 1)) ) q_results = kdt.query(starMasses.reshape((len(starMasses), 1)), k=1) indices = q_results[1] dm_frac = np.abs(starMasses - isoMasses[indices]) / starMasses idx = np.where(dm_frac > 0.1)[0] indices[idx] = -1 return indices
def _fast_edges(G, radius, p): """Returns edge list of node pairs within `radius` of each other using scipy KDTree and Minkowski distance metric `p` Requires scipy to be installed. """ pos = nx.get_node_attributes(G, 'pos') nodes, coords = list(zip(*pos.items())) kdtree = KDTree(coords) # Cannot provide generator. edge_indexes = kdtree.query_pairs(radius, p) edges = ((nodes[u], nodes[v]) for u, v in edge_indexes) return edges
def point_find_nearest_businesses(df, point, k=5, loc_cols=['latitude', 'longitude']): """ Given a point (lat, long) :param df: :param point: :param k: :param loc_cols: :return: """ tree = KDTree(df[loc_cols]) distance, indices = tree.query(point, k) return df.ix[indices]
def spatialCorelation(points, fields, vectorColumns=None, Nbins=200, maxDist=50.0): """Compute the spatial corellation of each field points -- 2D array of points coordinates. Shape is (N,d) with d the number of spatial dimensions. fields -- 2D array of scalar field or of coordinates of vector fields. Shape is (N, F) with F the sum of the dimensions of each field. vectorColumns -- 1D array indexing the columns of fields into vector fields. for example [0, 1, 1, 1] means that the first column of fields is the scalar field 0 and the next 3 columns are the coordinates of a 3D vector field. Nbins -- The number of bins of the histogram maxLength -- The maximum distance between a pair of points taken into account in the histogram """ #parameters parsing if len(points) != len(fields): raise ValueError( 'You must have exactly one field value per point\n' + 'Here points id %i and fieds is %i'%(len(points), len(fields)) ) if vectorColumns==None: vectorColumns = np.arange(fields.shape[1]) if len(vectorColumns) != fields.shape[1]: vectorColumns = np.concatenate(( vectorColumns, np.arange(vectorColumns.max()+1,fields.shape[1]) )) slices = [np.where(vectorColumns==v)[0] for v in range(vectorColumns.max()+1)] #spatial query lowerBound = points.min(axis=0) + maxDist/2 upperBound = points.max(axis=0) - maxDist/2 inside_id = [ i for i, p in enumerate(points) if (p>= lowerBound).all() and (p <= upperBound).all() ] tree = KDTree(points) inside_tree = KDTree(points[inside_id]) pairs = inside_tree.query_ball_tree(tree, maxDist) #binning coord_bins = np.zeros((Nbins, fields.shape[1])) nb_bins = np.zeros((Nbins), dtype=int) for p, qs in zip(inside_id, pairs): qs.remove(p) rs = np.asarray( np.sqrt( ((points[qs] - points[p])**2).sum(axis=1) ) * Nbins / maxDist, dtype=int) nb_bins[rs] += 1 coord_bins[rs] += fields[qs]*fields[p] bins = np.column_stack([coord_bins[:,cols].sum(axis=1) for cols in slices]) bins[np.nonzero(nb_bins)] /= nb_bins[np.nonzero(nb_bins)][:,np.newaxis] return np.column_stack((np.arange(Nbins, dtype=float)/maxDist,bins))
def remove_close(points, radius): ''' Given an (n, m) set of points where n=(2|3) return a list of points where no point is closer than radius ''' tree = KDTree(points) consumed = np.zeros(len(points), dtype=np.bool) unique = np.zeros(len(points), dtype=np.bool) for i in xrange(len(points)): if consumed[i]: continue neighbors = tree.query_ball_point(points[i], r=radius) consumed[neighbors] = True unique[i] = True return points[unique]
def kldivergence(x, y): """Compute the Kullback-Leibler divergence between two multivariate samples. Parameters ---------- x : 2D array (n,d) Samples from distribution P, which typically represents the true distribution. y : 2D array (m,d) Samples from distribution Q, which typically represents the approximate distribution. Returns ------- out : float The estimated Kullback-Leibler divergence D(P||Q). References ---------- Perez-Cruz, F. Kullback-Leibler divergence estimation of continuous distributions IEEE International Symposium on Information Theory, 2008. """ from scipy.spatial import cKDTree as KDTree # Check the dimensions are consistent x = NP.atleast_2d(x) y = NP.atleast_2d(y) n,d = x.shape m,dy = y.shape assert(d == dy) # Build a KD tree representation of the samples and find the nearest neighbour # of each point in x. xtree = KDTree(x) ytree = KDTree(y) # Get the first two nearest neighbours for x, since the closest one is the # sample itself. r = xtree.query(x, k=2, eps=.01, p=2)[0][:,1] s = ytree.query(x, k=1, eps=.01, p=2)[0] print r print s # There is a mistake in the paper. In Eq. 14, the right side misses a negative sign # on the first term of the right hand side. return -NP.log(r/s).sum() * d / n + NP.log(m / (n - 1.))
def remove_close_set(points_fixed, points_reduce, radius): ''' Given two sets of points and a radius, return a set of points that is the subset of points_reduce where no point is within radius of any point in points_fixed ''' tree_fixed = KDTree(points_fixed) tree_reduce = KDTree(points_reduce) reduce_duplicates = tree_fixed.query_ball_tree(tree_reduce, r = radius) reduce_duplicates = np.unique(np.hstack(reduce_duplicates).astype(int)) reduce_mask = np.ones(len(points_reduce), dtype=np.bool) reduce_mask[reduce_duplicates] = False points_clean = points_reduce[reduce_mask] return points_clean
def CartMatch(coord1, coord2, tol = None, nnearest=1): """ Cartesian Coordinate mathcing """ # sanitize coord1 = np.array(coord1, ndmin = 1) coord2 = np.array(coord2, ndmin = 1) # check the dimensions of the coordinate npairs1 = len( coord1 ) ndim1 = 1 if len( np.shape(coord1) ) == 1 else \ np.shape(coord1)[1] npairs2 = len( coord2 ) ndim2 = 1 if len( np.shape(coord2) ) == 1 else \ np.shape(coord2)[1] # check whether the coord1 and coord2 have the same shape if ndim1 != ndim2: raise RuntimeError("The dims of coord1/2 are not the same.") else: ndim = ndim1 # make proper arrays if they are 1d arrays if ndim == 1: coord1 = np.array([ coord1, np.zeros(len(coord1)) ]).T coord2 = np.array([ coord2, np.zeros(len(coord2)) ]).T # kdtree the coord2 kdt = KDT(coord2) if nnearest == 1: idxs2 = kdt.query(coord1)[1] elif nnearest > 1: idxs2 = kdt.query(coord1, nnearest)[1][:, -1] else: raise ValueError('invalid nnearest ' + str(nnearest)) # distance - warning: this could be over float if the precision is not enough, we assume that case is beyond the distance of interest... ds = np.sqrt( np.sum( (coord1 - coord2[idxs2])**2, axis = 1) ) # index of coord1 idxs1 = np.arange(npairs1) # distance filtering if tol is not None: msk = ds < tol idxs1 = idxs1[msk] idxs2 = idxs2[msk] ds = ds[msk] return idxs1, idxs2, ds
def compute_errors(self, mag_err_lim=None, dx_lim=None): """Estimates errors and completeness per star. Load photometry from fake table (from same chip, ext as primary data. For each star in the phot table, get its magnitude. Use a kdtree to get the N most similar stars; compute statistics Parameters ---------- frac : float Scalar fractional level of completeness. For example, 0.5 is the 50% completeness limit. mag_err_lim : float Maximum absolute difference in magnitudes, in any band, for the star to be considered recovered. dx_lim : float Maximum distance between a fake star's input site and its observed site for the fake star to be considered recovered. """ mag_errors = self._f.mag_errors() # diffs nstars x nimages recovered = self._f.recovered(mag_err_lim=mag_err_lim, dx_lim=dx_lim) tree = KDTree(self._f.data['mag']) obs_mags = np.array([row['mag'] for row in self._p.photTable.iterrows()]) dists, indices = tree.query(obs_mags, k=100) # distance_upper_bound=mag_err_lim) nObs = obs_mags.shape[0] nImages = obs_mags.shape[1] sigmas = np.empty([nObs, nImages]) comps = np.empty(nObs) for i in xrange(nObs): if np.any(obs_mags[i] > 50.): for j in xrange(nImages): sigmas[i, j] = np.nan comps[i] = np.nan continue idx = indices[i, :].flatten() for j in xrange(nImages): # Estimate uncertainty in this band (image index) sigmas[i, j] = np.std(mag_errors[idx, j]) # Estimate completeness for this star c = recovered[indices[i, :]] comps[i] = np.float(c.sum()) / len(c) # insert errors into the HDF5 table (need to make a new column self._p.add_column("ast_mag_err", sigmas) # insert completeness for this star self._p.add_column("comp", comps)
def main(): # read in the file try: ifs = open(sys.argv[1]) sample, ext = os.path.splitext(sys.argv[1]) except IndexError: ifs = sys.stdin sample = '' data = np.loadtxt(ifs, delimiter=',') if ifs is not sys.stdin: ifs.close() # view of the com com = data[:,1:4] # construct a KD tree tree = KDTree(com) # query KD tree to find the first nearest neighbor dist, idx = tree.query(com, k=2) nn = [(i, j, d2) for ((d1, d2), (i, j)) in zip(dist, idx)] # histogram of the nearest neighbor distance hist(np.array(nn)[:,2], title='{} pore-pore distances'.format(sample), output='{}.pdf'.format(sample)) # save the nearest neighbor distance to .json files ofile = '{}_pore-distribution.json'.format(sample) medianDist = np.median(np.array(nn)[:,2]) cmp0 = lambda lhs, rhs: -1 if lhs[0] < rhs[0] else \ (1 if lhs[0] > rhs[0] else 0) dist = { 'Pore ID' : list(data[:,0].astype(int)), 'center of mass X' : { 'units' : '$\mu$m', 'values' : list(data[:,1])}, 'center of mass Y' : { 'units' : '$\mu$m', 'values' : list(data[:,2])}, 'center of mass Z' : { 'units' : '$\mu$m', 'values' : list(data[:,3])}, 'volume' : { 'units' : '$\mu$m^3', 'values' : list(data[:,4])}, 'nearest neighbor distance' : { 'units' : '$\mu$m', 'values' : [entry[2] for entry in sorted(nn, cmp=cmp0)]}, 'median nearest neighbor distance' : { 'units' : '$\mu$m', 'values' : medianDist} } json.dump(dist, open(ofile, 'w'))
def match(s, h, fits_image, tolerance=4): """ Parameters ---------- s, h : obj Catalog objects. Each must have `ra` and `dec` attributes as 1-D Numpy arrays. fits_image : string FITS image for conversion of RA,DEC to X,Y. tolerance : number Match tolerance in pixels. Returns ------- xmatch, ymatch Matched X,Y from first catalog. xhmatch, yhmatch Matched X,Y from second catalog. """ # Now use pywcs to put these on some sort of projection. I think as # long as you use the same for both data sets it's not really important # what the projection is. In my case I read in a fits image associated # with the first catalog and use that header info. hdu = io.fits.open(fits_image) wcs = pywcs.WCS(hdu['PRIMARY'].header) # Convert sky to x,y positions x, y = wcs.wcs_world2pix(s.ra, s.dec, 0) xh, yh = wcs.wcs_world2pix(h.ra, h.dec, 0) # Create a KD Tree tree = KDTree(zip(x.ravel(), y.ravel())) # Search it for the nearest neighbor # d = distance of the nearest neighbor # i = index in x,y arrays of the nearest neighbor for each source in xh,yh d, i = tree.query(zip(xh.ravel(), yh.ravel()), k=1) # Give me just the matchers within a tolerance j = d < tolerance ii = i[j] # match within N pixels; tricker to do this in ra,dec xmatch, ymatch = x[ii], y[ii] xhmatch, yhmatch = xh[j], yh[j] return xmatch, ymatch, xhmatch, yhmatch
def EstimateLatticeConstant(pos): """ Estimate the lattice constant of a point set that represent a square grid. Parameters ---------- pos : array like A 2D array of shape (N, 2) containing the coordinates of the points. Returns ------- kxy : array like [2x2] lattice constants """ # Find the closest 4 neighbours (excluding itself) for each point. tree = KDTree(pos) dd, ii = tree.query(pos, k=5) dr = dd[:, 1:] # Determine the median radial distance and filter all points beyond # 2*sigma. med = numpy.median(dr) std = numpy.std(dr) outliers = numpy.abs(dr - med) > (2 * std) # doesn't work well if std is very high # Determine horizontal and vertical distance (only radial distance is # returned by tree.query). dpos = pos[ii[:, 0, numpy.newaxis]] - pos[ii[:, 1:]] dx, dy = dpos[:, :, 0], dpos[:, :, 1] assert numpy.all(numpy.abs(dr - numpy.hypot(dx, dy)) < 1.0e-12) # Use k-means to group the points into two directions. X = numpy.column_stack((dx[~outliers], dy[~outliers])) X[X[:, 0] < -0.5 * med] *= -1 X[X[:, 1] < -0.5 * med] *= -1 centroids, _ = kmeans(X, 2) labels = numpy.argmin(cdist(X, centroids), axis=1) kxy = numpy.array([numpy.median(X[labels.ravel() == 0], axis=0), numpy.median(X[labels.ravel() == 1], axis=0)]) # The angle between the two directions should be close to 90 degrees. alpha = numpy.math.atan2(numpy.linalg.norm(numpy.cross(*kxy)), numpy.dot(*kxy)) if abs(alpha - math.pi / 2) > math.radians(2.5): logging.warning('Estimated lattice angle differs from 90 degrees by ' 'more than 2.5 degrees. Input data could be wrong') return kxy
def __init__(self,xdata,ydata): #Do some tests here #Find data covariance cov = np.cov(xdata.T) #Cholesky decompose to make new basis L_mat = np.linalg.cholesky(cov) self.L_mat = np.linalg.inv(L_mat) #Transform xdata into new basis self.xtrain = xdata self.transf_x = np.array([np.dot(self.L_mat,x) for x in xdata]) #DEBUG #plt.plot(xdata[:,0],xdata[:,1],'.',color='r') #plt.plot(self.transf_x[:,0],self.transf_x[:,1],'.') #plt.show() #sys.exit() #Store training self.ytrain = ydata #Build KDTree for quick lookup self.transf_xtree = KDTree(self.transf_x)
def __init__( self, X, z, leafsize=10, stat=0 ): assert len(X) == len(z), "len(X) %d != len(z) %d" % (len(X), len(z)) self.tree = KDTree( X, leafsize=leafsize ) # build the tree self.z = z self.stat = stat self.wn = 0 self.wsum = None;
class ShapeMatcher(object): def __init__(self, ids, invariants): """Match other shapes based on euclidean distance. Constructs a KDTree in order to do nearest neighbour queries. For large datasets it might take a second or two to build the tree. Arguments: ids -- set names/identifiers for the shapes invariants -- 2D array of invariants that describe the shapes """ self.ids = ids self.invariants = invariants LOG.debug('Constructing tree from %d invariants', len(invariants)) self.tree = KDTree(invariants) def search_invariants(self, invariants, n=10, df=False): """Search for matches based on invariants. Arguments: invariants -- N length array of shape descriptors Keyword arguments: n -- number of matches to return (default 10) df -- return matches as a pandas DataFrame (default False) """ if n == 'max': n = len(self.invariants) LOG.debug('Searching for %d closest points', n) distances, indexes = self.tree.query(invariants, n) invariants = self.invariants[indexes] # Need to handle case of n == 1 correctly if isinstance(indexes, int): ids = self.ids[indexes].decode('utf-8') return SearchResult(ids, distances, invariants) else: ids = [x.decode('utf-8') for x in self.ids[indexes]] if df: return pd.DataFrame({ 'ID': ids, 'Proximity': distances }).set_index('ID') else: return [ SearchResult(n, d, i) for n, d, i in zip(ids, distances, invariants) ] def search_shape(self, shape, **kwargs): """Search for matches based on a shape object. (convenience function) Arguments: shape -- a Shape object. Keyword arguments: n -- number of matches to return (default 10) df -- return matches as a pandas DataFrame (default False) """ LOG.debug('Searching for closest shapes to %s', shape.name) # delegate to search_invariants method return self.search_invariants(shape.invariants, **kwargs) @staticmethod def from_datafile(filename, l_max=20): """Construct a CSD matcher based on the bundled data Keyword arguments: l_max -- maximum angular momenta to use for invariants (default 20) use_radius -- use the mean radius as the first invariant (default True) """ names, invariants = load_data(filename) return ShapeMatcher(names, invariants) @staticmethod def from_shapes(shapes, l_max=20): """Construct a shapematcher object from a list of shapes Arguments: shapes -- A list of Shape objects Keyword arguments: l_max -- maximuma angular momenta to use for invariants (default 20) """ invariants, names = [], [] if isinstance(shapes, dict): for name, s in shapes.items(): invariants.append(s.invariants) names.append(name) else: for s in shapes: invariants.append(s.invariants) names.append(s.name) invariants = np.array(invariants) names = np.array(names, dtype='|S64') return ShapeMatcher(names, invariants) @staticmethod def from_surface_files(files, property_name='shape'): """Construct a CSD matcher based on the bundled data Keyword arguments: l_max -- maximum angular momenta to use for invariants (default 20) use_radius -- use the mean radius as the first invariant (default True) """ shapes = {} for f in files: shapes['f.stem'] = surface_description(f, property_name=property_name) return ShapeMatcher.from_shapes(shapes) def all(self): return self.search_invariants(self.invariants[0], n=len(self.invariants))
def __init__(self, xgrid, ygrid, invalid_mask=None): print("Generating tree") self.tree = KDTree(np.array(list(zip(xgrid.ravel(), ygrid.ravel())))) self.imask = None if invalid_mask is not None: self.imask = np.asarray(invalid_mask).astype(bool)
n_clusters=number_of_cluster, random_state=41).fit(subset_data_unref) print('Kmeans done: Time elapsed: {} seconds'.format( time.time() - time_start)) labels_unref = kmeans_unref.labels_ centroids_unref = kmeans_unref.cluster_centers_ counting_occurence_in_patient_compare = Counter( labels_unref) vals_unref = np.fromiter( counting_occurence_in_patient_compare.values(), dtype=float) #COMPARING USING KDTREE k = KDTree(centroids_unref) (dists, idxs) = k.query(centroids_ref) vals_unref[idxs] reference_dataframe[f'Count_{name}'] = vals_unref[idxs] print(reference_dataframe.shape, reference_dataframe.columns) reference_dataframe.sort_values(by=['Cluster'], inplace=True) reference_dataframe.sort_index(axis=1, ascending=True, inplace=True) reference_dataframe.to_csv( path_to_store_frame + f'/Data_for_LDA_from_generate_data_with_n_{number_of_cluster}_configuration_{configuration}.csv'
class RGeocoder(object): """ The main reverse geocoder class """ def __init__(self, mode=2, verbose=False, stream=None): """ Class Instantiation Args: mode (int): Library supports the following two modes: - 1 = Single-threaded K-D Tree - 2 = Multi-threaded K-D Tree (Default) verbose (bool): For verbose output, set to True stream (io.StringIO): An in-memory stream of a custom data source """ self.mode = mode self.verbose = verbose if stream: coordinates, self.locations = self.load(stream) else: coordinates, self.locations = self.extract(rel_path(RG_FILE)) if mode == 1: # Single-process self.tree = KDTree(coordinates) else: # Multi-process self.tree = KDTree_MP.cKDTree_MP(coordinates) def query(self, coordinates): """ Function to query the K-D tree to find the nearest city Args: coordinates (list): List of tuple coordinates, i.e. [(latitude, longitude)] """ if self.mode == 1: _, indices = self.tree.query(coordinates, k=1) else: _, indices = self.tree.pquery(coordinates, k=1) return [self.locations[index] for index in indices] def load(self, stream): """ Function that loads a custom data source Args: stream (io.StringIO): An in-memory stream of a custom data source. The format of the stream must be a comma-separated file with header containing the columns defined in RG_COLUMNS. """ stream_reader = csv.DictReader(stream, delimiter=',') header = stream_reader.fieldnames if header != RG_COLUMNS: raise csv.Error('Input must be a comma-separated file with header containing ' + \ 'the following columns - %s. For more help, visit: ' % (','.join(RG_COLUMNS)) + \ 'https://github.com/thampiman/reverse-geocoder') # Load all the coordinates and locations geo_coords, locations = [], [] for row in stream_reader: geo_coords.append((row['lat'], row['lon'])) locations.append(row) return geo_coords, locations def extract(self, local_filename): """ Function loads the already extracted GeoNames cities file or downloads and extracts it if it doesn't exist locally Args: local_filename (str): Path to local RG_FILE """ if os.path.exists(local_filename): if self.verbose: print('Loading formatted geocoded file...', file=sys.stderr) rows = csv.DictReader(open(local_filename, 'rt')) else: gn_cities1000_url = GN_URL + GN_CITIES1000 + '.zip' gn_admin1_url = GN_URL + GN_ADMIN1 gn_admin2_url = GN_URL + GN_ADMIN2 cities1000_zipfilename = GN_CITIES1000 + '.zip' cities1000_filename = GN_CITIES1000 + '.txt' if not os.path.exists(cities1000_zipfilename): if self.verbose: print('Downloading files from Geoname...', file=sys.stderr) try: # Python 3 import urllib.request urllib.request.urlretrieve(gn_cities1000_url, cities1000_zipfilename) urllib.request.urlretrieve(gn_admin1_url, GN_ADMIN1) urllib.request.urlretrieve(gn_admin2_url, GN_ADMIN2) except ImportError: # Python 2 import urllib urllib.urlretrieve(gn_cities1000_url, cities1000_zipfilename) urllib.urlretrieve(gn_admin1_url, GN_ADMIN1) urllib.urlretrieve(gn_admin2_url, GN_ADMIN2) if self.verbose: print('Extracting cities1000...'file=sys.stderr) _z = zipfile.ZipFile(open(cities1000_zipfilename, 'rb')) open(cities1000_filename, 'wb').write(_z.read(cities1000_filename)) if self.verbose: print('Loading admin1 codes...', file=sys.stderr) admin1_map = {} t_rows = csv.reader(open(GN_ADMIN1, 'rt'), delimiter='\t') for row in t_rows: admin1_map[row[ADMIN_COLUMNS['concatCodes']]] = row[ADMIN_COLUMNS['asciiName']] if self.verbose: print('Loading admin2 codes...', file=sys.stderr) admin2_map = {} for row in csv.reader(open(GN_ADMIN2, 'rt'), delimiter='\t'): admin2_map[row[ADMIN_COLUMNS['concatCodes']]] = row[ADMIN_COLUMNS['asciiName']] if self.verbose: print('Creating formatted geocoded file...', file=sys.stderr) writer = csv.DictWriter(open(local_filename, 'wt'), fieldnames=RG_COLUMNS) rows = [] for row in csv.reader(open(cities1000_filename, 'rt'), \ delimiter='\t', quoting=csv.QUOTE_NONE): lat = row[GN_COLUMNS['latitude']] lon = row[GN_COLUMNS['longitude']] name = row[GN_COLUMNS['asciiName']] cc = row[GN_COLUMNS['countryCode']] admin1_c = row[GN_COLUMNS['admin1Code']] admin2_c = row[GN_COLUMNS['admin2Code']] cc_admin1 = cc+'.'+admin1_c cc_admin2 = cc+'.'+admin1_c+'.'+admin2_c admin1 = '' admin2 = '' if cc_admin1 in admin1_map: admin1 = admin1_map[cc_admin1] if cc_admin2 in admin2_map: admin2 = admin2_map[cc_admin2] write_row = {'lat':lat, 'lon':lon, 'name':name, 'admin1':admin1, 'admin2':admin2, 'cc':cc} rows.append(write_row) writer.writeheader() writer.writerows(rows) if self.verbose: print('Removing extracted cities1000 to save space...', file=sys.stderr) os.remove(cities1000_filename) # Load all the coordinates and locations geo_coords, locations = [], [] for row in rows: geo_coords.append((row['lat'], row['lon'])) locations.append(row) return geo_coords, locations
def gG_l(pos, qlms, is_center, Nbins, maxdist): """ Spatial correlation of the qlms (non normalized). For each particle i tagged as is_center, for each particle j closer than maxdist, do the cross product between their qlm and count, then bin each quantity with respect to distance. The two first sums need to be normalised by the last one. Periodic boundary conditions are not supported. Parameters ---------- pos : (N, 3) array of floats Spatial coordinates qlms : list A list of M (N, 2l+1) arrays of boo coordinates for l-fold symmetry. l can be different for each item. is_center : (N) array of bool. For example all particles further away than maxdist from any edge of the box. Nbins : int The number of bins along r maxdist : float The maximum distance considered. Returns ---------- hqQ : (Nbins, M) array of floats The sum of cross products for each distance and each qlm g : (Nbins) array of ints The number of pairs for each distance """ for qlm in qlms: assert len(pos) == len(qlm) assert len(is_center) == len(pos) #conversion factor between indices and bins l2r = Nbins / maxdist #result containers #an additional bin for the case where the distance is exactly equal to maxdist hqQ = np.zeros((Nbins + 1, len(qlms))) g = np.zeros(Nbins + 1, int) #compute ql for all particles qQ = np.array([ql(qlm) for qlm in qlms]) nonzero = qQ.min(0) + 1.0 > 1.0 #spatial indexing tree = KDTree(pos[nonzero], 12) centertree = KDTree(pos[is_center & nonzero], 12) #all pairs of points closer than maxdist with their distances in a record array query = centertree.sparse_distance_matrix(tree, maxdist, output_type='ndarray') #convert in original indices nonzeroindex = np.where(nonzero)[0] centerindex = np.where(is_center & nonzero)[0] query['i'] = centerindex[query['i']] query['j'] = nonzeroindex[query['j']] #keep only pairs where the points are distinct good = query['i'] != query['j'] query = query[good] #binning of distances rs = (query['v'] * l2r).astype(int) np.add.at(g, rs, 1) #binning of boo cross products pqQs = np.empty((len(rs), len(qlms))) for it, qlm in enumerate(qlms): pqQs[:, it] = product(qlm[query['i']], qlm[query['j']]) prodnorm = qQ[it, query['i']] * qQ[it, query['j']] pqQs[:, it] /= prodnorm np.add.at(hqQ, rs, pqQs) return hqQ[:-1], g[:-1]
def get_area_avg_from_erai_data(start_year=-np.Inf, end_year=np.Inf, var_folder="", varname="", mask=None, mask_lons=None, mask_lats=None): """ Interpolate the mask to the ERA-Interim grid using nearest neighbour approach :param start_year: :param end_year: :param var_folder: :param varname: :param mask: :return: """ def _get_year(fn): return int(fn.split(".")[0].split("_")[1]) flist = [ os.path.join(var_folder, fn) for fn in os.listdir(var_folder) if fn.startswith(varname) and (start_year <= _get_year(fn)) and ( _get_year(fn) <= end_year) ] print(flist) ktree = None mask_interpolated = None lons_target, lats_target = None, None ser_list = [] for fp in flist: with Dataset(fp) as ds: time_var = ds.variables["time"] times = num2date(time_var[:], time_var.units) print(times[0], times[-1]) # Determine nearest neighbours for interpolation (do it only once) if ktree is None: # get lons and lats from the bathymetry file data_folder_p = Path(var_folder).parent for f in data_folder_p.iterdir(): if f.name.lower().startswith("bathy_meter"): with Dataset(str(f)) as ds_bathy: lons_target, lats_target = [ ds_bathy.variables[k][:] for k in ["nav_lon", "nav_lat"] ] break x, y, z = lat_lon.lon_lat_to_cartesian(mask_lons.flatten(), mask_lats.flatten()) xt, yt, zt = lat_lon.lon_lat_to_cartesian( lons_target.flatten(), lats_target.flatten()) ktree = KDTree(list(zip(x, y, z))) dists, inds = ktree.query(list(zip(xt, yt, zt)), k=1) mask_interpolated = mask.flatten()[inds] mask_interpolated = mask_interpolated.reshape( lons_target.shape) vals = [ field[mask_interpolated].mean() for field in ds.variables[varname][:] ] ser = pd.Series(index=times, data=vals) if varname == "TT": ser -= 273.15 ser.sort_index(inplace=True) ser_list.append(ser) return pd.concat(ser_list)
def mosaic_texture(humfile, sonpath, cs2cs_args = "epsg:26949", res = 99, nn = 5, weight = 1): ''' Create mosaics of the spatially referenced sidescan echograms Syntax ---------- [] = PyHum.mosaic_texture(humfile, sonpath, cs2cs_args, res, nn, weight) Parameters ---------- humfile : str path to the .DAT file sonpath : str path where the *.SON files are cs2cs_args : int, *optional* [Default="epsg:26949"] arguments to create coordinates in a projected coordinate system this argument gets given to pyproj to turn wgs84 (lat/lon) coordinates into any projection supported by the proj.4 libraries res : float, *optional* [Default=0] grid resolution of output gridded texture map if res=99, res will be determined automatically from the spatial resolution of 1 pixel nn: int, *optional* [Default=5] number of nearest neighbours for gridding weight: int, *optional* [Default=1] specifies the type of pixel weighting in the gridding process weight = 1, based on grazing angle and inverse distance weighting weight = 2, based on grazing angle only weight = 3, inverse distance weighting only weight = 4, no weighting Returns ------- sonpath+'GroundOverlay.kml': kml file contains gridded (or point cloud) sidescan intensity map for importing into google earth of the pth chunk sonpath+'map.png' : image overlay associated with the kml file ''' # prompt user to supply file if no input file given if not humfile: print 'An input file is required!!!!!!' Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing humfile = askopenfilename(filetypes=[("DAT files","*.DAT")]) # prompt user to supply directory if no input sonpath is given if not sonpath: print 'A *.SON directory is required!!!!!!' Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing sonpath = askdirectory() # print given arguments to screen and convert data type where necessary if humfile: print 'Input file is %s' % (humfile) if sonpath: print 'Sonar file path is %s' % (sonpath) if cs2cs_args: print 'cs2cs arguments are %s' % (cs2cs_args) if res: res = np.asarray(res,float) print 'Gridding resolution: %s' % (str(res)) if nn: nn = int(nn) print 'Number of nearest neighbours for gridding: %s' % (str(nn)) if weight: weight = int(weight) print 'Weighting for gridding: %s' % (str(weight)) ##nn = 5 #number of nearest neighbours in gridding ##noisefloor=10 # noise threshold in dB W # start timer if os.name=='posix': # true if linux/mac or cygwin on windows start = time.time() else: # windows start = time.clock() trans = pyproj.Proj(init=cs2cs_args) # if son path name supplied has no separator at end, put one on if sonpath[-1]!=os.sep: sonpath = sonpath + os.sep base = humfile.split('.DAT') # get base of file name for output base = base[0].split(os.sep)[-1] # remove underscores, negatives and spaces from basename base = humutils.strip_base(base) meta = loadmat(os.path.normpath(os.path.join(sonpath,base+'meta.mat'))) esi = np.squeeze(meta['e']) nsi = np.squeeze(meta['n']) theta = np.squeeze(meta['heading'])/(180/np.pi) # load memory mapped scans shape_port = np.squeeze(meta['shape_port']) if shape_port!='': if os.path.isfile(os.path.normpath(os.path.join(sonpath,base+'_data_port_lar.dat'))): port_fp = io.get_mmap_data(sonpath, base, '_data_port_lar.dat', 'float32', tuple(shape_port)) else: port_fp = io.get_mmap_data(sonpath, base, '_data_port_la.dat', 'float32', tuple(shape_port)) shape_star = np.squeeze(meta['shape_star']) if shape_star!='': if os.path.isfile(os.path.normpath(os.path.join(sonpath,base+'_data_star_lar.dat'))): star_fp = io.get_mmap_data(sonpath, base, '_data_star_lar.dat', 'float32', tuple(shape_star)) else: star_fp = io.get_mmap_data(sonpath, base, '_data_star_la.dat', 'float32', tuple(shape_star)) # time varying gain tvg = ((8.5*10**-5)+(3/76923)+((8.5*10**-5)/4))*meta['c'] # depth correction dist_tvg = np.squeeze(((np.tan(np.radians(25)))*np.squeeze(meta['dep_m']))-(tvg)) # read in range data R_fp = io.get_mmap_data(sonpath, base, '_data_range.dat', 'float32', tuple(shape_star)) dx = np.arcsin(meta['c']/(1000*meta['t']*meta['f'])) pix_m = meta['pix_m'] c = meta['c'] if not os.path.isfile( os.path.normpath(os.path.join(sonpath,base+"S.p")) ): #if 2 > 1: inputfiles = [] if len(shape_star)>2: for p in xrange(len(star_fp)): e = esi[shape_port[-1]*p:shape_port[-1]*(p+1)] n = nsi[shape_port[-1]*p:shape_port[-1]*(p+1)] t = theta[shape_port[-1]*p:shape_port[-1]*(p+1)] d = dist_tvg[shape_port[-1]*p:shape_port[-1]*(p+1)] dat_port = port_fp[p] dat_star = star_fp[p] data_R = R_fp[p] print "writing chunk %s " % (str(p)) write_points(e, n, t, d, dat_port, dat_star, data_R, pix_m, res, cs2cs_args, sonpath, p, c, dx) inputfiles.append(os.path.normpath(os.path.join(sonpath,'x_y_class'+str(p)+'.asc'))) else: p=0 print "writing chunk %s " % (str(p)) write_points(esi, nsi, theta, dist_tvg, port_fp, star_fp, R_fp, meta['pix_m'], res, cs2cs_args, sonpath, 0, c, dx) inputfiles.append(os.path.normpath(os.path.join(sonpath,'x_y_class'+str(p)+'.asc'))) #trans = pyproj.Proj(init=cs2cs_args) # D, R, h, t print "reading points from %s files" % (str(len(inputfiles))) X,Y,S,D,R,h,t,i = getxys(inputfiles) print "%s points read from %s files" % (str(len(S)), str(len(inputfiles))) # remove values where sidescan intensity is zero ind = np.where(np.logical_not(S==0))[0] X = X[ind]; Y = Y[ind] S = S[ind]; D = D[ind] R = R[ind]; h = h[ind] t = t[ind]; i = i[ind] del ind # save to file for temporary storage pickle.dump( S, open( os.path.normpath(os.path.join(sonpath,base+"S.p")), "wb" ) ); del S pickle.dump( D, open( os.path.normpath(os.path.join(sonpath,base+"D.p")), "wb" ) ); del D pickle.dump( t, open( os.path.normpath(os.path.join(sonpath,base+"t.p")), "wb" ) ); del t pickle.dump( i, open( os.path.normpath(os.path.join(sonpath,base+"i.p")), "wb" ) ); del i pickle.dump( X, open( os.path.normpath(os.path.join(sonpath,base+"X.p")), "wb" ) ); del X pickle.dump( Y, open( os.path.normpath(os.path.join(sonpath,base+"Y.p")), "wb" ) ); del Y pickle.dump( R, open( os.path.normpath(os.path.join(sonpath,base+"R.p")), "wb" ) ); pickle.dump( h, open( os.path.normpath(os.path.join(sonpath,base+"h.p")), "wb" ) ); #grazing angle g = np.arctan(R.flatten(),h.flatten()) pickle.dump( g, open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "wb" ) ); del g, R, h print "creating grids ..." if res==0: res=99 if res==99: #### prepare grids R = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"R.p")), "rb" ) ) ## actual along-track resolution is this: dx times dy = Af tmp = R * dx * (c*0.007 / 2) del R resg = np.min(tmp[tmp>0]) del tmp else: resg = res X = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"X.p")), "rb" ) ) Y = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"Y.p")), "rb" ) ) humlon, humlat = trans(X, Y, inverse=True) grid_x, grid_y = np.meshgrid( np.arange(np.min(X), np.max(X), resg), np.arange(np.min(Y), np.max(Y), resg) ) shape = np.shape(grid_x) tree = KDTree(zip(X.flatten(), Y.flatten())) del X, Y print "mosaicking ..." #k nearest neighbour try: dist, inds = tree.query(zip(grid_x.flatten(), grid_y.flatten()), k = nn, n_jobs=-1) except: #print ".... update your scipy installation to use faster kd-tree" dist, inds = tree.query(zip(grid_x.flatten(), grid_y.flatten()), k = nn) #del grid_x, grid_y if weight==1: g = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "rb" ) ) w = g[inds] + 1.0 / dist**2 del g elif weight==2: g = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "rb" ) ) w = g[inds] del g elif weight==3: w = 1.0 / dist**2 elif weight==4: w = 1.0 #g = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "rb" ) ) #w = g[inds] + 1.0 / dist**2 #del g if weight < 4: w[np.isinf(w)]=1 w[np.isnan(w)]=1 w[w>10000]=10000 w[w<=0]=1 # load in sidescan intensity S = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"S.p")), "rb" ) ) # filter out noise pixels S[S<noisefloor] = np.nan if nn==1: Sdat_g = (w * S.flatten()[inds]).reshape(shape) del w dist = dist.reshape(shape) else: if weight < 4: Sdat_g = (np.nansum(w * S.flatten()[inds], axis=1) / np.nansum(w, axis=1)).reshape(shape) else: Sdat_g = (np.nansum(S.flatten()[inds], axis=1)).reshape(shape) del w dist = np.nanmean(dist,axis=1).reshape(shape) del S Sdat_g[dist>1] = np.nan Sdat_g[Sdat_g<noisefloor] = np.nan dat = Sdat_g.copy() dat[dist>1] = 0 dat2 = replace_nans.RN(dat.astype('float64'),1000,0.01,2,'localmean').getdata() dat2[dat==0] = np.nan del dat dat2[dat2<noisefloor] = np.nan Sdat_g = dat2.copy() del dat2 Sdat_g[Sdat_g==0] = np.nan Sdat_g[np.isinf(Sdat_g)] = np.nan Sdat_gm = np.ma.masked_invalid(Sdat_g) del Sdat_g glon, glat = trans(grid_x, grid_y, inverse=True) del grid_x, grid_y # ========================================================= print "creating kmz file ..." ## new way to create kml file pixels = 1024 * 10 fig, ax = humutils.gearth_fig(llcrnrlon=glon.min(), llcrnrlat=glat.min(), urcrnrlon=glon.max(), urcrnrlat=glat.max(), pixels=pixels) cs = ax.pcolormesh(glon, glat, Sdat_gm) ax.set_axis_off() fig.savefig(os.path.normpath(os.path.join(sonpath,'class_overlay1.png')), transparent=True, format='png') fig = plt.figure(figsize=(1.0, 4.0), facecolor=None, frameon=False) ax = fig.add_axes([0.0, 0.05, 0.2, 0.9]) cb = fig.colorbar(cs, cax=ax) cb.set_label('Texture lengthscale [m]', rotation=-90, color='k', labelpad=20) fig.savefig(os.path.normpath(os.path.join(sonpath,'class_legend.png')), transparent=False, format='png') humutils.make_kml(llcrnrlon=glon.min(), llcrnrlat=glat.min(), urcrnrlon=glon.max(), urcrnrlat=glat.max(), figs=[os.path.normpath(os.path.join(sonpath,'class_overlay1.png'))], colorbar=os.path.normpath(os.path.join(sonpath,'class_legend.png')), kmzfile=os.path.normpath(os.path.join(sonpath,'class_GroundOverlay.kmz')), name='Sidescan Intensity') # ========================================================= print "drawing and printing map ..." fig = plt.figure(frameon=False) map = Basemap(projection='merc', epsg=cs2cs_args.split(':')[1], resolution = 'i', #h #f llcrnrlon=np.min(humlon)-0.001, llcrnrlat=np.min(humlat)-0.001, urcrnrlon=np.max(humlon)+0.001, urcrnrlat=np.max(humlat)+0.001) gx,gy = map.projtran(glon, glat) try: map.arcgisimage(server='http://server.arcgisonline.com/ArcGIS', service='ESRI_Imagery_World_2D', xpixels=1000, ypixels=None, dpi=300) except: map.arcgisimage(server='http://server.arcgisonline.com/ArcGIS', service='World_Imagery', xpixels=1000, ypixels=None, dpi=300) #finally: # print "error: map could not be created..." ax = plt.Axes(fig, [0., 0., 1., 1.], ) ax.set_axis_off() fig.add_axes(ax) if Sdat_gm.size > 25000000: print "matrix size > 25,000,000 - decimating by factor of 5 for display" map.pcolormesh(gx[::5,::5], gy[::5,::5], Sdat_gm[::5,::5], vmin=np.nanmin(Sdat_gm), vmax=np.nanmax(Sdat_gm)) else: map.pcolormesh(gx, gy, Sdat_gm, vmin=np.nanmin(Sdat_gm), vmax=np.nanmax(Sdat_gm)) custom_save2(sonpath,'class_map_imagery') del fig if os.name=='posix': # true if linux/mac elapsed = (time.time() - start) else: # windows elapsed = (time.clock() - start) print "Processing took ", elapsed , "seconds to analyse" print "Done!"
def get_ref_coors_convex(field, coors, close_limit=0.1, cache=None, verbose=False): """ Get reference element coordinates and elements corresponding to given physical coordinates. Parameters ---------- field : Field instance The field defining the approximation. coors : array The physical coordinates. close_limit : float, optional The maximum limit distance of a point from the closest element allowed for extrapolation. cache : Struct, optional To speed up a sequence of evaluations, the field mesh and other data can be cached. Optionally, the cache can also contain the reference element coordinates as `cache.ref_coors`, `cache.cells` and `cache.status`, if the evaluation occurs in the same coordinates repeatedly. In that case the mesh related data are ignored. verbose : bool If False, reduce verbosity. Returns ------- ref_coors : array The reference coordinates. cells : array The cell indices corresponding to the reference coordinates. status : array The status: 0 is success, 1 is extrapolation within `close_limit`, 2 is extrapolation outside `close_limit`, 3 is failure, 4 is failure due to non-convergence of the Newton iteration in tensor product cells. Notes ----- Outline of the algorithm for finding xi such that X(xi) = P: 1. make inverse connectivity - for each vertex have cells it is in. 2. find the closest vertex V. 3. choose initial cell: i0 = first from cells incident to V. 4. while not P in C_i, change C_i towards P, check if P in new C_i. """ timer = Timer() ref_coors = get_default_attr(cache, 'ref_coors', None) if ref_coors is None: extrapolate = close_limit > 0.0 ref_coors = nm.empty_like(coors) cells = nm.empty((coors.shape[0], ), dtype=nm.int32) status = nm.empty((coors.shape[0], ), dtype=nm.int32) cmesh = get_default_attr(cache, 'cmesh', None) if cmesh is None: timer.start() mesh = field.create_mesh(extra_nodes=False) cmesh = mesh.cmesh gels = create_geometry_elements() cmesh.set_local_entities(gels) cmesh.setup_entities() centroids = cmesh.get_centroids(cmesh.tdim) if field.gel.name != '3_8': normals0 = cmesh.get_facet_normals() normals1 = None else: normals0 = cmesh.get_facet_normals(0) normals1 = cmesh.get_facet_normals(1) output('cmesh setup: %f s' % timer.stop(), verbose=verbose) else: centroids = cache.centroids normals0 = cache.normals0 normals1 = cache.normals1 kdtree = get_default_attr(cache, 'kdtree', None) if kdtree is None: from scipy.spatial import cKDTree as KDTree timer.start() kdtree = KDTree(cmesh.coors) output('kdtree: %f s' % timer.stop(), verbose=verbose) timer.start() ics = kdtree.query(coors)[1] output('kdtree query: %f s' % timer.stop(), verbose=verbose) ics = nm.asarray(ics, dtype=nm.int32) coors = nm.ascontiguousarray(coors) ctx = field.create_basis_context() timer.start() crc.find_ref_coors_convex(ref_coors, cells, status, coors, cmesh, centroids, normals0, normals1, ics, extrapolate, 1e-15, close_limit, ctx) output('ref. coordinates: %f s' % timer.stop(), verbose=verbose) else: cells = cache.cells status = cache.status return ref_coors, cells, status
def get_potential_cells(coors, cmesh, centroids=None, extrapolate=True): """ Get cells that potentially contain points with the given physical coordinates. Parameters ---------- coors : array The physical coordinates. cmesh : CMesh instance The cmesh defining the cells. centroids : array, optional The centroids of the cells. extrapolate : bool If True, even the points that are surely outside of the cmesh are considered and assigned potential cells. Returns ------- potential_cells : array The indices of the cells that potentially contain the points. offsets : array The offsets into `potential_cells` for each point: a point ``ip`` is potentially in cells ``potential_cells[offsets[ip]:offsets[ip+1]]``. """ from scipy.spatial import cKDTree as KDTree if centroids is None: centroids = cmesh.get_centroids(cmesh.tdim) kdtree = KDTree(coors) conn = cmesh.get_cell_conn() cc = conn.indices.reshape(cmesh.n_el, -1) cell_coors = cmesh.coors[cc] rays = cell_coors - centroids[:, None] radii = nm.linalg.norm(rays, ord=nm.inf, axis=2).max(axis=1) potential_cells = [[]] * coors.shape[0] for ic, centroid in enumerate(centroids): ips = kdtree.query_ball_point(centroid, radii[ic], p=nm.inf) if len(ips): for ip in ips: if not len(potential_cells[ip]): potential_cells[ip] = [] potential_cells[ip].append(ic) lens = nm.array([0] + [len(ii) for ii in potential_cells], dtype=nm.int32) if extrapolate: # Deal with the points outside of the field domain - insert elements # incident to the closest mesh vertex. iin = nm.where(lens[1:] == 0)[0] if len(iin): kdtree = KDTree(cmesh.coors) ics = kdtree.query(coors[iin])[1] cmesh.setup_connectivity(0, cmesh.tdim) conn = cmesh.get_conn(0, cmesh.tdim) oo = conn.offsets for ii, ip in enumerate(iin): ik = ics[ii] potential_cells[ip] = conn.indices[oo[ik]:oo[ik + 1]] lens[ip + 1] = len(potential_cells[ip]) offsets = nm.cumsum(lens, dtype=nm.int32) potential_cells = nm.concatenate(potential_cells).astype(nm.int32) return potential_cells, offsets
class NearestNeighborFinder(): """ Nearest neighbor search object for NEMO netCDF output files. """ def __init__(self, ncfilename): """ Create new instance. :arg str ncfilename: NEMO netCDF file name """ self.filename = ncfilename self.data_dim = None self.grid_type = None self._build_tree() def _build_tree(self): """ Construct nearest neighbor tree. """ def parse_grid_type(ncf): """ Figure out which discretization the file contains, T, U or V Reads the description attribute, e.g. "ocean T grid variables" returns 't', 'u', or 'v' """ return 't' # HACK assume always T grid desc = ncf.description words = desc.split() assert words[0] == 'ocean' assert words[2] == 'grid' return words[1].lower() with netCDF4.Dataset(self.filename) as ncf: self.grid_type = parse_grid_type(ncf) assert self.grid_type == 't', 'Only T grid is supported currently' # compute land mask self.data_dim = 3 if 'e3t' in ncf.variables else 2 if self.data_dim == 3: # NOTE does not take time-dependent wetting-drying into account e = ncf['e3t'][0, :, :, :] self.landmask = numpy.all(e.mask, axis=0) # 1D array of all wet points in raveled index self.wetmask = numpy.nonzero(~self.landmask.ravel())[0] # get coordinates self.lon = ncf['nav_lon'][:] self.lat = ncf['nav_lat'][:] depth = ncf['deptht'][:] self.z = -depth # 1D arrays of all wet points self.valid_lon = self.lon.ravel()[self.wetmask] self.valid_lat = self.lat.ravel()[self.wetmask] else: # read a field to get landmask for v in ncf.variables: var = ncf[v] if len(var.shape) == 3: # 2D time dependent field self.landmask = numpy.all(var[:].mask, axis=0) break self.wetmask = numpy.nonzero(~self.landmask.ravel())[0] # get coordinates self.lon = ncf['nav_lon'][:] self.lat = ncf['nav_lat'][:] self.z = 0.0 # 1D arrays of all wet points self.valid_lon = self.lon.ravel()[self.wetmask] self.valid_lat = self.lat.ravel()[self.wetmask] assert len(self.valid_lat) > 0, \ 'No valid points found in {:}'.format(self.filename) coords = numpy.vstack((self.valid_lon, self.valid_lat)).T self.tree = KDTree(coords) def find(self, lon, lat, z): """ Finds nearest neighbor index for point (lon, lat, z) :arg lon: longitude coordinate :arg lat: latitude coordinate :arg z: z coordinate (negative downwards) :returns: i, j, k indices of nearest neighbor indices """ dist, index = self.tree.query([lon, lat], k=1) index = self.wetmask[index] i, j = numpy.unravel_index(index, self.lat.shape) if self.data_dim == 3: k = numpy.abs(self.z - z).argmin() else: k = None return i, j, k
if 'snakemake' not in globals(): from vresutils.snakemake import MockSnakemake, Dict snakemake = MockSnakemake(input=Dict(base_network='networks/base.nc'), output=['resources/powerplants.csv']) logging.basicConfig(level=snakemake.config['logging_level']) n = pypsa.Network(snakemake.input.base_network) ppl = (ppm.collection.matched_data()[lambda df: ~df.Fueltype.isin( ('Solar', 'Wind'))].pipe(ppm.cleaning.clean_technology).assign( Fueltype=lambda df: (df.Fueltype.where( df.Fueltype != 'Natural Gas', df.Technology.replace('Steam Turbine', 'OCGT').fillna('OCGT')))). pipe(ppm.utils.fill_geoposition, parse=True, only_saved_locs=True).pipe(ppm.heuristics.fill_missing_duration)) # ppl.loc[(ppl.Fueltype == 'Other') & ppl.Technology.str.contains('CCGT'), 'Fueltype'] = 'CCGT' # ppl.loc[(ppl.Fueltype == 'Other') & ppl.Technology.str.contains('Steam Turbine'), 'Fueltype'] = 'CCGT' ppl = ppl.loc[ppl.lon.notnull() & ppl.lat.notnull()] substation_lv_i = n.buses.index[n.buses['substation_lv']] kdtree = KDTree(n.buses.loc[substation_lv_i, ['x', 'y']].values) ppl = ppl.assign( bus=substation_lv_i[kdtree.query(ppl[['lon', 'lat']].values)[1]]) ppl.to_csv(snakemake.output[0])
def match_arbitrary_translation_dilatation(x1,y1,x2,y2) : """ Match two catalogs in different coordinate systems, 1 and 2, related by a translation, a dilatation, and possibly a "small" rotation The orientation of triangles is used for the match so the rotation has to be small. Inspired from http://articles.adsabs.harvard.edu/pdf/1986AJ.....91.1244G Args: x1 : float numpy array of coordinates along first axis of cartesian coordinate system 1 y1 : float numpy array of coordinates along second axis of cartesian coordinate system 1 x2 : float numpy array of coordinates along first axis of cartesian coordinate system 2 y2 : float numpy array of coordinates along second axis of cartesian coordinate system 2 returns: indices_2 : integer numpy array. if ii is a index array for entries in the first catalog, indices_2[ii] is the index array of best matching entries in the second catalog. (one should compare x1[ii] with x2[indices_2[ii]]) negative values for unmatched entries. distance : distance between pairs of triangles. It can be used to discard bad matches. """ log = get_logger() # compute all possible triangles in both data sets # txyz are properties of the shape and orientation of the triangles log.debug("compute triangles") tk1,txyz1 = compute_triangles_with_fixed_orientation(x1,y1) tk2,txyz2 = compute_triangles_with_fixed_orientation(x2,y2) log.debug("match triangles") # match with kdtree triangles with same shape and orientation tree2=KDTree(txyz2) triangle_distances,triangle_indices_2 = tree2.query(txyz1,k=1) # now that we have match of triangles , need to match back catalog entries ranked_pairs = np.argsort(triangle_distances) indices_2 = -1*np.ones(x1.size,dtype=int) distances = np.zeros(x1.size) all_matched = False log.debug("match catalogs using pairs of triangles") for p in ranked_pairs : k1=tk1[p] # incides (in x1,y1) of vertices of this triangle (size=3) k2=tk2[triangle_indices_2[p]] # incides (in x2,y2) of vertices of other triangle # check unmatched or equal if np.any((indices_2[k1]>=0)&(indices_2[k1]!=k2)) : log.warning("skip {} <=> {}".format(k1,k2)) continue indices_2[k1]=k2 distances[k1]=triangle_distances[p] all_matched = (np.sum(indices_2>=0)==x1.size) if all_matched : log.debug("all matched") break # check duplicates for i2 in np.unique(indices_2[indices_2>=0]) : ii=(indices_2==i2) if np.sum(ii) > 1 : log.warning("{} duplicates for i2={}".format(np.sum(ii),i2)) indices_2[ii]=-1 return indices_2 , distances
def extract_edges_in_block(db_name, db_host, soft_mask_container, soft_mask_dataset, distance_threshold, evidence_threshold, graph_number, block): graph_provider = MongoDbGraphProvider( db_name, db_host, mode='r+', position_attribute=['z', 'y', 'x'], directed=False, edges_collection='edges_g{}'.format(graph_number)) if check_function(graph_provider.database, block, "edges_g{}".format(graph_number)): return 0 logger.debug("Finding edges in %s, reading from %s", block.write_roi, block.read_roi) start = time.time() soft_mask_array = daisy.open_ds(soft_mask_container, soft_mask_dataset) graph = graph_provider[block.read_roi.intersect(soft_mask_array.roi)] if graph.number_of_nodes() == 0: logger.info("No nodes in roi %s. Skipping", block.read_roi) write_done(graph_provider.database, block, 'edges_g{}'.format(graph_number)) return 0 logger.debug("Read %d candidates in %.3fs", graph.number_of_nodes(), time.time() - start) start = time.time() """ candidates = [(candidate_id, np.array([data[d] for d in ['z', 'y', 'x']])) for candidate_id, data in graph.nodes(data=True) if 'z' in data] """ candidates = np.array( [[candidate_id] + [data[d] for d in ['z', 'y', 'x']] for candidate_id, data in graph.nodes(data=True) if 'z' in data], dtype=np.uint64) kdtree_start = time.time() kdtree = KDTree([[candidate[1], candidate[2], candidate[3]] for candidate in candidates]) #kdtree = KDTree(candidates[]) pairs = kdtree.query_pairs(distance_threshold, p=2.0, eps=0) logger.debug("Query pairs in %.3fs", time.time() - kdtree_start) soft_mask_array = daisy.open_ds(soft_mask_container, soft_mask_dataset) voxel_size = np.array(soft_mask_array.voxel_size, dtype=np.uint32) soft_mask_roi = block.read_roi.snap_to_grid( voxel_size=voxel_size).intersect(soft_mask_array.roi) soft_mask_array_data = soft_mask_array.to_ndarray(roi=soft_mask_roi) sm_dtype = soft_mask_array_data.dtype if sm_dtype == np.uint8: # standard pipeline pm 0-255 pass elif sm_dtype == np.float32 or sm_dtype == np.float64: if not (soft_mask_array_data.min() >= 0 and soft_mask_array_data.max() <= 1): raise ValueError( "Provided soft_mask has dtype float but not in range [0,1], abort" ) else: soft_mask_array_data *= 255 else: raise ValueError("Soft mask dtype {} not understood".format(sm_dtype)) soft_mask_array_data = soft_mask_array_data.astype(np.float64) if evidence_threshold is not None: soft_mask_array_data = (soft_mask_array_data >= evidence_threshold * 255).astype(np.float64) * 255 offset = np.array(np.array(soft_mask_roi.get_offset()) / voxel_size, dtype=np.uint64) evidence_start = time.time() if pairs: pairs = np.array(list(pairs), dtype=np.uint64) evidence_array = cpp_get_evidence(candidates, pairs, soft_mask_array_data, offset, voxel_size) graph.add_weighted_edges_from(evidence_array, weight='evidence') logger.debug("Accumulate evidence in %.3fs", time.time() - evidence_start) logger.debug("Found %d edges", graph.number_of_edges()) logger.debug("Extracted edges in %.3fs", time.time() - start) start = time.time() graph.write_edges(block.write_roi) logger.debug("Wrote edges in %.3fs", time.time() - start) else: logger.debug("No pairs in block, skip") write_done(graph_provider.database, block, 'edges_g{}'.format(graph_number)) return 0
def fit(self, df, location_cols, y_col, **kwargs): super().fit(df, location_cols, y_col) self.model = KDtree(self.locations)
def __init__(self, polytopes, key_vertices_count=0, process_count=8, max_number_key_points=None): ''' Compute the closest polytope using Voronoi cells :param polytopes: ''' self.init_start_time = default_timer() self.section_start_time = self.init_start_time self.polytopes = np.asarray(polytopes, dtype='object') self.type = self.polytopes[0].type self.process_count = process_count self.key_vertices_count = key_vertices_count if self.type == 'AH_polytope': self.dim = self.polytopes[0].t.shape[0] elif self.type == 'zonotope': self.dim = self.polytopes[0].x.shape[0] else: raise NotImplementedError if self.key_vertices_count > 0: self.key_points = np.zeros([ len(self.polytopes) * (1 + 2**self.key_vertices_count), self.dim ]) else: self.key_points = np.zeros([len(self.polytopes), self.dim]) for i, z in enumerate(polytopes): if self.type == 'AH_polytope': if self.key_vertices_count > 0: raise NotImplementedError else: self.key_points[i, :] = self.polytopes[i].t[:, 0] elif self.type == 'zonotope': if self.key_vertices_count > 0: self.key_points[i * (2**self.key_vertices_count + 1), :] = self.polytopes[i].x[:, 0] self.key_points[ i * (2**self.key_vertices_count + 1) + 1:(i + 1) * (2**self.key_vertices_count + 1), :] = get_k_random_edge_points_in_zonotope( self.polytopes[i], self.key_vertices_count) else: self.key_points[i, :] = self.polytopes[i].x[:, 0] else: raise NotImplementedError if max_number_key_points: # sample the key points n = self.key_points.shape[0] chosen_key_points = np.random.choice(n, size=min( n, max_number_key_points), replace=False) self.key_points = self.key_points[chosen_key_points, :] # print(self.key_points.shape) self.key_point_to_polytope_map = dict( ) # stores the potential closest polytopes associated with each Voronoi (centroid) for key_point in self.key_points: ds = np.zeros(self.polytopes.shape[0]) self.key_point_to_polytope_map[str(key_point)] = np.rec.fromarrays( [self.polytopes, ds], names=('polytopes', 'distances')) self.build_cell_polytope_map_default() #build kd-tree for centroids self.key_point_tree = KDTree(self.key_points) print(('Completed precomputation in %f seconds' % (default_timer() - self.init_start_time)))
sys.stdout.flush() if name is not None: newargs = initargs + ['--imodes','flexm-'+str(nstruc+1)+name+'.dat'] if not os.path.exists('flexm-'+str(nstruc+1)+name+'.dat'): break collectlib.collect_iattract(newargs) result = collectlib.collect_next() if result: break nstruc += 1 coor = collectlib.collect_all_coor() pdbsizes2 = np.cumsum([0] + pdbsizes) coors = [coor[pdbsizes2[n]:pdbsizes2[n+1]] for n in range(len(pdbs))] energy = 0 eblock = 0 for n1 in range(len(pdbs)): c1 = coors[n1] tree1 = KDTree(c1) for n2 in range(n1+1, len(pdbs)): c2 = coors[n2] tree2 = KDTree(c2) energyblock = energyblocks[eblock] pairs = tree1.query_ball_tree(tree2, 10) ene = sum([sum(e[p]) for e,p in zip(energyblock,pairs) if len(p)]) energy += ene eblock += 1 f1.write("%.3f\n" % energy)
class Invdisttree: """ inverse-distance-weighted interpolation using KDTree: invdisttree = Invdisttree( X, z ) -- data points, values interpol = invdisttree( q, nnear=3, eps=0, p=1, weights=None, stat=0 ) interpolates z from the 3 points nearest each query point q; For example, interpol[ a query point q ] finds the 3 data points nearest q, at distances d1 d2 d3 and returns the IDW average of the values z1 z2 z3 (z1/d1 + z2/d2 + z3/d3) / (1/d1 + 1/d2 + 1/d3) = .55 z1 + .27 z2 + .18 z3 for distances 1 2 3 q may be one point, or a batch of points. eps: approximate nearest, dist <= (1 + eps) * true nearest p: use 1 / distance**p weights: optional multipliers for 1 / distance**p, of the same shape as q stat: accumulate wsum, wn for average weights How many nearest neighbors should one take ? a) start with 8 11 14 .. 28 in 2d 3d 4d .. 10d; see Wendel's formula b) make 3 runs with nnear= e.g. 6 8 10, and look at the results -- |interpol 6 - interpol 8| etc., or |f - interpol*| if you have f(q). I find that runtimes don't increase much at all with nnear -- ymmv. p=1, p=2 ? p=2 weights nearer points more, farther points less. In 2d, the circles around query points have areas ~ distance**2, so p=2 is inverse-area weighting. For example, (z1/area1 + z2/area2 + z3/area3) / (1/area1 + 1/area2 + 1/area3) = .74 z1 + .18 z2 + .08 z3 for distances 1 2 3 Similarly, in 3d, p=3 is inverse-volume weighting. Scaling: if different X coordinates measure different things, Euclidean distance can be way off. For example, if X0 is in the range 0 to 1 but X1 0 to 1000, the X1 distances will swamp X0; rescale the data, i.e. make X0.std() ~= X1.std() . A nice property of IDW is that it's scale-free around query points: if I have values z1 z2 z3 from 3 points at distances d1 d2 d3, the IDW average (z1/d1 + z2/d2 + z3/d3) / (1/d1 + 1/d2 + 1/d3) is the same for distances 1 2 3, or 10 20 30 -- only the ratios matter. In contrast, the commonly-used Gaussian kernel exp( - (distance/h)**2 ) is exceedingly sensitive to distance and to h. """ # anykernel( dj / av dj ) is also scale-free # error analysis, |f(x) - idw(x)| ? todo: regular grid, nnear ndim+1, 2*ndim def __init__(self, X, z, leafsize=10, stat=0): assert len(X) == len(z), "len(X) %d != len(z) %d" % (len(X), len(z)) self.tree = KDTree(X, leafsize=leafsize) # build the tree self.z = z self.stat = stat self.wn = 0 self.wsum = None def __call__(self, q, nnear=6, eps=0, p=1, weights=None): # nnear nearest neighbours of each query point -- q = np.asarray(q) qdim = q.ndim if qdim == 1: q = np.array([q]) if self.wsum is None: self.wsum = np.zeros(nnear) self.distances, self.ix = self.tree.query(q, k=nnear, eps=eps) interpol = np.zeros((len(self.distances), ) + np.shape(self.z[0])) jinterpol = 0 for dist, ix in zip(self.distances, self.ix): if nnear == 1: wz = self.z[ix] elif dist[0] < 1e-10: wz = self.z[ix[0]] else: # weight z s by 1/dist -- w = 1 / dist**p if weights is not None: w *= weights[ix] # >= 0 w /= np.sum(w) wz = np.dot(w, self.z[ix]) if self.stat: self.wn += 1 self.wsum += w interpol[jinterpol] = wz jinterpol += 1 return interpol if qdim > 1 else interpol[0]
def init_subproblems(self, conf, **kwargs): from sfepy.discrete.state import State from sfepy.discrete import Problem from sfepy.base.conf import ProblemConf, get_standard_keywords from scipy.spatial import cKDTree as KDTree # init subproblems problem = self.context pb_vars = problem.get_variables() # get "master" DofInfo and last index pb_adi_indx = problem.equations.variables.adi.indx self.adi_indx = pb_adi_indx.copy() last_indx = -1 for ii in six.itervalues(self.adi_indx): last_indx = nm.max([last_indx, ii.stop]) # coupling variables self.cvars_to_pb = {} for jj in conf.coupling_variables: self.cvars_to_pb[jj] = [None, None] if jj in pb_vars.names: if pb_vars[jj].dual_var_name is not None: self.cvars_to_pb[jj][0] = -1 else: self.cvars_to_pb[jj][1] = -1 # init subproblems self.subpb = [] required, other = get_standard_keywords() master_prefix = output.get_output_prefix() for ii, ifname in enumerate(conf.others): sub_prefix = master_prefix[:-1] + '-sub%d:' % (ii + 1) output.set_output_prefix(sub_prefix) kwargs['master_problem'] = problem confi = ProblemConf.from_file(ifname, required, other, define_args=kwargs) pbi = Problem.from_conf(confi, init_equations=True) sti = State(pbi.equations.variables) pbi.equations.set_data(None, ignore_unknown=True) pbi.time_update() pbi.update_materials() sti.apply_ebc() pbi_vars = pbi.get_variables() output.set_output_prefix(master_prefix) self.subpb.append([pbi, sti, None]) # append "slave" DofInfo for jj in pbi_vars.names: if not(pbi_vars[jj].is_state()): continue didx = pbi.equations.variables.adi.indx[jj] ndof = didx.stop - didx.start if jj in self.adi_indx: if ndof != \ (self.adi_indx[jj].stop - self.adi_indx[jj].start): raise ValueError('DOFs do not match!') else: self.adi_indx.update({ jj: slice(last_indx, last_indx + ndof, None)}) last_indx += ndof for jj in conf.coupling_variables: if jj in pbi_vars.names: if pbi_vars[jj].dual_var_name is not None: self.cvars_to_pb[jj][0] = ii else: self.cvars_to_pb[jj][1] = ii self.subpb.append([problem, None, None]) self.cvars_to_pb_map = {} for varname, pbs in six.iteritems(self.cvars_to_pb): # match field nodes coors = [] for ii in pbs: pbi = self.subpb[ii][0] pbi_vars = pbi.get_variables() fcoors = pbi_vars[varname].field.coors dc = nm.abs(nm.max(fcoors, axis=0)\ - nm.min(fcoors, axis=0)) ax = nm.where(dc > 1e-9)[0] coors.append(fcoors[:,ax]) if len(coors[0]) != len(coors[1]): raise ValueError('number of nodes does not match!') kdtree = KDTree(coors[0]) map_12 = kdtree.query(coors[1])[1] pbi1 = self.subpb[pbs[0]][0] pbi1_vars = pbi1.get_variables() eq_map_1 = pbi1_vars[varname].eq_map pbi2 = self.subpb[pbs[1]][0] pbi2_vars = pbi2.get_variables() eq_map_2 = pbi2_vars[varname].eq_map dpn = eq_map_2.dpn nnd = map_12.shape[0] map_12_nd = nm.zeros((nnd * dpn,), dtype=nm.int32) if dpn > 1: for ii in range(dpn): map_12_nd[ii::dpn] = map_12 * dpn + ii else: map_12_nd = map_12 idx = nm.where(eq_map_2.eq >= 0)[0] self.cvars_to_pb_map[varname] = eq_map_1.eq[map_12[idx]]
def KLdivergence(x, y): """Compute the Kullback-Leibler divergence between two multivariate samples. Parameters ---------- x : 2D array (n,d) Samples from distribution P, which typically represents the true distribution. y : 2D array (m,d) Samples from distribution Q, which typically represents the approximate distribution. Returns ------- out : float The estimated Kullback-Leibler divergence D(P||Q). References ---------- Pérez-Cruz, F. Kullback-Leibler divergence estimation of continuous distributions IEEE International Symposium on Information Theory, 2008. https://gist.github.com/atabakd/ed0f7581f8510c8587bc2f41a094b518 """ eta = 0.0000000001 # Check the dimensions are consistent x = np.atleast_2d(x) y = np.atleast_2d(y) n,d = x.shape m,dy = y.shape assert d == dy assert n != 0 assert n != 1 # Build a KD tree representation of the samples and find the nearest neighbour # of each point in x. xtree = KDTree(x) ytree = KDTree(y) # Get the first two nearest neighbours for x, since the closest one is the # sample itself. r = xtree.query(x, k=2, eps=.01, p=2)[0][:,1] s = ytree.query(x, k=1, eps=.01, p=2)[0] s[s == 0] = eta #np.seterr(all='raise') #try: # ratio = r / s # _ = np.log(ratio, where=ratio > 0).sum() #except Exception as ex: # print(ex) # print(np.sum(s==0)) # print(np.sum(np.isclose(s, 0))) # assert False, "log(r/s) produces 'divide by zero' error or other exception." if np.any(s == 0): return "ERR: s=0" else: # There is a mistake in the paper. In Eq. 14, the right side misses a negative sign # on the first term of the right hand side. ratio = r/s return -np.log(ratio, where=ratio > 0).sum() * d / n + np.log(m / (n - 1.))
def runPixMatch(outpre, filter): if filter == 'f606w': let = 'v' else: let = 'i' if outpre == 'lower': x_drc_low = drc_low['x_' + let] y_drc_low = drc_low['y_' + let] xm_flc_low = flc_all['xdrc_low_' + filter] ym_flc_low = flc_all['ydrc_low_' + filter] coords1low = np.empty((xm_flc_low.size, 2)) coords2low = np.empty((x_drc_low.size, 2)) coords1low[:, 0] = xm_flc_low coords1low[:, 1] = ym_flc_low coords2low[:, 0] = x_drc_low coords2low[:, 1] = y_drc_low kdt = KDT(coords2low) idxs2 = kdt.query(coords1low)[1] ds = distArr(xm_flc_low, ym_flc_low, x_drc_low[idxs2], y_drc_low[idxs2]) idxs1 = np.arange(xm_flc_low.size) msk = ds < matchtol idxs1 = idxs1[msk] idxs2 = idxs2[msk] ds = ds[msk] else: x_drc_up = drc_up['x_' + let] y_drc_up = drc_up['y_' + let] xm_flc_up = flc_all['xdrc_up_' + filter] ym_flc_up = flc_all['ydrc_up_' + filter] coords1up = np.empty((xm_flc_up.size, 2)) coords2up = np.empty((x_drc_up.size, 2)) coords1up[:, 0] = xm_flc_up coords1up[:, 1] = ym_flc_up coords2up[:, 0] = x_drc_up coords2up[:, 1] = y_drc_up kdt = KDT(coords2up) idxs2 = kdt.query(coords1up)[1] ds = distArr(xm_flc_up, ym_flc_up, x_drc_up[idxs2], y_drc_up[idxs2]) idxs1 = np.arange(xm_flc_up.size) msk = ds < matchtol idxs1 = idxs1[msk] idxs2 = idxs2[msk] ds = ds[msk] print(len(idxs1)) outfile = main_dir + 'hor-I-cut_drc_' + outpre + '_' + filter + '_tol{0}_magCuts.txt'.format( matchtol) np.savetxt(outfile, idxs2, fmt='%4i') outfile = main_dir + 'hor-I-cut_flc_' + outpre + '_' + filter + '_tol{0}_magCuts.txt'.format( matchtol) np.savetxt(outfile, idxs1, fmt='%4i') # outfile = main_dir+'hor-I-cut_ds_'+outpre+'_'+filter+'_tol{0}.txt'.format(matchtol) # np.savetxt(outfile, ds, fmt='%1.4f') return None
def xymatch(x1, y1, x2, y2, tol=None, nnearest=1): """ Finds matches in one catalog to another. Parameters x1 : array-like X-coordinates of first catalog y1 : array-like Y-coordinates of first catalog x2 : array-like X-coordinates of second catalog y2 : array-like Y-coordinates of second catalog tol : float or None, optional How close a match has to be to count as a match. If None, all nearest neighbors for the first catalog will be returned. nnearest : int, optional The nth neighbor to find. E.g., 1 for the nearest nearby, 2 for the second nearest neighbor, etc. Particularly useful if you want to get the nearest *non-self* neighbor of a catalog. To do this, use: ``spherematch(x, y, x, y, nnearest=2)`` Returns ------- idx1 : int array Indecies into the first catalog of the matches. Will never be larger than `x1`/`y1`. idx2 : int array Indecies into the second catalog of the matches. Will never be larger than `x1`/`y1`. ds : float array Distance between the matches """ x1 = np.array(x1, copy=False) y1 = np.array(y1, copy=False) x2 = np.array(x2, copy=False) y2 = np.array(y2, copy=False) if x1.shape != y1.shape: raise ValueError('x1 and y1 do not match!') if x2.shape != y2.shape: raise ValueError('x2 and y2 do not match!') # this is equivalent to, but faster than just doing np.array([x1, y1]) coords1 = np.empty((x1.size, 2)) coords1[:, 0] = x1 coords1[:, 1] = y1 # this is equivalent to, but faster than just doing np.array([x2, y2]) coords2 = np.empty((x2.size, 2)) coords2[:, 0] = x2 coords2[:, 1] = y2 kdt = KDT(coords2) if nnearest == 1: ds,idxs2 = kdt.query(coords1) elif nnearest > 1: retval = kdt.query(coords1, nnearest) ds = retval[0] idxs2 = retval[1][:, -1] else: raise ValueError('invalid nnearest ' + str(nnearest)) idxs1 = np.arange(x1.size) if tol is not None: msk = ds < tol idxs1 = idxs1[msk] idxs2 = idxs2[msk] ds = ds[msk] return idxs1, idxs2, ds
class Dataset: """ SELFE Model Binary IO Functions Presently enables reading SELFE dataformat version 5.0 binary output files. Can read 2D & 3D scalar and vector variables. Usage Example: model = pyselfe.Dataset('1_hvel.64') [t,t_iter,eta,dp,data] = model.read_time_series() t = time in seconds t_iter = iteration number eta = water surface elevation dp = bathymetric depth data = 2D/3D variables @author Dharhas Pothina @version 0.2 """ def __init__(self, fname, nfiles=1): "Initialise by reading header information from file." self.fname = fname fid = open(fname, 'rb') self.read_header(fid) self.read_hgrid(fid) self.data_start_pos = fid.tell() self.compute_step_size() self.datadir = os.path.split(fname)[0] self.nfiles = nfiles def read_header(self, fid): """Read header information from SELFE binary output file.""" # Read misc header info. self.data_format = fid.read(48) self.version = fid.read(48) self.start_time = fid.read(48) self.var_type = fid.read(48) self.var_dimension = fid.read(48) self.nsteps = io.fread(fid, 1, 'i') self.dt = io.fread(fid, 1, 'f') self.skip = io.fread(fid, 1, 'i') self.flag_sv = io.fread(fid, 1, 'i') self.flag_dm = io.fread(fid, 1, 'i') # @todo check when zDes needs to be read # self.zDes = io.fread(fid, 1, 'f'). # Read vert grid info. self.nlevels = io.fread(fid, 1, 'i') self.kz = io.fread(fid, 1, 'i') self.h0 = io.fread(fid, 1, 'f') self.hs = io.fread(fid, 1, 'f') self.hc = io.fread(fid, 1, 'f') self.theta_b = io.fread(fid, 1, 'f') self.theta = io.fread(fid, 1, 'f') self.zlevels = io.fread(fid, self.kz, 'f') self.slevels = io.fread(fid, self.nlevels - self.kz, 'f') def read_hgrid(self, fid): """Read horizontal grid info from SELFE binary output file.""" # Read dimensions. self.np = io.fread(fid, 1, 'i') self.ne = io.fread(fid, 1, 'i') # Read grid and bathymetry. pos = fid.tell() hgridtmp = io.fread(fid, 4 * self.np, 'f') self.x, self.y, self.dp, tmp1 = hgridtmp.reshape(self.np, 4).T # Read bottom index. fid.seek(pos) hgridtmp = io.fread(fid, 4 * self.np, 'i') tmp1, tmp2, tmp3, self.bot_idx = hgridtmp.reshape(self.np, 4).T # Read element connectivity list. self.elem = io.fread(fid, 4 * self.ne, 'i') self.elem = self.elem.reshape(self.ne, 4)[:, 1:4] # Create kdtree. self.kdtree = KDTree(list(zip(self.x, self.y))) def compute_step_size(self): """ Compute the data block size to move one timestep within the file. """ # Calculate grid size depending on whether dataset is 3D or 2D. if self.flag_dm == 3: # @todo check what needs to be done with bIdx (==0?)for dry nodes. bIdx = self.bot_idx bIdx[bIdx < 1] = 1 self.grid_size = sum(self.nlevels - bIdx + 1) elif self.flag_dm == 2: self.grid_size = self.np # Compute step size. self.step_size = 2 * 4 + self.np * 4 + self.grid_size * 4 * self.flag_sv def read_time_series(self, fname, nodes=None, levels=None, xy=np.array([]), nfiles=3, sfile=1, datadir=None): """ Main function to extract a spatial and temporal slice of entire 3D Time series. Returns [t,t_iter,eta,dp,data] where: t : time in seconds from simulation start t_iter : iteration number from simulation start eta : Surface water elevation time series dp : Bathymetry (depth of sea bed from MSL) data[t,nodes,levels,vars] : extracted data slice (i.e. Salinity, Temp, Velocity etc) Options: nodes : list of nodes to extract (default is all nodes) level : list of levels to extract (default is all levels) xy : array of x,y coordinates to extract (default is none) sfile : serial number of starting file (default is one) nfiles : number of files in data sequence (default is one) NOTE : node index starts at zero so add one to match up with node numbers in SELFE hgrid.gr3 file. """ # Initialize vars. t = np.array([]) t_iter = np.array([]) eta = [] data = [] if nfiles is None: nfiles = self.nfiles if datadir is None: datadir = self.datadir # Convert xy points to list of nodes, # find parent elements & calculate interpolation weights. if xy.size != 0: if xy.shape[1] != 2: sys.exit('xy array shape wrong.') nodes = np.array([], dtype='int32') arco = np.array([]) for xy00 in xy: parent, tmparco, node3 = self.find_parent_element( xy00[0], xy00[1]) # noqa nodes = np.append(nodes, node3 - 1) arco = np.append(arco, tmparco) # Set default for nodes to be all nodes. # Node index starts at zero. elif nodes is None: nodes = np.arange(self.np) # Set default for level to be all levels. if levels is None: levels = np.arange(self.nlevels) # Check whether 2D or 3D variable is being read. if self.flag_dm == 2: nlevs = 1 levels = np.array([0]) else: nlevs = self.nlevels # Read time series slice. for files in np.arange(sfile, sfile + nfiles): try: fname1 = datadir + '/' + str(files) + '_' + fname fid = open(fname1, 'rb') fid.seek(self.data_start_pos) for i in np.arange(self.nsteps): t = np.append(t, io.fread(fid, 1, 'f')) t_iter = np.append(t_iter, io.fread(fid, 1, 'i')) eta.append(io.fread(fid, self.np, 'f')) tmpdata = io.fread(fid, self.flag_sv * self.grid_size, 'f') tmpdata = tmpdata.reshape(self.np, nlevs, self.flag_sv) # Only keep requested slice of tmpdata. # i.e. tmpdata[nodes, levels, var] tmpdata = tmpdata[nodes, :, :] tmpdata = tmpdata[:, levels, :] data.append(tmpdata) except: continue # import pdb; pdb.set_trace() eta = np.column_stack(eta[:]).T eta = eta[:, nodes] data = np.array(data) dp = self.dp[nodes] # Convert nodal values back to xy point values if needed. if xy.size != 0: # Not sure about this. Need to look at it on more detail put in to # remove shape error. # try: tmpdata = np.zeros((data.shape[0], data.shape[1] // 3, data.shape[2], data.shape[3])) / 0. # noqa # except: # tmpdata = np.zeros((data.shape[0], data.shape[1]//3, data.shape[2]))/0. # noqa tmpeta = np.zeros((eta.shape[0], eta.shape[1] // 3)) / 0. tmpdp = np.zeros(dp.shape[0] // 3) / 0. for i in range(xy.shape[0]): n1 = i * 3 n2 = n1 + 1 n3 = n2 + 1 tmpdata[:, i, :, :] = (data[:, n1, :, :] * arco[n1] + data[:, n2, :, :] * arco[n2] + data[:, n3, :, :] * arco[n3]) tmpeta[:, i] = (eta[:, n1] * arco[n1] + eta[:, n2] * arco[n2] + eta[:, n3] * arco[n3]) tmpdp[i] = (dp[n1] * arco[n1] + dp[n2] * arco[n2] + dp[n3] * arco[n3]) data = tmpdata eta = tmpeta dp = tmpdp return t, t_iter, eta, dp, data def find_parent_element(self, x00, y00): """ Find Parent Element of a given (x,y) point and calculate interpolation weights. Uses brute force search through all elements. Calculates whether point is internal/external to element by comparing summed area of sub triangles with area of triangle element. @todo implement binary tree search for efficiency Returns: parent, arco, node3 : parent element number, interp wieghts and element node numbers. """ def signa(x1, x2, x3, y1, y2, y3): "Return signed area of triangle." return (((x1 - x3) * (y2 - y3) - (x2 - x3) * (y1 - y3)) / 2) parent = -1 nm = self.elem.view() out = np.zeros(3) / 0. x = self.x.view() y = self.y.view() for i in np.arange(self.ne): aa = 0 ar = 0 # Area. for j in np.arange(3): j1 = j + 1 j2 = j + 2 if (j1 > 2): j1 = j1 - 3 if (j2 > 2): j2 = j2 - 3 n0 = nm[i, j] - 1 # Zero based index rather than 1 based index. n1 = nm[i, j1] - 1 n2 = nm[i, j2] - 1 # Temporary storage. out[j] = signa(x[n1], x[n2], x00, y[n1], y[n2], y00) aa = aa + abs(out[j]) if (j == 0): ar = signa(x[n1], x[n2], x[n0], y[n1], y[n2], y[n0]) if (ar <= 0): sys.exit('Negative area:' + str(ar)) ae = abs(aa - ar) / ar if (ae <= 1.e-5): parent = i node3 = nm[i, 0:3] arco = out[0:3] / ar arco[1] = max(0., min(1., arco[1])) arco[2] = max(0., min(1., arco[2])) if (arco[0] + arco[1] > 1): arco[2] = 0 arco[1] = 1 - arco[0] else: arco[2] = 1 - arco[0] - arco[1] break if (parent == -1): sys.exit('Cannot find a parent:' + str(x00) + ',' + str(y00)) else: print('Parent Element :', parent + 1, ' ,Nodes: ', node3) return parent, arco, node3 def compute_relative_rec(self, node, level): """ Computes offset for extracting particular node/level. NOTE THIS FUNCTION NOT COMPLETE/TESTED. """ count = 0 step_size = np.zeros(self.np, self.nlevels, self.flag_sv) / 0. for i in range(self.np): for k in range(max(1, self.bot_idx[i]), self.nlevels): for m in range(self.flag_sv): count = count + 1 step_size[i, k, m] = count def read_time_series_xy(self, variable, x, y, sigma_level='middle', return_eta=False): """ Finds nearest 3 nodes to x,y and returns the average value. """ xy = np.hstack((x, y)) dist, nodes = self.kdtree.query(xy, k=3) data = [] if sigma_level == 'average': t, t_iter, eta, dp, data = self.read_time_series( variable, nodes=nodes) # noqa eta = eta.mean(axis=1) data = data[:, :, :, 0].mean(axis=2).mean(axis=1) # Take average of all levels and then 3 nodes for now. # Implement idw or area weighted a average later. data = data.mean(axis=1).mean(axis=1) if return_eta: return np.column_stack((t, data)), np.column_stack((t, eta)) else: return np.column_stack((t, data)) elif sigma_level == 'top': sigma_level = 0 elif sigma_level == 'bottom': sigma_level = self.nlevels - 1 elif sigma_level == 'middle': sigma_level = self.nlevels // 2 t, t_iter, eta, dp, data = self.read_time_series(variable, nodes=nodes, levels=sigma_level) eta = eta.mean(axis=1) data = data[:, :, 0, :].mean(axis=1) # data.mean(axis=1).shape[:, 0, :] # Take average of all levels and then 3 nodes for now. # Implement idw or area weighted average later/ # data = data.mean(axis=1) # import pdb; pdb.set_trace() if return_eta: return np.column_stack((t, data)), np.column_stack((t, eta)) else: return np.column_stack((t, data))
filter = filters[ff] x_drc_low = low_x[ff] y_drc_low = low_y[ff] xm_flc_low = flc_all['xdrc_low_'+filter] ym_flc_low = flc_all['ydrc_low_'+filter] coords1low = np.empty((xm_flc_low.size,2)) coords2low = np.empty((x_drc_low.size,2)) coords1low[:,0] = xm_flc_low coords1low[:,1] = ym_flc_low coords2low[:,0] = x_drc_low coords2low[:,1] = y_drc_low kdt = KDT(coords2low) idxs2 = kdt.query(coords1low)[1] ds = distArr(xm_flc_low,ym_flc_low,x_drc_low[idxs2],y_drc_low[idxs2]) idxs1 = np.arange(xm_flc_low.size) msk = ds < matchtol idxs1 = idxs1[msk] idxs2 = idxs2[msk] ds = ds[msk] outfile = outDir+'hor-I-cut_drc_low_'+filter+'_tol1.txt' np.savetxt(outfile, idxs2, fmt='%4i') outfile = outDir+'hor-I-cut_flc_low_'+filter+'_tol1.txt'
def spherematch(ra1, dec1, ra2, dec2, tol=None, nnearest=1): """ Finds matches in one catalog to another. Parameters ra1 : array-like Right Ascension in degrees of the first catalog dec1 : array-like Declination in degrees of the first catalog (shape of array must match `ra1`) ra2 : array-like Right Ascension in degrees of the second catalog dec2 : array-like Declination in degrees of the second catalog (shape of array must match `ra2`) tol : float or None, optional How close (in degrees) a match has to be to count as a match. If None, all nearest neighbors for the first catalog will be returned. nnearest : int, optional The nth neighbor to find. E.g., 1 for the nearest nearby, 2 for the second nearest neighbor, etc. Particularly useful if you want to get the nearest *non-self* neighbor of a catalog. To do this, use: ``spherematch(ra, dec, ra, dec, nnearest=2)`` Returns ------- idx1 : int array Indecies into the first catalog of the matches. Will never be larger than `ra1`/`dec1`. idx2 : int array Indecies into the second catalog of the matches. Will never be larger than `ra1`/`dec1`. ds : float array Distance (in degrees) between the matches """ ra1 = np.array(ra1, copy=False) dec1 = np.array(dec1, copy=False) ra2 = np.array(ra2, copy=False) dec2 = np.array(dec2, copy=False) if ra1.shape != dec1.shape: raise ValueError('ra1 and dec1 do not match!') if ra2.shape != dec2.shape: raise ValueError('ra2 and dec2 do not match!') x1, y1, z1 = _spherical_to_cartesian(ra1.ravel(), dec1.ravel()) # this is equivalent to, but faster than just doing np.array([x1, y1, z1]) coords1 = np.empty((x1.size, 3)) coords1[:, 0] = x1 coords1[:, 1] = y1 coords1[:, 2] = z1 x2, y2, z2 = _spherical_to_cartesian(ra2.ravel(), dec2.ravel()) # this is equivalent to, but faster than just doing np.array([x1, y1, z1]) coords2 = np.empty((x2.size, 3)) coords2[:, 0] = x2 coords2[:, 1] = y2 coords2[:, 2] = z2 kdt = KDT(coords2) if nnearest == 1: idxs2 = kdt.query(coords1)[1] elif nnearest > 1: idxs2 = kdt.query(coords1, nnearest)[1][:, -1] else: raise ValueError('invalid nnearest ' + str(nnearest)) ds = _great_circle_distance(ra1, dec1, ra2[idxs2], dec2[idxs2]) idxs1 = np.arange(ra1.size) if tol is not None: msk = ds < tol idxs1 = idxs1[msk] idxs2 = idxs2[msk] ds = ds[msk] return idxs1, idxs2, ds
def keypoints_match_geometry(src_frame, dest_frame): """ Finds keypoint matches based on scene geometry @return pairs, angles `pairs[n] = (src_pt_n, dest_pt_n)` `angles[n]` = angle btw rays of src_pt_n and dest_pt_n """ if hasattr(src_frame, 'kpt_proj_cloud'): helper_cloud = src_frame.kpt_proj_cloud else: helper_cloud = build_point_cloud_for_projection(src_frame) # spatial and perspective projection src_frame -> dest_frame spatial_mat = dest_frame.world_to_camera @ src_frame.camera_to_world full_projection_mat = dest_frame.intrinsic_mat @ spatial_mat[:3, :] # project and retrieve keypoints helper_projected = projection_apply_rowvec(full_projection_mat, helper_cloud) proj_pts, proj_sizes, proj_orientations = kptproj_interpret_projected_vectors( helper_projected) # find pairs of neighbours in radius of MATCH_DISTANCE tree_proj = KDTree(proj_pts) tree_dest = KDTree(dest_frame.kpt_locs) match_suggestions = tree_dest.query_ball_tree(tree_proj, r=MATCH_DISTANCE) matches = [] # kpt_matched_id[n] = id of point in src_frame which matches n #frame.kpt_matched_id = np.zeros(frame.pt_count, dtype=np.int32) #frame.kpt_matched_id[:] = -1 # no match #print(src_frame.pt_count, dest_frame.pt_count, len(match_suggestions)) for dest_pt_idx, suggestions in enumerate(match_suggestions): dest_pt_size = dest_frame.kpt_sizes[dest_pt_idx] for src_pt_idx in suggestions: proj_size = proj_sizes[src_pt_idx] if ((max(dest_pt_size / proj_size, proj_size / dest_pt_size) < MATCH_SIZE_DIFF_RELATIVE) and (angular_distance_abs( dest_frame.kpt_orientations[dest_pt_idx], proj_orientations[src_pt_idx]) < MATCH_ANGLE_DIFF)): matches.append((src_pt_idx, dest_pt_idx)) #frame.kpt_matched_id[pt_idx] = src_pt_idx break if len(matches) == 0: return AttrDict( pairs=np.zeros((0, 2), dtype=np.int32), angles=np.zeros(0, dtype=np.float32), ) else: # store matched pairs match_pairs = np.array(matches, dtype=np.int32) # sort by src_point_id match_pairs = match_pairs[np.argsort(match_pairs[:, 0]), :] view_angle_changes = derive_keypoint_view_angle_change( src_frame, dest_frame, match_pairs) return AttrDict( pairs=match_pairs, angles=view_angle_changes, )
def remove_ind(reference_pop, removal_size, removal_type): begin_time = time.time() if removal_type == 'random': # reference_pop is a numpy array of size (n_reference_pop, pop_dim) reference_pop = list(reference_pop) # now reference_pop is a list of numpy arrays (each defining one individual) random.shuffle(reference_pop) # shuffle the list # pop last removal_size individuals for _ in range(removal_size): reference_pop.pop() # turn back to numpy array reference_pop = np.array(reference_pop) if removal_type == 'least_novel': # compute novelties of reference_pop inside reference_pop novelties = assess_novelties(reference_pop, reference_pop) removal_indices = np.argpartition(novelties, removal_size)[:removal_size] # # plot the reference pop # fig = plt.figure(figsize=(5, 5)) # ax = fig.add_subplot(111) # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference') # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed', # marker='x', color='red') # ax.set_facecolor("#ffebb8") # ax.set_title('Least novel removal', fontsize=15) # plt.xlim(0, 1) # plt.ylim(0, 1) # plt.legend() # plt.show() reference_pop = np.delete(reference_pop, removal_indices, 0) if removal_type == 'least_novel_iter': removal_indices = [] temp_ref_pop = copy.deepcopy(reference_pop) for j in range(removal_size): # compute novelties of reference_pop inside reference_pop novelties = assess_novelties(temp_ref_pop, temp_ref_pop) remov_idx = np.argmin(novelties) remov_ind = temp_ref_pop[remov_idx] removal_indices.append(np.where(reference_pop == remov_ind)[0][0]) temp_ref_pop = np.vstack( (temp_ref_pop[:remov_idx], temp_ref_pop[remov_idx + 1:])) # # plot the reference pop # fig = plt.figure(figsize=(5, 5)) # ax = fig.add_subplot(111) # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference') # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed', # marker='x', color='red') # ax.set_facecolor("#ffebb8") # ax.set_title('Least novel removal', fontsize=15) # plt.xlim(0, 1) # plt.ylim(0, 1) # plt.legend() # plt.show() reference_pop = np.delete(reference_pop, removal_indices, 0) if removal_type == 'most_novel': # compute novelties of reference_pop inside reference_pop novelties = assess_novelties(reference_pop, reference_pop) removal_indices = np.argpartition(novelties, -removal_size)[-removal_size:] # # plot the reference pop # fig = plt.figure(figsize=(5, 5)) # ax = fig.add_subplot(111) # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference') # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed', # marker='x', color='red') # ax.set_facecolor("#ffebb8") # ax.set_title('Least novel removal', fontsize=15) # plt.xlim(0, 1) # plt.ylim(0, 1) # plt.legend() # plt.show() reference_pop = np.delete(reference_pop, removal_indices, 0) if removal_type == 'most_novel_iter': removal_indices = [] temp_ref_pop = copy.deepcopy(reference_pop) for j in range(removal_size): # compute novelties of reference_pop inside reference_pop novelties = assess_novelties(temp_ref_pop, temp_ref_pop) remov_idx = np.argmax(novelties) remov_ind = temp_ref_pop[remov_idx] removal_indices.append(np.where(reference_pop == remov_ind)[0][0]) temp_ref_pop = np.vstack( (temp_ref_pop[:remov_idx], temp_ref_pop[remov_idx + 1:])) # # plot the reference pop # fig = plt.figure(figsize=(5, 5)) # ax = fig.add_subplot(111) # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference') # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed', # marker='x', color='red') # ax.set_facecolor("#ffebb8") # ax.set_title('Least novel removal', fontsize=15) # plt.xlim(0, 1) # plt.ylim(0, 1) # plt.legend() # plt.show() reference_pop = np.delete(reference_pop, removal_indices, 0) if removal_type == 'gmm_sampling': # hypothesis: n_components equals generative number of components n_comp = N gmix = mixture.GaussianMixture(n_components=n_comp, covariance_type='full') gmix.fit(reference_pop) nodes = gmix.sample(removal_size)[0] k_tree = KDTree(reference_pop) removal_indices = [] for node in nodes: # for each node, find the closest point in the reference pop cond = True closest = 1 # make sure removal indivual was not already chosen while cond: if closest == 1: possible_removal_index = k_tree.query(node, closest)[1] else: possible_removal_index = k_tree.query( node, closest)[1][closest - 1] if possible_removal_index not in removal_indices: removal_indices.append(possible_removal_index) cond = False else: closest += 1 # # plot the reference pop # fig = plt.figure(figsize=(5, 5)) # ax = fig.add_subplot(111) # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference') # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed', # marker='x', color='red') # ax.set_facecolor("#ffebb8") # ax.set_title('GMM removal', fontsize=15) # plt.xlim(0, 1) # plt.ylim(0, 1) # plt.legend() # plt.show() reference_pop = np.delete(reference_pop, removal_indices, 0) if removal_type == 'grid': n_dim = reference_pop.shape[1] # compute maximums and minimums on each dimension maximums = np.max(reference_pop, 0) minimums = np.min(reference_pop, 0) ranges = maximums - minimums bins_per_dim = math.floor(math.exp(math.log(removal_size) / n_dim)) + 1 grid_positions = [] for i in range(n_dim): # important choice on how we make the grid grid_position = [ minimums[i] + ((j + 1) * ranges[i] / bins_per_dim) for j in range(bins_per_dim) ] grid_position.pop() grid_positions.append(grid_position) mesh = np.meshgrid(*grid_positions) nodes = list(zip(*(dim.flat for dim in mesh))) nodes = np.array(nodes) k_tree = KDTree(reference_pop) removal_indices = [] for node in nodes: # for each node, find the closest point in the reference pop cond = True closest = 1 # make sure removal indivual was not already chosen while cond: if closest == 1: possible_removal_index = k_tree.query(node, closest)[1] else: possible_removal_index = k_tree.query( node, closest)[1][closest - 1] if possible_removal_index not in removal_indices: removal_indices.append(possible_removal_index) cond = False else: closest += 1 # dealing with the missing removals nb_missing_removals = removal_size - len(nodes) for _ in range(nb_missing_removals): query = random.choice(nodes) cond = True # start with second closest since closest is for sure in removal indices closest = 2 # make sure removal indivual was not already chosen while cond: possible_removal_index = k_tree.query(query, closest)[1][closest - 1] if possible_removal_index not in removal_indices: removal_indices.append(possible_removal_index) cond = False else: closest += 1 # # plot the reference pop # fig = plt.figure(figsize=(5, 5)) # ax = fig.add_subplot(111) # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference') # ax.scatter(nodes[:, 0], nodes[:, 1], label='grid', marker='+', color='black') # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed', # marker='x', color='red') # ax.set_facecolor("#ffebb8") # ax.set_title('Grid removal', fontsize=15) # plt.xlim(0, 1) # plt.ylim(0, 1) # plt.legend() # plt.show() reference_pop = np.delete(reference_pop, removal_indices, 0) if removal_type == 'grid_density': n_dim = reference_pop.shape[1] # compute maximums and minimums on each dimension maximums = np.max(reference_pop, 0) minimums = np.min(reference_pop, 0) ranges = maximums - minimums bins_per_dim = math.floor(math.exp(math.log(N_CELLS) / n_dim)) + 1 grid_positions = [] for i in range(n_dim): # important choice on how we make the grid grid_position = [ minimums[i] + (j * ranges[i] / (bins_per_dim - 1)) for j in range(bins_per_dim) ] grid_positions.append(grid_position) mesh = np.meshgrid(*grid_positions) nodes = list(zip(*(dim.flat for dim in mesh))) nodes = np.array(nodes) removal_indices = [] nb_cells = (bins_per_dim - 1)**n_dim grid_density = np.zeros(nb_cells) cells = [[] for _ in range(nb_cells)] for ind_idx, ind in enumerate(reference_pop): dim_indexs = np.zeros(n_dim) for i, dim in enumerate(ind): grid_pos = grid_positions[i] for j in range(bins_per_dim - 1): if dim >= grid_pos[j] and dim < grid_pos[j + 1]: dim_indexs[i] = j + 1 if 0 not in dim_indexs: # indivudal is inside the grid dim_indexs = dim_indexs - 1 cell_idx = 0 for k, dim_idx in enumerate(dim_indexs): cell_idx += int(dim_idx * ((bins_per_dim - 1)**k)) grid_density[cell_idx] += 1 cells[cell_idx].append(ind_idx) grid_density = grid_density / np.sum(grid_density) # TEST: square the grid_density to biase more towards high density cells # grid_density = np.square(grid_density) grid_law = np.cumsum(grid_density) for _ in range(removal_size): dice = random.random() * grid_law[-1] cell_to_remove_from = np.searchsorted(grid_law, dice) cond = True n = 0 while cond: if n < LIMIT_DENSITY_ITER: removal_idx = random.choice(cells[cell_to_remove_from]) else: removal_idx = random.choice(list(range( len(reference_pop)))) if removal_idx not in removal_indices: removal_indices.append(removal_idx) cond = False n += 1 # # plot the reference pop # fig = plt.figure(figsize=(5, 5)) # ax = fig.add_subplot(111) # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference') # ax.scatter(nodes[:, 0], nodes[:, 1], label='grid', marker='+', color='black') # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed', # marker='x', color='red') # ax.set_facecolor("#ffebb8") # ax.set_title('Grid density removal', fontsize=15) # plt.xlim(0, 1) # plt.ylim(0, 1) # plt.legend() # plt.show() reference_pop = np.delete(reference_pop, removal_indices, 0) end_time = time.time() removal_time = end_time - begin_time return reference_pop, removal_time
def tsne(fdarray, new_label='tsne', channels=None, transform='arcsinh', sample=6000, verbose=False, backgate=True): """Perform t-SNE/viSNE on the FlowData object """ fdarray = util.make_list(fdarray) # If the user has not provided a list of channels to use, # use the intersection of all isotope channels if channels is None: channel_set = [] for fd in fdarray: channel_set.append(set(fd.isotopes)) channels = list(set.intersection(*channel_set)) # Make a copy of the data in files that we want points = [] for fd in fdarray: points.append(np.vstack([fd[ch] for ch in channels]).T) # transform if transform == 'arcsinh': for pts in points: # Apply the transform inplace to the data np.arcsinh(5 * pts, pts) # Randomly sample to reduce the number of points sample_masks = [] for pts in points: if sample < pts.shape[0]: # If we have enough points to subsample sample_masks.append( np.random.choice(pts.shape[0], sample, replace=False)) else: # Otherwise we add all the points sample_masks.append(np.array(range(pts.shape[0]))) # Sample the points, and construct a large matrix sample_points = [] for mask, pts in zip(sample_masks, points): sample_points.append(pts[mask, :]) X = np.vstack(sample_points) # Perform t-SNE Y = lib_tsne.tsne(X, verbose=verbose) assert Y is not None, ('t-SNE failed to return') # Split Y into a matrix for each dataset splits = np.cumsum( np.array([mask.shape[0] for mask in sample_masks], dtype=int)) Y_split = np.split(Y, splits, axis=0) # now expand data to reassign these points back into the dataset tsne_coords = [] for (pts, mask, Yspt) in zip(points, sample_masks, Y_split): npoints = pts.shape[0] Z = np.zeros((npoints, 2)) * float('NaN') Z[mask, :] = Yspt tsne_coords.append(Z) # If a point didn't get sampled, place its t-SNE coordinates at its nearest # neighbor. if backgate: kd = KDTree(X) # select points not assigned values with t-SNE for pts, mask, coords, j in zip(points, sample_masks, tsne_coords, range(len(points))): nan_points = np.argwhere(np.isnan(coords[:, 0])) d, near = kd.query(pts[nan_points], 1) # convert back to coordinates on the whole dataset coords[nan_points, :] = Y[near, :] tsne_coords[j] = coords # add to data to FlowData structure for fd, coords in zip(fdarray, tsne_coords): fd[new_label + '1'] = coords[:, 0] fd[new_label + '2'] = coords[:, 1]
class VoronoiClosestPolytope: def __init__(self, polytopes, key_vertices_count=0, process_count=8, max_number_key_points=None): ''' Compute the closest polytope using Voronoi cells :param polytopes: ''' self.init_start_time = default_timer() self.section_start_time = self.init_start_time self.polytopes = np.asarray(polytopes, dtype='object') self.type = self.polytopes[0].type self.process_count = process_count self.key_vertices_count = key_vertices_count if self.type == 'AH_polytope': self.dim = self.polytopes[0].t.shape[0] elif self.type == 'zonotope': self.dim = self.polytopes[0].x.shape[0] else: raise NotImplementedError if self.key_vertices_count > 0: self.key_points = np.zeros([ len(self.polytopes) * (1 + 2**self.key_vertices_count), self.dim ]) else: self.key_points = np.zeros([len(self.polytopes), self.dim]) for i, z in enumerate(polytopes): if self.type == 'AH_polytope': if self.key_vertices_count > 0: raise NotImplementedError else: self.key_points[i, :] = self.polytopes[i].t[:, 0] elif self.type == 'zonotope': if self.key_vertices_count > 0: self.key_points[i * (2**self.key_vertices_count + 1), :] = self.polytopes[i].x[:, 0] self.key_points[ i * (2**self.key_vertices_count + 1) + 1:(i + 1) * (2**self.key_vertices_count + 1), :] = get_k_random_edge_points_in_zonotope( self.polytopes[i], self.key_vertices_count) else: self.key_points[i, :] = self.polytopes[i].x[:, 0] else: raise NotImplementedError if max_number_key_points: # sample the key points n = self.key_points.shape[0] chosen_key_points = np.random.choice(n, size=min( n, max_number_key_points), replace=False) self.key_points = self.key_points[chosen_key_points, :] # print(self.key_points.shape) self.key_point_to_polytope_map = dict( ) # stores the potential closest polytopes associated with each Voronoi (centroid) for key_point in self.key_points: ds = np.zeros(self.polytopes.shape[0]) self.key_point_to_polytope_map[str(key_point)] = np.rec.fromarrays( [self.polytopes, ds], names=('polytopes', 'distances')) self.build_cell_polytope_map_default() #build kd-tree for centroids self.key_point_tree = KDTree(self.key_points) print(('Completed precomputation in %f seconds' % (default_timer() - self.init_start_time))) def build_cell_polytope_map_default(self): polytope_key_point_indices = np.array( np.meshgrid(np.arange(self.polytopes.shape[0]), np.arange(self.key_points.shape[0]))).T.reshape(-1, 2) arguments = [] for i in polytope_key_point_indices: arguments.append( (self.key_points, self.key_point_to_polytope_map, i[0], i[1])) p = Pool(self.process_count) pca = p.map(set_polytope_pair_distance, arguments) polytope_key_point_arrays = np.asarray(pca).reshape( (self.polytopes.shape[0]), self.key_points.shape[0]) # print(polytope_centroid_arrays) # compute pairwise distances of the centroids and the polytopes #fixme for key_point_index, key_point in enumerate(self.key_points): key_point_string = str(key_point) for polytope_index, polytope in enumerate( self.key_point_to_polytope_map[key_point_string] ['polytopes']): self.key_point_to_polytope_map[str(key_point)].distances[ polytope_index] = polytope_key_point_arrays[ polytope_index, key_point_index] # print(polytope_key_point_arrays[polytope_index, key_point_index]) self.key_point_to_polytope_map[key_point_string].sort( order='distances') # print(self.centroid_to_polytope_map[centroid_string]) def find_closest_polytope(self, query_point, return_intermediate_info=False): #find the closest centroid d, i = self.key_point_tree.query(query_point) closest_key_point = self.key_point_tree.data[i] # print('closest key point', closest_key_point) closest_key_point_polytope = self.key_point_to_polytope_map[str( closest_key_point)]['polytopes'][0] # print('closest polytope centroid' + str(closest_key_point_polytope.x)) dist_query_centroid_polytope = distance_point_polytope( closest_key_point_polytope, query_point, ball='l2')[0] dist_query_key_point = np.linalg.norm(query_point - closest_key_point) # print(dist_query_key_point, dist_query_centroid_polytope) cutoff_index = np.searchsorted( self.key_point_to_polytope_map[str(closest_key_point)].distances, dist_query_key_point + dist_query_centroid_polytope) # print(cutoff_index) # print(self.key_point_to_polytope_map[str(closest_key_point)]['distances'][0:cutoff_index]) # print(self.key_point_to_polytope_map[str(closest_key_point)]['distances'][cutoff_index:]) # print('dqc',dist_query_key_point) # print(self.centroid_to_polytope_map[str(closest_key_point)].distances) closest_polytope_candidates = self.key_point_to_polytope_map[str( closest_key_point)].polytopes[0:cutoff_index] # print(closest_polytope_candidates) best_polytope = None best_distance = np.inf for polytope in closest_polytope_candidates: if best_distance < 1e-9: break dist = distance_point_polytope(polytope, query_point, ball='l2')[0] if best_distance > dist: best_distance = dist best_polytope = polytope # print('best distance', best_distance) if return_intermediate_info: return best_polytope, best_distance, closest_polytope_candidates return best_polytope
def correlate_neighbourhood(calcium_signal: np.ndarray, kd_tree: cKDTree, center_ix: int, init_radius=0.02, max_radius=.08, min_corr=.5, step=0.01, measure=correlation, verbose=True): """ Given a center neuron and parameters of the neighbourhood definition, tries to group neurons The basic idea is: 1. Look at all neurons within a given radius of the center neurons, 2. Correlate their calcium signal to the center's. 3. Keep sufficiently highly correlated neurons as being part of the group. 4. Compute the fraction correlated / all neighboring neurons 5. Move the center to the neuron closest to the center of mass of this group 6. Increase slightly the radius and start again. 7. As long as the fraction of correlated neurons is not droppping significantly, keep on increasing the radius 8. Label the neurons as being part of this group. If some were already part of another group, they belong to the biggest group Parameters ---------- calcium_signal kd_tree center_ix init_radius max_radius min_corr step measure verbose Returns ------- """ FRAC_DEC = .95 radii = np.arange(init_radius, max_radius, step) radius = radii[0] # not necessary due to loop? frac_corr = 0 w_correlated = np.array([]) for radius in radii: neighbors_ix, _ = get_neighbors(kd_tree, center_ix, radius) if len(neighbors_ix) == 0: # one neuron left so no neighbours break corr_neigh = measure(calcium_signal, center_ix, neighbors_ix) # Fraction of correlated neurons in the neighboorhod correlated = corr_neigh >= min_corr n_correlated = np.sum(correlated) new_frac_corr = n_correlated / len(corr_neigh) if verbose: print( f'Number of neurons: {len(corr_neigh)} ; fraction correlated: {new_frac_corr * 100:.2f}% ;' f' Correlated neurons: {np.sum(correlated)}') # More correlations than before if new_frac_corr >= FRAC_DEC * frac_corr and n_correlated > 2: # 100 frac_corr = new_frac_corr w_correlated = neighbors_ix[correlated] centroid = np.mean(kd_tree.data[w_correlated, :], 0) _, center_ix = kd_tree.query(centroid, 1) else: break if radius == radii[-1]: # print('\t >>> Reached maximum radius <<<') pass return w_correlated
coordsP[:, 0] = x_psf coordsP[:, 1] = y_psf coordsP[:, 2] = z_psf coordsF[:, 0] = x_flc coordsF[:, 1] = y_flc coordsF[:, 2] = z_flc ######################################################################## # kdt = KDT(coordsF) # idxsF = kdt.query(coordsP)[1] # ds = distArr(x_psf,y_psf,z_psf,x_flc[idxsF],y_flc[idxsF],z_flc[idxsF]) kdt = KDT(coordsP) idxsP = kdt.query(coordsF)[1] ds = distArr(x_flc, y_flc, z_flc, x_psf[idxsP], y_psf[idxsP], z_psf[idxsP]) # print(len(ds)) idxsF = np.arange(x_flc.size) msk = ds < matchtol idxsF = idxsF[msk] idxsP = idxsP[msk] ds = ds[msk] # print(len(idxs1))