def KLdivTree(X1, X2): "fast KL estimation using KDTrees" n, d = X1.shape m, dy = X2.shape xtree = KDTree(X1) ytree = KDTree(X2) r = xtree.query(X1, k=2, eps=.01, p=2)[0][:, 1] s = ytree.query(X1, k=1, eps=.01, p=2)[0] diff = r/s return -np.log(diff).sum() * d / n + np.log(m/(n-1))
def kldivergence(x, y): """Compute the Kullback-Leibler divergence between two multivariate samples. Parameters ---------- x : 2D array (n,d) Samples from distribution P, which typically represents the true distribution. y : 2D array (m,d) Samples from distribution Q, which typically represents the approximate distribution. Returns ------- out : float The estimated Kullback-Leibler divergence D(P||Q). References ---------- Perez-Cruz, F. Kullback-Leibler divergence estimation of continuous distributions IEEE International Symposium on Information Theory, 2008. """ from scipy.spatial import cKDTree as KDTree # Check the dimensions are consistent x = NP.atleast_2d(x) y = NP.atleast_2d(y) n,d = x.shape m,dy = y.shape assert(d == dy) # Build a KD tree representation of the samples and find the nearest neighbour # of each point in x. xtree = KDTree(x) ytree = KDTree(y) # Get the first two nearest neighbours for x, since the closest one is the # sample itself. r = xtree.query(x, k=2, eps=.01, p=2)[0][:,1] s = ytree.query(x, k=1, eps=.01, p=2)[0] print r print s # There is a mistake in the paper. In Eq. 14, the right side misses a negative sign # on the first term of the right hand side. return -NP.log(r/s).sum() * d / n + NP.log(m / (n - 1.))
def CartMatch(coord1, coord2, tol = None, nnearest=1): """ Cartesian Coordinate mathcing """ # sanitize coord1 = np.array(coord1, ndmin = 1) coord2 = np.array(coord2, ndmin = 1) # check the dimensions of the coordinate npairs1 = len( coord1 ) ndim1 = 1 if len( np.shape(coord1) ) == 1 else \ np.shape(coord1)[1] npairs2 = len( coord2 ) ndim2 = 1 if len( np.shape(coord2) ) == 1 else \ np.shape(coord2)[1] # check whether the coord1 and coord2 have the same shape if ndim1 != ndim2: raise RuntimeError("The dims of coord1/2 are not the same.") else: ndim = ndim1 # make proper arrays if they are 1d arrays if ndim == 1: coord1 = np.array([ coord1, np.zeros(len(coord1)) ]).T coord2 = np.array([ coord2, np.zeros(len(coord2)) ]).T # kdtree the coord2 kdt = KDT(coord2) if nnearest == 1: idxs2 = kdt.query(coord1)[1] elif nnearest > 1: idxs2 = kdt.query(coord1, nnearest)[1][:, -1] else: raise ValueError('invalid nnearest ' + str(nnearest)) # distance - warning: this could be over float if the precision is not enough, we assume that case is beyond the distance of interest... ds = np.sqrt( np.sum( (coord1 - coord2[idxs2])**2, axis = 1) ) # index of coord1 idxs1 = np.arange(npairs1) # distance filtering if tol is not None: msk = ds < tol idxs1 = idxs1[msk] idxs2 = idxs2[msk] ds = ds[msk] return idxs1, idxs2, ds
def swept_extrude(self, thickness): """ outer is a copy of inner, possibly with added detail, but with identical boundary we seek to create a castable object with a constant thickness 'thickness' to that end, we need to match the boundary points to make a closed extrusion extrusion is done iteratively we init by radially shinking the inner mesh by thickness """ assert thickness > 0 outer = self.vertices tree = KDTree(outer) outer_radius = np.linalg.norm(outer, axis=1) inner = outer #incremental updates while True: # find nearest point for each inner point dist, idx = tree.query(inner, k=1) inner_radius = np.linalg.norm(inner, axis=1) radial_dist = inner_radius - outer_radius[idx] ortho_dist2 = dist**2 - radial_dist**2 new_radius = outer_radius[idx] - np.sqrt(1 - ortho_dist2 / thickness ** 2) * thickness if np.allclose(inner_radius, new_radius): break inner = inner / (inner_radius / new_radius)[:, None] #return inner surface swepth by thickness return self.extrude(inner)
def main(): # read in the file try: ifs = open(sys.argv[1]) sample, ext = os.path.splitext(sys.argv[1]) except IndexError: ifs = sys.stdin sample = '' data = np.loadtxt(ifs, delimiter=',') if ifs is not sys.stdin: ifs.close() # view of the com com = data[:,1:4] # construct a KD tree tree = KDTree(com) # query KD tree to find the first nearest neighbor dist, idx = tree.query(com, k=2) nn = [(i, j, d2) for ((d1, d2), (i, j)) in zip(dist, idx)] # histogram of the nearest neighbor distance hist(np.array(nn)[:,2]) #title='{} pore-pore distances'.format(sample), #output='{}.pdf'.format(sample)) # save the nearest neighbor distance to .json files ofile = '{}_pore-distribution.json'.format(sample) medianDist = np.median(np.array(nn)[:,2]) cmp0 = lambda lhs, rhs: -1 if lhs[0] < rhs[0] else \ (1 if lhs[0] > rhs[0] else 0)
def sht_isosurface(filename, l_max=20, prop='electric_potential', test=None): """Given an SBF, describe the set of vertices and their esp using sht. Will scale the mesh to be of unit mean radius. Arguments: filename -- name of the SBF file containing a surface Keyword arguments: prop -- the name of the vertex property to describe in combination with the shape (or radius) l_max -- maximum angular momenta test -- use to keep the actual shape and property values for examination of accuracy of descriptor """ name = Path(filename).stem LOG.debug('Describing %s surface with spherical harmonics', name) datafile = sbf.read_file(filename) pts = datafile['vertices'].data.transpose() LOG.debug('Loaded vertex data') # shift to be centered about the origin pts -= np.mean(pts, axis=0) # this is faster for some reason than np.apply_along_axis norms = np.sqrt(pts[:, 0] ** 2 + pts[:, 1] ** 2 + pts[:, 2] ** 2) mean_norm = np.mean(norms) pts /= mean_norm norms /= mean_norm pts_normalized = pts / np.reshape(norms, (pts.shape[0], 1)) LOG.debug('Normalized points') sht = SHT(l_max) grid_cartesian = spherical_to_cartesian( np.c_[np.ones(sht.grid.shape[0]), sht.grid[:, 1], sht.grid[:, 0]]) LOG.debug('Constructing tree') tree = KDTree(pts_normalized) LOG.debug('Done') LOG.debug('Interpolating values') nearest = tree.query(grid_cartesian, 1) LOG.debug('Done') shape = values_from_grid(norms, nearest[1]) property_values = values_from_grid(datafile[prop].data, nearest[1]) if test is not None: test['actual'] = shape # normalize property to be in [0,1], keep track of min and range prop_min = np.min(property_values) prop_scale = np.abs(np.max(property_values) - np.min(property_values)) property_values -= prop_min if prop_scale != 0: property_values /= prop_scale others = [mean_norm, prop_min, prop_scale] combined = np.zeros(property_values.shape, dtype=np.complex128) combined.real = shape combined.imag = property_values return name, others, sht.analyse(combined)
def closest_index(sample_points, indices): r""" Find the nearest sample_point at a given index (along with the distance to the point). Input is an array of sample_points and an array of indicies to test at. Output is array of indices and distances. """ kdtree = KDTree(sample_points) distance, index = kdtree.query(indices) return index, distance
def check(name): points = DATASETS[name] tree = KDTree(points) vor = qhull.Voronoi(points) for p, v in vor.ridge_dict.items(): # consider only finite ridges if not np.all(np.asarray(v) >= 0): continue ridge_midpoint = vor.vertices[v].mean(axis=0) d = 1e-6 * (points[p[0]] - ridge_midpoint) dist, k = tree.query(ridge_midpoint + d, k=1) assert_equal(k, p[0]) dist, k = tree.query(ridge_midpoint - d, k=1) assert_equal(k, p[1])
def kdtree_clean(xx2d, yy2d, xS, yS, elevation2d): #REMOVE DODGY ADDED DATA FROM THE REGRIDDING BASED ON KDTREE. # dist is how far away the nearest neighbours are. # need to decide on this threshold. # ONLY DO THIS FOR POINTS THAT HAVE ALREADY BEEN CLASSIFIED AS RIDGES grid_points = np.c_[xx2d.ravel(), yy2d.ravel()] tree = KDTree(np.c_[xS, yS]) dist, _ = tree.query(grid_points, k=1) dist = dist.reshape(xx2d.shape) elevation2d_KD=ma.masked_where(dist > 4, elevation2d) return elevation2d_KD
def match_model_masses(isoMasses, starMasses): kdt = KDTree( isoMasses.reshape((len(isoMasses), 1)) ) q_results = kdt.query(starMasses.reshape((len(starMasses), 1)), k=1) indices = q_results[1] dm_frac = np.abs(starMasses - isoMasses[indices]) / starMasses idx = np.where(dm_frac > 0.1)[0] indices[idx] = -1 return indices
def generate_galaxy(num_stars, spiral_arm_count, spiral_tightness, galaxy_radius, bulge_height, disk_height): #generate vertices star_dict = {} next_index = 0 #spiral stars for i in xrange(int(num_stars*0.65)): star_dict[next_index] = create_vertex_spiral(max_radius=galaxy_radius, arm_count=spiral_arm_count, beta=spiral_tightness, disk_height=disk_height) next_index += 1 #inner cluster stars for i in xrange(int(num_stars*0.15)): star_dict[next_index] = create_vertex_inner(max_radius=galaxy_radius * 0.8, bulge_height=bulge_height) next_index += 1 #outer "spread out" stars while(len(star_dict) < num_stars): star_dict[next_index] = create_vertex_outer(max_radius=galaxy_radius * 0.9, disk_height=disk_height) next_index += 1 #generate a KDTree from the star data in order to help with edges star_keys = star_dict.keys() star_values = star_dict.values() star_tree = KDTree(star_values) #compute the nearest neighbors for each vertex distance_data, index_data = star_tree.query(star_values, k=20, eps=0.1) #for each vertex, randomly add edges to its nearest neighbors edge_dict = {} for distances, indexes in zip(distance_data, index_data): v1 = star_keys[int(indexes[0])] if(v1 not in edge_dict): edge_dict[v1] = set() for distance, v2 in create_edges(zip(distances[1:],indexes[1:])): v2 = star_keys[int(v2)] edge_dict[v1].add(v2) if(v2 not in edge_dict): edge_dict[v2] = set() edge_dict[v2].add(v1) #remove disconnected components from the graph star_dict, edge_dict = remove_disconnected_stars(star_dict, edge_dict) #convert the star array to an array of dictionaries before returning, so other data can be added star_dict = {key:{'position':Vector3D(*p)} for key, p in star_dict.iteritems()} return star_dict, edge_dict
def sample_colors(img, sample_points, n): h, w = img.shape[:2] print("Sampling colors...") tree = KDTree(np.array(sample_points)) color_samples = collections.defaultdict(list) img_lab = rgb2lab(img) xx, yy = np.meshgrid(np.arange(h), np.arange(w)) pixel_coords = np.c_[xx.ravel(), yy.ravel()] nearest = tree.query(pixel_coords)[1] i = 0 for pixel_coord in pixel_coords: color_samples[tuple(tree.data[nearest[i]])].append( img_lab[tuple(pixel_coord)]) i += 1 print("Computing color means...") samples = [] for point, colors in color_samples.items(): avg_color = np.sum(colors, axis=0) / len(colors) samples.append(np.append(point, avg_color)) if len(samples) > n: print("Downsampling {} to {} points...".format(len(samples), n)) while len(samples) > n: tree = KDTree(np.array(samples)) dists, neighbours = tree.query(np.array(samples), 2) dists = dists[:, 1] worst_idx = min(range(len(samples)), key=lambda i: dists[i]) samples[neighbours[worst_idx][1]] += samples[neighbours[worst_idx][0]] samples[neighbours[worst_idx][1]] /= 2 samples.pop(neighbours[worst_idx][0]) color_samples = [] for sample in samples: color = lab2rgb([[sample[2:]]])[0][0] color_samples.append(tuple(sample[:2][::-1]) + tuple(color)) return color_samples
def point_find_nearest_businesses(df, point, k=5, loc_cols=['latitude', 'longitude']): """ Given a point (lat, long) :param df: :param point: :param k: :param loc_cols: :return: """ tree = KDTree(df[loc_cols]) distance, indices = tree.query(point, k) return df.ix[indices]
def neighborDistances(self,neighbors=64): """ Find the N-th nearest neighbors to each particle :param neighbors: neighbor order :type neighbors: int. :returns: array with units """ #Get the particle positions if not available get if hasattr(self,"positions"): positions = self.positions.copy() else: positions = self.getPositions(save=False) #Build the KD-Tree particle_tree = KDTree(positions.value) #For memory reasons, with large datasets it's better to proceed in chunks with nearest neighbors queries numPart = positions.shape[0] rp = np.zeros(numPart) #Split the particles in chunks chunkSize = numPart // neighbors remaining = numPart % neighbors #Cycle over the chunks, querying the tree for i in range(neighbors): rp[i*chunkSize:(i+1)*chunkSize] = particle_tree.query(positions[i*chunkSize:(i+1)*chunkSize].value,k=neighbors)[0][:,neighbors-1] if remaining: rp[neighbors*chunkSize:] = particle_tree.query(positions[neighbors*chunkSize:].value,k=neighbors)[0][:,neighbors-1] #Return return rp * positions.unit
def test_ridges(self, name): # Check that the ridges computed by Voronoi indeed separate # the regions of nearest neighborhood, by comparing the result # to KDTree. points = DATASETS[name] tree = KDTree(points) vor = qhull.Voronoi(points) for p, v in vor.ridge_dict.items(): # consider only finite ridges if not np.all(np.asarray(v) >= 0): continue ridge_midpoint = vor.vertices[v].mean(axis=0) d = 1e-6 * (points[p[0]] - ridge_midpoint) dist, k = tree.query(ridge_midpoint + d, k=1) assert_equal(k, p[0]) dist, k = tree.query(ridge_midpoint - d, k=1) assert_equal(k, p[1])
def compute_errors(self, mag_err_lim=None, dx_lim=None): """Estimates errors and completeness per star. Load photometry from fake table (from same chip, ext as primary data. For each star in the phot table, get its magnitude. Use a kdtree to get the N most similar stars; compute statistics Parameters ---------- frac : float Scalar fractional level of completeness. For example, 0.5 is the 50% completeness limit. mag_err_lim : float Maximum absolute difference in magnitudes, in any band, for the star to be considered recovered. dx_lim : float Maximum distance between a fake star's input site and its observed site for the fake star to be considered recovered. """ mag_errors = self._f.mag_errors() # diffs nstars x nimages recovered = self._f.recovered(mag_err_lim=mag_err_lim, dx_lim=dx_lim) tree = KDTree(self._f.data['mag']) obs_mags = np.array([row['mag'] for row in self._p.photTable.iterrows()]) dists, indices = tree.query(obs_mags, k=100) # distance_upper_bound=mag_err_lim) nObs = obs_mags.shape[0] nImages = obs_mags.shape[1] sigmas = np.empty([nObs, nImages]) comps = np.empty(nObs) for i in xrange(nObs): if np.any(obs_mags[i] > 50.): for j in xrange(nImages): sigmas[i, j] = np.nan comps[i] = np.nan continue idx = indices[i, :].flatten() for j in xrange(nImages): # Estimate uncertainty in this band (image index) sigmas[i, j] = np.std(mag_errors[idx, j]) # Estimate completeness for this star c = recovered[indices[i, :]] comps[i] = np.float(c.sum()) / len(c) # insert errors into the HDF5 table (need to make a new column self._p.add_column("ast_mag_err", sigmas) # insert completeness for this star self._p.add_column("comp", comps)
def main(): # read in the file try: ifs = open(sys.argv[1]) sample, ext = os.path.splitext(sys.argv[1]) except IndexError: ifs = sys.stdin sample = '' data = np.loadtxt(ifs, delimiter=',') if ifs is not sys.stdin: ifs.close() # view of the com com = data[:,1:4] # construct a KD tree tree = KDTree(com) # query KD tree to find the first nearest neighbor dist, idx = tree.query(com, k=2) nn = [(i, j, d2) for ((d1, d2), (i, j)) in zip(dist, idx)] # histogram of the nearest neighbor distance hist(np.array(nn)[:,2], title='{} pore-pore distances'.format(sample), output='{}.pdf'.format(sample)) # save the nearest neighbor distance to .json files ofile = '{}_pore-distribution.json'.format(sample) medianDist = np.median(np.array(nn)[:,2]) cmp0 = lambda lhs, rhs: -1 if lhs[0] < rhs[0] else \ (1 if lhs[0] > rhs[0] else 0) dist = { 'Pore ID' : list(data[:,0].astype(int)), 'center of mass X' : { 'units' : '$\mu$m', 'values' : list(data[:,1])}, 'center of mass Y' : { 'units' : '$\mu$m', 'values' : list(data[:,2])}, 'center of mass Z' : { 'units' : '$\mu$m', 'values' : list(data[:,3])}, 'volume' : { 'units' : '$\mu$m^3', 'values' : list(data[:,4])}, 'nearest neighbor distance' : { 'units' : '$\mu$m', 'values' : [entry[2] for entry in sorted(nn, cmp=cmp0)]}, 'median nearest neighbor distance' : { 'units' : '$\mu$m', 'values' : medianDist} } json.dump(dist, open(ofile, 'w'))
def match(s, h, fits_image, tolerance=4): """ Parameters ---------- s, h : obj Catalog objects. Each must have `ra` and `dec` attributes as 1-D Numpy arrays. fits_image : string FITS image for conversion of RA,DEC to X,Y. tolerance : number Match tolerance in pixels. Returns ------- xmatch, ymatch Matched X,Y from first catalog. xhmatch, yhmatch Matched X,Y from second catalog. """ # Now use pywcs to put these on some sort of projection. I think as # long as you use the same for both data sets it's not really important # what the projection is. In my case I read in a fits image associated # with the first catalog and use that header info. hdu = io.fits.open(fits_image) wcs = pywcs.WCS(hdu['PRIMARY'].header) # Convert sky to x,y positions x, y = wcs.wcs_world2pix(s.ra, s.dec, 0) xh, yh = wcs.wcs_world2pix(h.ra, h.dec, 0) # Create a KD Tree tree = KDTree(zip(x.ravel(), y.ravel())) # Search it for the nearest neighbor # d = distance of the nearest neighbor # i = index in x,y arrays of the nearest neighbor for each source in xh,yh d, i = tree.query(zip(xh.ravel(), yh.ravel()), k=1) # Give me just the matchers within a tolerance j = d < tolerance ii = i[j] # match within N pixels; tricker to do this in ra,dec xmatch, ymatch = x[ii], y[ii] xhmatch, yhmatch = xh[j], yh[j] return xmatch, ymatch, xhmatch, yhmatch
class FStrategy(object): '''Class implements a connection strategy based on nearest neighbors. The class keeps track of states using a k-d tree which is polled for nearest states and is needs to be updated whenever new states are added to the transition system. ''' def __init__(self, no_neighbors, radius): # nearest neighbors data structure self.nn = None self.max_no_neighbors = no_neighbors self.max_dist = radius self.states = [] # TODO: can I get rid of this? maybe take it from ts def add(self, state): self.states.append(state) self.nn = NearestNeightbor([s.conf[:2] for s in self.states]) def nearest(self, state, ret_dist=False): s, d = None, -1 if self.nn: idx = self.nn.query(state.conf[:2]) d, idx = idx assert idx == int(idx) p = self.nn.data[idx] s = self.states[idx] assert np.all(s.conf[:2] == p) if ret_dist: return s, d return s def near(self, state): if self.nn: _, idxs = self.nn.query(state.conf[:2], k=self.max_no_neighbors, distance_upper_bound=self.max_dist) return [self.states[idx] for idx in idxs if idx < len(self.states)] return
def EstimateLatticeConstant(pos): """ Estimate the lattice constant of a point set that represent a square grid. Parameters ---------- pos : array like A 2D array of shape (N, 2) containing the coordinates of the points. Returns ------- kxy : array like [2x2] lattice constants """ # Find the closest 4 neighbours (excluding itself) for each point. tree = KDTree(pos) dd, ii = tree.query(pos, k=5) dr = dd[:, 1:] # Determine the median radial distance and filter all points beyond # 2*sigma. med = numpy.median(dr) std = numpy.std(dr) outliers = numpy.abs(dr - med) > (2 * std) # doesn't work well if std is very high # Determine horizontal and vertical distance (only radial distance is # returned by tree.query). dpos = pos[ii[:, 0, numpy.newaxis]] - pos[ii[:, 1:]] dx, dy = dpos[:, :, 0], dpos[:, :, 1] assert numpy.all(numpy.abs(dr - numpy.hypot(dx, dy)) < 1.0e-12) # Use k-means to group the points into two directions. X = numpy.column_stack((dx[~outliers], dy[~outliers])) X[X[:, 0] < -0.5 * med] *= -1 X[X[:, 1] < -0.5 * med] *= -1 centroids, _ = kmeans(X, 2) labels = numpy.argmin(cdist(X, centroids), axis=1) kxy = numpy.array([numpy.median(X[labels.ravel() == 0], axis=0), numpy.median(X[labels.ravel() == 1], axis=0)]) # The angle between the two directions should be close to 90 degrees. alpha = numpy.math.atan2(numpy.linalg.norm(numpy.cross(*kxy)), numpy.dot(*kxy)) if abs(alpha - math.pi / 2) > math.radians(2.5): logging.warning('Estimated lattice angle differs from 90 degrees by ' 'more than 2.5 degrees. Input data could be wrong') return kxy
def render(img, color_samples): print("Rendering...") h, w = [2*x for x in img.shape[:2]] xx, yy = np.meshgrid(np.arange(h), np.arange(w)) pixel_coords = np.c_[xx.ravel(), yy.ravel()] colors = np.empty([h, w, 3]) coords = [] for color_sample in color_samples: coord = tuple(x*2 for x in color_sample[:2][::-1]) colors[coord] = color_sample[2:] coords.append(coord) tree = KDTree(coords) idxs = tree.query(pixel_coords)[1] data = colors[tuple(tree.data[idxs].astype(int).T)].reshape((w, h, 3)) data = np.transpose(data, (1, 0, 2)) return downscale_local_mean(data, (2, 2, 1))
def row_find_nearest_businesses(df, row, k=5, loc_cols=['latitude', 'longitude']): """ Finds the nearest neighbor of a :param row: Row that we are interested in finding the nearest neighbors for :param df: pd.DataFrame with loc_cols :param k: :param loc_cols: List of columns that we are comparing to for nearest neighbors :return: Example --- >>> find_nearest_neighbors(businesses, 1) """ tree = KDTree(df[loc_cols]) distance, indices = tree.query(df[loc_cols], k + 1) neighbors = df.ix[indices[row][1:]] # Start at 1 to ignore the current index return neighbors
def FindGridSpots(image, repetition): """ Find a grid of spots in an image. Parameters ---------- image : array like Data array containing the greyscale image. repetition : tuple of ints Number of expected spots in (X, Y). Returns ------- spot_coordinates : array like A 2D array of shape (N, 2) containing the coordinates of the spots. translation : tuple of two floats scaling : tuple of two floats rotation : float """ spot_positions = MaximaFind(image, repetition[0] * repetition[1]) if len(spot_positions) < repetition[0] * repetition[1]: logging.warning('Not enough spots found, returning only the found spots.') return spot_positions, None, None, None # Estimate transformation lattice_constants = EstimateLatticeConstant(spot_positions) transformation_matrix = numpy.transpose(lattice_constants) if numpy.linalg.det(lattice_constants) < 0.: transformation_matrix = numpy.fliplr(transformation_matrix) translation = numpy.mean(spot_positions, axis=0) transform_to_spot_positions = Transform(translation=translation) transform_to_spot_positions.transformation_matrix = transformation_matrix # Iterative closest point algorithm - single iteration, to fit a grid to the found spot positions grid = GridPoints(*repetition) spot_grid = transform_to_spot_positions.apply(grid) tree = KDTree(spot_positions) dd, ii = tree.query(spot_grid, k=1) pos_sorted = spot_positions[ii.ravel(), :] transformation = Transform.from_pointset(grid, pos_sorted) spot_coordinates = transformation.apply(grid) return spot_coordinates, translation, transformation.scaling, transformation.rotation
def main(): image = Image.open(sys.argv[1]) image2 = Image.new('RGB', image.size, BACKGROUND) draw_image = ImageDraw.Draw(image2) width, height = image.size min_diameter = (width + height) / 200 max_diameter = (width + height) / 75 circle = generate_circle(width, height, min_diameter, max_diameter) circles = [circle] circle_draw(draw_image, image, circle) try: for i in xrange(TOTAL_CIRCLES): tries = 0 if IMPORTED_SCIPY: kdtree = KDTree([(x, y) for (x, y, _) in circles]) while True: circle = generate_circle(width, height, min_diameter, max_diameter) elements, indexes = kdtree.query([(circle[0], circle[1])], k=12) for element, index in zip(elements[0], indexes[0]): if not np.isinf(element) and circle_intersection(circle, circles[index]): break else: break tries += 1 else: while any(circle_intersection(circle, circle2) for circle2 in circles): tries += 1 circle = generate_circle(width, height, min_diameter, max_diameter) print '{}/{} {}'.format(i, TOTAL_CIRCLES, tries) circles.append(circle) circle_draw(draw_image, image, circle) except (KeyboardInterrupt, SystemExit): pass image2.show()
def spherematch(ra1, dec1, ra2, dec2, tolerance=1/3600.): """ Uses a k-d tree to efficiently match two pairs of coordinates in spherical geometry, with a tolerance in degrees. """ args = ra1, dec1, ra2, dec2 ra1, dec1, ra2, dec2 = map(partial(np.array, copy=False), args) coords1 = radec_to_coords(ra1, dec1) coords2 = radec_to_coords(ra2, dec2) kdt = KDT(coords2) idx2 = kdt.query(coords1)[1] ds = great_circle_distance(ra1, dec1, ra2[idx2], dec2[idx2]) idx1 = np.arange(ra1.size) msk = ds < tolerance idx1 = idx1[msk] idx2 = idx2[msk] ds = ds[msk] return idx1, idx2, ds
def _find_nearest_neighbors(self): """ Internal function to compute the nearest neighbors of all collided/unresolved galaxies """ # we can double count using any uncollided galaxies (for which # we have a redshift) cond = (self._data.collided == 0)|(self._data.resolved == 1) uncollided_gals = self._data[cond] # initialize the kdtree for NN calculations tree = KDTree(uncollided_gals[self.coord_keys]) # find the NN for only the collided galaxies cond = (self.sample.collided == 1)&(self.sample.resolved == 0) collided_gals = self.sample[cond] dists, inds = tree.query(collided_gals[self.coord_keys], k=1) self._collided_unresolved_ids = collided_gals.index self._nearest_neighbor_ids = uncollided_gals.iloc[inds].index self._metadata += ['_collided_unresolved_ids', '_nearest_neighbor_ids']
def lab2ind(im, colors=256): """convert a Lab image to indexed colors :param a: nparray (x,y,n) containing image :param colors: int number of colors or predefined Palette :ref: http://scikit-learn.org/stable/auto_examples/cluster/plot_color_quantization.html """ # http://stackoverflow.com/questions/10818546/finding-index-of-nearest-point-in-numpy-arrays-of-x-and-y-coordinates if isinstance(colors, int): p = palette(im, colors) # pal = [Color(c, 'lab') for c in p] else: pal = colors p = [c.lab for c in flatten(pal)] w, h, d = im.shape s = w * h # number of pixels flat = np.reshape(im, (s, d)) from scipy.spatial import cKDTree as KDTree # compiled is MUCH faster mytree = KDTree(p) _, indexes = mytree.query(flat) im = indexes.reshape(w, h) return im, pal
def initial_weights(data, k, min_dist=None): """ Calculate a matrix of weights for the k nearest points. For N data points in D dimensions, data has shape (N, D). min_dist (c_dist in K06) is 1.0/(some known minimum distance to a nearest neighbor) k is the number of nearest neighbors for which weights (K06 eq. 1) should be calculated """ knn = KDTree(data) # D, i contains the point itself and the associated zero distance, of course. # D are distances (0, d1, d2, d3, ... , dk-1) # i are indices (i0, i1, i2, i3, ..., ik-1) where i[:,0] increment by one # Both have shape (N points, k neighbors) D, conn = knn.query(knn.data, k) if min_dist is None: min_dist = D[D>0].min() W = np.exp(-min_dist * D) return W, conn
class Parsec(StarKitModel): mh = Parameter() mass = Parameter() age = Parameter() inputs = () outputs = ('teff', 'logg', 'lum') def __init__(self, parsec_store, mh=0.0, mass=1.0, age=5e9): super(Parsec, self).__init__(mh, mass, age) try: self.parsec_store = pd.HDFStore(parsec_store) except TypeError: self.parsec_store = parsec_store self.evolution_data = [self.parsec_store[key] for key in self.parsec_store.keys()] self.parsec_store.close() self.mh_mass = np.empty((len(self.evolution_data), 2)) for i, ev_data in enumerate(self.evolution_data): mh = ev_data['MH'][0] mass = ev_data['MASS'][0] self.mh_mass[i] = mh, mass self.mh_mass_kd_tree = KDTree(self.mh_mass) def evaluate(self, mh, mass, age): distance, idx = self.mh_mass_kd_tree.query( np.array([mh, mass]).squeeze()) ev_data = self.evolution_data[idx] age_ev_data = ev_data['AGE'].values out_ev_data = ev_data[['TEFF', 'LOG_G', 'LOG_L']].values teff, logg, log_l = interpolate.interp1d( age_ev_data, out_ev_data.T, bounds_error=False)(np.squeeze(age)) return teff, logg, 10**log_l
def get_potential_cells(coors, cmesh, centroids=None, extrapolate=True): """ Get cells that potentially contain points with the given physical coordinates. Parameters ---------- coors : array The physical coordinates. cmesh : CMesh instance The cmesh defining the cells. centroids : array, optional The centroids of the cells. extrapolate : bool If True, even the points that are surely outside of the cmesh are considered and assigned potential cells. Returns ------- potential_cells : array The indices of the cells that potentially contain the points. offsets : array The offsets into `potential_cells` for each point: a point ``ip`` is potentially in cells ``potential_cells[offsets[ip]:offsets[ip+1]]``. """ from scipy.spatial import cKDTree as KDTree if centroids is None: centroids = cmesh.get_centroids(cmesh.tdim) kdtree = KDTree(coors) conn = cmesh.get_cell_conn() cc = conn.indices.reshape(cmesh.n_el, -1) cell_coors = cmesh.coors[cc] rays = cell_coors - centroids[:, None] radii = nm.linalg.norm(rays, ord=nm.inf, axis=2).max(axis=1) potential_cells = [[]] * coors.shape[0] for ic, centroid in enumerate(centroids): ips = kdtree.query_ball_point(centroid, radii[ic], p=nm.inf) if len(ips): for ip in ips: if not len(potential_cells[ip]): potential_cells[ip] = [] potential_cells[ip].append(ic) lens = nm.array([0] + [len(ii) for ii in potential_cells], dtype=nm.int32) if extrapolate: # Deal with the points outside of the field domain - insert elements # incident to the closest mesh vertex. iin = nm.where(lens[1:] == 0)[0] if len(iin): kdtree = KDTree(cmesh.coors) ics = kdtree.query(coors[iin])[1] cmesh.setup_connectivity(0, cmesh.tdim) conn = cmesh.get_conn(0, cmesh.tdim) oo = conn.offsets for ii, ip in enumerate(iin): ik = ics[ii] potential_cells[ip] = conn.indices[oo[ik]:oo[ik + 1]] lens[ip + 1] = len(potential_cells[ip]) offsets = nm.cumsum(lens, dtype=nm.int32) potential_cells = nm.concatenate(potential_cells).astype(nm.int32) return potential_cells, offsets
def xymatch(x1, y1, x2, y2, tol=None, nnearest=1): """ Finds matches in one catalog to another. Parameters x1 : array-like X-coordinates of first catalog y1 : array-like Y-coordinates of first catalog x2 : array-like X-coordinates of second catalog y2 : array-like Y-coordinates of second catalog tol : float or None, optional How close a match has to be to count as a match. If None, all nearest neighbors for the first catalog will be returned. nnearest : int, optional The nth neighbor to find. E.g., 1 for the nearest nearby, 2 for the second nearest neighbor, etc. Particularly useful if you want to get the nearest *non-self* neighbor of a catalog. To do this, use: ``spherematch(x, y, x, y, nnearest=2)`` Returns ------- idx1 : int array Indecies into the first catalog of the matches. Will never be larger than `x1`/`y1`. idx2 : int array Indecies into the second catalog of the matches. Will never be larger than `x1`/`y1`. ds : float array Distance between the matches """ x1 = np.array(x1, copy=False) y1 = np.array(y1, copy=False) x2 = np.array(x2, copy=False) y2 = np.array(y2, copy=False) if x1.shape != y1.shape: raise ValueError('x1 and y1 do not match!') if x2.shape != y2.shape: raise ValueError('x2 and y2 do not match!') # this is equivalent to, but faster than just doing np.array([x1, y1]) coords1 = np.empty((x1.size, 2)) coords1[:, 0] = x1 coords1[:, 1] = y1 # this is equivalent to, but faster than just doing np.array([x2, y2]) coords2 = np.empty((x2.size, 2)) coords2[:, 0] = x2 coords2[:, 1] = y2 kdt = KDT(coords2) if nnearest == 1: ds,idxs2 = kdt.query(coords1) elif nnearest > 1: retval = kdt.query(coords1, nnearest) ds = retval[0] idxs2 = retval[1][:, -1] else: raise ValueError('invalid nnearest ' + str(nnearest)) idxs1 = np.arange(x1.size) if tol is not None: msk = ds < tol idxs1 = idxs1[msk] idxs2 = idxs2[msk] ds = ds[msk] return idxs1, idxs2, ds
class KDicTree(dict): ''' Wrapper around the scipy.spatial.KDTree for labelled points. Use like dict to register or update points: tree = KDicTree({'1':(0,0), 2:(2,2), '3':(45,45)}) tree['1'] = (1, 1) tree['2'] = (5, 5) tree['3'] = (50, 50) Then use KDTree querys: tree.query_ball_point( (3, 3), 10 ) ['1', 2, '2'] Parameters ---------- data : labelled (N,K) dict The data points to be indexed, labelled in a dictionary. leafsize : int, optional The number of points at which the algorithm switches over to brute-force. Has to be positive. See Also -------- scipy.spatial.KDTree scipy.spatial.cKDTree ''' def __init__(self, data, leafsize=16): self.tree = None self.ids = [] # maps tree to dict keys self.altered = True self.leafsize = leafsize super().__init__(data) def __setitem__(self, key, point): '''Set point for self[key]''' super().__setitem__(key, point) self.altered = True def __delitem__(self, key): '''Delete self[key].''' super().__delitem__(key) self.altered = True def build_tree(self): '''Gets called automatically by a query.''' if not self.altered: return self.tree = KDTree(list(self.values()), leafsize=self.leafsize) self.ids = list(self.keys()) self.altered = False def map_ids(self, ids): '''Maps the result of Querys to dict keys.''' if isinstance(ids, (tuple, list, ndarray)): return tuple(map(self.map_ids, ids)) return self.ids[ids] def query(self, x, k=1, eps=0, p=2, distance_upper_bound=float("inf")): '''Query the kd-tree for nearest neighbors.''' self.build_tree() dists, ids = self.tree.query(x, k, eps, p, distance_upper_bound) return (dists, self.map_ids(ids)) def query_ball_point(self, x, r, p=2., eps=0): '''Find all points within distance r of point(s) x.''' self.build_tree() return self.map_ids(self.tree.query_ball_point(x, r, p, eps)) def query_pairs(self, r, p=2., eps=0): '''Find all pairs of points within a distance r.''' self.build_tree() return [ tuple(self.map_ids(pair)) for pair in self.tree.query_pairs(r, p=p, eps=eps) ]
def mosaic_texture(humfile, sonpath, cs2cs_args = "epsg:26949", res = 99, nn = 5, weight = 1): ''' Create mosaics of the spatially referenced sidescan echograms Syntax ---------- [] = PyHum.mosaic_texture(humfile, sonpath, cs2cs_args, res, nn, weight) Parameters ---------- humfile : str path to the .DAT file sonpath : str path where the *.SON files are cs2cs_args : int, *optional* [Default="epsg:26949"] arguments to create coordinates in a projected coordinate system this argument gets given to pyproj to turn wgs84 (lat/lon) coordinates into any projection supported by the proj.4 libraries res : float, *optional* [Default=0] grid resolution of output gridded texture map if res=99, res will be determined automatically from the spatial resolution of 1 pixel nn: int, *optional* [Default=5] number of nearest neighbours for gridding weight: int, *optional* [Default=1] specifies the type of pixel weighting in the gridding process weight = 1, based on grazing angle and inverse distance weighting weight = 2, based on grazing angle only weight = 3, inverse distance weighting only weight = 4, no weighting Returns ------- sonpath+'GroundOverlay.kml': kml file contains gridded (or point cloud) sidescan intensity map for importing into google earth of the pth chunk sonpath+'map.png' : image overlay associated with the kml file ''' # prompt user to supply file if no input file given if not humfile: print 'An input file is required!!!!!!' Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing humfile = askopenfilename(filetypes=[("DAT files","*.DAT")]) # prompt user to supply directory if no input sonpath is given if not sonpath: print 'A *.SON directory is required!!!!!!' Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing sonpath = askdirectory() # print given arguments to screen and convert data type where necessary if humfile: print 'Input file is %s' % (humfile) if sonpath: print 'Sonar file path is %s' % (sonpath) if cs2cs_args: print 'cs2cs arguments are %s' % (cs2cs_args) if res: res = np.asarray(res,float) print 'Gridding resolution: %s' % (str(res)) if nn: nn = int(nn) print 'Number of nearest neighbours for gridding: %s' % (str(nn)) if weight: weight = int(weight) print 'Weighting for gridding: %s' % (str(weight)) ##nn = 5 #number of nearest neighbours in gridding ##noisefloor=10 # noise threshold in dB W # start timer if os.name=='posix': # true if linux/mac or cygwin on windows start = time.time() else: # windows start = time.clock() trans = pyproj.Proj(init=cs2cs_args) # if son path name supplied has no separator at end, put one on if sonpath[-1]!=os.sep: sonpath = sonpath + os.sep base = humfile.split('.DAT') # get base of file name for output base = base[0].split(os.sep)[-1] # remove underscores, negatives and spaces from basename base = humutils.strip_base(base) meta = loadmat(os.path.normpath(os.path.join(sonpath,base+'meta.mat'))) esi = np.squeeze(meta['e']) nsi = np.squeeze(meta['n']) theta = np.squeeze(meta['heading'])/(180/np.pi) # load memory mapped scans shape_port = np.squeeze(meta['shape_port']) if shape_port!='': if os.path.isfile(os.path.normpath(os.path.join(sonpath,base+'_data_port_lar.dat'))): port_fp = io.get_mmap_data(sonpath, base, '_data_port_lar.dat', 'float32', tuple(shape_port)) else: port_fp = io.get_mmap_data(sonpath, base, '_data_port_la.dat', 'float32', tuple(shape_port)) shape_star = np.squeeze(meta['shape_star']) if shape_star!='': if os.path.isfile(os.path.normpath(os.path.join(sonpath,base+'_data_star_lar.dat'))): star_fp = io.get_mmap_data(sonpath, base, '_data_star_lar.dat', 'float32', tuple(shape_star)) else: star_fp = io.get_mmap_data(sonpath, base, '_data_star_la.dat', 'float32', tuple(shape_star)) # time varying gain tvg = ((8.5*10**-5)+(3/76923)+((8.5*10**-5)/4))*meta['c'] # depth correction dist_tvg = np.squeeze(((np.tan(np.radians(25)))*np.squeeze(meta['dep_m']))-(tvg)) # read in range data R_fp = io.get_mmap_data(sonpath, base, '_data_range.dat', 'float32', tuple(shape_star)) dx = np.arcsin(meta['c']/(1000*meta['t']*meta['f'])) pix_m = meta['pix_m'] c = meta['c'] if not os.path.isfile( os.path.normpath(os.path.join(sonpath,base+"S.p")) ): #if 2 > 1: inputfiles = [] if len(shape_star)>2: for p in xrange(len(star_fp)): e = esi[shape_port[-1]*p:shape_port[-1]*(p+1)] n = nsi[shape_port[-1]*p:shape_port[-1]*(p+1)] t = theta[shape_port[-1]*p:shape_port[-1]*(p+1)] d = dist_tvg[shape_port[-1]*p:shape_port[-1]*(p+1)] dat_port = port_fp[p] dat_star = star_fp[p] data_R = R_fp[p] print "writing chunk %s " % (str(p)) write_points(e, n, t, d, dat_port, dat_star, data_R, pix_m, res, cs2cs_args, sonpath, p, c, dx) inputfiles.append(os.path.normpath(os.path.join(sonpath,'x_y_class'+str(p)+'.asc'))) else: p=0 print "writing chunk %s " % (str(p)) write_points(esi, nsi, theta, dist_tvg, port_fp, star_fp, R_fp, meta['pix_m'], res, cs2cs_args, sonpath, 0, c, dx) inputfiles.append(os.path.normpath(os.path.join(sonpath,'x_y_class'+str(p)+'.asc'))) #trans = pyproj.Proj(init=cs2cs_args) # D, R, h, t print "reading points from %s files" % (str(len(inputfiles))) X,Y,S,D,R,h,t,i = getxys(inputfiles) print "%s points read from %s files" % (str(len(S)), str(len(inputfiles))) # remove values where sidescan intensity is zero ind = np.where(np.logical_not(S==0))[0] X = X[ind]; Y = Y[ind] S = S[ind]; D = D[ind] R = R[ind]; h = h[ind] t = t[ind]; i = i[ind] del ind # save to file for temporary storage pickle.dump( S, open( os.path.normpath(os.path.join(sonpath,base+"S.p")), "wb" ) ); del S pickle.dump( D, open( os.path.normpath(os.path.join(sonpath,base+"D.p")), "wb" ) ); del D pickle.dump( t, open( os.path.normpath(os.path.join(sonpath,base+"t.p")), "wb" ) ); del t pickle.dump( i, open( os.path.normpath(os.path.join(sonpath,base+"i.p")), "wb" ) ); del i pickle.dump( X, open( os.path.normpath(os.path.join(sonpath,base+"X.p")), "wb" ) ); del X pickle.dump( Y, open( os.path.normpath(os.path.join(sonpath,base+"Y.p")), "wb" ) ); del Y pickle.dump( R, open( os.path.normpath(os.path.join(sonpath,base+"R.p")), "wb" ) ); pickle.dump( h, open( os.path.normpath(os.path.join(sonpath,base+"h.p")), "wb" ) ); #grazing angle g = np.arctan(R.flatten(),h.flatten()) pickle.dump( g, open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "wb" ) ); del g, R, h print "creating grids ..." if res==0: res=99 if res==99: #### prepare grids R = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"R.p")), "rb" ) ) ## actual along-track resolution is this: dx times dy = Af tmp = R * dx * (c*0.007 / 2) del R resg = np.min(tmp[tmp>0]) del tmp else: resg = res X = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"X.p")), "rb" ) ) Y = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"Y.p")), "rb" ) ) humlon, humlat = trans(X, Y, inverse=True) grid_x, grid_y = np.meshgrid( np.arange(np.min(X), np.max(X), resg), np.arange(np.min(Y), np.max(Y), resg) ) shape = np.shape(grid_x) tree = KDTree(zip(X.flatten(), Y.flatten())) del X, Y print "mosaicking ..." #k nearest neighbour try: dist, inds = tree.query(zip(grid_x.flatten(), grid_y.flatten()), k = nn, n_jobs=-1) except: #print ".... update your scipy installation to use faster kd-tree" dist, inds = tree.query(zip(grid_x.flatten(), grid_y.flatten()), k = nn) #del grid_x, grid_y if weight==1: g = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "rb" ) ) w = g[inds] + 1.0 / dist**2 del g elif weight==2: g = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "rb" ) ) w = g[inds] del g elif weight==3: w = 1.0 / dist**2 elif weight==4: w = 1.0 #g = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"g.p")), "rb" ) ) #w = g[inds] + 1.0 / dist**2 #del g if weight < 4: w[np.isinf(w)]=1 w[np.isnan(w)]=1 w[w>10000]=10000 w[w<=0]=1 # load in sidescan intensity S = pickle.load( open( os.path.normpath(os.path.join(sonpath,base+"S.p")), "rb" ) ) # filter out noise pixels S[S<noisefloor] = np.nan if nn==1: Sdat_g = (w * S.flatten()[inds]).reshape(shape) del w dist = dist.reshape(shape) else: if weight < 4: Sdat_g = (np.nansum(w * S.flatten()[inds], axis=1) / np.nansum(w, axis=1)).reshape(shape) else: Sdat_g = (np.nansum(S.flatten()[inds], axis=1)).reshape(shape) del w dist = np.nanmean(dist,axis=1).reshape(shape) del S Sdat_g[dist>1] = np.nan Sdat_g[Sdat_g<noisefloor] = np.nan dat = Sdat_g.copy() dat[dist>1] = 0 dat2 = replace_nans.RN(dat.astype('float64'),1000,0.01,2,'localmean').getdata() dat2[dat==0] = np.nan del dat dat2[dat2<noisefloor] = np.nan Sdat_g = dat2.copy() del dat2 Sdat_g[Sdat_g==0] = np.nan Sdat_g[np.isinf(Sdat_g)] = np.nan Sdat_gm = np.ma.masked_invalid(Sdat_g) del Sdat_g glon, glat = trans(grid_x, grid_y, inverse=True) del grid_x, grid_y # ========================================================= print "creating kmz file ..." ## new way to create kml file pixels = 1024 * 10 fig, ax = humutils.gearth_fig(llcrnrlon=glon.min(), llcrnrlat=glat.min(), urcrnrlon=glon.max(), urcrnrlat=glat.max(), pixels=pixels) cs = ax.pcolormesh(glon, glat, Sdat_gm) ax.set_axis_off() fig.savefig(os.path.normpath(os.path.join(sonpath,'class_overlay1.png')), transparent=True, format='png') fig = plt.figure(figsize=(1.0, 4.0), facecolor=None, frameon=False) ax = fig.add_axes([0.0, 0.05, 0.2, 0.9]) cb = fig.colorbar(cs, cax=ax) cb.set_label('Texture lengthscale [m]', rotation=-90, color='k', labelpad=20) fig.savefig(os.path.normpath(os.path.join(sonpath,'class_legend.png')), transparent=False, format='png') humutils.make_kml(llcrnrlon=glon.min(), llcrnrlat=glat.min(), urcrnrlon=glon.max(), urcrnrlat=glat.max(), figs=[os.path.normpath(os.path.join(sonpath,'class_overlay1.png'))], colorbar=os.path.normpath(os.path.join(sonpath,'class_legend.png')), kmzfile=os.path.normpath(os.path.join(sonpath,'class_GroundOverlay.kmz')), name='Sidescan Intensity') # ========================================================= print "drawing and printing map ..." fig = plt.figure(frameon=False) map = Basemap(projection='merc', epsg=cs2cs_args.split(':')[1], resolution = 'i', #h #f llcrnrlon=np.min(humlon)-0.001, llcrnrlat=np.min(humlat)-0.001, urcrnrlon=np.max(humlon)+0.001, urcrnrlat=np.max(humlat)+0.001) gx,gy = map.projtran(glon, glat) try: map.arcgisimage(server='http://server.arcgisonline.com/ArcGIS', service='ESRI_Imagery_World_2D', xpixels=1000, ypixels=None, dpi=300) except: map.arcgisimage(server='http://server.arcgisonline.com/ArcGIS', service='World_Imagery', xpixels=1000, ypixels=None, dpi=300) #finally: # print "error: map could not be created..." ax = plt.Axes(fig, [0., 0., 1., 1.], ) ax.set_axis_off() fig.add_axes(ax) if Sdat_gm.size > 25000000: print "matrix size > 25,000,000 - decimating by factor of 5 for display" map.pcolormesh(gx[::5,::5], gy[::5,::5], Sdat_gm[::5,::5], vmin=np.nanmin(Sdat_gm), vmax=np.nanmax(Sdat_gm)) else: map.pcolormesh(gx, gy, Sdat_gm, vmin=np.nanmin(Sdat_gm), vmax=np.nanmax(Sdat_gm)) custom_save2(sonpath,'class_map_imagery') del fig if os.name=='posix': # true if linux/mac elapsed = (time.time() - start) else: # windows elapsed = (time.clock() - start) print "Processing took ", elapsed , "seconds to analyse" print "Done!"
def runPixMatch(outpre, filter): if filter == 'f606w': let = 'v' else: let = 'i' if outpre == 'lower': x_drc_low = drc_low['x_' + let] y_drc_low = drc_low['y_' + let] xm_flc_low = flc_all['xdrc_low_' + filter] ym_flc_low = flc_all['ydrc_low_' + filter] coords1low = np.empty((xm_flc_low.size, 2)) coords2low = np.empty((x_drc_low.size, 2)) coords1low[:, 0] = xm_flc_low coords1low[:, 1] = ym_flc_low coords2low[:, 0] = x_drc_low coords2low[:, 1] = y_drc_low kdt = KDT(coords2low) idxs2 = kdt.query(coords1low)[1] ds = distArr(xm_flc_low, ym_flc_low, x_drc_low[idxs2], y_drc_low[idxs2]) idxs1 = np.arange(xm_flc_low.size) msk = ds < matchtol idxs1 = idxs1[msk] idxs2 = idxs2[msk] ds = ds[msk] else: x_drc_up = drc_up['x_' + let] y_drc_up = drc_up['y_' + let] xm_flc_up = flc_all['xdrc_up_' + filter] ym_flc_up = flc_all['ydrc_up_' + filter] coords1up = np.empty((xm_flc_up.size, 2)) coords2up = np.empty((x_drc_up.size, 2)) coords1up[:, 0] = xm_flc_up coords1up[:, 1] = ym_flc_up coords2up[:, 0] = x_drc_up coords2up[:, 1] = y_drc_up kdt = KDT(coords2up) idxs2 = kdt.query(coords1up)[1] ds = distArr(xm_flc_up, ym_flc_up, x_drc_up[idxs2], y_drc_up[idxs2]) idxs1 = np.arange(xm_flc_up.size) msk = ds < matchtol idxs1 = idxs1[msk] idxs2 = idxs2[msk] ds = ds[msk] print(len(idxs1)) outfile = main_dir + 'hor-I-cut_drc_' + outpre + '_' + filter + '_tol{0}_magCuts.txt'.format( matchtol) np.savetxt(outfile, idxs2, fmt='%4i') outfile = main_dir + 'hor-I-cut_flc_' + outpre + '_' + filter + '_tol{0}_magCuts.txt'.format( matchtol) np.savetxt(outfile, idxs1, fmt='%4i') # outfile = main_dir+'hor-I-cut_ds_'+outpre+'_'+filter+'_tol{0}.txt'.format(matchtol) # np.savetxt(outfile, ds, fmt='%1.4f') return None
class Invdisttree: """ inverse-distance-weighted interpolation using KDTree: invdisttree = Invdisttree( X, z ) -- data points, values interpol = invdisttree( q, nnear=3, eps=0, p=1, weights=None, stat=0 ) interpolates z from the 3 points nearest each query point q; For example, interpol[ a query point q ] finds the 3 data points nearest q, at distances d1 d2 d3 and returns the IDW average of the values z1 z2 z3 (z1/d1 + z2/d2 + z3/d3) / (1/d1 + 1/d2 + 1/d3) = .55 z1 + .27 z2 + .18 z3 for distances 1 2 3 q may be one point, or a batch of points. eps: approximate nearest, dist <= (1 + eps) * true nearest p: use 1 / distance**p weights: optional multipliers for 1 / distance**p, of the same shape as q stat: accumulate wsum, wn for average weights How many nearest neighbors should one take ? a) start with 8 11 14 .. 28 in 2d 3d 4d .. 10d; see Wendel's formula b) make 3 runs with nnear= e.g. 6 8 10, and look at the results -- |interpol 6 - interpol 8| etc., or |f - interpol*| if you have f(q). I find that runtimes don't increase much at all with nnear -- ymmv. p=1, p=2 ? p=2 weights nearer points more, farther points less. In 2d, the circles around query points have areas ~ distance**2, so p=2 is inverse-area weighting. For example, (z1/area1 + z2/area2 + z3/area3) / (1/area1 + 1/area2 + 1/area3) = .74 z1 + .18 z2 + .08 z3 for distances 1 2 3 Similarly, in 3d, p=3 is inverse-volume weighting. Scaling: if different X coordinates measure different things, Euclidean distance can be way off. For example, if X0 is in the range 0 to 1 but X1 0 to 1000, the X1 distances will swamp X0; rescale the data, i.e. make X0.std() ~= X1.std() . A nice property of IDW is that it's scale-free around query points: if I have values z1 z2 z3 from 3 points at distances d1 d2 d3, the IDW average (z1/d1 + z2/d2 + z3/d3) / (1/d1 + 1/d2 + 1/d3) is the same for distances 1 2 3, or 10 20 30 -- only the ratios matter. In contrast, the commonly-used Gaussian kernel exp( - (distance/h)**2 ) is exceedingly sensitive to distance and to h. """ def __init__(self, X, z, leafsize=10, stat=0): assert len(X) == len(z), "len(X) %d != len(z) %d" % (len(X), len(z)) self.tree = KDTree(X, leafsize=leafsize) # build the tree self.z = z self.stat = stat self.wn = 0 self.wsum = None def __call__(self, q, nnear=6, eps=0, p=1, weights=None): # nnear nearest neighbours of each query point -- q = np.asarray(q) qdim = q.ndim if qdim == 1: q = np.array([q]) if self.wsum is None: self.wsum = np.zeros(nnear) self.distances, self.ix = self.tree.query(q, k=nnear, eps=eps) interpol = np.zeros((len(self.distances), ) + np.shape(self.z[0])) jinterpol = 0 for dist, ix in zip(self.distances, self.ix): if nnear == 1: wz = self.z[ix] elif dist[0] < 1e-10: wz = self.z[ix[0]] else: # weight z s by 1/dist -- w = 1 / dist**p if weights is not None: w *= weights[ix] # >= 0 w /= np.sum(w) wz = np.dot(w, self.z[ix]) if self.stat: self.wn += 1 self.wsum += w interpol[jinterpol] = wz jinterpol += 1 return interpol if qdim > 1 else interpol[0]
def get_area_avg_from_erai_data(start_year=-np.Inf, end_year=np.Inf, var_folder="", varname="", mask=None, mask_lons=None, mask_lats=None): """ Interpolate the mask to the ERA-Interim grid using nearest neighbour approach :param start_year: :param end_year: :param var_folder: :param varname: :param mask: :return: """ def _get_year(fn): return int(fn.split(".")[0].split("_")[1]) flist = [ os.path.join(var_folder, fn) for fn in os.listdir(var_folder) if fn.startswith(varname) and (start_year <= _get_year(fn)) and ( _get_year(fn) <= end_year) ] print(flist) ktree = None mask_interpolated = None lons_target, lats_target = None, None ser_list = [] for fp in flist: with Dataset(fp) as ds: time_var = ds.variables["time"] times = num2date(time_var[:], time_var.units) print(times[0], times[-1]) # Determine nearest neighbours for interpolation (do it only once) if ktree is None: # get lons and lats from the bathymetry file data_folder_p = Path(var_folder).parent for f in data_folder_p.iterdir(): if f.name.lower().startswith("bathy_meter"): with Dataset(str(f)) as ds_bathy: lons_target, lats_target = [ ds_bathy.variables[k][:] for k in ["nav_lon", "nav_lat"] ] break x, y, z = lat_lon.lon_lat_to_cartesian(mask_lons.flatten(), mask_lats.flatten()) xt, yt, zt = lat_lon.lon_lat_to_cartesian( lons_target.flatten(), lats_target.flatten()) ktree = KDTree(list(zip(x, y, z))) dists, inds = ktree.query(list(zip(xt, yt, zt)), k=1) mask_interpolated = mask.flatten()[inds] mask_interpolated = mask_interpolated.reshape( lons_target.shape) vals = [ field[mask_interpolated].mean() for field in ds.variables[varname][:] ] ser = pd.Series(index=times, data=vals) if varname == "TT": ser -= 273.15 ser.sort_index(inplace=True) ser_list.append(ser) return pd.concat(ser_list)
class NearestNeighborFinder(): """ Nearest neighbor search object for NEMO netCDF output files. """ def __init__(self, ncfilename): """ Create new instance. :arg str ncfilename: NEMO netCDF file name """ self.filename = ncfilename self.data_dim = None self.grid_type = None self._build_tree() def _build_tree(self): """ Construct nearest neighbor tree. """ def parse_grid_type(ncf): """ Figure out which discretization the file contains, T, U or V Reads the description attribute, e.g. "ocean T grid variables" returns 't', 'u', or 'v' """ return 't' # HACK assume always T grid desc = ncf.description words = desc.split() assert words[0] == 'ocean' assert words[2] == 'grid' return words[1].lower() with netCDF4.Dataset(self.filename) as ncf: self.grid_type = parse_grid_type(ncf) assert self.grid_type == 't', 'Only T grid is supported currently' # compute land mask self.data_dim = 3 if 'e3t' in ncf.variables else 2 if self.data_dim == 3: # NOTE does not take time-dependent wetting-drying into account e = ncf['e3t'][0, :, :, :] self.landmask = numpy.all(e.mask, axis=0) # 1D array of all wet points in raveled index self.wetmask = numpy.nonzero(~self.landmask.ravel())[0] # get coordinates self.lon = ncf['nav_lon'][:] self.lat = ncf['nav_lat'][:] depth = ncf['deptht'][:] self.z = -depth # 1D arrays of all wet points self.valid_lon = self.lon.ravel()[self.wetmask] self.valid_lat = self.lat.ravel()[self.wetmask] else: # read a field to get landmask for v in ncf.variables: var = ncf[v] if len(var.shape) == 3: # 2D time dependent field self.landmask = numpy.all(var[:].mask, axis=0) break self.wetmask = numpy.nonzero(~self.landmask.ravel())[0] # get coordinates self.lon = ncf['nav_lon'][:] self.lat = ncf['nav_lat'][:] self.z = 0.0 # 1D arrays of all wet points self.valid_lon = self.lon.ravel()[self.wetmask] self.valid_lat = self.lat.ravel()[self.wetmask] assert len(self.valid_lat) > 0, \ 'No valid points found in {:}'.format(self.filename) coords = numpy.vstack((self.valid_lon, self.valid_lat)).T self.tree = KDTree(coords) def find(self, lon, lat, z): """ Finds nearest neighbor index for point (lon, lat, z) :arg lon: longitude coordinate :arg lat: latitude coordinate :arg z: z coordinate (negative downwards) :returns: i, j, k indices of nearest neighbor indices """ dist, index = self.tree.query([lon, lat], k=1) index = self.wetmask[index] i, j = numpy.unravel_index(index, self.lat.shape) if self.data_dim == 3: k = numpy.abs(self.z - z).argmin() else: k = None return i, j, k
def remove_ind(reference_pop, removal_size, removal_type): begin_time = time.time() if removal_type == 'random': # reference_pop is a numpy array of size (n_reference_pop, pop_dim) reference_pop = list(reference_pop) # now reference_pop is a list of numpy arrays (each defining one individual) random.shuffle(reference_pop) # shuffle the list # pop last removal_size individuals for _ in range(removal_size): reference_pop.pop() # turn back to numpy array reference_pop = np.array(reference_pop) if removal_type == 'least_novel': # compute novelties of reference_pop inside reference_pop novelties = assess_novelties(reference_pop, reference_pop) removal_indices = np.argpartition(novelties, removal_size)[:removal_size] # # plot the reference pop # fig = plt.figure(figsize=(5, 5)) # ax = fig.add_subplot(111) # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference') # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed', # marker='x', color='red') # ax.set_facecolor("#ffebb8") # ax.set_title('Least novel removal', fontsize=15) # plt.xlim(0, 1) # plt.ylim(0, 1) # plt.legend() # plt.show() reference_pop = np.delete(reference_pop, removal_indices, 0) if removal_type == 'least_novel_iter': removal_indices = [] temp_ref_pop = copy.deepcopy(reference_pop) for j in range(removal_size): # compute novelties of reference_pop inside reference_pop novelties = assess_novelties(temp_ref_pop, temp_ref_pop) remov_idx = np.argmin(novelties) remov_ind = temp_ref_pop[remov_idx] removal_indices.append(np.where(reference_pop == remov_ind)[0][0]) temp_ref_pop = np.vstack( (temp_ref_pop[:remov_idx], temp_ref_pop[remov_idx + 1:])) # # plot the reference pop # fig = plt.figure(figsize=(5, 5)) # ax = fig.add_subplot(111) # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference') # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed', # marker='x', color='red') # ax.set_facecolor("#ffebb8") # ax.set_title('Least novel removal', fontsize=15) # plt.xlim(0, 1) # plt.ylim(0, 1) # plt.legend() # plt.show() reference_pop = np.delete(reference_pop, removal_indices, 0) if removal_type == 'most_novel': # compute novelties of reference_pop inside reference_pop novelties = assess_novelties(reference_pop, reference_pop) removal_indices = np.argpartition(novelties, -removal_size)[-removal_size:] # # plot the reference pop # fig = plt.figure(figsize=(5, 5)) # ax = fig.add_subplot(111) # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference') # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed', # marker='x', color='red') # ax.set_facecolor("#ffebb8") # ax.set_title('Least novel removal', fontsize=15) # plt.xlim(0, 1) # plt.ylim(0, 1) # plt.legend() # plt.show() reference_pop = np.delete(reference_pop, removal_indices, 0) if removal_type == 'most_novel_iter': removal_indices = [] temp_ref_pop = copy.deepcopy(reference_pop) for j in range(removal_size): # compute novelties of reference_pop inside reference_pop novelties = assess_novelties(temp_ref_pop, temp_ref_pop) remov_idx = np.argmax(novelties) remov_ind = temp_ref_pop[remov_idx] removal_indices.append(np.where(reference_pop == remov_ind)[0][0]) temp_ref_pop = np.vstack( (temp_ref_pop[:remov_idx], temp_ref_pop[remov_idx + 1:])) # # plot the reference pop # fig = plt.figure(figsize=(5, 5)) # ax = fig.add_subplot(111) # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference') # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed', # marker='x', color='red') # ax.set_facecolor("#ffebb8") # ax.set_title('Least novel removal', fontsize=15) # plt.xlim(0, 1) # plt.ylim(0, 1) # plt.legend() # plt.show() reference_pop = np.delete(reference_pop, removal_indices, 0) if removal_type == 'gmm_sampling': # hypothesis: n_components equals generative number of components n_comp = N gmix = mixture.GaussianMixture(n_components=n_comp, covariance_type='full') gmix.fit(reference_pop) nodes = gmix.sample(removal_size)[0] k_tree = KDTree(reference_pop) removal_indices = [] for node in nodes: # for each node, find the closest point in the reference pop cond = True closest = 1 # make sure removal indivual was not already chosen while cond: if closest == 1: possible_removal_index = k_tree.query(node, closest)[1] else: possible_removal_index = k_tree.query( node, closest)[1][closest - 1] if possible_removal_index not in removal_indices: removal_indices.append(possible_removal_index) cond = False else: closest += 1 # # plot the reference pop # fig = plt.figure(figsize=(5, 5)) # ax = fig.add_subplot(111) # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference') # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed', # marker='x', color='red') # ax.set_facecolor("#ffebb8") # ax.set_title('GMM removal', fontsize=15) # plt.xlim(0, 1) # plt.ylim(0, 1) # plt.legend() # plt.show() reference_pop = np.delete(reference_pop, removal_indices, 0) if removal_type == 'grid': n_dim = reference_pop.shape[1] # compute maximums and minimums on each dimension maximums = np.max(reference_pop, 0) minimums = np.min(reference_pop, 0) ranges = maximums - minimums bins_per_dim = math.floor(math.exp(math.log(removal_size) / n_dim)) + 1 grid_positions = [] for i in range(n_dim): # important choice on how we make the grid grid_position = [ minimums[i] + ((j + 1) * ranges[i] / bins_per_dim) for j in range(bins_per_dim) ] grid_position.pop() grid_positions.append(grid_position) mesh = np.meshgrid(*grid_positions) nodes = list(zip(*(dim.flat for dim in mesh))) nodes = np.array(nodes) k_tree = KDTree(reference_pop) removal_indices = [] for node in nodes: # for each node, find the closest point in the reference pop cond = True closest = 1 # make sure removal indivual was not already chosen while cond: if closest == 1: possible_removal_index = k_tree.query(node, closest)[1] else: possible_removal_index = k_tree.query( node, closest)[1][closest - 1] if possible_removal_index not in removal_indices: removal_indices.append(possible_removal_index) cond = False else: closest += 1 # dealing with the missing removals nb_missing_removals = removal_size - len(nodes) for _ in range(nb_missing_removals): query = random.choice(nodes) cond = True # start with second closest since closest is for sure in removal indices closest = 2 # make sure removal indivual was not already chosen while cond: possible_removal_index = k_tree.query(query, closest)[1][closest - 1] if possible_removal_index not in removal_indices: removal_indices.append(possible_removal_index) cond = False else: closest += 1 # # plot the reference pop # fig = plt.figure(figsize=(5, 5)) # ax = fig.add_subplot(111) # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference') # ax.scatter(nodes[:, 0], nodes[:, 1], label='grid', marker='+', color='black') # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed', # marker='x', color='red') # ax.set_facecolor("#ffebb8") # ax.set_title('Grid removal', fontsize=15) # plt.xlim(0, 1) # plt.ylim(0, 1) # plt.legend() # plt.show() reference_pop = np.delete(reference_pop, removal_indices, 0) if removal_type == 'grid_density': n_dim = reference_pop.shape[1] # compute maximums and minimums on each dimension maximums = np.max(reference_pop, 0) minimums = np.min(reference_pop, 0) ranges = maximums - minimums bins_per_dim = math.floor(math.exp(math.log(N_CELLS) / n_dim)) + 1 grid_positions = [] for i in range(n_dim): # important choice on how we make the grid grid_position = [ minimums[i] + (j * ranges[i] / (bins_per_dim - 1)) for j in range(bins_per_dim) ] grid_positions.append(grid_position) mesh = np.meshgrid(*grid_positions) nodes = list(zip(*(dim.flat for dim in mesh))) nodes = np.array(nodes) removal_indices = [] nb_cells = (bins_per_dim - 1)**n_dim grid_density = np.zeros(nb_cells) cells = [[] for _ in range(nb_cells)] for ind_idx, ind in enumerate(reference_pop): dim_indexs = np.zeros(n_dim) for i, dim in enumerate(ind): grid_pos = grid_positions[i] for j in range(bins_per_dim - 1): if dim >= grid_pos[j] and dim < grid_pos[j + 1]: dim_indexs[i] = j + 1 if 0 not in dim_indexs: # indivudal is inside the grid dim_indexs = dim_indexs - 1 cell_idx = 0 for k, dim_idx in enumerate(dim_indexs): cell_idx += int(dim_idx * ((bins_per_dim - 1)**k)) grid_density[cell_idx] += 1 cells[cell_idx].append(ind_idx) grid_density = grid_density / np.sum(grid_density) # TEST: square the grid_density to biase more towards high density cells # grid_density = np.square(grid_density) grid_law = np.cumsum(grid_density) for _ in range(removal_size): dice = random.random() * grid_law[-1] cell_to_remove_from = np.searchsorted(grid_law, dice) cond = True n = 0 while cond: if n < LIMIT_DENSITY_ITER: removal_idx = random.choice(cells[cell_to_remove_from]) else: removal_idx = random.choice(list(range( len(reference_pop)))) if removal_idx not in removal_indices: removal_indices.append(removal_idx) cond = False n += 1 # # plot the reference pop # fig = plt.figure(figsize=(5, 5)) # ax = fig.add_subplot(111) # ax.scatter(reference_pop[:, 0], reference_pop[:, 1], label='reference') # ax.scatter(nodes[:, 0], nodes[:, 1], label='grid', marker='+', color='black') # ax.scatter(reference_pop[removal_indices, 0], reference_pop[removal_indices, 1], label='removed', # marker='x', color='red') # ax.set_facecolor("#ffebb8") # ax.set_title('Grid density removal', fontsize=15) # plt.xlim(0, 1) # plt.ylim(0, 1) # plt.legend() # plt.show() reference_pop = np.delete(reference_pop, removal_indices, 0) end_time = time.time() removal_time = end_time - begin_time return reference_pop, removal_time
class Dataset: """ SELFE Model Binary IO Functions Presently enables reading SELFE dataformat version 5.0 binary output files. Can read 2D & 3D scalar and vector variables. Usage Example: model = pyselfe.Dataset('1_hvel.64') [t,t_iter,eta,dp,data] = model.read_time_series() t = time in seconds t_iter = iteration number eta = water surface elevation dp = bathymetric depth data = 2D/3D variables @author Dharhas Pothina @version 0.2 """ def __init__(self, fname, nfiles=1): "Initialise by reading header information from file." self.fname = fname fid = open(fname, 'rb') self.read_header(fid) self.read_hgrid(fid) self.data_start_pos = fid.tell() self.compute_step_size() self.datadir = os.path.split(fname)[0] self.nfiles = nfiles def read_header(self, fid): """Read header information from SELFE binary output file.""" # Read misc header info. self.data_format = fid.read(48) self.version = fid.read(48) self.start_time = fid.read(48) self.var_type = fid.read(48) self.var_dimension = fid.read(48) self.nsteps = io.fread(fid, 1, 'i') self.dt = io.fread(fid, 1, 'f') self.skip = io.fread(fid, 1, 'i') self.flag_sv = io.fread(fid, 1, 'i') self.flag_dm = io.fread(fid, 1, 'i') # @todo check when zDes needs to be read # self.zDes = io.fread(fid, 1, 'f'). # Read vert grid info. self.nlevels = io.fread(fid, 1, 'i') self.kz = io.fread(fid, 1, 'i') self.h0 = io.fread(fid, 1, 'f') self.hs = io.fread(fid, 1, 'f') self.hc = io.fread(fid, 1, 'f') self.theta_b = io.fread(fid, 1, 'f') self.theta = io.fread(fid, 1, 'f') self.zlevels = io.fread(fid, self.kz, 'f') self.slevels = io.fread(fid, self.nlevels - self.kz, 'f') def read_hgrid(self, fid): """Read horizontal grid info from SELFE binary output file.""" # Read dimensions. self.np = io.fread(fid, 1, 'i') self.ne = io.fread(fid, 1, 'i') # Read grid and bathymetry. pos = fid.tell() hgridtmp = io.fread(fid, 4 * self.np, 'f') self.x, self.y, self.dp, tmp1 = hgridtmp.reshape(self.np, 4).T # Read bottom index. fid.seek(pos) hgridtmp = io.fread(fid, 4 * self.np, 'i') tmp1, tmp2, tmp3, self.bot_idx = hgridtmp.reshape(self.np, 4).T # Read element connectivity list. self.elem = io.fread(fid, 4 * self.ne, 'i') self.elem = self.elem.reshape(self.ne, 4)[:, 1:4] # Create kdtree. self.kdtree = KDTree(list(zip(self.x, self.y))) def compute_step_size(self): """ Compute the data block size to move one timestep within the file. """ # Calculate grid size depending on whether dataset is 3D or 2D. if self.flag_dm == 3: # @todo check what needs to be done with bIdx (==0?)for dry nodes. bIdx = self.bot_idx bIdx[bIdx < 1] = 1 self.grid_size = sum(self.nlevels - bIdx + 1) elif self.flag_dm == 2: self.grid_size = self.np # Compute step size. self.step_size = 2 * 4 + self.np * 4 + self.grid_size * 4 * self.flag_sv def read_time_series(self, fname, nodes=None, levels=None, xy=np.array([]), nfiles=3, sfile=1, datadir=None): """ Main function to extract a spatial and temporal slice of entire 3D Time series. Returns [t,t_iter,eta,dp,data] where: t : time in seconds from simulation start t_iter : iteration number from simulation start eta : Surface water elevation time series dp : Bathymetry (depth of sea bed from MSL) data[t,nodes,levels,vars] : extracted data slice (i.e. Salinity, Temp, Velocity etc) Options: nodes : list of nodes to extract (default is all nodes) level : list of levels to extract (default is all levels) xy : array of x,y coordinates to extract (default is none) sfile : serial number of starting file (default is one) nfiles : number of files in data sequence (default is one) NOTE : node index starts at zero so add one to match up with node numbers in SELFE hgrid.gr3 file. """ # Initialize vars. t = np.array([]) t_iter = np.array([]) eta = [] data = [] if nfiles is None: nfiles = self.nfiles if datadir is None: datadir = self.datadir # Convert xy points to list of nodes, # find parent elements & calculate interpolation weights. if xy.size != 0: if xy.shape[1] != 2: sys.exit('xy array shape wrong.') nodes = np.array([], dtype='int32') arco = np.array([]) for xy00 in xy: parent, tmparco, node3 = self.find_parent_element( xy00[0], xy00[1]) # noqa nodes = np.append(nodes, node3 - 1) arco = np.append(arco, tmparco) # Set default for nodes to be all nodes. # Node index starts at zero. elif nodes is None: nodes = np.arange(self.np) # Set default for level to be all levels. if levels is None: levels = np.arange(self.nlevels) # Check whether 2D or 3D variable is being read. if self.flag_dm == 2: nlevs = 1 levels = np.array([0]) else: nlevs = self.nlevels # Read time series slice. for files in np.arange(sfile, sfile + nfiles): try: fname1 = datadir + '/' + str(files) + '_' + fname fid = open(fname1, 'rb') fid.seek(self.data_start_pos) for i in np.arange(self.nsteps): t = np.append(t, io.fread(fid, 1, 'f')) t_iter = np.append(t_iter, io.fread(fid, 1, 'i')) eta.append(io.fread(fid, self.np, 'f')) tmpdata = io.fread(fid, self.flag_sv * self.grid_size, 'f') tmpdata = tmpdata.reshape(self.np, nlevs, self.flag_sv) # Only keep requested slice of tmpdata. # i.e. tmpdata[nodes, levels, var] tmpdata = tmpdata[nodes, :, :] tmpdata = tmpdata[:, levels, :] data.append(tmpdata) except: continue # import pdb; pdb.set_trace() eta = np.column_stack(eta[:]).T eta = eta[:, nodes] data = np.array(data) dp = self.dp[nodes] # Convert nodal values back to xy point values if needed. if xy.size != 0: # Not sure about this. Need to look at it on more detail put in to # remove shape error. # try: tmpdata = np.zeros((data.shape[0], data.shape[1] // 3, data.shape[2], data.shape[3])) / 0. # noqa # except: # tmpdata = np.zeros((data.shape[0], data.shape[1]//3, data.shape[2]))/0. # noqa tmpeta = np.zeros((eta.shape[0], eta.shape[1] // 3)) / 0. tmpdp = np.zeros(dp.shape[0] // 3) / 0. for i in range(xy.shape[0]): n1 = i * 3 n2 = n1 + 1 n3 = n2 + 1 tmpdata[:, i, :, :] = (data[:, n1, :, :] * arco[n1] + data[:, n2, :, :] * arco[n2] + data[:, n3, :, :] * arco[n3]) tmpeta[:, i] = (eta[:, n1] * arco[n1] + eta[:, n2] * arco[n2] + eta[:, n3] * arco[n3]) tmpdp[i] = (dp[n1] * arco[n1] + dp[n2] * arco[n2] + dp[n3] * arco[n3]) data = tmpdata eta = tmpeta dp = tmpdp return t, t_iter, eta, dp, data def find_parent_element(self, x00, y00): """ Find Parent Element of a given (x,y) point and calculate interpolation weights. Uses brute force search through all elements. Calculates whether point is internal/external to element by comparing summed area of sub triangles with area of triangle element. @todo implement binary tree search for efficiency Returns: parent, arco, node3 : parent element number, interp wieghts and element node numbers. """ def signa(x1, x2, x3, y1, y2, y3): "Return signed area of triangle." return (((x1 - x3) * (y2 - y3) - (x2 - x3) * (y1 - y3)) / 2) parent = -1 nm = self.elem.view() out = np.zeros(3) / 0. x = self.x.view() y = self.y.view() for i in np.arange(self.ne): aa = 0 ar = 0 # Area. for j in np.arange(3): j1 = j + 1 j2 = j + 2 if (j1 > 2): j1 = j1 - 3 if (j2 > 2): j2 = j2 - 3 n0 = nm[i, j] - 1 # Zero based index rather than 1 based index. n1 = nm[i, j1] - 1 n2 = nm[i, j2] - 1 # Temporary storage. out[j] = signa(x[n1], x[n2], x00, y[n1], y[n2], y00) aa = aa + abs(out[j]) if (j == 0): ar = signa(x[n1], x[n2], x[n0], y[n1], y[n2], y[n0]) if (ar <= 0): sys.exit('Negative area:' + str(ar)) ae = abs(aa - ar) / ar if (ae <= 1.e-5): parent = i node3 = nm[i, 0:3] arco = out[0:3] / ar arco[1] = max(0., min(1., arco[1])) arco[2] = max(0., min(1., arco[2])) if (arco[0] + arco[1] > 1): arco[2] = 0 arco[1] = 1 - arco[0] else: arco[2] = 1 - arco[0] - arco[1] break if (parent == -1): sys.exit('Cannot find a parent:' + str(x00) + ',' + str(y00)) else: print('Parent Element :', parent + 1, ' ,Nodes: ', node3) return parent, arco, node3 def compute_relative_rec(self, node, level): """ Computes offset for extracting particular node/level. NOTE THIS FUNCTION NOT COMPLETE/TESTED. """ count = 0 step_size = np.zeros(self.np, self.nlevels, self.flag_sv) / 0. for i in range(self.np): for k in range(max(1, self.bot_idx[i]), self.nlevels): for m in range(self.flag_sv): count = count + 1 step_size[i, k, m] = count def read_time_series_xy(self, variable, x, y, sigma_level='middle', return_eta=False): """ Finds nearest 3 nodes to x,y and returns the average value. """ xy = np.hstack((x, y)) dist, nodes = self.kdtree.query(xy, k=3) data = [] if sigma_level == 'average': t, t_iter, eta, dp, data = self.read_time_series( variable, nodes=nodes) # noqa eta = eta.mean(axis=1) data = data[:, :, :, 0].mean(axis=2).mean(axis=1) # Take average of all levels and then 3 nodes for now. # Implement idw or area weighted a average later. data = data.mean(axis=1).mean(axis=1) if return_eta: return np.column_stack((t, data)), np.column_stack((t, eta)) else: return np.column_stack((t, data)) elif sigma_level == 'top': sigma_level = 0 elif sigma_level == 'bottom': sigma_level = self.nlevels - 1 elif sigma_level == 'middle': sigma_level = self.nlevels // 2 t, t_iter, eta, dp, data = self.read_time_series(variable, nodes=nodes, levels=sigma_level) eta = eta.mean(axis=1) data = data[:, :, 0, :].mean(axis=1) # data.mean(axis=1).shape[:, 0, :] # Take average of all levels and then 3 nodes for now. # Implement idw or area weighted average later/ # data = data.mean(axis=1) # import pdb; pdb.set_trace() if return_eta: return np.column_stack((t, data)), np.column_stack((t, eta)) else: return np.column_stack((t, data))
def KLdivergence(x, y): """Compute the Kullback-Leibler divergence between two multivariate samples. Parameters ---------- x : 2D array (n,d) Samples from distribution P, which typically represents the true distribution. y : 2D array (m,d) Samples from distribution Q, which typically represents the approximate distribution. Returns ------- out : float The estimated Kullback-Leibler divergence D(P||Q). References ---------- Pérez-Cruz, F. Kullback-Leibler divergence estimation of continuous distributions IEEE International Symposium on Information Theory, 2008. https://gist.github.com/atabakd/ed0f7581f8510c8587bc2f41a094b518 """ eta = 0.0000000001 # Check the dimensions are consistent x = np.atleast_2d(x) y = np.atleast_2d(y) n,d = x.shape m,dy = y.shape assert d == dy assert n != 0 assert n != 1 # Build a KD tree representation of the samples and find the nearest neighbour # of each point in x. xtree = KDTree(x) ytree = KDTree(y) # Get the first two nearest neighbours for x, since the closest one is the # sample itself. r = xtree.query(x, k=2, eps=.01, p=2)[0][:,1] s = ytree.query(x, k=1, eps=.01, p=2)[0] s[s == 0] = eta #np.seterr(all='raise') #try: # ratio = r / s # _ = np.log(ratio, where=ratio > 0).sum() #except Exception as ex: # print(ex) # print(np.sum(s==0)) # print(np.sum(np.isclose(s, 0))) # assert False, "log(r/s) produces 'divide by zero' error or other exception." if np.any(s == 0): return "ERR: s=0" else: # There is a mistake in the paper. In Eq. 14, the right side misses a negative sign # on the first term of the right hand side. ratio = r/s return -np.log(ratio, where=ratio > 0).sum() * d / n + np.log(m / (n - 1.))
.assign(Fueltype=lambda df: ( df.Fueltype .where(df.Fueltype != 'Natural Gas', df.Technology.replace('Steam Turbine', 'OCGT').fillna('OCGT'))))) ppl_query = snakemake.config['electricity']['powerplants_filter'] if isinstance(ppl_query, str): ppl.query(ppl_query, inplace=True) ppl = add_custom_powerplants(ppl) # add carriers from own powerplant files cntries_without_ppl = [c for c in countries if c not in ppl.Country.unique()] for c in countries: substation_i = n.buses.query('substation_lv and country == @c').index kdtree = KDTree(n.buses.loc[substation_i, ['x','y']].values) ppl_i = ppl.query('Country == @c').index tree_i = kdtree.query(ppl.loc[ppl_i, ['lon','lat']].values)[1] ppl.loc[ppl_i, 'bus'] = substation_i.append(pd.Index([np.nan]))[tree_i] if cntries_without_ppl: logging.warning(f"No powerplants known in: {', '.join(cntries_without_ppl)}") bus_null_b = ppl["bus"].isnull() if bus_null_b.any(): logging.warning(f"Couldn't find close bus for {bus_null_b.sum()} powerplants") ppl.to_csv(snakemake.output[0])
def phase_detection(self): self.solids = [] self.detect = [] self.liquid_density = list() self.solid_density = list() self.solid_fraction = list() self.liquid_fraction = list() self.solid_molecular_order = list() self.liquid_molecular_order = list() self.solid_local_bond4 = list() self.solid_local_bond6 = list() self.solid_local_mole4 = list() self.solid_polar = list() self.solid_polar_fraction = list() self.radi_polar = list() self.radi_polar_fraction = list() self.xys = dict() self.final_id = dict() self.vor_area = list() self.vornoi_history = list() self.inter_boundary_number = list() #self.liquid_vor_area = np.empty(0) plot_number = 0 sorted_keys = sorted(self.config_vdata.keys()) for startframe in sorted_keys: #pids = v_data.keys() v_data = self.config_vdata[startframe] qualify_id, order_para_mean, vr_mean, vomega_list = self.single_config_detect(v_data) self.detect.append(len(qualify_id)) qualify_id_set = set(qualify_id) order_mask, vr_mask, vomega_mask = self.solid_criteria(order_para_mean, vr_mean, vomega_list) fdata = helpy.load_framesets(v_data) # find the frame where contains all the qualified particles count = 0 while (count < 49) & (not set(fdata[startframe+count]['t']).issuperset(qualify_id_set)): count+=1 if count == 49: break startframe += count # for the idtentified frame, make TRUE if t in qualified_id fdata_track = fdata[startframe]['t'] track_mask = list() for t in fdata_track: track_mask.append(t in qualify_id) track_mask = np.asarray(track_mask) #build KDTree to query the nearest neighbor xys = helpy.consecutive_fields_view(fdata[startframe][track_mask], 'xy') ors = helpy.consecutive_fields_view(fdata[startframe][track_mask], 'o') disp = xys - [self.x0, self.y0] # displacement to the center radial = np.hypot(*disp.T) criteria = self.R - 1.4*self.side_len radial_mask = radial >= criteria #switch x, y coordinate into the regular orientation xys = xys[:,::-1] xys[:,1] = 1024 - xys[:,1] self.xys[startframe] = xys ftree = KDTree(xys, leafsize = 16) ##################################################################### # 1st iteration, # find at least two particles within 1.5 particle size radius # 3 of your neighbor must satisfy vr_criteria. # need to meet vomega criteria ##################################################################### final_mask = [] for pt_id in range(len(xys)): if not vr_mask[pt_id]: final_mask.append(False) continue dists, ids = ftree.query(xys[pt_id], self.nnn) #if np.all(dists < self.side_len * 2.0): if np.sum(dists < self.side_len*1.5) > 2: final_mask.append(np.sum(vr_mask[ids]) > 3) else: final_mask.append(False) temp_mask = np.array(final_mask) & np.array(vomega_mask) ############################################################################## # if you neighbors qualified then you will be solid ,exclude detection error # # qualified_id is a True or False mask ############################################################################## qualified_solid = list() for pt_id in range(len(xys)): dists, ids = ftree.query(xys[pt_id], self.nnn) qualified_solid.append(temp_mask[pt_id] or np.sum(temp_mask[ids[1:]]) >= 3) self.final_id[startframe] = qualified_solid solid_number = np.sum(qualified_solid) self.solids.append(solid_number) plot_vor = startframe < 550 voronoi = self.density_calculation(solid_number, len(qualify_id), xys, ors, disp,\ qualified_solid, plot_vor, radial_mask) #print(qualified_solid) self.liquid_density.append(voronoi.liquid_density) self.solid_density.append(voronoi.solid_density) self.solid_local_bond4.append(np.nanmean(voronoi.solid_local_bond4)) self.solid_local_bond6.append(np.nanmean(voronoi.solid_local_bond6)) self.solid_local_mole4.append(np.nanmean(voronoi.solid_local_mole4)) self.solid_polar.append(np.nanmean(voronoi.solid_polar)) self.solid_polar_fraction.append(voronoi.solid_polar_fraction) self.radi_polar.append(np.nanmean(voronoi.radius_polar)) self.radi_polar_fraction.append(voronoi.R_polar_fraction) self.inter_boundary_number.append(voronoi.interface_number) self.vornoi_history.append(voronoi) if plot_number < self.plot_check: xs = helpy.consecutive_fields_view(fdata[startframe][track_mask],'x') ys = helpy.consecutive_fields_view(fdata[startframe][track_mask],'y') self.plot_check_solid(xs, ys, vr_mean, vr_mask, order_para_mean, \ order_mask,vomega_list, vomega_mask, final_mask,\ qualified_solid) plot_number += 1 self.save_phase() return len(qualify_id)
class RGeocoder(metaclass=Singleton): """ The main reverse geocoder class """ def __init__(self, mode=2, verbose=True, stream=None): """ Class Instantiation Args: mode (int): Library supports the following two modes: - 1 = Single-threaded K-D Tree - 2 = Multi-threaded K-D Tree (Default) verbose (bool): For verbose output, set to True stream (io.StringIO): An in-memory stream of a custom data source """ self.mode = mode self.verbose = verbose if stream: coordinates, self.locations = self.load(stream) else: coordinates, self.locations = self.extract(rel_path(RG_FILE)) if mode == 1: # Single-process self.tree = KDTree(coordinates) else: # Multi-process self.tree = KDTree_MP.cKDTree_MP(coordinates) def query(self, coordinates): """ Function to query the K-D tree to find the nearest city Args: coordinates (list): List of tuple coordinates, i.e. [(latitude, longitude)] """ if self.mode == 1: _, indices = self.tree.query(coordinates, k=1) else: _, indices = self.tree.pquery(coordinates, k=1) return [self.locations[index] for index in indices] @staticmethod def load(stream): """ Function that loads a custom data source Args: stream (io.StringIO): An in-memory stream of a custom data source. The format of the stream must be a comma-separated file with header containing the columns defined in RG_COLUMNS. """ stream_reader = csv.DictReader(stream, delimiter=',') header = stream_reader.fieldnames if header != RG_COLUMNS: raise csv.Error('Input must be a comma-separated file with header containing ' + \ 'the following columns - %s. For more help, visit: ' % (','.join(RG_COLUMNS)) + \ 'https://github.com/thampiman/reverse-geocoder') # Load all the coordinates and locations geo_coords, locations = [], [] for row in stream_reader: geo_coords.append((row['lat'], row['lon'])) locations.append(row) return geo_coords, locations def extract(self, local_filename): """ Function loads the already extracted GeoNames cities file or downloads and extracts it if it doesn't exist locally Args: local_filename (str): Path to local RG_FILE """ if os.path.exists(local_filename): if self.verbose: print('Loading formatted geocoded file...') rows = csv.DictReader(open(local_filename, 'rt')) else: gn_cities1000_url = GN_URL + GN_CITIES1000 + '.zip' gn_admin1_url = GN_URL + GN_ADMIN1 gn_admin2_url = GN_URL + GN_ADMIN2 cities1000_zip_filename = GN_CITIES1000 + '.zip' cities1000_filename = GN_CITIES1000 + '.txt' if not os.path.exists(cities1000_zip_filename): if self.verbose: print('Downloading files from Geoname...') urllib.request.urlretrieve(gn_cities1000_url, cities1000_zip_filename) urllib.request.urlretrieve(gn_admin1_url, GN_ADMIN1) urllib.request.urlretrieve(gn_admin2_url, GN_ADMIN2) if self.verbose: print('Extracting cities1000...') _z = zipfile.ZipFile(open(cities1000_zip_filename, 'rb')) open(cities1000_filename, 'wb').write(_z.read(cities1000_filename)) if self.verbose: print('Loading admin1 codes...') admin1_map = {} t_rows = csv.reader(open(GN_ADMIN1, 'rt'), delimiter='\t') for row in t_rows: admin1_map[row[ADMIN_COLUMNS['concatCodes']]] = row[ ADMIN_COLUMNS['asciiName']] if self.verbose: print('Loading admin2 codes...') admin2_map = {} for row in csv.reader(open(GN_ADMIN2, 'rt'), delimiter='\t'): admin2_map[row[ADMIN_COLUMNS['concatCodes']]] = row[ ADMIN_COLUMNS['asciiName']] if self.verbose: print('Creating formatted geocoded file...') writer = csv.DictWriter(open(local_filename, 'wt'), fieldnames=RG_COLUMNS) rows = [] for row in csv.reader(open(cities1000_filename, 'rt'), \ delimiter='\t', quoting=csv.QUOTE_NONE): lat = row[GN_COLUMNS['latitude']] lon = row[GN_COLUMNS['longitude']] name = row[GN_COLUMNS['asciiName']] cc = row[GN_COLUMNS['countryCode']] admin1_c = row[GN_COLUMNS['admin1Code']] admin2_c = row[GN_COLUMNS['admin2Code']] cc_admin1 = cc + '.' + admin1_c cc_admin2 = cc + '.' + admin1_c + '.' + admin2_c admin1 = '' admin2 = '' if cc_admin1 in admin1_map: admin1 = admin1_map[cc_admin1] if cc_admin2 in admin2_map: admin2 = admin2_map[cc_admin2] write_row = { 'lat': lat, 'lon': lon, 'name': name, 'admin1': admin1, 'admin2': admin2, 'cc': cc } rows.append(write_row) writer.writeheader() writer.writerows(rows) if self.verbose: print('Removing extracted cities1000 to save space...') os.remove(cities1000_filename) # Load all the coordinates and locations geo_coords, locations = [], [] for row in rows: geo_coords.append((row['lat'], row['lon'])) locations.append(row) return geo_coords, locations
coordsP[:, 0] = x_psf coordsP[:, 1] = y_psf coordsP[:, 2] = z_psf coordsF[:, 0] = x_flc coordsF[:, 1] = y_flc coordsF[:, 2] = z_flc ######################################################################## # kdt = KDT(coordsF) # idxsF = kdt.query(coordsP)[1] # ds = distArr(x_psf,y_psf,z_psf,x_flc[idxsF],y_flc[idxsF],z_flc[idxsF]) kdt = KDT(coordsP) idxsP = kdt.query(coordsF)[1] ds = distArr(x_flc, y_flc, z_flc, x_psf[idxsP], y_psf[idxsP], z_psf[idxsP]) # print(len(ds)) idxsF = np.arange(x_flc.size) msk = ds < matchtol idxsF = idxsF[msk] idxsP = idxsP[msk] ds = ds[msk] # print(len(idxs1)) # outfile = magDir+'hor-I-cut_F606W_match_pix.txt'
def init_subproblems(self, conf, **kwargs): from sfepy.discrete.state import State from sfepy.discrete import Problem from sfepy.base.conf import ProblemConf, get_standard_keywords from scipy.spatial import cKDTree as KDTree # init subproblems problem = self.context pb_vars = problem.get_variables() # get "master" DofInfo and last index pb_adi_indx = problem.equations.variables.adi.indx self.adi_indx = pb_adi_indx.copy() last_indx = -1 for ii in six.itervalues(self.adi_indx): last_indx = nm.max([last_indx, ii.stop]) # coupling variables self.cvars_to_pb = {} for jj in conf.coupling_variables: self.cvars_to_pb[jj] = [None, None] if jj in pb_vars.names: if pb_vars[jj].dual_var_name is not None: self.cvars_to_pb[jj][0] = -1 else: self.cvars_to_pb[jj][1] = -1 # init subproblems self.subpb = [] required, other = get_standard_keywords() master_prefix = output.get_output_prefix() for ii, ifname in enumerate(conf.others): sub_prefix = master_prefix[:-1] + '-sub%d:' % (ii + 1) output.set_output_prefix(sub_prefix) kwargs['master_problem'] = problem confi = ProblemConf.from_file(ifname, required, other, define_args=kwargs) pbi = Problem.from_conf(confi, init_equations=True) sti = State(pbi.equations.variables) pbi.equations.set_data(None, ignore_unknown=True) pbi.time_update() pbi.update_materials() sti.apply_ebc() pbi_vars = pbi.get_variables() output.set_output_prefix(master_prefix) self.subpb.append([pbi, sti, None]) # append "slave" DofInfo for jj in pbi_vars.names: if not(pbi_vars[jj].is_state()): continue didx = pbi.equations.variables.adi.indx[jj] ndof = didx.stop - didx.start if jj in self.adi_indx: if ndof != \ (self.adi_indx[jj].stop - self.adi_indx[jj].start): raise ValueError('DOFs do not match!') else: self.adi_indx.update({ jj: slice(last_indx, last_indx + ndof, None)}) last_indx += ndof for jj in conf.coupling_variables: if jj in pbi_vars.names: if pbi_vars[jj].dual_var_name is not None: self.cvars_to_pb[jj][0] = ii else: self.cvars_to_pb[jj][1] = ii self.subpb.append([problem, None, None]) self.cvars_to_pb_map = {} for varname, pbs in six.iteritems(self.cvars_to_pb): # match field nodes coors = [] for ii in pbs: pbi = self.subpb[ii][0] pbi_vars = pbi.get_variables() fcoors = pbi_vars[varname].field.coors dc = nm.abs(nm.max(fcoors, axis=0)\ - nm.min(fcoors, axis=0)) ax = nm.where(dc > 1e-9)[0] coors.append(fcoors[:,ax]) if len(coors[0]) != len(coors[1]): raise ValueError('number of nodes does not match!') kdtree = KDTree(coors[0]) map_12 = kdtree.query(coors[1])[1] pbi1 = self.subpb[pbs[0]][0] pbi1_vars = pbi1.get_variables() eq_map_1 = pbi1_vars[varname].eq_map pbi2 = self.subpb[pbs[1]][0] pbi2_vars = pbi2.get_variables() eq_map_2 = pbi2_vars[varname].eq_map dpn = eq_map_2.dpn nnd = map_12.shape[0] map_12_nd = nm.zeros((nnd * dpn,), dtype=nm.int32) if dpn > 1: for ii in range(dpn): map_12_nd[ii::dpn] = map_12 * dpn + ii else: map_12_nd = map_12 idx = nm.where(eq_map_2.eq >= 0)[0] self.cvars_to_pb_map[varname] = eq_map_1.eq[map_12[idx]]
def correlate_neighbourhood(calcium_signal: np.ndarray, kd_tree: cKDTree, center_ix: int, init_radius=0.02, max_radius=.08, min_corr=.5, step=0.01, measure=correlation, verbose=True): """ Given a center neuron and parameters of the neighbourhood definition, tries to group neurons The basic idea is: 1. Look at all neurons within a given radius of the center neurons, 2. Correlate their calcium signal to the center's. 3. Keep sufficiently highly correlated neurons as being part of the group. 4. Compute the fraction correlated / all neighboring neurons 5. Move the center to the neuron closest to the center of mass of this group 6. Increase slightly the radius and start again. 7. As long as the fraction of correlated neurons is not droppping significantly, keep on increasing the radius 8. Label the neurons as being part of this group. If some were already part of another group, they belong to the biggest group Parameters ---------- calcium_signal kd_tree center_ix init_radius max_radius min_corr step measure verbose Returns ------- """ FRAC_DEC = .95 radii = np.arange(init_radius, max_radius, step) radius = radii[0] # not necessary due to loop? frac_corr = 0 w_correlated = np.array([]) for radius in radii: neighbors_ix, _ = get_neighbors(kd_tree, center_ix, radius) if len(neighbors_ix) == 0: # one neuron left so no neighbours break corr_neigh = measure(calcium_signal, center_ix, neighbors_ix) # Fraction of correlated neurons in the neighboorhod correlated = corr_neigh >= min_corr n_correlated = np.sum(correlated) new_frac_corr = n_correlated / len(corr_neigh) if verbose: print( f'Number of neurons: {len(corr_neigh)} ; fraction correlated: {new_frac_corr * 100:.2f}% ;' f' Correlated neurons: {np.sum(correlated)}') # More correlations than before if new_frac_corr >= FRAC_DEC * frac_corr and n_correlated > 2: # 100 frac_corr = new_frac_corr w_correlated = neighbors_ix[correlated] centroid = np.mean(kd_tree.data[w_correlated, :], 0) _, center_ix = kd_tree.query(centroid, 1) else: break if radius == radii[-1]: # print('\t >>> Reached maximum radius <<<') pass return w_correlated
def spherematch(ra1, dec1, ra2, dec2, tol=None, nnearest=1): """ Finds matches in one catalog to another. Parameters ra1 : array-like Right Ascension in degrees of the first catalog dec1 : array-like Declination in degrees of the first catalog (shape of array must match `ra1`) ra2 : array-like Right Ascension in degrees of the second catalog dec2 : array-like Declination in degrees of the second catalog (shape of array must match `ra2`) tol : float or None, optional How close (in degrees) a match has to be to count as a match. If None, all nearest neighbors for the first catalog will be returned. nnearest : int, optional The nth neighbor to find. E.g., 1 for the nearest nearby, 2 for the second nearest neighbor, etc. Particularly useful if you want to get the nearest *non-self* neighbor of a catalog. To do this, use: ``spherematch(ra, dec, ra, dec, nnearest=2)`` Returns ------- idx1 : int array Indecies into the first catalog of the matches. Will never be larger than `ra1`/`dec1`. idx2 : int array Indecies into the second catalog of the matches. Will never be larger than `ra1`/`dec1`. ds : float array Distance (in degrees) between the matches """ ra1 = np.array(ra1, copy=False) dec1 = np.array(dec1, copy=False) ra2 = np.array(ra2, copy=False) dec2 = np.array(dec2, copy=False) if ra1.shape != dec1.shape: raise ValueError('ra1 and dec1 do not match!') if ra2.shape != dec2.shape: raise ValueError('ra2 and dec2 do not match!') x1, y1, z1 = _spherical_to_cartesian(ra1.ravel(), dec1.ravel()) # this is equivalent to, but faster than just doing np.array([x1, y1, z1]) coords1 = np.empty((x1.size, 3)) coords1[:, 0] = x1 coords1[:, 1] = y1 coords1[:, 2] = z1 x2, y2, z2 = _spherical_to_cartesian(ra2.ravel(), dec2.ravel()) # this is equivalent to, but faster than just doing np.array([x1, y1, z1]) coords2 = np.empty((x2.size, 3)) coords2[:, 0] = x2 coords2[:, 1] = y2 coords2[:, 2] = z2 kdt = KDT(coords2) if nnearest == 1: idxs2 = kdt.query(coords1)[1] elif nnearest > 1: idxs2 = kdt.query(coords1, nnearest)[1][:, -1] else: raise ValueError('invalid nnearest ' + str(nnearest)) ds = _great_circle_distance(ra1, dec1, ra2[idxs2], dec2[idxs2]) idxs1 = np.arange(ra1.size) if tol is not None: msk = ds < tol idxs1 = idxs1[msk] idxs2 = idxs2[msk] ds = ds[msk] return idxs1, idxs2, ds
random_state=41).fit(subset_data_unref) print('Kmeans done: Time elapsed: {} seconds'.format( time.time() - time_start)) labels_unref = kmeans_unref.labels_ centroids_unref = kmeans_unref.cluster_centers_ counting_occurence_in_patient_compare = Counter( labels_unref) vals_unref = np.fromiter( counting_occurence_in_patient_compare.values(), dtype=float) #COMPARING USING KDTREE k = KDTree(centroids_unref) (dists, idxs) = k.query(centroids_ref) vals_unref[idxs] reference_dataframe[f'Count_{name}'] = vals_unref[idxs] print(reference_dataframe.shape, reference_dataframe.columns) reference_dataframe.sort_values(by=['Cluster'], inplace=True) reference_dataframe.sort_index(axis=1, ascending=True, inplace=True) reference_dataframe.to_csv( path_to_store_frame + f'/Data_for_LDA_from_generate_data_with_n_{number_of_cluster}_configuration_{configuration}.csv' )
class VoronoiClosestPolytope: def __init__(self, polytopes, key_vertices_count=0, process_count=8, max_number_key_points=None): ''' Compute the closest polytope using Voronoi cells :param polytopes: ''' self.init_start_time = default_timer() self.section_start_time = self.init_start_time self.polytopes = np.asarray(polytopes, dtype='object') self.type = self.polytopes[0].type self.process_count = process_count self.key_vertices_count = key_vertices_count if self.type == 'AH_polytope': self.dim = self.polytopes[0].t.shape[0] elif self.type == 'zonotope': self.dim = self.polytopes[0].x.shape[0] else: raise NotImplementedError if self.key_vertices_count > 0: self.key_points = np.zeros([ len(self.polytopes) * (1 + 2**self.key_vertices_count), self.dim ]) else: self.key_points = np.zeros([len(self.polytopes), self.dim]) for i, z in enumerate(polytopes): if self.type == 'AH_polytope': if self.key_vertices_count > 0: raise NotImplementedError else: self.key_points[i, :] = self.polytopes[i].t[:, 0] elif self.type == 'zonotope': if self.key_vertices_count > 0: self.key_points[i * (2**self.key_vertices_count + 1), :] = self.polytopes[i].x[:, 0] self.key_points[ i * (2**self.key_vertices_count + 1) + 1:(i + 1) * (2**self.key_vertices_count + 1), :] = get_k_random_edge_points_in_zonotope( self.polytopes[i], self.key_vertices_count) else: self.key_points[i, :] = self.polytopes[i].x[:, 0] else: raise NotImplementedError if max_number_key_points: # sample the key points n = self.key_points.shape[0] chosen_key_points = np.random.choice(n, size=min( n, max_number_key_points), replace=False) self.key_points = self.key_points[chosen_key_points, :] # print(self.key_points.shape) self.key_point_to_polytope_map = dict( ) # stores the potential closest polytopes associated with each Voronoi (centroid) for key_point in self.key_points: ds = np.zeros(self.polytopes.shape[0]) self.key_point_to_polytope_map[str(key_point)] = np.rec.fromarrays( [self.polytopes, ds], names=('polytopes', 'distances')) self.build_cell_polytope_map_default() #build kd-tree for centroids self.key_point_tree = KDTree(self.key_points) print(('Completed precomputation in %f seconds' % (default_timer() - self.init_start_time))) def build_cell_polytope_map_default(self): polytope_key_point_indices = np.array( np.meshgrid(np.arange(self.polytopes.shape[0]), np.arange(self.key_points.shape[0]))).T.reshape(-1, 2) arguments = [] for i in polytope_key_point_indices: arguments.append( (self.key_points, self.key_point_to_polytope_map, i[0], i[1])) p = Pool(self.process_count) pca = p.map(set_polytope_pair_distance, arguments) polytope_key_point_arrays = np.asarray(pca).reshape( (self.polytopes.shape[0]), self.key_points.shape[0]) # print(polytope_centroid_arrays) # compute pairwise distances of the centroids and the polytopes #fixme for key_point_index, key_point in enumerate(self.key_points): key_point_string = str(key_point) for polytope_index, polytope in enumerate( self.key_point_to_polytope_map[key_point_string] ['polytopes']): self.key_point_to_polytope_map[str(key_point)].distances[ polytope_index] = polytope_key_point_arrays[ polytope_index, key_point_index] # print(polytope_key_point_arrays[polytope_index, key_point_index]) self.key_point_to_polytope_map[key_point_string].sort( order='distances') # print(self.centroid_to_polytope_map[centroid_string]) def find_closest_polytope(self, query_point, return_intermediate_info=False): #find the closest centroid d, i = self.key_point_tree.query(query_point) closest_key_point = self.key_point_tree.data[i] # print('closest key point', closest_key_point) closest_key_point_polytope = self.key_point_to_polytope_map[str( closest_key_point)]['polytopes'][0] # print('closest polytope centroid' + str(closest_key_point_polytope.x)) dist_query_centroid_polytope = distance_point_polytope( closest_key_point_polytope, query_point, ball='l2')[0] dist_query_key_point = np.linalg.norm(query_point - closest_key_point) # print(dist_query_key_point, dist_query_centroid_polytope) cutoff_index = np.searchsorted( self.key_point_to_polytope_map[str(closest_key_point)].distances, dist_query_key_point + dist_query_centroid_polytope) # print(cutoff_index) # print(self.key_point_to_polytope_map[str(closest_key_point)]['distances'][0:cutoff_index]) # print(self.key_point_to_polytope_map[str(closest_key_point)]['distances'][cutoff_index:]) # print('dqc',dist_query_key_point) # print(self.centroid_to_polytope_map[str(closest_key_point)].distances) closest_polytope_candidates = self.key_point_to_polytope_map[str( closest_key_point)].polytopes[0:cutoff_index] # print(closest_polytope_candidates) best_polytope = None best_distance = np.inf for polytope in closest_polytope_candidates: if best_distance < 1e-9: break dist = distance_point_polytope(polytope, query_point, ball='l2')[0] if best_distance > dist: best_distance = dist best_polytope = polytope # print('best distance', best_distance) if return_intermediate_info: return best_polytope, best_distance, closest_polytope_candidates return best_polytope
def tsne(fdarray, new_label='tsne', channels=None, transform='arcsinh', sample=6000, verbose=False, backgate=True): """Perform t-SNE/viSNE on the FlowData object """ fdarray = util.make_list(fdarray) # If the user has not provided a list of channels to use, # use the intersection of all isotope channels if channels is None: channel_set = [] for fd in fdarray: channel_set.append(set(fd.isotopes)) channels = list(set.intersection(*channel_set)) # Make a copy of the data in files that we want points = [] for fd in fdarray: points.append(np.vstack([fd[ch] for ch in channels]).T) # transform if transform == 'arcsinh': for pts in points: # Apply the transform inplace to the data np.arcsinh(5 * pts, pts) # Randomly sample to reduce the number of points sample_masks = [] for pts in points: if sample < pts.shape[0]: # If we have enough points to subsample sample_masks.append( np.random.choice(pts.shape[0], sample, replace=False)) else: # Otherwise we add all the points sample_masks.append(np.array(range(pts.shape[0]))) # Sample the points, and construct a large matrix sample_points = [] for mask, pts in zip(sample_masks, points): sample_points.append(pts[mask, :]) X = np.vstack(sample_points) # Perform t-SNE Y = lib_tsne.tsne(X, verbose=verbose) assert Y is not None, ('t-SNE failed to return') # Split Y into a matrix for each dataset splits = np.cumsum( np.array([mask.shape[0] for mask in sample_masks], dtype=int)) Y_split = np.split(Y, splits, axis=0) # now expand data to reassign these points back into the dataset tsne_coords = [] for (pts, mask, Yspt) in zip(points, sample_masks, Y_split): npoints = pts.shape[0] Z = np.zeros((npoints, 2)) * float('NaN') Z[mask, :] = Yspt tsne_coords.append(Z) # If a point didn't get sampled, place its t-SNE coordinates at its nearest # neighbor. if backgate: kd = KDTree(X) # select points not assigned values with t-SNE for pts, mask, coords, j in zip(points, sample_masks, tsne_coords, range(len(points))): nan_points = np.argwhere(np.isnan(coords[:, 0])) d, near = kd.query(pts[nan_points], 1) # convert back to coordinates on the whole dataset coords[nan_points, :] = Y[near, :] tsne_coords[j] = coords # add to data to FlowData structure for fd, coords in zip(fdarray, tsne_coords): fd[new_label + '1'] = coords[:, 0] fd[new_label + '2'] = coords[:, 1]
def kldiv(x, y, *, k=1): r""" Compute the Kullback-Leibler divergence between two multivariate samples. .. math D(P||Q) = "\"frac{d}{n} "\"sum_i^n "\"log{"\"frac{r_k(x_i)}{s_k(x_i)}} + "\"log{"\"frac{m}{n-1}} where r_k(x_i) and s_k(x_i) are, respectively, the euclidean distance to the kth neighbour of x_i in the x array (excepting x_i) and in the y array. Parameters ---------- x : ndarray (n,d) Samples from distribution P, which typically represents the true distribution (reference). y : ndarray (m,d) Samples from distribution Q, which typically represents the approximate distribution (candidate) k : int or sequence The kth neighbours to look for when estimating the density of the distributions. Defaults to 1, which can be noisy. Returns ------- out : float or sequence The estimated Kullback-Leibler divergence D(P||Q) computed from the distances to the kth neighbour. Notes ----- In information theory, the Kullback–Leibler divergence is a non-symmetric measure of the difference between two probability distributions P and Q, where P is the "true" distribution and Q an approximation. This nuance is important because D(P||Q) is not equal to D(Q||P). For probability distributions P and Q of a continuous random variable, the K–L divergence is defined as: D_{KL}(P||Q) = "\"int p(x) "\"log{p()/q(x)} dx This formula assumes we have a representation of the probability densities p(x) and q(x). In many cases, we only have samples from the distribution, and most methods first estimate the densities from the samples and then proceed to compute the K-L divergence. In Perez-Cruz, the authors propose an algorithm to estimate the K-L divergence directly from the sample using an empirical CDF. Even though the CDFs do not converge to their true values, the paper proves that the K-L divergence almost surely does converge to its true value. References ---------- Kullback-Leibler Divergence Estimation of Continuous Distributions (2008). Fernando Pérez-Cruz. """ mk = np.iterable(k) ka = np.atleast_1d(k) nx, d = x.shape ny, d = y.shape # Limit the number of dimensions to 10, too slow otherwise. if d > 10: raise ValueError("Too many dimensions: {}.".format(d)) # Not enough data to draw conclusions. if nx < 5 or ny < 5: return np.nan if not mk else [np.nan] * len(k) # Build a KD tree representation of the samples. xtree = KDTree(x) ytree = KDTree(y) # Get the k'th nearest neighbour from each points in x for both x and y. # We get the values for K + 1 to make sure the output is a 2D array. kmax = max(ka) + 1 r, _ = xtree.query(x, k=kmax, eps=0, p=2, n_jobs=2) s, _ = ytree.query(x, k=kmax, eps=0, p=2, n_jobs=2) # There is a mistake in the paper. In Eq. 14, the right side misses a # negative sign on the first term of the right hand side. out = [] for ki in ka: # The 0th nearest neighbour of x[i] in x is x[i] itself. # Hence we take the k'th + 1, which in 0-based indexing is given by # index k. out.append(-np.log(r[:, ki] / s[:, ki - 1]).sum() * d / nx + np.log(ny / (nx - 1.0))) if mk: return out return out[0]
class GeocodeData: def __init__(self, geocode_filename='geocode.csv', country_filename='countries.csv'): coordinates, self.__locations = self.__extract(rel_path(geocode_filename)) self.__tree = KDTree(coordinates) self.__load_countries(rel_path(country_filename)) def __load_countries(self, country_filename): """Load a map of country code to name """ self.__countries = {} with open(country_filename, 'r') as handler: for code, name in csv.reader(handler): self.__countries[code] = name def query(self, coordinates): """Find closest match to this list of coordinates """ try: distances, indices = self.__tree.query(coordinates, k=1) except ValueError as e: logging.info('Unable to parse coordinates: {}'.format(coordinates)) raise e else: results = [self.__locations[index] for index in indices] for result in results: result['country'] = self.__countries.get(result['country_code'], '') return results def __download(self): """Download geocode file """ local_filename = os.path.abspath(os.path.basename(GEOCODE_URL)) if not os.path.exists(local_filename): logging.info('Downloading: {}'.format(GEOCODE_URL)) urlretrieve(GEOCODE_URL, local_filename) return local_filename def __extract(self, local_filename): """Extract geocode data from zip """ if os.path.exists(local_filename): # open compact CSV rows = csv.reader(open(local_filename, 'r')) else: if not os.path.exists(GEOCODE_FILENAME): # remove GEOCODE_FILENAME to get updated data downloadedFile = self.__download() z = zipfile.ZipFile(downloadedFile) logging.info('Extracting: {}'.format(GEOCODE_FILENAME)) open(GEOCODE_FILENAME, 'wb').write(z.read(GEOCODE_FILENAME)) z.close() # extract coordinates into more compact CSV for faster loading writer = csv.writer(open(local_filename, 'w')) rows = [] for row in csv.reader(open(GEOCODE_FILENAME, 'r'), delimiter='\t'): latitude, longitude = row[4:6] country_code = row[8] if latitude and longitude and country_code: city = row[1] row = latitude, longitude, country_code, city writer.writerow(row) rows.append(row) # cleanup downloaded files os.remove(downloadedFile) os.remove(GEOCODE_FILENAME) # load a list of known coordinates and corresponding __locations coordinates, __locations = [], [] for latitude, longitude, country_code, city in rows: coordinates.append((latitude, longitude)) __locations.append(dict(country_code=country_code, city=city)) return coordinates, __locations
class ShapeMatcher(object): def __init__(self, ids, invariants): """Match other shapes based on euclidean distance. Constructs a KDTree in order to do nearest neighbour queries. For large datasets it might take a second or two to build the tree. Arguments: ids -- set names/identifiers for the shapes invariants -- 2D array of invariants that describe the shapes """ self.ids = ids self.invariants = invariants LOG.debug('Constructing tree from %d invariants', len(invariants)) self.tree = KDTree(invariants) def search_invariants(self, invariants, n=10, df=False): """Search for matches based on invariants. Arguments: invariants -- N length array of shape descriptors Keyword arguments: n -- number of matches to return (default 10) df -- return matches as a pandas DataFrame (default False) """ if n == 'max': n = len(self.invariants) LOG.debug('Searching for %d closest points', n) distances, indexes = self.tree.query(invariants, n) invariants = self.invariants[indexes] # Need to handle case of n == 1 correctly if isinstance(indexes, int): ids = self.ids[indexes].decode('utf-8') return SearchResult(ids, distances, invariants) else: ids = [x.decode('utf-8') for x in self.ids[indexes]] if df: return pd.DataFrame({ 'ID': ids, 'Proximity': distances }).set_index('ID') else: return [ SearchResult(n, d, i) for n, d, i in zip(ids, distances, invariants) ] def search_shape(self, shape, **kwargs): """Search for matches based on a shape object. (convenience function) Arguments: shape -- a Shape object. Keyword arguments: n -- number of matches to return (default 10) df -- return matches as a pandas DataFrame (default False) """ LOG.debug('Searching for closest shapes to %s', shape.name) # delegate to search_invariants method return self.search_invariants(shape.invariants, **kwargs) @staticmethod def from_datafile(filename, l_max=20): """Construct a CSD matcher based on the bundled data Keyword arguments: l_max -- maximum angular momenta to use for invariants (default 20) use_radius -- use the mean radius as the first invariant (default True) """ names, invariants = load_data(filename) return ShapeMatcher(names, invariants) @staticmethod def from_shapes(shapes, l_max=20): """Construct a shapematcher object from a list of shapes Arguments: shapes -- A list of Shape objects Keyword arguments: l_max -- maximuma angular momenta to use for invariants (default 20) """ invariants, names = [], [] if isinstance(shapes, dict): for name, s in shapes.items(): invariants.append(s.invariants) names.append(name) else: for s in shapes: invariants.append(s.invariants) names.append(s.name) invariants = np.array(invariants) names = np.array(names, dtype='|S64') return ShapeMatcher(names, invariants) @staticmethod def from_surface_files(files, property_name='shape'): """Construct a CSD matcher based on the bundled data Keyword arguments: l_max -- maximum angular momenta to use for invariants (default 20) use_radius -- use the mean radius as the first invariant (default True) """ shapes = {} for f in files: shapes['f.stem'] = surface_description(f, property_name=property_name) return ShapeMatcher.from_shapes(shapes) def all(self): return self.search_invariants(self.invariants[0], n=len(self.invariants))
def get_ref_coors_convex(field, coors, close_limit=0.1, cache=None, verbose=False): """ Get reference element coordinates and elements corresponding to given physical coordinates. Parameters ---------- field : Field instance The field defining the approximation. coors : array The physical coordinates. close_limit : float, optional The maximum limit distance of a point from the closest element allowed for extrapolation. cache : Struct, optional To speed up a sequence of evaluations, the field mesh and other data can be cached. Optionally, the cache can also contain the reference element coordinates as `cache.ref_coors`, `cache.cells` and `cache.status`, if the evaluation occurs in the same coordinates repeatedly. In that case the mesh related data are ignored. verbose : bool If False, reduce verbosity. Returns ------- ref_coors : array The reference coordinates. cells : array The cell indices corresponding to the reference coordinates. status : array The status: 0 is success, 1 is extrapolation within `close_limit`, 2 is extrapolation outside `close_limit`, 3 is failure, 4 is failure due to non-convergence of the Newton iteration in tensor product cells. Notes ----- Outline of the algorithm for finding xi such that X(xi) = P: 1. make inverse connectivity - for each vertex have cells it is in. 2. find the closest vertex V. 3. choose initial cell: i0 = first from cells incident to V. 4. while not P in C_i, change C_i towards P, check if P in new C_i. """ timer = Timer() ref_coors = get_default_attr(cache, 'ref_coors', None) if ref_coors is None: extrapolate = close_limit > 0.0 ref_coors = nm.empty_like(coors) cells = nm.empty((coors.shape[0], ), dtype=nm.int32) status = nm.empty((coors.shape[0], ), dtype=nm.int32) cmesh = get_default_attr(cache, 'cmesh', None) if cmesh is None: timer.start() mesh = field.create_mesh(extra_nodes=False) cmesh = mesh.cmesh gels = create_geometry_elements() cmesh.set_local_entities(gels) cmesh.setup_entities() centroids = cmesh.get_centroids(cmesh.tdim) if field.gel.name != '3_8': normals0 = cmesh.get_facet_normals() normals1 = None else: normals0 = cmesh.get_facet_normals(0) normals1 = cmesh.get_facet_normals(1) output('cmesh setup: %f s' % timer.stop(), verbose=verbose) else: centroids = cache.centroids normals0 = cache.normals0 normals1 = cache.normals1 kdtree = get_default_attr(cache, 'kdtree', None) if kdtree is None: from scipy.spatial import cKDTree as KDTree timer.start() kdtree = KDTree(cmesh.coors) output('kdtree: %f s' % timer.stop(), verbose=verbose) timer.start() ics = kdtree.query(coors)[1] output('kdtree query: %f s' % timer.stop(), verbose=verbose) ics = nm.asarray(ics, dtype=nm.int32) coors = nm.ascontiguousarray(coors) ctx = field.create_basis_context() timer.start() crc.find_ref_coors_convex(ref_coors, cells, status, coors, cmesh, centroids, normals0, normals1, ics, extrapolate, 1e-15, close_limit, ctx) output('ref. coordinates: %f s' % timer.stop(), verbose=verbose) else: cells = cache.cells status = cache.status return ref_coors, cells, status
def match_arbitrary_translation_dilatation(x1,y1,x2,y2) : """ Match two catalogs in different coordinate systems, 1 and 2, related by a translation, a dilatation, and possibly a "small" rotation The orientation of triangles is used for the match so the rotation has to be small. Inspired from http://articles.adsabs.harvard.edu/pdf/1986AJ.....91.1244G Args: x1 : float numpy array of coordinates along first axis of cartesian coordinate system 1 y1 : float numpy array of coordinates along second axis of cartesian coordinate system 1 x2 : float numpy array of coordinates along first axis of cartesian coordinate system 2 y2 : float numpy array of coordinates along second axis of cartesian coordinate system 2 returns: indices_2 : integer numpy array. if ii is a index array for entries in the first catalog, indices_2[ii] is the index array of best matching entries in the second catalog. (one should compare x1[ii] with x2[indices_2[ii]]) negative values for unmatched entries. distance : distance between pairs of triangles. It can be used to discard bad matches. """ log = get_logger() # compute all possible triangles in both data sets # txyz are properties of the shape and orientation of the triangles log.debug("compute triangles") tk1,txyz1 = compute_triangles_with_fixed_orientation(x1,y1) tk2,txyz2 = compute_triangles_with_fixed_orientation(x2,y2) log.debug("match triangles") # match with kdtree triangles with same shape and orientation tree2=KDTree(txyz2) triangle_distances,triangle_indices_2 = tree2.query(txyz1,k=1) # now that we have match of triangles , need to match back catalog entries ranked_pairs = np.argsort(triangle_distances) indices_2 = -1*np.ones(x1.size,dtype=int) distances = np.zeros(x1.size) all_matched = False log.debug("match catalogs using pairs of triangles") for p in ranked_pairs : k1=tk1[p] # incides (in x1,y1) of vertices of this triangle (size=3) k2=tk2[triangle_indices_2[p]] # incides (in x2,y2) of vertices of other triangle # check unmatched or equal if np.any((indices_2[k1]>=0)&(indices_2[k1]!=k2)) : log.warning("skip {} <=> {}".format(k1,k2)) continue indices_2[k1]=k2 distances[k1]=triangle_distances[p] all_matched = (np.sum(indices_2>=0)==x1.size) if all_matched : log.debug("all matched") break # check duplicates for i2 in np.unique(indices_2[indices_2>=0]) : ii=(indices_2==i2) if np.sum(ii) > 1 : log.warning("{} duplicates for i2={}".format(np.sum(ii),i2)) indices_2[ii]=-1 return indices_2 , distances
x_drc_low = low_x[ff] y_drc_low = low_y[ff] xm_flc_low = flc_all['xdrc_low_'+filter] ym_flc_low = flc_all['ydrc_low_'+filter] coords1low = np.empty((xm_flc_low.size,2)) coords2low = np.empty((x_drc_low.size,2)) coords1low[:,0] = xm_flc_low coords1low[:,1] = ym_flc_low coords2low[:,0] = x_drc_low coords2low[:,1] = y_drc_low kdt = KDT(coords2low) idxs2 = kdt.query(coords1low)[1] ds = distArr(xm_flc_low,ym_flc_low,x_drc_low[idxs2],y_drc_low[idxs2]) idxs1 = np.arange(xm_flc_low.size) msk = ds < matchtol idxs1 = idxs1[msk] idxs2 = idxs2[msk] ds = ds[msk] outfile = outDir+'hor-I-cut_drc_low_'+filter+'_tol1.txt' np.savetxt(outfile, idxs2, fmt='%4i') outfile = outDir+'hor-I-cut_flc_low_'+filter+'_tol1.txt' np.savetxt(outfile, idxs1, fmt='%4i')
if 'snakemake' not in globals(): from vresutils.snakemake import MockSnakemake, Dict snakemake = MockSnakemake(input=Dict(base_network='networks/base.nc'), output=['resources/powerplants.csv']) logging.basicConfig(level=snakemake.config['logging_level']) n = pypsa.Network(snakemake.input.base_network) ppl = (ppm.collection.matched_data()[lambda df: ~df.Fueltype.isin( ('Solar', 'Wind'))].pipe(ppm.cleaning.clean_technology).assign( Fueltype=lambda df: (df.Fueltype.where( df.Fueltype != 'Natural Gas', df.Technology.replace('Steam Turbine', 'OCGT').fillna('OCGT')))). pipe(ppm.utils.fill_geoposition, parse=True, only_saved_locs=True).pipe(ppm.heuristics.fill_missing_duration)) # ppl.loc[(ppl.Fueltype == 'Other') & ppl.Technology.str.contains('CCGT'), 'Fueltype'] = 'CCGT' # ppl.loc[(ppl.Fueltype == 'Other') & ppl.Technology.str.contains('Steam Turbine'), 'Fueltype'] = 'CCGT' ppl = ppl.loc[ppl.lon.notnull() & ppl.lat.notnull()] substation_lv_i = n.buses.index[n.buses['substation_lv']] kdtree = KDTree(n.buses.loc[substation_lv_i, ['x', 'y']].values) ppl = ppl.assign( bus=substation_lv_i[kdtree.query(ppl[['lon', 'lat']].values)[1]]) ppl.to_csv(snakemake.output[0])
N = row_splits.size - 1 consumed = np.zeros((N,), dtype=np.bool) out = [] for i in range(N): if len(out) >= max_size: break if not consumed[i]: consumed[indices[row_splits[i]:row_splits[i + 1]]] = True out.append(i) return np.array(out, dtype=np.uint32) np.random.seed(123) x = np.random.uniform(size=(in_size, 2)).astype(dtype=np.float32) tree = KDTree(x) dists, indices = tree.query(x, k) valid = dists < max_dist indices = indices[valid] row_lengths = np.count_nonzero(valid, axis=1) row_splits = np.pad(np.cumsum(row_lengths), [[1, 0]], 'constant') kwargs = dict(indices=indices, row_splits=row_splits, max_size=max_size) num_runs = 100 print('cython implementation') print( timeit(functools.partial(rejection_sample_ragged, **kwargs), number=num_runs) / num_runs) print('python implementation') print( timeit(functools.partial(rejection_sample_ragged_base, **kwargs),
class cholesky_NN(object): def __init__(self,xdata,ydata): #Do some tests here #Find data covariance cov = np.cov(xdata.T) #Cholesky decompose to make new basis L_mat = np.linalg.cholesky(cov) self.L_mat = np.linalg.inv(L_mat) #Transform xdata into new basis self.xtrain = xdata self.transf_x = np.array([np.dot(self.L_mat,x) for x in xdata]) #DEBUG #plt.plot(xdata[:,0],xdata[:,1],'.',color='r') #plt.plot(self.transf_x[:,0],self.transf_x[:,1],'.') #plt.show() #sys.exit() #Store training self.ytrain = ydata #Build KDTree for quick lookup self.transf_xtree = KDTree(self.transf_x) def __call__(self,x,k=5): if k<2: raise Exception("Need k>1") if x.ndim != self.xtrain[0].ndim: raise Exception("Requested x and training set do not have the same number of dimension.") #Change basis x0 = np.dot(self.L_mat,x) #Get nearest neighbors dist, loc = self.transf_xtree.query(x0,k=k) #Protect div by zero dist = np.array([np.max([1e-15,d]) for d in dist]) weight = 1.0/dist nearest_y = self.ytrain[loc] #Interpolate with weighted average if self.ytrain.ndim > 1: y_predict = np.array([np.average(y0,weights=weight) for y0 in nearest_y.T]) testgood = all([test_good(y) for y in y_predict]) elif self.ytrain.ndim==1: y_predict = np.average(nearest_y,weights=weight) testgood = test_good(y_predict) else: raise Exception('The dimension of y training data is weird') if not testgood: raise Exception('y prediction went wrong') return y_predict def train_dist_error_model(self,xtrain,ytrain,k=5): """Rather than learning a non-parametric error model, we can define a parametric error model instead and learn its parameters.""" if xtrain.shape[0]!=ytrain.shape[0]: raise TypeError('Xtrain and Ytrain do not have same shape.') dist_list = [] for x0 in xtrain: #Change basis x0 = np.dot(self.L_mat,x0) #Get nearest neighbors in original training set dist, loc = self.transf_xtree.query(x0,k=k) #Weighted density in ball for NN #dist = np.array([np.max([1e-15,d]) for d in dist]) #weight = 1.0/dist #dist_list.append(np.sum(weight)) dist_list.append(np.mean(dist)) dist_list = np.array(dist_list) def error_model(dist, a, b, c): return a*(dist) + b*(dist)**c bestfit, cov = opt.curve_fit(error_model, dist_list,np.abs(ytrain), #bounds=((0.0,0.0,0.0),(np.inf,np.inf,np.inf))) bounds=((0.0,0.0,0.0),(1e1,1e1,1e1))) #print "this is bestfit:", bestfit def new_error_model(xval): xval = np.dot(self.L_mat,xval) #Get nearest neighbors in original training set dist, loc = self.transf_xtree.query(xval,k=k) #Mean distance to NN dist = np.mean(dist) #dist = dist/bestfit[2] err_guess = bestfit[0]*dist + bestfit[1]*dist**bestfit[2] rand_sign = np.random.rand() - 0.5 #err_guess *= 1.0 if rand_sign>0.0 else -1.0 return err_guess #DEBUG #plt.plot(dist_list, np.abs(ytrain),'bo') #plt.plot(dist_list, map(new_error_model,xtrain),'ro') #plt.show() return new_error_model
class InvDistTree: """ As seen in http://stackoverflow.com/questions/3104781/inverse-distance-weighted-idw-interpolation-with-python inverse-distance-weighted interpolation using KDTree: invdisttree = Invdisttree( X, z ) -- data points, values interpol = invdisttree( q, nnear=3, eps=0, p=1, weights=None, stat=0 ) interpolates z from the 3 points nearest each query point q; For example, interpol[ a query point q ] finds the 3 data points nearest q, at distances d1 d2 d3 and returns the IDW average of the values z1 z2 z3 (z1/d1 + z2/d2 + z3/d3) / (1/d1 + 1/d2 + 1/d3) = .55 z1 + .27 z2 + .18 z3 for distances 1 2 3 q may be one point, or a batch of points. eps: approximate nearest, dist <= (1 + eps) * true nearest p: use 1 / distance**p weights: optional multipliers for 1 / distance**p, of the same shape as q stat: accumulate wsum, wn for average weights How many nearest neighbors should one take ? a) start with 8 11 14 .. 28 in 2d 3d 4d .. 10d; see Wendel's formula b) make 3 runs with nnear= e.g. 6 8 10, and look at the results -- |interpol 6 - interpol 8| etc., or |f - interpol*| if you have f(q). I find that runtimes don't increase much at all with nnear -- ymmv. p=1, p=2 ? p=2 weights nearer points more, farther points less. In 2d, the circles around query points have areas ~ distance**2, so p=2 is inverse-area weighting. For example, (z1/area1 + z2/area2 + z3/area3) / (1/area1 + 1/area2 + 1/area3) = .74 z1 + .18 z2 + .08 z3 for distances 1 2 3 Similarly, in 3d, p=3 is inverse-volume weighting. Scaling: if different X coordinates measure different things, Euclidean distance can be way off. For example, if X0 is in the range 0 to 1 but X1 0 to 1000, the X1 distances will swamp X0; rescale the data, i.e. make X0.std() ~= X1.std() . A nice property of IDW is that it's scale-free around query points: if I have values z1 z2 z3 from 3 points at distances d1 d2 d3, the IDW average (z1/d1 + z2/d2 + z3/d3) / (1/d1 + 1/d2 + 1/d3) is the same for distances 1 2 3, or 10 20 30 -- only the ratios matter. In contrast, the commonly-used Gaussian kernel exp( - (distance/h)**2 ) is exceedingly sensitive to distance and to h. """ # anykernel( dj / av dj ) is also scale-free # error analysis, |f(x) - idw(x)| ? def __init__(self, measured_points, measured_values, leafsize=10, stat=0): """ @param measured_points: @param measured_values: @param leafsize: @param stat: """ assert len(measured_points) == len( measured_values), "len(X) %d != len(z) %d" % (len(measured_points), len(measured_values)) self.tree = KDTree(measured_points, leafsize=leafsize) # build the tree self.z = measured_values self.stat = stat self.wn = 0 self.wsum = None def __call__(self, new_points, num_near=6, eps=0, p=1, weights=None): """ Call an interpolation with the trained data @param new_points: @param num_near: Number of near-by points @param eps: Tolerance @param p: 1<=p<=infinity. Which Minkowski p-norm to use. 1 is the sum-of-absolute-values "Manhattan" distance 2 is the usual Euclidean distance infinity is the maximum-coordinate-difference distance @param weights: @return: """ # num_near nearest neighbours of each query point -- new_points = np.asarray(new_points, dtype=complex) qdim = new_points.ndim if qdim == 1: new_points = np.array([new_points], dtype=complex) if self.wsum is None: self.wsum = np.zeros(num_near) # get the nearest neighbours of each point ''' self.distances : array of floats. The distances to the nearest neighbors. If x has shape tuple+(self.m,), then d has shape tuple+(k,). Missing neighbors are indicated with infinite distances. self.ix : ndarray of ints. The locations of the neighbors in self.data. If x has shape tuple+(self.m,), then i has shape tuple+(k,). Missing neighbors are indicated with self.n. ''' self.distances, self.ix = self.tree.query(new_points, k=num_near, eps=eps) # declare the interpolation array interpol = np.empty((len(self.distances), ) + np.shape(self.z[0]), dtype=complex) # Perform the interpolation idx = 0 for dist, ix in zip(self.distances, self.ix): if num_near == 1: wz = self.z[ix] elif dist[0] < 1e-10: wz = self.z[ix[0]] else: # weight z s by 1/dist -- w = 1 / np.power(dist, p) if weights is not None: w *= weights[ix] # >= 0 w /= np.sum(w) wz = np.dot(w, self.z[ix]) if self.stat: self.wn += 1 self.wsum += w interpol[idx] = wz idx += 1 return interpol if qdim > 1 else interpol[0]