def center_overdensity(center, galaxy_data, max_velocity): # calculate overdensity of cluster # select 300 random points (RA and dec) n = 300 ra_random = np.random.uniform(low=min(galaxy_data[:, 0]), high=max(galaxy_data[:, 0]), size=n) dec_random = np.random.uniform(low=min(galaxy_data[:, 1]), high=max(galaxy_data[:, 1]), size=n) points = np.vstack((ra_random, dec_random)).T assert points.shape == (n, 2) # select all galaxies within max velocity velocity_bin = galaxy_data[ abs(redshift_to_velocity(galaxy_data[:, 2], center.z)) <= max_velocity] virial_gsp = GriSPy(velocity_bin[:, :2], metric="haversine") max_dist = linear_to_angular_dist(0.5 * u.Mpc / u.littleh, center.z).value v_dist, v_idx = virial_gsp.bubble_neighbors(points, distance_upper_bound=max_dist) # find the N(0.5) mean and rms for the 300 points N_arr = np.array([len(idx) for idx in v_idx]) N_mean = np.mean(N_arr) N_rms = np.sqrt(np.mean(N_arr**2)) # rms D = (center.N - N_mean) / N_rms return D
def run(self,positions,delay=50,length=150): ## Builds gridsearch and finds nearby neighbors gsp = GriSPy(positions, N_cells = 10, periodic={0:(0,self.width), 1:(0,self.height)}) dub = max(self.alignCohRadius, self.sepRadius) _, self.neighbor_indices = gsp.bubble_neighbors(positions,distance_upper_bound=dub) ## Calculates and records data for each individual boid for i in range(len(self.boids)): this_boid = self.boids[i] neighbors = [self.boids[j] for j in self.neighbor_indices[i]] ret_atts = this_boid.run(neighbors) for i,att in enumerate(ret_atts): self.lists[i].append(att) ## Manages the number of instances calculated and # halting criteria. self.frames+=1 if self.frames == delay: self.record = True if self.frames == delay+length: self.stop = True if self.screen_cap and self.frames==100: self.capture = True if self.screen_cap and self.frames==101: self.capture = False
def consolidateSchools(schoolData, radius, verbose): radius = radius / 1000 #Create empty dataframes #resultsDF = pd.DataFrame(columns=['pointsInRadius','nearestPoint']) #def computeBandwidth(lat,lon,radius): if (verbose): print('Reading.....') #schoolData = pd.read_excel(args.File) latlonArray = schoolData[['Lat', 'Lon']].to_numpy() #Build grid if (verbose): print('Building Grid.....') gsp = GriSPy(latlonArray) degPerKM = 0.0089 #Degrees per 1 km upper_radii = radius * degPerKM numSchools = len(schoolData.index) dropList = [] numberDropped = 0 for i in range(numSchools): center = latlonArray[i].reshape(1, 2) #Find all other points in latlonArray which are within upper_radii #from the current center point bubble_dist, bubble_ind = gsp.bubble_neighbors( center, distance_upper_bound=upper_radii) #These are the indices of schools inside the radius bubble_ind = np.array(bubble_ind) schoolsInRadius = bubble_ind.size - 1 #A bit of a hack but don't drop *multiple* schools -- only one. If more #than one school is found that probably means it's a high density area. #For later -- use multiple column matching (names etc) if schoolsInRadius == 1: numberDropped = numberDropped + 1 #print(bubble_ind[0][0]) #drop the data - ignore errors if schoolData = schoolData.drop([bubble_ind[0][0]], errors='ignore') #print(dropList) #schoolData.drop(dropList) schoolData = schoolData.reset_index(drop=True) print('NUMBER DROPPED = ', numberDropped) #print(schoolData.head()) input() return (schoolData)
def test_custom_distance_lev(): def levenshtein(c0, centres, dim): c0 = tuple(c0) distances = np.empty(len(centres)) for idx, c1 in enumerate(centres): c1 = tuple(c1) dis = textdistance.levenshtein(c0, c1) distances[idx] = dis return distances random = np.random.RandomState(42) Npoints = 10**3 Ncentres = 2 dim = 2 Lbox = 100.0 data = random.uniform(0, Lbox, size=(Npoints, dim)) centres = random.uniform(0, Lbox, size=(Ncentres, dim)) gsp = GriSPy(data, N_cells=20, metric=levenshtein) upper_radii = 10.0 lev_dist, lev_ind = gsp.bubble_neighbors(centres, distance_upper_bound=upper_radii) assert len(centres) == len(lev_dist) == len(lev_ind) assert np.all(lev_dist[0] == 2) assert np.all(lev_dist[1] == 2) assert np.all(lev_ind[0] == [ 648, 516, 705, 910, 533, 559, 61, 351, 954, 214, 90, 645, 846, 818, 39, 433, 7, 700, 2, 364, 547, 427, 660, 548, 333, 246, 193, 55, 83, 159, 684, 310, 777, 112, 535, 780, 334, 300, 467, 30, 613, 564, 134, 534, 435, 901, 296, 800, 391, 321, 763, 208, 42, 413, 97 ]) assert np.all(lev_ind[1] == [ 580, 740, 498, 89, 610, 792, 259, 647, 58, 722, 360, 685, 552, 619, 6, 555, 935, 268, 615, 661, 680, 817, 75, 919, 922, 927, 52, 77, 859, 70, 544, 189, 340, 691, 453, 570, 126, 140, 67, 284, 662, 590, 527 ])
def test_floatX_precision(self, floatX): rng = np.random.default_rng(1234) data_floatX = rng.random(size=(1000, 3), dtype=floatX) centres_floatX = rng.random(size=(100, 3), dtype=floatX) upper_radii = 0.2 gsp_floatX = GriSPy(data_floatX) dist_floatX, ind_floatX = gsp_floatX.bubble_neighbors( centres_floatX, distance_upper_bound=upper_radii ) eps = np.finfo(floatX).resolution for i, ind_list in enumerate(ind_floatX): for j, il in enumerate(ind_list): delta = data_floatX[il] - centres_floatX[i] dist = np.sqrt(np.sum(delta**2)) assert_(dist <= upper_radii + eps) gsp_dist = dist_floatX[i][j] assert_(abs(dist - gsp_dist) <= eps)
def find_number_count(center, galaxies, distance=0.5 * u.Mpc / u.littleh): """ Computes number count of the center of interest, within a distance d. Number count, N(d) is the number of galaxies surrounding the center of interest. Note: Uses haversine metric so angular coordinates must be used. Parameters ---------- center: ndarray, shape (1, 3) or cluster.Cluster. Center of interest galaxies: ndarray, shape (n, 2) Sample of galaxies to search. distance: float with units of [Mpc/littleh], default: 0.5*u.Mpc/u.littleh distance, d. Returns ------- len(n_points): int N(d), Number of points around center of interest, within distance d. """ if isinstance(center, Cluster): coords = [center.ra, center.dec] z = center.z else: coords = center[:2] z = center[2] N_gsp = GriSPy(galaxies[:, :2], metric="haversine") distance = linear_to_angular_dist(distance, z).value # convert to angular distance n_dist, n_idx = N_gsp.bubble_neighbors(np.array([coords]), distance_upper_bound=distance) n_points = galaxies[tuple(n_idx)] return len(n_points)
def test_custom_distance_hamming(): def hamming(c0, centres, dim): c0 = c0.reshape((-1, dim)) d = cdist(c0, centres, metric="hamming").reshape((-1, )) return d random = np.random.RandomState(42) Npoints = 10**3 Ncentres = 2 dim = 2 Lbox = 100.0 data = random.uniform(0, Lbox, size=(Npoints, dim)) centres = random.uniform(0, Lbox, size=(Ncentres, dim)) gsp = GriSPy(data, N_cells=20, metric=hamming) upper_radii = 10.0 ham_dist, ham_ind = gsp.bubble_neighbors(centres, distance_upper_bound=upper_radii) assert len(centres) == len(ham_dist) == len(ham_ind) assert np.all(ham_dist[0] == 1) assert np.all(ham_dist[1] == 1) assert np.all(ham_ind[0] == [ 648, 516, 705, 910, 533, 559, 61, 351, 954, 214, 90, 645, 846, 818, 39, 433, 7, 700, 2, 364, 547, 427, 660, 548, 333, 246, 193, 55, 83, 159, 684, 310, 777, 112, 535, 780, 334, 300, 467, 30, 613, 564, 134, 534, 435, 901, 296, 800, 391, 321, 763, 208, 42, 413, 97 ]) assert np.all(ham_ind[1] == [ 580, 740, 498, 89, 610, 792, 259, 647, 58, 722, 360, 685, 552, 619, 6, 555, 935, 268, 615, 661, 680, 817, 75, 919, 922, 927, 52, 77, 859, 70, 544, 189, 340, 691, 453, 570, 126, 140, 67, 284, 662, 590, 527 ])
def test_floatX_precision(dim, N_cells, floatX): rng = np.random.default_rng(1234) data_floatX = rng.random(size=(100, dim), dtype=floatX) centres_floatX = rng.random(size=(10, dim), dtype=floatX) upper_radii = 0.2 gsp_floatX = GriSPy(data_floatX, N_cells=N_cells) dist_floatX, ind_floatX = gsp_floatX.bubble_neighbors( centres_floatX, distance_upper_bound=upper_radii) eps = np.finfo(floatX).resolution decimal = np.abs(int(np.log10(eps))) for i, ind_list in enumerate(ind_floatX): for j, il in enumerate(ind_list): gsp_dist = dist_floatX[i][j] delta = data_floatX[il] - centres_floatX[i] dist = np.linalg.norm(delta) assert (dist <= upper_radii).all() npt.assert_almost_equal(dist, gsp_dist, decimal)
np.random.seed(2) data = np.random.uniform(0, Lbox, size=(Npoints, dim)) centres = np.random.uniform(0, Lbox, size=(Ncentres, dim)) # Grispy params upper_radii = 15.0 lower_radii = 10.0 n_nearest = 100 periodic = {0: (0, Lbox), 1: (0, Lbox)} # Build the grid with the data gsp = GriSPy(data, periodic=periodic) # Query for neighbors within upper_radii bubble_dist, bubble_ind = gsp.bubble_neighbors( centres, distance_upper_bound=upper_radii) # Query for neighbors in a shell within lower_radii and upper_radii shell_dist, shell_ind = gsp.shell_neighbors(centres, distance_lower_bound=lower_radii, distance_upper_bound=upper_radii) # Query for nth nearest neighbors near_dist, near_ind = gsp.nearest_neighbors(centres, n=n_nearest) # Plot results plt.figure(4, figsize=(10, 3.2)) plt.subplot(1, 3, 1, aspect="equal") plt.title("Bubble query") plt.scatter(data[:, 0], data[:, 1], c="k", marker=".", s=3)
k2 = (i[j - 1][3:] + i[j][3:] + i[j + 1][3:] + (k[0, :] + k[1, :] + k[2, :]) * k1) * 0.33333 for x in range(3): if k2[x] >= k1[x]: k2[x] -= k1[x] vector.append([ k2, (i[j + 1][3:] - i[j - 1][3:] + k[0, :] * k1[0] - k[2, :] * k1[2]) ]) vector_np.append(k2) vector_np = np.array(vector_np) periodic = {0: (0, float(k1[0])), 1: (0, float(k1[1])), 2: (0, float(k1[2]))} gsp = GriSPy(vector_np) gsp.set_periodicity(periodic) bubble_dist, bubble_ind = gsp.bubble_neighbors(vector_np, distance_upper_bound=R) density_map = np.zeros((len(vector), 4), dtype=float) for i in range(len(vector)): D = 0 print(i) for j in range(len(bubble_ind[i])): if bubble_ind[i][j] != i: unit_point = vector[i][1] / np.linalg.norm(vector[i][1]) unit_neighbor = vector[bubble_ind[i][j]][1] / np.linalg.norm( vector[bubble_ind[i][j]][1]) dot = np.dot(unit_point, unit_neighbor) if dot > 1: dot = 1 elif dot < -1: dot = -1 factor = 1
def FoF( galaxy_data, candidate_centers, richness, overdensity, max_velocity=2000 * u.km / u.s, linking_length_factor=0.1, virial_radius=1.5 * u.Mpc / u.littleh, ): """ The Friends-of-Friends algorithm is a clustering algorithm used to identify groups of particles. In this instance, FoF is used to identify clusters of galaxies. FoF uses a linking length, l, whereby galaxies within a distance l from another galaxy are linked directly (as friends) and galaxies within a distance l from its friends are linked indirectly (as friends of friends). This network of particles are considered a cluster. After locating all candidate clusters, overlapping clusters are merged, with preference towards the center with larger N(d) and abs magnitude. A new cluster center is then defined as the brightess galaxy within 0.5 Mpc away from the current center. Finally, a cluster is only initialized if it has met the threshold richness and overdensity. The algorithm is sped up with: - numpy vectorization - grispy nearest neighbor implementation, which uses cell techniques to efficiently locate neighbors. This is preferred as it allows the use of the haversine metric for spherical coordinates. Parameters ---------- galaxy_data: ndarray, shape (n,7) Galaxy data with compulsory properties: ['ra', 'dec', 'z', 'abs mag', 'id', 'LR', 'N'] candidate_centers: ndarray, shape (m,7) Array of candidate centers with compulsory properties: ['ra', 'dec', 'z', 'abs mag', 'id', 'LR', 'N'] max_velocity: float, units [km/s] Default value: 2000 km/s linking_length_factor: float Default value: 0.1 virial_radius: float, units [Mpc/littleh] Default value: 1.5 hMpc richness: integer overdensity: float Returns ------- candidates: list of cluster.Cluster object """ candidates = [] # sep_arr = [] # tracks change in linking length with redshift # tracker identifies galaxies that have been included in another cluster previously to speed up algorithm. # candidate_centers was sorted by N(0.5) before to ensure larger clusters are prioritized tracker = np.ones(len(candidate_centers)) # identify cluster candidates for i, center in enumerate( candidate_centers ): # each row is a candidate center to search around if tracker[i]: velocity_bin = galaxy_data[ abs(redshift_to_velocity(galaxy_data[:, 2], center[2])) <= max_velocity] # select galaxies within max velocity virial_gsp = GriSPy(velocity_bin[:, :2], metric="haversine") # given virial radius is in proper distances, we convert to comoving distance to account for cosmological expansion. ang_virial_radius = linear_to_angular_dist( virial_radius, center[2] ).to("rad") # convert proper virial radius to angular separation max_dist = ( ang_virial_radius * cosmo.comoving_transverse_distance(center[2])).to( u.Mpc, u.dimensionless_angles()) # convert to comoving distance max_dist = linear_to_angular_dist( max_dist, center[2] ).value # convert comoving distance to angular separation virial_dist, virial_idx = virial_gsp.bubble_neighbors( np.array([center[:2]]), distance_upper_bound=max_dist ) # center must be a ndarray of (n,2) virial_points = velocity_bin[tuple( virial_idx)] # convert to tuple for deprecation warning if ( len(virial_points) >= 12 ): # reject if <12 galaxies within virial radius (to save time) mean_sep = mean_separation( len(virial_points), center[2], max_dist * u.degree, max_velocity, survey_area=1.7, ) # Mpc linking_length = ( linking_length_factor * mean_sep ) # determine transverse LL from local mean separation # sep_arr.append([linking_length.value, center[2]]) linking_length = linear_to_angular_dist( linking_length, center[2]).value # fix linking length here f_gsp = GriSPy(virial_points[:, :2], metric="haversine") f_dist, f_idx = f_gsp.bubble_neighbors( np.array([center[:2]]), distance_upper_bound=linking_length ) # select galaxies within linking length f_points = virial_points[tuple(f_idx)] member_galaxies = f_points fof_dist, fof_idx = f_gsp.bubble_neighbors( f_points[:, :2], distance_upper_bound=linking_length ) # select all galaxies within 2 linking lengths for idx in fof_idx: fof_points = virial_points[idx] # ensure no repeated points in cluster mask = np.isin( fof_points, member_galaxies, invert=True ) # filter for points not already accounted for vec_mask = np.isin(mask.sum(axis=1), center.shape[0]) fof_points = fof_points[vec_mask].reshape( (-1, center.shape[0])) # points of 2 linking lengths (FoF) if len(fof_points): member_galaxies = np.concatenate( (member_galaxies, fof_points) ) # merge all FoF points within 2 linking lengths if len(member_galaxies) >= richness: # must have >= richness c = Cluster(center, member_galaxies) candidates.append(c) if not i % 100: logging.info(f"{i} " + c.__str__()) # locate centers within member_galaxies (centers of interest) member_gal_id = member_galaxies[:, 4] luminous_gal_id = candidate_centers[:, 4] coi, _, coi_idx = np.intersect1d(member_gal_id, luminous_gal_id, return_indices=True) # update tracker to 0 for these points for i in coi_idx: tracker[i] = 0 # if len(candidates) >= 100: # for quick testing # break # plot_clusters(candidates, flagging=False) # for quick check of clusters # tracks mean separation across redshift # sep_arr = np.array(sep_arr) # plt.plot(sep_arr[:,1], sep_arr[:,0], '.') # plt.show() # perform overlap removal and merger print("Performing overlap removal") candidate_clusters = np.array([ [c.ra, c.dec, c.z, c.gal_id] for c in candidates ]) # get specific attributes from candidate center sample candidates = np.array(candidates) merged_candidates = candidates.copy() gal_id_space = candidate_clusters[:, 3] for center in candidates: # identity overlapping centers (centers lying within virial radius of current cluster) velocity_bin = candidate_clusters[ abs(redshift_to_velocity(candidate_clusters[:, 2], center.z)) <= max_velocity] # select galaxies within max velocity center_gsp = GriSPy(velocity_bin[:, :2], metric="haversine") c_coords = [center.ra, center.dec] max_dist = linear_to_angular_dist( virial_radius, center.z).value # convert virial radius to angular distance c_dist, c_idx = center_gsp.bubble_neighbors( np.array([c_coords]), distance_upper_bound=max_dist) # center must be a ndarray of (n,2) c_points = velocity_bin[tuple(c_idx)] # merge each overlapping cluster if len(c_points): for c in c_points: c = candidates[gal_id_space == c[-1]][0] if center.gal_id == c.gal_id: # if same center, ignore continue # modify the cluster's galaxies in merged_candidates array if len(c.galaxies) and len( center.galaxies): # check both clusters are not empty S = setdiff2d( c.galaxies, center.galaxies) # identify overlapping galaxies if len(S): new_c = merged_candidates[gal_id_space == c.gal_id][ 0] # c from merged_candidates new_center = merged_candidates[ gal_id_space == center.gal_id][ 0] # center from merged_candidates c_galaxies, center_galaxies = c.remove_overlap(center) new_c.galaxies = c_galaxies new_center.galaxies = center_galaxies merged_candidates = np.array([ c for c in merged_candidates if c.richness >= richness ]) # select only clusters >= richness if len(merged_candidates) >= len(candidates): logging.warning("No candidates were merged!") bcg_clusters = merged_candidates.copy() # replace candidate center with brightest galaxy in cluster print("Searching for BCGs") merged_candidates = sorted(merged_candidates, key=lambda x: x.N, reverse=True) # sort by N for center in merged_candidates: bcg_space_gal_id = np.array([c.gal_id for c in bcg_clusters]) # identify galaxies within 0.25*virial radius cluster_gsp = GriSPy( center.galaxies[:, :2], metric="haversine") # for galaxies within a cluster c_coords = [center.ra, center.dec] max_dist = 0.25 * (linear_to_angular_dist(virial_radius, center.z).value ) # convert virial radius to angular distance c_dist, c_idx = cluster_gsp.bubble_neighbors( np.array([c_coords]), distance_upper_bound=max_dist) # center must be a ndarray of (n,2) bcg_arr = center.galaxies[tuple(c_idx)] if len(bcg_arr) and len( center.galaxies ): # check for galaxies within 0.25*virial radius mag_sort = bcg_arr[bcg_arr[:, 3].argsort( )] # sort selected galaxies by abs mag (brightness) mask = np.isin( mag_sort[:, 4], bcg_space_gal_id, invert=True ) # filter for galaxies that are not existing centers mag_sort = mag_sort[mask] if len(mag_sort): bcg = mag_sort[0] # brightest cluster galaxy (bcg) # if bcg brighter than current center, replace it as center if (abs(bcg[3]) > abs(center.bcg_absMag)) and (bcg[4] != center.gal_id): new_cluster = Cluster( bcg, center.galaxies) # initialize new center bcg_clusters = np.delete( bcg_clusters, np.where([c.gal_id for c in bcg_clusters] == center.gal_id), ) bcg_clusters = np.concatenate( (bcg_clusters, np.array([new_cluster]))) # add new center to array bcg_clusters = np.array([ c for c in bcg_clusters if c.richness >= richness ]) # select only clusters >= richness final_clusters = [] # N(0.5) and galaxy overdensity print("Selecting appropriate clusters") for center in bcg_clusters: center.N = find_number_count(center, center.galaxies, distance=0.5 * u.Mpc / u.littleh) # find number count N(0.5) center.D = center_overdensity(center, galaxy_data, max_velocity) # find overdensity D # Initialize the cluster only if N(0.5) >= 8 and D >= overdensity if ((center.N >= 8) and (center.richness >= richness) and (center.D >= overdensity)): final_clusters.append(center) return final_clusters
def findNumberOfNeighbors(schoolData, radius, verbose): #Create empty dataframes #resultsDF = pd.DataFrame(columns=['pointsInRadius','nearestPoint']) #def computeBandwidth(lat,lon,radius): if (verbose): print('Reading.....') #schoolData = pd.read_excel(args.File) latlonArray = schoolData[['Lat', 'Lon']].to_numpy() #Build grid if (verbose): print('Building Grid.....') gsp = GriSPy(latlonArray) degPerKM = 0.0089 #Degrees per 10 km upper_radii = radius * degPerKM n_nearest = 1 numSchools = len(schoolData.index) resultsList = [] for i in range(numSchools): center = latlonArray[i].reshape(1, 2) #Find all other points in latlonArray which are within upper_radii #from the current center point bubble_dist, bubble_ind = gsp.bubble_neighbors( center, distance_upper_bound=upper_radii) #Compute the distance to the nearest location near_dist, near_ind = gsp.nearest_neighbors(center, n=n_nearest) bubble_ind = np.array(bubble_ind) #Append number of locations within certain distance, closest point resultsList.append( [bubble_ind.size - 1, float(near_dist[0] / degPerKM)]) if i == 0 and verbose: print("Number of Nearby Schools", bubble_ind.size - 1) print("Nearest School Location:", latlonArray[tuple(near_ind)]) if (verbose): if (bubble_ind.size == 1): print("----->School Index:", i) print("School Location:", latlonArray[i]) print("Number of Nearby Schools", bubble_ind.size - 1) #print("Locations:",bubble_ind[i]) #print("Distances:",bubble_dist[i]) print("Nearest School Location:", latlonArray[near_ind]) print("Nearest School Distance:", near_dist[0] / degPerKM) #resultsDF['pointsInRadius'][i] = bubble_ind.size-1 #resultsDF['nearestPoint'][i] = near_dist[0]/degPerKM resultDF = pd.DataFrame(resultsList, columns=['numPoints', 'nearestNeighbor']) #print(resultDF.head()) return (resultDF)