def FoF( galaxy_data, candidate_centers, richness, overdensity, max_velocity=2000 * u.km / u.s, linking_length_factor=0.1, virial_radius=1.5 * u.Mpc / u.littleh, ): """ The Friends-of-Friends algorithm is a clustering algorithm used to identify groups of particles. In this instance, FoF is used to identify clusters of galaxies. FoF uses a linking length, l, whereby galaxies within a distance l from another galaxy are linked directly (as friends) and galaxies within a distance l from its friends are linked indirectly (as friends of friends). This network of particles are considered a cluster. After locating all candidate clusters, overlapping clusters are merged, with preference towards the center with larger N(d) and abs magnitude. A new cluster center is then defined as the brightess galaxy within 0.5 Mpc away from the current center. Finally, a cluster is only initialized if it has met the threshold richness and overdensity. The algorithm is sped up with: - numpy vectorization - grispy nearest neighbor implementation, which uses cell techniques to efficiently locate neighbors. This is preferred as it allows the use of the haversine metric for spherical coordinates. Parameters ---------- galaxy_data: ndarray, shape (n,7) Galaxy data with compulsory properties: ['ra', 'dec', 'z', 'abs mag', 'id', 'LR', 'N'] candidate_centers: ndarray, shape (m,7) Array of candidate centers with compulsory properties: ['ra', 'dec', 'z', 'abs mag', 'id', 'LR', 'N'] max_velocity: float, units [km/s] Default value: 2000 km/s linking_length_factor: float Default value: 0.1 virial_radius: float, units [Mpc/littleh] Default value: 1.5 hMpc richness: integer overdensity: float Returns ------- candidates: list of cluster.Cluster object """ candidates = [] # sep_arr = [] # tracks change in linking length with redshift # tracker identifies galaxies that have been included in another cluster previously to speed up algorithm. # candidate_centers was sorted by N(0.5) before to ensure larger clusters are prioritized tracker = np.ones(len(candidate_centers)) # identify cluster candidates for i, center in enumerate( candidate_centers ): # each row is a candidate center to search around if tracker[i]: velocity_bin = galaxy_data[ abs(redshift_to_velocity(galaxy_data[:, 2], center[2])) <= max_velocity] # select galaxies within max velocity virial_gsp = GriSPy(velocity_bin[:, :2], metric="haversine") # given virial radius is in proper distances, we convert to comoving distance to account for cosmological expansion. ang_virial_radius = linear_to_angular_dist( virial_radius, center[2] ).to("rad") # convert proper virial radius to angular separation max_dist = ( ang_virial_radius * cosmo.comoving_transverse_distance(center[2])).to( u.Mpc, u.dimensionless_angles()) # convert to comoving distance max_dist = linear_to_angular_dist( max_dist, center[2] ).value # convert comoving distance to angular separation virial_dist, virial_idx = virial_gsp.bubble_neighbors( np.array([center[:2]]), distance_upper_bound=max_dist ) # center must be a ndarray of (n,2) virial_points = velocity_bin[tuple( virial_idx)] # convert to tuple for deprecation warning if ( len(virial_points) >= 12 ): # reject if <12 galaxies within virial radius (to save time) mean_sep = mean_separation( len(virial_points), center[2], max_dist * u.degree, max_velocity, survey_area=1.7, ) # Mpc linking_length = ( linking_length_factor * mean_sep ) # determine transverse LL from local mean separation # sep_arr.append([linking_length.value, center[2]]) linking_length = linear_to_angular_dist( linking_length, center[2]).value # fix linking length here f_gsp = GriSPy(virial_points[:, :2], metric="haversine") f_dist, f_idx = f_gsp.bubble_neighbors( np.array([center[:2]]), distance_upper_bound=linking_length ) # select galaxies within linking length f_points = virial_points[tuple(f_idx)] member_galaxies = f_points fof_dist, fof_idx = f_gsp.bubble_neighbors( f_points[:, :2], distance_upper_bound=linking_length ) # select all galaxies within 2 linking lengths for idx in fof_idx: fof_points = virial_points[idx] # ensure no repeated points in cluster mask = np.isin( fof_points, member_galaxies, invert=True ) # filter for points not already accounted for vec_mask = np.isin(mask.sum(axis=1), center.shape[0]) fof_points = fof_points[vec_mask].reshape( (-1, center.shape[0])) # points of 2 linking lengths (FoF) if len(fof_points): member_galaxies = np.concatenate( (member_galaxies, fof_points) ) # merge all FoF points within 2 linking lengths if len(member_galaxies) >= richness: # must have >= richness c = Cluster(center, member_galaxies) candidates.append(c) if not i % 100: logging.info(f"{i} " + c.__str__()) # locate centers within member_galaxies (centers of interest) member_gal_id = member_galaxies[:, 4] luminous_gal_id = candidate_centers[:, 4] coi, _, coi_idx = np.intersect1d(member_gal_id, luminous_gal_id, return_indices=True) # update tracker to 0 for these points for i in coi_idx: tracker[i] = 0 # if len(candidates) >= 100: # for quick testing # break # plot_clusters(candidates, flagging=False) # for quick check of clusters # tracks mean separation across redshift # sep_arr = np.array(sep_arr) # plt.plot(sep_arr[:,1], sep_arr[:,0], '.') # plt.show() # perform overlap removal and merger print("Performing overlap removal") candidate_clusters = np.array([ [c.ra, c.dec, c.z, c.gal_id] for c in candidates ]) # get specific attributes from candidate center sample candidates = np.array(candidates) merged_candidates = candidates.copy() gal_id_space = candidate_clusters[:, 3] for center in candidates: # identity overlapping centers (centers lying within virial radius of current cluster) velocity_bin = candidate_clusters[ abs(redshift_to_velocity(candidate_clusters[:, 2], center.z)) <= max_velocity] # select galaxies within max velocity center_gsp = GriSPy(velocity_bin[:, :2], metric="haversine") c_coords = [center.ra, center.dec] max_dist = linear_to_angular_dist( virial_radius, center.z).value # convert virial radius to angular distance c_dist, c_idx = center_gsp.bubble_neighbors( np.array([c_coords]), distance_upper_bound=max_dist) # center must be a ndarray of (n,2) c_points = velocity_bin[tuple(c_idx)] # merge each overlapping cluster if len(c_points): for c in c_points: c = candidates[gal_id_space == c[-1]][0] if center.gal_id == c.gal_id: # if same center, ignore continue # modify the cluster's galaxies in merged_candidates array if len(c.galaxies) and len( center.galaxies): # check both clusters are not empty S = setdiff2d( c.galaxies, center.galaxies) # identify overlapping galaxies if len(S): new_c = merged_candidates[gal_id_space == c.gal_id][ 0] # c from merged_candidates new_center = merged_candidates[ gal_id_space == center.gal_id][ 0] # center from merged_candidates c_galaxies, center_galaxies = c.remove_overlap(center) new_c.galaxies = c_galaxies new_center.galaxies = center_galaxies merged_candidates = np.array([ c for c in merged_candidates if c.richness >= richness ]) # select only clusters >= richness if len(merged_candidates) >= len(candidates): logging.warning("No candidates were merged!") bcg_clusters = merged_candidates.copy() # replace candidate center with brightest galaxy in cluster print("Searching for BCGs") merged_candidates = sorted(merged_candidates, key=lambda x: x.N, reverse=True) # sort by N for center in merged_candidates: bcg_space_gal_id = np.array([c.gal_id for c in bcg_clusters]) # identify galaxies within 0.25*virial radius cluster_gsp = GriSPy( center.galaxies[:, :2], metric="haversine") # for galaxies within a cluster c_coords = [center.ra, center.dec] max_dist = 0.25 * (linear_to_angular_dist(virial_radius, center.z).value ) # convert virial radius to angular distance c_dist, c_idx = cluster_gsp.bubble_neighbors( np.array([c_coords]), distance_upper_bound=max_dist) # center must be a ndarray of (n,2) bcg_arr = center.galaxies[tuple(c_idx)] if len(bcg_arr) and len( center.galaxies ): # check for galaxies within 0.25*virial radius mag_sort = bcg_arr[bcg_arr[:, 3].argsort( )] # sort selected galaxies by abs mag (brightness) mask = np.isin( mag_sort[:, 4], bcg_space_gal_id, invert=True ) # filter for galaxies that are not existing centers mag_sort = mag_sort[mask] if len(mag_sort): bcg = mag_sort[0] # brightest cluster galaxy (bcg) # if bcg brighter than current center, replace it as center if (abs(bcg[3]) > abs(center.bcg_absMag)) and (bcg[4] != center.gal_id): new_cluster = Cluster( bcg, center.galaxies) # initialize new center bcg_clusters = np.delete( bcg_clusters, np.where([c.gal_id for c in bcg_clusters] == center.gal_id), ) bcg_clusters = np.concatenate( (bcg_clusters, np.array([new_cluster]))) # add new center to array bcg_clusters = np.array([ c for c in bcg_clusters if c.richness >= richness ]) # select only clusters >= richness final_clusters = [] # N(0.5) and galaxy overdensity print("Selecting appropriate clusters") for center in bcg_clusters: center.N = find_number_count(center, center.galaxies, distance=0.5 * u.Mpc / u.littleh) # find number count N(0.5) center.D = center_overdensity(center, galaxy_data, max_velocity) # find overdensity D # Initialize the cluster only if N(0.5) >= 8 and D >= overdensity if ((center.N >= 8) and (center.richness >= richness) and (center.D >= overdensity)): final_clusters.append(center) return final_clusters