コード例 #1
0
def FoF(
    galaxy_data,
    candidate_centers,
    richness,
    overdensity,
    max_velocity=2000 * u.km / u.s,
    linking_length_factor=0.1,
    virial_radius=1.5 * u.Mpc / u.littleh,
):
    """
    The Friends-of-Friends algorithm is a clustering algorithm used to identify groups of particles. In this instance, FoF is used to identify clusters of galaxies.

    FoF uses a linking length, l, whereby galaxies within a distance l from another galaxy are linked directly (as friends) and galaxies within a distance l from its friends are linked indirectly (as friends of friends). This network of particles are considered a cluster.
    After locating all candidate clusters, overlapping clusters are merged, with preference towards the center with larger N(d) and abs magnitude.
    A new cluster center is then defined as the brightess galaxy within 0.5 Mpc away from the current center.
    Finally, a cluster is only initialized if it has met the threshold richness and overdensity.

    The algorithm is sped up with:
    - numpy vectorization
    - grispy nearest neighbor implementation, which uses cell techniques to efficiently locate neighbors. This is preferred as it allows the use of the haversine metric for spherical coordinates.

    Parameters
    ----------
    galaxy_data: ndarray, shape (n,7)
        Galaxy data with compulsory properties: ['ra', 'dec', 'z', 'abs mag', 'id', 'LR', 'N']

    candidate_centers: ndarray, shape (m,7)
        Array of candidate centers with compulsory properties: ['ra', 'dec', 'z', 'abs mag', 'id', 'LR', 'N']

    max_velocity: float, units [km/s]
        Default value: 2000 km/s

    linking_length_factor: float
        Default value: 0.1

    virial_radius: float, units [Mpc/littleh]
        Default value: 1.5 hMpc

    richness: integer

    overdensity: float

    Returns
    -------
    candidates: list of cluster.Cluster object

    """
    candidates = []
    # sep_arr = [] # tracks change in linking length with redshift

    # tracker identifies galaxies that have been included in another cluster previously to speed up algorithm.
    # candidate_centers was sorted by N(0.5) before to ensure larger clusters are prioritized
    tracker = np.ones(len(candidate_centers))

    # identify cluster candidates
    for i, center in enumerate(
            candidate_centers
    ):  # each row is a candidate center to search around

        if tracker[i]:
            velocity_bin = galaxy_data[
                abs(redshift_to_velocity(galaxy_data[:, 2], center[2])) <=
                max_velocity]  # select galaxies within max velocity

            virial_gsp = GriSPy(velocity_bin[:, :2], metric="haversine")

            # given virial radius is in proper distances, we convert to comoving distance to account for cosmological expansion.
            ang_virial_radius = linear_to_angular_dist(
                virial_radius, center[2]
            ).to("rad")  # convert proper virial radius to angular separation
            max_dist = (
                ang_virial_radius *
                cosmo.comoving_transverse_distance(center[2])).to(
                    u.Mpc,
                    u.dimensionless_angles())  # convert to comoving distance
            max_dist = linear_to_angular_dist(
                max_dist, center[2]
            ).value  # convert comoving distance to angular separation

            virial_dist, virial_idx = virial_gsp.bubble_neighbors(
                np.array([center[:2]]), distance_upper_bound=max_dist
            )  # center must be a ndarray of (n,2)
            virial_points = velocity_bin[tuple(
                virial_idx)]  # convert to tuple for deprecation warning

            if (
                    len(virial_points) >= 12
            ):  # reject if <12 galaxies within virial radius (to save time)
                mean_sep = mean_separation(
                    len(virial_points),
                    center[2],
                    max_dist * u.degree,
                    max_velocity,
                    survey_area=1.7,
                )  # Mpc
                linking_length = (
                    linking_length_factor * mean_sep
                )  # determine transverse LL from local mean separation
                # sep_arr.append([linking_length.value, center[2]])
                linking_length = linear_to_angular_dist(
                    linking_length, center[2]).value  # fix linking length here

                f_gsp = GriSPy(virial_points[:, :2], metric="haversine")
                f_dist, f_idx = f_gsp.bubble_neighbors(
                    np.array([center[:2]]),
                    distance_upper_bound=linking_length
                )  # select galaxies within linking length
                f_points = virial_points[tuple(f_idx)]

                member_galaxies = f_points
                fof_dist, fof_idx = f_gsp.bubble_neighbors(
                    f_points[:, :2], distance_upper_bound=linking_length
                )  # select all galaxies within 2 linking lengths

                for idx in fof_idx:
                    fof_points = virial_points[idx]

                    # ensure no repeated points in cluster
                    mask = np.isin(
                        fof_points, member_galaxies, invert=True
                    )  # filter for points not already accounted for
                    vec_mask = np.isin(mask.sum(axis=1), center.shape[0])
                    fof_points = fof_points[vec_mask].reshape(
                        (-1,
                         center.shape[0]))  # points of 2 linking lengths (FoF)

                    if len(fof_points):
                        member_galaxies = np.concatenate(
                            (member_galaxies, fof_points)
                        )  # merge all FoF points within 2 linking lengths

                if len(member_galaxies) >= richness:  # must have >= richness
                    c = Cluster(center, member_galaxies)
                    candidates.append(c)

                    if not i % 100:
                        logging.info(f"{i} " + c.__str__())

                    # locate centers within member_galaxies (centers of interest)
                    member_gal_id = member_galaxies[:, 4]
                    luminous_gal_id = candidate_centers[:, 4]
                    coi, _, coi_idx = np.intersect1d(member_gal_id,
                                                     luminous_gal_id,
                                                     return_indices=True)

                    # update tracker to 0 for these points
                    for i in coi_idx:
                        tracker[i] = 0

            # if len(candidates) >= 100: # for quick testing
            #     break

    # plot_clusters(candidates, flagging=False) # for quick check of clusters

    # tracks mean separation across redshift
    # sep_arr = np.array(sep_arr)
    # plt.plot(sep_arr[:,1], sep_arr[:,0], '.')
    # plt.show()

    # perform overlap removal and merger
    print("Performing overlap removal")
    candidate_clusters = np.array([
        [c.ra, c.dec, c.z, c.gal_id] for c in candidates
    ])  # get specific attributes from candidate center sample
    candidates = np.array(candidates)
    merged_candidates = candidates.copy()
    gal_id_space = candidate_clusters[:, 3]

    for center in candidates:

        # identity overlapping centers (centers lying within virial radius of current cluster)
        velocity_bin = candidate_clusters[
            abs(redshift_to_velocity(candidate_clusters[:, 2], center.z)) <=
            max_velocity]  # select galaxies within max velocity

        center_gsp = GriSPy(velocity_bin[:, :2], metric="haversine")
        c_coords = [center.ra, center.dec]
        max_dist = linear_to_angular_dist(
            virial_radius,
            center.z).value  # convert virial radius to angular distance
        c_dist, c_idx = center_gsp.bubble_neighbors(
            np.array([c_coords]),
            distance_upper_bound=max_dist)  # center must be a ndarray of (n,2)
        c_points = velocity_bin[tuple(c_idx)]

        # merge each overlapping cluster
        if len(c_points):
            for c in c_points:
                c = candidates[gal_id_space == c[-1]][0]

                if center.gal_id == c.gal_id:  # if same center, ignore
                    continue

                # modify the cluster's galaxies in merged_candidates array
                if len(c.galaxies) and len(
                        center.galaxies):  # check both clusters are not empty
                    S = setdiff2d(
                        c.galaxies,
                        center.galaxies)  # identify overlapping galaxies
                    if len(S):
                        new_c = merged_candidates[gal_id_space == c.gal_id][
                            0]  # c from merged_candidates
                        new_center = merged_candidates[
                            gal_id_space == center.gal_id][
                                0]  # center from merged_candidates

                        c_galaxies, center_galaxies = c.remove_overlap(center)
                        new_c.galaxies = c_galaxies
                        new_center.galaxies = center_galaxies

    merged_candidates = np.array([
        c for c in merged_candidates if c.richness >= richness
    ])  # select only clusters >= richness
    if len(merged_candidates) >= len(candidates):
        logging.warning("No candidates were merged!")

    bcg_clusters = merged_candidates.copy()

    # replace candidate center with brightest galaxy in cluster
    print("Searching for BCGs")
    merged_candidates = sorted(merged_candidates,
                               key=lambda x: x.N,
                               reverse=True)  # sort by N

    for center in merged_candidates:
        bcg_space_gal_id = np.array([c.gal_id for c in bcg_clusters])

        # identify galaxies within 0.25*virial radius
        cluster_gsp = GriSPy(
            center.galaxies[:, :2],
            metric="haversine")  # for galaxies within a cluster
        c_coords = [center.ra, center.dec]
        max_dist = 0.25 * (linear_to_angular_dist(virial_radius,
                                                  center.z).value
                           )  # convert virial radius to angular distance
        c_dist, c_idx = cluster_gsp.bubble_neighbors(
            np.array([c_coords]),
            distance_upper_bound=max_dist)  # center must be a ndarray of (n,2)
        bcg_arr = center.galaxies[tuple(c_idx)]

        if len(bcg_arr) and len(
                center.galaxies
        ):  # check for galaxies within 0.25*virial radius

            mag_sort = bcg_arr[bcg_arr[:, 3].argsort(
            )]  # sort selected galaxies by abs mag (brightness)
            mask = np.isin(
                mag_sort[:, 4], bcg_space_gal_id, invert=True
            )  # filter for galaxies that are not existing centers
            mag_sort = mag_sort[mask]

            if len(mag_sort):
                bcg = mag_sort[0]  # brightest cluster galaxy (bcg)

                # if bcg brighter than current center, replace it as center
                if (abs(bcg[3]) > abs(center.bcg_absMag)) and (bcg[4] !=
                                                               center.gal_id):
                    new_cluster = Cluster(
                        bcg, center.galaxies)  # initialize new center

                    bcg_clusters = np.delete(
                        bcg_clusters,
                        np.where([c.gal_id
                                  for c in bcg_clusters] == center.gal_id),
                    )
                    bcg_clusters = np.concatenate(
                        (bcg_clusters,
                         np.array([new_cluster])))  # add new center to array

    bcg_clusters = np.array([
        c for c in bcg_clusters if c.richness >= richness
    ])  # select only clusters >= richness
    final_clusters = []

    # N(0.5) and galaxy overdensity
    print("Selecting appropriate clusters")
    for center in bcg_clusters:
        center.N = find_number_count(center,
                                     center.galaxies,
                                     distance=0.5 * u.Mpc /
                                     u.littleh)  # find number count N(0.5)
        center.D = center_overdensity(center, galaxy_data,
                                      max_velocity)  # find overdensity D

        # Initialize the cluster only if N(0.5) >= 8 and D >= overdensity
        if ((center.N >= 8) and (center.richness >= richness)
                and (center.D >= overdensity)):
            final_clusters.append(center)

    return final_clusters