Beispiel #1
def center_overdensity(center, galaxy_data,
                       max_velocity):  # calculate overdensity of cluster

    # select 300 random points (RA and dec)
    n = 300
    ra_random = np.random.uniform(low=min(galaxy_data[:, 0]),
                                  high=max(galaxy_data[:, 0]),
    dec_random = np.random.uniform(low=min(galaxy_data[:, 1]),
                                   high=max(galaxy_data[:, 1]),
    points = np.vstack((ra_random, dec_random)).T
    assert points.shape == (n, 2)

    # select all galaxies within max velocity
    velocity_bin = galaxy_data[
        abs(redshift_to_velocity(galaxy_data[:, 2], center.z)) <= max_velocity]

    virial_gsp = GriSPy(velocity_bin[:, :2], metric="haversine")
    max_dist = linear_to_angular_dist(0.5 * u.Mpc / u.littleh, center.z).value
    v_dist, v_idx = virial_gsp.bubble_neighbors(points,

    # find the N(0.5) mean and rms for the 300 points
    N_arr = np.array([len(idx) for idx in v_idx])

    N_mean = np.mean(N_arr)
    N_rms = np.sqrt(np.mean(N_arr**2))  # rms
    D = (center.N - N_mean) / N_rms
    return D
Beispiel #2
    def run(self,positions,delay=50,length=150):

        ## Builds gridsearch and finds nearby neighbors
        gsp = GriSPy(positions, N_cells = 10, periodic={0:(0,self.width), 1:(0,self.height)})
        dub = max(self.alignCohRadius, self.sepRadius)
        _, self.neighbor_indices = gsp.bubble_neighbors(positions,distance_upper_bound=dub)

        ## Calculates and records data for each individual boid
        for i in range(len(self.boids)):
            this_boid = self.boids[i]
            neighbors = [self.boids[j] for j in self.neighbor_indices[i]]
            ret_atts =
            for i,att in enumerate(ret_atts):

        ## Manages the number of instances calculated and 
        #  halting criteria.
        if self.frames == delay:
            self.record = True
        if self.frames == delay+length:
            self.stop = True
        if self.screen_cap and self.frames==100:
            self.capture = True
        if self.screen_cap and self.frames==101:
            self.capture = False
Beispiel #3
def consolidateSchools(schoolData, radius, verbose):
    radius = radius / 1000
    #Create empty dataframes
    #resultsDF = pd.DataFrame(columns=['pointsInRadius','nearestPoint'])

    #def computeBandwidth(lat,lon,radius):
    if (verbose):
    #schoolData = pd.read_excel(args.File)
    latlonArray = schoolData[['Lat', 'Lon']].to_numpy()

    #Build grid
    if (verbose):
        print('Building Grid.....')
    gsp = GriSPy(latlonArray)

    degPerKM = 0.0089  #Degrees per 1 km
    upper_radii = radius * degPerKM

    numSchools = len(schoolData.index)

    dropList = []
    numberDropped = 0
    for i in range(numSchools):
        center = latlonArray[i].reshape(1, 2)

        #Find all other points in latlonArray which are within upper_radii
        #from the current center point
        bubble_dist, bubble_ind = gsp.bubble_neighbors(
            center, distance_upper_bound=upper_radii)

        #These are the indices of schools inside the radius
        bubble_ind = np.array(bubble_ind)

        schoolsInRadius = bubble_ind.size - 1

        #A bit of a hack but don't drop *multiple* schools -- only one. If more
        #than one school is found that probably means it's a high density area.

        #For later -- use multiple column matching (names etc)
        if schoolsInRadius == 1:
            numberDropped = numberDropped + 1
            #drop the data - ignore errors if
            schoolData = schoolData.drop([bubble_ind[0][0]], errors='ignore')


    schoolData = schoolData.reset_index(drop=True)
    print('NUMBER DROPPED = ', numberDropped)

    return (schoolData)
Beispiel #4
def test_custom_distance_lev():
    def levenshtein(c0, centres, dim):
        c0 = tuple(c0)
        distances = np.empty(len(centres))
        for idx, c1 in enumerate(centres):
            c1 = tuple(c1)
            dis = textdistance.levenshtein(c0, c1)
            distances[idx] = dis
        return distances

    random = np.random.RandomState(42)

    Npoints = 10**3
    Ncentres = 2
    dim = 2
    Lbox = 100.0

    data = random.uniform(0, Lbox, size=(Npoints, dim))
    centres = random.uniform(0, Lbox, size=(Ncentres, dim))

    gsp = GriSPy(data, N_cells=20, metric=levenshtein)

    upper_radii = 10.0
    lev_dist, lev_ind = gsp.bubble_neighbors(centres,

    assert len(centres) == len(lev_dist) == len(lev_ind)
    assert np.all(lev_dist[0] == 2)
    assert np.all(lev_dist[1] == 2)

    assert np.all(lev_ind[0] == [
        648, 516, 705, 910, 533, 559, 61, 351, 954, 214, 90, 645, 846, 818, 39,
        433, 7, 700, 2, 364, 547, 427, 660, 548, 333, 246, 193, 55, 83, 159,
        684, 310, 777, 112, 535, 780, 334, 300, 467, 30, 613, 564, 134, 534,
        435, 901, 296, 800, 391, 321, 763, 208, 42, 413, 97

    assert np.all(lev_ind[1] == [
        580, 740, 498, 89, 610, 792, 259, 647, 58, 722, 360, 685, 552, 619, 6,
        555, 935, 268, 615, 661, 680, 817, 75, 919, 922, 927, 52, 77, 859, 70,
        544, 189, 340, 691, 453, 570, 126, 140, 67, 284, 662, 590, 527
Beispiel #5
    def test_floatX_precision(self, floatX):

        rng = np.random.default_rng(1234)
        data_floatX = rng.random(size=(1000, 3), dtype=floatX)
        centres_floatX = rng.random(size=(100, 3), dtype=floatX)
        upper_radii = 0.2

        gsp_floatX = GriSPy(data_floatX)
        dist_floatX, ind_floatX = gsp_floatX.bubble_neighbors(
            centres_floatX, distance_upper_bound=upper_radii

        eps = np.finfo(floatX).resolution

        for i, ind_list in enumerate(ind_floatX):
            for j, il in enumerate(ind_list):
                delta = data_floatX[il] - centres_floatX[i]
                dist = np.sqrt(np.sum(delta**2))
                assert_(dist <= upper_radii + eps)
                gsp_dist = dist_floatX[i][j]
                assert_(abs(dist - gsp_dist) <= eps)
Beispiel #6
def find_number_count(center, galaxies, distance=0.5 * u.Mpc / u.littleh):
    Computes number count of the center of interest, within a distance d.

    Number count, N(d) is the number of galaxies surrounding the center of interest.

    Note: Uses haversine metric so angular coordinates must be used.

    center: ndarray, shape (1, 3) or cluster.Cluster.
        Center of interest

    galaxies: ndarray, shape (n, 2)
        Sample of galaxies to search.

    distance: float with units of [Mpc/littleh], default: 0.5*u.Mpc/u.littleh
        distance, d.

    len(n_points): int
        N(d), Number of points around center of interest, within distance d.

    if isinstance(center, Cluster):
        coords = [center.ra, center.dec]
        z = center.z
        coords = center[:2]
        z = center[2]

    N_gsp = GriSPy(galaxies[:, :2], metric="haversine")
    distance = linear_to_angular_dist(distance,
                                      z).value  # convert to angular distance
    n_dist, n_idx = N_gsp.bubble_neighbors(np.array([coords]),
    n_points = galaxies[tuple(n_idx)]

    return len(n_points)
Beispiel #7
def test_custom_distance_hamming():
    def hamming(c0, centres, dim):
        c0 = c0.reshape((-1, dim))
        d = cdist(c0, centres, metric="hamming").reshape((-1, ))
        return d

    random = np.random.RandomState(42)

    Npoints = 10**3
    Ncentres = 2
    dim = 2
    Lbox = 100.0

    data = random.uniform(0, Lbox, size=(Npoints, dim))
    centres = random.uniform(0, Lbox, size=(Ncentres, dim))

    gsp = GriSPy(data, N_cells=20, metric=hamming)

    upper_radii = 10.0
    ham_dist, ham_ind = gsp.bubble_neighbors(centres,

    assert len(centres) == len(ham_dist) == len(ham_ind)
    assert np.all(ham_dist[0] == 1)
    assert np.all(ham_dist[1] == 1)

    assert np.all(ham_ind[0] == [
        648, 516, 705, 910, 533, 559, 61, 351, 954, 214, 90, 645, 846, 818, 39,
        433, 7, 700, 2, 364, 547, 427, 660, 548, 333, 246, 193, 55, 83, 159,
        684, 310, 777, 112, 535, 780, 334, 300, 467, 30, 613, 564, 134, 534,
        435, 901, 296, 800, 391, 321, 763, 208, 42, 413, 97

    assert np.all(ham_ind[1] == [
        580, 740, 498, 89, 610, 792, 259, 647, 58, 722, 360, 685, 552, 619, 6,
        555, 935, 268, 615, 661, 680, 817, 75, 919, 922, 927, 52, 77, 859, 70,
        544, 189, 340, 691, 453, 570, 126, 140, 67, 284, 662, 590, 527
Beispiel #8
def test_floatX_precision(dim, N_cells, floatX):

    rng = np.random.default_rng(1234)
    data_floatX = rng.random(size=(100, dim), dtype=floatX)
    centres_floatX = rng.random(size=(10, dim), dtype=floatX)
    upper_radii = 0.2

    gsp_floatX = GriSPy(data_floatX, N_cells=N_cells)
    dist_floatX, ind_floatX = gsp_floatX.bubble_neighbors(
        centres_floatX, distance_upper_bound=upper_radii)

    eps = np.finfo(floatX).resolution
    decimal = np.abs(int(np.log10(eps)))

    for i, ind_list in enumerate(ind_floatX):
        for j, il in enumerate(ind_list):
            gsp_dist = dist_floatX[i][j]

            delta = data_floatX[il] - centres_floatX[i]
            dist = np.linalg.norm(delta)

            assert (dist <= upper_radii).all()
            npt.assert_almost_equal(dist, gsp_dist, decimal)
Beispiel #9
data = np.random.uniform(0, Lbox, size=(Npoints, dim))
centres = np.random.uniform(0, Lbox, size=(Ncentres, dim))

# Grispy params
upper_radii = 15.0
lower_radii = 10.0
n_nearest = 100
periodic = {0: (0, Lbox), 1: (0, Lbox)}

# Build the grid with the data
gsp = GriSPy(data, periodic=periodic)

# Query for neighbors within upper_radii
bubble_dist, bubble_ind = gsp.bubble_neighbors(
    centres, distance_upper_bound=upper_radii)

# Query for neighbors in a shell within lower_radii and upper_radii
shell_dist, shell_ind = gsp.shell_neighbors(centres,

# Query for nth nearest neighbors
near_dist, near_ind = gsp.nearest_neighbors(centres, n=n_nearest)

# Plot results
plt.figure(4, figsize=(10, 3.2))

plt.subplot(1, 3, 1, aspect="equal")
plt.title("Bubble query")
plt.scatter(data[:, 0], data[:, 1], c="k", marker=".", s=3)
        k2 = (i[j - 1][3:] + i[j][3:] + i[j + 1][3:] +
              (k[0, :] + k[1, :] + k[2, :]) * k1) * 0.33333
        for x in range(3):
            if k2[x] >= k1[x]:
                k2[x] -= k1[x]
            (i[j + 1][3:] - i[j - 1][3:] + k[0, :] * k1[0] - k[2, :] * k1[2])

vector_np = np.array(vector_np)
periodic = {0: (0, float(k1[0])), 1: (0, float(k1[1])), 2: (0, float(k1[2]))}
gsp = GriSPy(vector_np)
bubble_dist, bubble_ind = gsp.bubble_neighbors(vector_np,
density_map = np.zeros((len(vector), 4), dtype=float)
for i in range(len(vector)):
    D = 0
    for j in range(len(bubble_ind[i])):
        if bubble_ind[i][j] != i:
            unit_point = vector[i][1] / np.linalg.norm(vector[i][1])
            unit_neighbor = vector[bubble_ind[i][j]][1] / np.linalg.norm(
            dot =, unit_neighbor)
            if dot > 1:
                dot = 1
            elif dot < -1:
                dot = -1
            factor = 1
Beispiel #11
def FoF(
    max_velocity=2000 * / u.s,
    virial_radius=1.5 * u.Mpc / u.littleh,
    The Friends-of-Friends algorithm is a clustering algorithm used to identify groups of particles. In this instance, FoF is used to identify clusters of galaxies.

    FoF uses a linking length, l, whereby galaxies within a distance l from another galaxy are linked directly (as friends) and galaxies within a distance l from its friends are linked indirectly (as friends of friends). This network of particles are considered a cluster.
    After locating all candidate clusters, overlapping clusters are merged, with preference towards the center with larger N(d) and abs magnitude.
    A new cluster center is then defined as the brightess galaxy within 0.5 Mpc away from the current center.
    Finally, a cluster is only initialized if it has met the threshold richness and overdensity.

    The algorithm is sped up with:
    - numpy vectorization
    - grispy nearest neighbor implementation, which uses cell techniques to efficiently locate neighbors. This is preferred as it allows the use of the haversine metric for spherical coordinates.

    galaxy_data: ndarray, shape (n,7)
        Galaxy data with compulsory properties: ['ra', 'dec', 'z', 'abs mag', 'id', 'LR', 'N']

    candidate_centers: ndarray, shape (m,7)
        Array of candidate centers with compulsory properties: ['ra', 'dec', 'z', 'abs mag', 'id', 'LR', 'N']

    max_velocity: float, units [km/s]
        Default value: 2000 km/s

    linking_length_factor: float
        Default value: 0.1

    virial_radius: float, units [Mpc/littleh]
        Default value: 1.5 hMpc

    richness: integer

    overdensity: float

    candidates: list of cluster.Cluster object

    candidates = []
    # sep_arr = [] # tracks change in linking length with redshift

    # tracker identifies galaxies that have been included in another cluster previously to speed up algorithm.
    # candidate_centers was sorted by N(0.5) before to ensure larger clusters are prioritized
    tracker = np.ones(len(candidate_centers))

    # identify cluster candidates
    for i, center in enumerate(
    ):  # each row is a candidate center to search around

        if tracker[i]:
            velocity_bin = galaxy_data[
                abs(redshift_to_velocity(galaxy_data[:, 2], center[2])) <=
                max_velocity]  # select galaxies within max velocity

            virial_gsp = GriSPy(velocity_bin[:, :2], metric="haversine")

            # given virial radius is in proper distances, we convert to comoving distance to account for cosmological expansion.
            ang_virial_radius = linear_to_angular_dist(
                virial_radius, center[2]
            ).to("rad")  # convert proper virial radius to angular separation
            max_dist = (
                ang_virial_radius *
                    u.dimensionless_angles())  # convert to comoving distance
            max_dist = linear_to_angular_dist(
                max_dist, center[2]
            ).value  # convert comoving distance to angular separation

            virial_dist, virial_idx = virial_gsp.bubble_neighbors(
                np.array([center[:2]]), distance_upper_bound=max_dist
            )  # center must be a ndarray of (n,2)
            virial_points = velocity_bin[tuple(
                virial_idx)]  # convert to tuple for deprecation warning

            if (
                    len(virial_points) >= 12
            ):  # reject if <12 galaxies within virial radius (to save time)
                mean_sep = mean_separation(
                    max_dist *,
                )  # Mpc
                linking_length = (
                    linking_length_factor * mean_sep
                )  # determine transverse LL from local mean separation
                # sep_arr.append([linking_length.value, center[2]])
                linking_length = linear_to_angular_dist(
                    linking_length, center[2]).value  # fix linking length here

                f_gsp = GriSPy(virial_points[:, :2], metric="haversine")
                f_dist, f_idx = f_gsp.bubble_neighbors(
                )  # select galaxies within linking length
                f_points = virial_points[tuple(f_idx)]

                member_galaxies = f_points
                fof_dist, fof_idx = f_gsp.bubble_neighbors(
                    f_points[:, :2], distance_upper_bound=linking_length
                )  # select all galaxies within 2 linking lengths

                for idx in fof_idx:
                    fof_points = virial_points[idx]

                    # ensure no repeated points in cluster
                    mask = np.isin(
                        fof_points, member_galaxies, invert=True
                    )  # filter for points not already accounted for
                    vec_mask = np.isin(mask.sum(axis=1), center.shape[0])
                    fof_points = fof_points[vec_mask].reshape(
                         center.shape[0]))  # points of 2 linking lengths (FoF)

                    if len(fof_points):
                        member_galaxies = np.concatenate(
                            (member_galaxies, fof_points)
                        )  # merge all FoF points within 2 linking lengths

                if len(member_galaxies) >= richness:  # must have >= richness
                    c = Cluster(center, member_galaxies)

                    if not i % 100:
              "{i} " + c.__str__())

                    # locate centers within member_galaxies (centers of interest)
                    member_gal_id = member_galaxies[:, 4]
                    luminous_gal_id = candidate_centers[:, 4]
                    coi, _, coi_idx = np.intersect1d(member_gal_id,

                    # update tracker to 0 for these points
                    for i in coi_idx:
                        tracker[i] = 0

            # if len(candidates) >= 100: # for quick testing
            #     break

    # plot_clusters(candidates, flagging=False) # for quick check of clusters

    # tracks mean separation across redshift
    # sep_arr = np.array(sep_arr)
    # plt.plot(sep_arr[:,1], sep_arr[:,0], '.')

    # perform overlap removal and merger
    print("Performing overlap removal")
    candidate_clusters = np.array([
        [c.ra, c.dec, c.z, c.gal_id] for c in candidates
    ])  # get specific attributes from candidate center sample
    candidates = np.array(candidates)
    merged_candidates = candidates.copy()
    gal_id_space = candidate_clusters[:, 3]

    for center in candidates:

        # identity overlapping centers (centers lying within virial radius of current cluster)
        velocity_bin = candidate_clusters[
            abs(redshift_to_velocity(candidate_clusters[:, 2], center.z)) <=
            max_velocity]  # select galaxies within max velocity

        center_gsp = GriSPy(velocity_bin[:, :2], metric="haversine")
        c_coords = [center.ra, center.dec]
        max_dist = linear_to_angular_dist(
            center.z).value  # convert virial radius to angular distance
        c_dist, c_idx = center_gsp.bubble_neighbors(
            distance_upper_bound=max_dist)  # center must be a ndarray of (n,2)
        c_points = velocity_bin[tuple(c_idx)]

        # merge each overlapping cluster
        if len(c_points):
            for c in c_points:
                c = candidates[gal_id_space == c[-1]][0]

                if center.gal_id == c.gal_id:  # if same center, ignore

                # modify the cluster's galaxies in merged_candidates array
                if len(c.galaxies) and len(
                        center.galaxies):  # check both clusters are not empty
                    S = setdiff2d(
                        center.galaxies)  # identify overlapping galaxies
                    if len(S):
                        new_c = merged_candidates[gal_id_space == c.gal_id][
                            0]  # c from merged_candidates
                        new_center = merged_candidates[
                            gal_id_space == center.gal_id][
                                0]  # center from merged_candidates

                        c_galaxies, center_galaxies = c.remove_overlap(center)
                        new_c.galaxies = c_galaxies
                        new_center.galaxies = center_galaxies

    merged_candidates = np.array([
        c for c in merged_candidates if c.richness >= richness
    ])  # select only clusters >= richness
    if len(merged_candidates) >= len(candidates):
        logging.warning("No candidates were merged!")

    bcg_clusters = merged_candidates.copy()

    # replace candidate center with brightest galaxy in cluster
    print("Searching for BCGs")
    merged_candidates = sorted(merged_candidates,
                               key=lambda x: x.N,
                               reverse=True)  # sort by N

    for center in merged_candidates:
        bcg_space_gal_id = np.array([c.gal_id for c in bcg_clusters])

        # identify galaxies within 0.25*virial radius
        cluster_gsp = GriSPy(
            center.galaxies[:, :2],
            metric="haversine")  # for galaxies within a cluster
        c_coords = [center.ra, center.dec]
        max_dist = 0.25 * (linear_to_angular_dist(virial_radius,
                           )  # convert virial radius to angular distance
        c_dist, c_idx = cluster_gsp.bubble_neighbors(
            distance_upper_bound=max_dist)  # center must be a ndarray of (n,2)
        bcg_arr = center.galaxies[tuple(c_idx)]

        if len(bcg_arr) and len(
        ):  # check for galaxies within 0.25*virial radius

            mag_sort = bcg_arr[bcg_arr[:, 3].argsort(
            )]  # sort selected galaxies by abs mag (brightness)
            mask = np.isin(
                mag_sort[:, 4], bcg_space_gal_id, invert=True
            )  # filter for galaxies that are not existing centers
            mag_sort = mag_sort[mask]

            if len(mag_sort):
                bcg = mag_sort[0]  # brightest cluster galaxy (bcg)

                # if bcg brighter than current center, replace it as center
                if (abs(bcg[3]) > abs(center.bcg_absMag)) and (bcg[4] !=
                    new_cluster = Cluster(
                        bcg, center.galaxies)  # initialize new center

                    bcg_clusters = np.delete(
                                  for c in bcg_clusters] == center.gal_id),
                    bcg_clusters = np.concatenate(
                         np.array([new_cluster])))  # add new center to array

    bcg_clusters = np.array([
        c for c in bcg_clusters if c.richness >= richness
    ])  # select only clusters >= richness
    final_clusters = []

    # N(0.5) and galaxy overdensity
    print("Selecting appropriate clusters")
    for center in bcg_clusters:
        center.N = find_number_count(center,
                                     distance=0.5 * u.Mpc /
                                     u.littleh)  # find number count N(0.5)
        center.D = center_overdensity(center, galaxy_data,
                                      max_velocity)  # find overdensity D

        # Initialize the cluster only if N(0.5) >= 8 and D >= overdensity
        if ((center.N >= 8) and (center.richness >= richness)
                and (center.D >= overdensity)):

    return final_clusters
Beispiel #12
def findNumberOfNeighbors(schoolData, radius, verbose):

    #Create empty dataframes
    #resultsDF = pd.DataFrame(columns=['pointsInRadius','nearestPoint'])

    #def computeBandwidth(lat,lon,radius):
    if (verbose):
    #schoolData = pd.read_excel(args.File)
    latlonArray = schoolData[['Lat', 'Lon']].to_numpy()

    #Build grid
    if (verbose):
        print('Building Grid.....')
    gsp = GriSPy(latlonArray)

    degPerKM = 0.0089  #Degrees per 10 km
    upper_radii = radius * degPerKM
    n_nearest = 1

    numSchools = len(schoolData.index)

    resultsList = []
    for i in range(numSchools):
        center = latlonArray[i].reshape(1, 2)

        #Find all other points in latlonArray which are within upper_radii
        #from the current center point
        bubble_dist, bubble_ind = gsp.bubble_neighbors(
            center, distance_upper_bound=upper_radii)

        #Compute the distance to the nearest location
        near_dist, near_ind = gsp.nearest_neighbors(center, n=n_nearest)

        bubble_ind = np.array(bubble_ind)

        #Append number of locations within certain distance, closest point
            [bubble_ind.size - 1,
             float(near_dist[0] / degPerKM)])

        if i == 0 and verbose:
            print("Number of Nearby Schools", bubble_ind.size - 1)
            print("Nearest School Location:", latlonArray[tuple(near_ind)])

        if (verbose):
            if (bubble_ind.size == 1):
                print("----->School Index:", i)
                print("School Location:", latlonArray[i])
                print("Number of Nearby Schools", bubble_ind.size - 1)
                print("Nearest School Location:", latlonArray[near_ind])
                print("Nearest School Distance:", near_dist[0] / degPerKM)

        #resultsDF['pointsInRadius'][i] = bubble_ind.size-1
        #resultsDF['nearestPoint'][i] = near_dist[0]/degPerKM
    resultDF = pd.DataFrame(resultsList,
                            columns=['numPoints', 'nearestNeighbor'])
    return (resultDF)