Exemple #1
0
def knn_cond_mutual_information(x, y, z, k, standardize = True, dualtree = False):
    """
    Computes conditional mutual information between two time series x and y 
    conditioned on a third z (which can be multi-dimensional) as
        I(x; y | z) = sum( p(x,y,z) * log( p(z)*p(x,y,z) / p(x,z)*p(y,z) ),
        where p(z), p(x,z), p(y,z) and p(x,y,z) are probability distributions.
    Performs k-nearest neighbours search using k-dimensional tree.
    Uses sklearn.neighbors for KDTree class.

    standardize - whether transform data to zero mean and unit variance
    dualtree - whether to use dualtree formalism in k-d tree for the k-NN search
      could lead to better performance with large N

    According to Frenzel S. and Pompe B., Phys. Rev. Lett., 99, 2007.
    """

    from sklearn.neighbors import KDTree

    # prepare data
    if standardize:
        x = _center_ts(x)
        y = _center_ts(y)
        if isinstance(z, np.ndarray):
            z = _center_ts(z)
        elif isinstance(z, list):
            for cond_ts in z:
                cond_ts = _center_ts(cond_ts)
    z = np.atleast_2d(z)
    data = np.vstack([x, y, z]).T

    # build k-d tree using the maximum (Chebyshev) norm
    tree = KDTree(data, leaf_size = 15, metric = "chebyshev")
    # find distance to k-nearest neighbour per point
    dist, _ = tree.query(data, k = k + 1, return_distance = True, dualtree = dualtree)

    sum_ = 0
    # prepare marginal vectors xz, yz and z
    n_x_z_data = np.delete(data, 1, axis = 1)
    n_y_z_data = np.delete(data, 0, axis = 1)
    n_z_data = np.delete(data, [0, 1], axis = 1)

    # build and query k-d trees in marginal spaces for number of points in a given dist from a point
    tree_x_z = KDTree(n_x_z_data, leaf_size = 15, metric = "chebyshev")
    n_x_z = tree_x_z.query_radius(n_x_z_data, r = dist[:, -1], count_only = True) - 2
    tree_y_z = KDTree(n_y_z_data, leaf_size = 15, metric = "chebyshev")
    n_y_z = tree_y_z.query_radius(n_y_z_data, r = dist[:, -1], count_only = True) - 2
    tree_z = KDTree(n_z_data, leaf_size = 15, metric = "chebyshev")
    n_z = tree_z.query_radius(n_z_data, r = dist[:, -1], count_only = True) - 2

    # count points
    for n in range(data.shape[0]):
        sum_ += _neg_harmonic(n_x_z[n]) + _neg_harmonic(n_y_z[n]) - _neg_harmonic(n_z[n])

    sum_ /= data.shape[0]

    return sum_ - _neg_harmonic(k-1)
def match_bright(x,y,x2,y2,mags,dist=1./3600.):
    """Routine that matches the truth catalog
    with the input table
    
    Args:
    ----
        x: `float` RA of the truth objects to match (in degrees)
        y: `float` dec of the truth objects to match (in degrees)
        x2: `float` RA of detected objects to match (in degrees)
        y2: `float` dec of detected objects to match (in degrees)
        mags: `float` array containing the true input magnitudes
        dist: `float` maximum distance in degrees considered to match
            the objects, the default is 1 arcsecond.
    Returns:
    -------
        brightest_ind: `int` array of indices to select the truth objects
            that match the detected objects, returns -1 if no match has
            been found for a particular object
    """
    X = np.zeros((len(x),2))
    X[:,0]=x
    X[:,1]=y
    Y = np.zeros((len(x2),2))
    Y[:,0]=x2
    Y[:,1]=y2
    tree = KDTree(X,leaf_size=40)
    ind = tree.query_radius(Y, r=dist)
    brightest_indices = np.zeros(len(ind),dtype=np.int64)
    for i,ii in enumerate(ind):
        sorted_indices = np.argsort(mags[ii])
        if(len(sorted_indices)>0):
            brightest_indices[i] = ii[sorted_indices[0]]
        else:
            brightest_indices[i]=-1 
    return brightest_indices
def concat_features_by_neighbors(df_labels, df_features,
                                 X_names=['Offense Type'],
                                 grid=["Latitude", "Longitude"],
                                 radius=1./500.,
                                 scale=np.array([1.,1.])):

    df_labels = df_labels.dropna(subset=grid)
    df_features = df_features.dropna(subset=grid)

    X = df_features.as_matrix(X_names)
    xy_features = df_features.as_matrix(grid)
    xy_labels = df_labels.as_matrix(grid)
    tree = KDTree(xy_features*scale)

    vocabulary = set()
    features = []
    for nei in tree.query_radius(xy_labels*scale, radius):
        U,I = np.unique(X[nei], return_inverse=True)
        D = dict(zip(U,np.bincount(I)))
        map(vocabulary.add, D)
        features.append(D)

    return pd.concat([df_labels, pd.DataFrame([map(fi.get, vocabulary)
                      for fi in features],
                      index=df_labels.index,
                      columns=vocabulary).fillna(0.)], axis=1)
Exemple #4
0
def study_redmapper_lrg_3d(hemi='north'):
    # create 3d grid object
    grid = grid3d(hemi=hemi)
    
    # load SDSS data
    sdss = load_sdss_data_both_catalogs(hemi)
    
    # load redmapper catalog
    rm = load_redmapper(hemi=hemi)
    
    # get XYZ positions (Mpc) of both datasets
    x_sdss, y_sdss, z_sdss = grid.xyz_from_radecz(sdss['ra'], sdss['dec'], sdss['z'], applyzcut=False)
    x_rm, y_rm, z_rm = grid.xyz_from_radecz(rm['ra'], rm['dec'], rm['z_spec'], applyzcut=False)
    pos_sdss = np.vstack([x_sdss, y_sdss, z_sdss]).T
    pos_rm = np.vstack([x_rm, y_rm, z_rm]).T

    # build a couple of KDTree's, one for SDSS, one for RM.
    from sklearn.neighbors import KDTree
    tree_sdss = KDTree(pos_sdss, leaf_size=30)
    tree_rm = KDTree(pos_rm, leaf_size=30)

    lrg_counts = tree_sdss.query_radius(pos_rm, 100., count_only=True)
    pl.clf()
    pl.hist(lrg_counts, bins=50)
    
    
    ipdb.set_trace()
def count_close(x,y,x2,y2,distances):
    """Routine that counts the number of 
    objects that are within certain radius
    
    Args:
    ----
        x: `float` position X of objects to count
        y: `float` position Y of objects to count
        x2: `float` position X of the objects that serve as the center
            of the circle where we look for neighbors 
        y2: `float` position Y of the objects that serve as the center
            of the circle where we look for neighbors  
        distances: `float` array of radii where to count the objects
    Returns:
    -------
        neighbors: `float` the mean number of neighbors in a circle of radii
        corresponding to each entry of distances
        err: `float` standard deviation of the number of neighbors in a circle
        of radii corresponding to each entry of distances
    """
    X = np.zeros((len(x),2))
    X[:,0]=x
    X[:,1]=y
    Y = np.zeros((len(x2),2))
    Y[:,0]=x2
    Y[:,1]=y2
    tree = KDTree(X,leaf_size=40)
    neighbors = np.zeros(len(distances))
    err = np.zeros(len(distances))
    for i,distance in enumerate(distances):
        neighbors[i], err[i] = np.nanmean(tree.query_radius(Y, r=distance, count_only=True)), np.nanstd(tree.query_radius(Y, r=distance, count_only=True))
    return neighbors, err
Exemple #6
0
def approximate_surface_node(atlas, node_id):
    roi = np.zeros(atlas.shape)
    roi[atlas == node_id] = 1
    ind = np.argwhere(roi > 0)
    tree = KDTree(ind)
    count = np.sum(7 - tree.query_radius(ind, r=1.0,
                                         count_only=True))

    return count
Exemple #7
0
def constructQueryDict(df_centroids, filename):
    tree = KDTree(df_centroids[['posX', 'posY']])
    ind_nn = tree.query_radius(df_centroids[['posX', 'posY']], r=10)
    ind_r = tree.query_radius(df_centroids[['posX', 'posY']], r=50)
    queries = {}
    for i in range(len(ind_nn)):
        query = df_centroids.iloc[i]["filename"]
        positives = np.setdiff1d(ind_nn[i], [i]).tolist()
        negatives = np.setdiff1d(df_centroids.index.values.tolist(), ind_r[i]).tolist()
        random.shuffle(positives)
        random.shuffle(negatives)
        queries[i] = {"query": query, "positives": positives, "negatives": negatives}

    with open(filename, 'wb') as handle:
        print(queries)
        pickle.dump(queries, handle, protocol=pickle.HIGHEST_PROTOCOL)

    print("Construct Training Baseline Done: " + filename + "!")
Exemple #8
0
def approx_sw2_vr_from_local_bases(pointcloud, bases, max_eps) :
    cocycle = []
    n = len(pointcloud)
    d = len(bases[0])
    #print("dimension is " + str(d))

    kd_tree = KDTree(pointcloud, leaf_size=2)
    close_neighbors = kd_tree.query_radius(pointcloud, r = max_eps)
    # force j to be larger than i
    close_neighbors = [ np.array([j for j in close_neighbors[i]
                                  if j > i and np.linalg.norm(pointcloud[i] - pointcloud[j]) < max_eps])
                                  for i in range(n) ]
    close_neighbors = np.array(close_neighbors)



    pin_lifts = dict([])

    for i in range(n) :
        for j in close_neighbors[i] :

            if (i,j) in pin_lifts:
                ij_pin = pin_lifts[(i,j)]
            else :
                ij_omega = best_orth_trans(bases[i], bases[j])
                ij_pin = lift_to_pin(d,ij_omega)
                pin_lifts[(i,j)] = ij_pin
    
            for k in close_neighbors[j] :
                ik_close = np.linalg.norm(pointcloud[i] - pointcloud[k]) < max_eps 
                if ik_close :

                    if (j,k) in pin_lifts:
                        jk_pin = pin_lifts[(j,k)]
                    else :
                        jk_omega = best_orth_trans(bases[j], bases[k])
                        jk_pin = lift_to_pin(d,jk_omega)
                        pin_lifts[(j,k)] = jk_pin

                    if (i,k) in pin_lifts:
                        ik_pin = pin_lifts[(i,k)]
                    else :
                        ik_omega = best_orth_trans(bases[i], bases[k])
                        ik_pin = lift_to_pin(d,ik_omega)
                        pin_lifts[(i,k)] = ik_pin

                    ki_pin = invert_pin(d, ik_pin)
    
                    simplex_approx_val = mults(d,vects_to_cliff(d,ij_pin + jk_pin + ki_pin))[0]
                    simplex_val = simplex_approx_val < 0
    
                    if simplex_val != 0 :
                        cocycle.append([i,j,k, simplex_val])
    
    cocycle = np.array(cocycle)

    return np.array(cocycle)
def overlapped_points(Truth, Predictions, radius):
    tree = KDTree(Truth, leaf_size = 2*len(Predictions))
    output = []
    for point in Predictions:
        point_copy = point[np.newaxis, :]
        ind = tree.query_radius(point_copy, r = 15)
        if ind[0].shape[0] != 0:
            output.append(point)
    return output, len(output)
Exemple #10
0
def construct_query_dict(df_centroids, filename):
    tree = KDTree(df_centroids[['northing','easting']])
    ind_nn = tree.query_radius(df_centroids[['northing','easting']],r=10)
    ind_r = tree.query_radius(df_centroids[['northing','easting']], r=50)
    queries = {}
    for i in range(len(ind_nn)):
        query = df_centroids.iloc[i]["file"]
        positives = np.setdiff1d(ind_nn[i],[i]).tolist()
        negatives = np.setdiff1d(
            df_centroids.index.values.tolist(),ind_r[i]).tolist()
        random.shuffle(negatives)
        queries[i] = {"query":query,
                      "positives":positives,"negatives":negatives}

    with open(filename, 'wb') as handle:
        pickle.dump(queries, handle, protocol=pickle.HIGHEST_PROTOCOL)

    print("Done ", filename)
Exemple #11
0
def euclidean_analysis_rank_pre(root_path, dataset):
    dl = pickle.load(open(root_path + 'dl_' + dataset + '.pk', 'rb'))
    K = min(5000, dl.nv)
    K_bin = 10
    dl.show_info()
    coor_nor = [dl.vid_coor_nor[vid] for vid in range(dl.nv)]
    tree = KDTree(coor_nor)
    time_rank_pre = np.zeros((12, K / K_bin))
    rank_pre = np.zeros(K / K_bin)
    time_cnt = np.zeros(12)
    for uid, records_u in dl.uid_records.items():
        vid_cnt = {}
        # all visable
        records_u.summarize()
        for record in records_u.get_records(0):
            if record.vid not in vid_cnt:
                vid_cnt[record.vid] = 0
            vid_cnt[record.vid] += 1
        # print 'vid_cnt: ', vid_cnt
        records_al_test = records_u.get_records(1)
        for rid, record in enumerate(records_al_test):
            if record.is_last:
                continue
            if record.vid not in vid_cnt:
                vid_cnt[record.vid] = 0
            vid_cnt[record.vid] += 1
            time_gap = int((records_al_test[rid + 1].dt -
                            records_al_test[rid].dt).total_seconds() / 60 / 30)
            if time_gap == 12:
                record.peek()
                # records_u.records[rid + 1].peek()
                # print (records_u.records[rid + 1].dt - records_u.records[rid].dt).total_seconds() / 60 / 30
                # raw_input()
                time_gap = 11
            time_cnt[time_gap] += 1
            dist = np.sqrt(
                np.sum((dl.vid_coor_nor[record.vid] -
                        dl.vid_coor_nor[record.vid_next])**2))
            ids = tree.query_radius([dl.vid_coor_nor[record.vid]], r=dist)
            idx = len(ids[0]) / K_bin
            if idx >= K / K_bin:
                idx = K / K_bin - 1
            time_rank_pre[time_gap, idx] += 1
            rank_pre[idx] += 1
    for i in xrange(0, len(time_cnt)):
        time_rank_pre[i] /= time_cnt[i]
    rank_pre /= np.sum(rank_pre)
    plt.imshow(time_rank_pre, cmap='hot', interpolation='nearest')
    plt.show()
    for i in xrange(0, len(time_cnt)):
        for j in xrange(1, len(time_rank_pre[i])):
            time_rank_pre[i, j] += time_rank_pre[i, j - 1]
        print i, time_rank_pre[i]
    print 0, rank_pre[0]
    for j in xrange(1, len(rank_pre)):
        rank_pre[j] += rank_pre[j - 1]
        print j * 100, rank_pre[j]
Exemple #12
0
def spatial_neighbours(coords,
                       n_sp_neighbors=7,
                       radius=None,
                       include_source_location=True,
                       sample_id=None):
    """
    Find spatial neighbours using the number of neighbours or radius (KDTree approach).

    :param coords: numpy.ndarray with x,y positions of spots.
    :param n_sp_neighbors: how many spatially-adjacent neighbors to report for each spot (including the source spot).
     Use 7 for hexagonal grid.
    :param radius: Supersedes `n_sp_neighbors` - radius within which to report spatially-adjacent neighbors for each spot. Pick radius based on spot size.
    :param include_source_location: include the observation itself into the list of neighbours.
    :param sample_id: pd.Series or np.array listing sample membership for each observation (each row of coords).
    """

    # create and query spatial proximity tree within each sample
    if radius is None:
        if include_source_location:
            coord_ind = np.zeros((coords.shape[0], n_sp_neighbors))
        else:
            coord_ind = np.zeros((coords.shape[0], n_sp_neighbors - 1))
    else:
        coord_ind = np.zeros(coords.shape[0])

    if sample_id is None:
        sample_id = np.array(["sample" for i in range(coords.shape[0])])

    total_ind = np.arange(0, coords.shape[0]).astype(int)

    for sam in np.unique(sample_id):
        sam_ind = np.isin(sample_id, [sam])
        coord_tree = KDTree(coords[sam_ind, :])
        if radius is None:
            n_list = coord_tree.query(coords[sam_ind, :],
                                      k=n_sp_neighbors,
                                      return_distance=False)
            n_list = np.array(n_list)
            # replace sample-specific indices with a global index
            for c in range(n_list.shape[1]):
                n_list[:, c] = total_ind[sam_ind][n_list[:, c]]

            if include_source_location:
                coord_ind[sam_ind, :] = n_list
            else:
                n_list_sel = n_list != np.arange(sam_ind.sum()).reshape(
                    sam_ind.sum(), 1)
                coord_ind[sam_ind, :] = n_list[n_list_sel].reshape(
                    (sam_ind.sum(), n_sp_neighbors - 1))

        else:
            coord_ind[sam_ind] = coord_tree.query_radius(coords[sam_ind, :],
                                                         radius,
                                                         count_only=False)

    return coord_ind.astype(int)
Exemple #13
0
def impute_work_locations_same_zone(hts_trips, df_ag, df_candidates, df_travel,
                                    name):
    hts_work = hts_trips.copy()

    hist_cp, bins_cp = np.histogram(hts_work["crowfly_distance"],
                                    weights=hts_work["weight"],
                                    bins=500)

    df_trips = df_travel.copy()

    df_agents = df_ag.copy()
    df_agents_cp = df_agents  #[np.isin(df_agents["hts_person_id"], cp_ids)]

    home_coordinates_cp = list(
        zip(df_agents_cp["home_x"], df_agents_cp["home_y"]))
    work_coordinates = np.array(
        list(zip(df_candidates["x"], df_candidates["y"])))

    bin_midpoints = bins_cp[:-1] + np.diff(bins_cp) / 2
    cdf = np.cumsum(hist_cp)
    cdf = cdf / cdf[-1]
    values = np.random.rand(len(df_agents_cp))
    value_bins = np.searchsorted(cdf, values)
    random_from_cdf_cp = bin_midpoints[value_bins]  # in meters

    tree = KDTree(work_coordinates)
    indices_cp, distances_cp = tree.query_radius(home_coordinates_cp,
                                                 r=random_from_cdf_cp,
                                                 return_distance=True,
                                                 sort_results=True)

    # In some cases no work facility was found within the given radius. In this case select the nearest facility.
    for i in range(len(indices_cp)):
        l = indices_cp[i]
        if len(l) == 0:
            dist, ind = tree.query(np.array(home_coordinates_cp[i]).reshape(
                1, -1),
                                   2,
                                   return_distance=True,
                                   sort_results=True)
            fac = ind[0][1]
            indices_cp[i] = [fac]
            distances_cp[i] = [dist[0][1]]

    indices_cp = [l[-1] for l in indices_cp]
    distances_cp = [d[-1] for d in distances_cp]

    df_return_cp = df_agents_cp.copy()
    df_return_cp["x"] = df_candidates.iloc[indices_cp]["x"].values
    df_return_cp["y"] = df_candidates.iloc[indices_cp]["y"].values
    df_return_cp["location_id"] = df_candidates.iloc[indices_cp][
        "location_id"].values

    df_return = df_return_cp
    assert len(df_return) == len(df_agents)
    return df_return
    def test_random_cpu(self):
        a = torch.randn(100, 3).to(torch.float)
        b = torch.randn(50, 3).to(torch.float)
        batch_a = torch.tensor([0 for i in range(a.shape[0] // 2)] +
                               [1 for i in range(a.shape[0] // 2, a.shape[0])])
        batch_b = torch.tensor([0 for i in range(b.shape[0] // 2)] +
                               [1 for i in range(b.shape[0] // 2, b.shape[0])])
        R = 1

        idx, dist = ball_query(R,
                               15,
                               a,
                               b,
                               mode="PARTIAL_DENSE",
                               batch_x=batch_a,
                               batch_y=batch_b,
                               sort=True)
        idx1, dist = ball_query(R,
                                15,
                                a,
                                b,
                                mode="PARTIAL_DENSE",
                                batch_x=batch_a,
                                batch_y=batch_b,
                                sort=True)
        torch.testing.assert_allclose(idx1, idx)
        with self.assertRaises(AssertionError):
            idx, dist = ball_query(R,
                                   15,
                                   a,
                                   b,
                                   mode="PARTIAL_DENSE",
                                   batch_x=batch_a,
                                   batch_y=batch_b,
                                   sort=False)
            idx1, dist = ball_query(R,
                                    15,
                                    a,
                                    b,
                                    mode="PARTIAL_DENSE",
                                    batch_x=batch_a,
                                    batch_y=batch_b,
                                    sort=False)
            torch.testing.assert_allclose(idx1, idx)

        self.assertEqual(idx.shape[0], b.shape[0])
        self.assertEqual(dist.shape[0], b.shape[0])
        self.assertLessEqual(idx.max().item(), len(batch_a))

        # Comparison to see if we have the same result
        tree = KDTree(a.detach().numpy())
        idx3_sk = tree.query_radius(b.detach().numpy(), r=R)
        i = np.random.randint(len(batch_b))
        for p in idx[i].detach().numpy():
            if p >= 0 and p < len(batch_a):
                assert p in idx3_sk[i]
Exemple #15
0
class Sampler:
    '''
    Sample points in free space at a certain height
    '''
    def __init__(self, data, safety_distance, zmin = 10, zmax = 20):
        '''
        Parameters:
            data -
        '''
        self._polygons, self._heights = np.transpose(extract_polygons(data, safety_distance))
        self._xmin = np.min(data[:, 0] - data[:, 3])
        self._xmax = np.max(data[:, 0] + data[:, 3])

        self._ymin = np.min(data[:, 1] - data[:, 4])
        self._ymax = np.max(data[:, 1] + data[:, 4])

        self._zmin = zmin
        # limit z-axis
        self._zmax = zmax
        # Record maximum polygon dimension in the xy plane
        # multiply by 2 since given sizes are half widths
        # This is still rather clunky but will allow us to
        # cut down the number of polygons we compare with by a lot.
        self._max_poly_xy = 2 * np.max((data[:, 3], data[:, 4])) + 2 * safety_distance
        centers = np.array([(p.centroid.x, p.centroid.y) for p in self._polygons]).reshape(-1, 2)
        self._tree = KDTree(centers, metric='euclidean')

    def sample(self, num_samples):
        """Implemented with a k-d tree for efficiency."""
        xvals = np.random.uniform(self._xmin, self._xmax, num_samples)
        yvals = np.random.uniform(self._ymin, self._ymax, num_samples)
        zvals = np.random.uniform(self._zmin, self._zmax, num_samples)
        samples = list(zip(xvals, yvals, zvals))

        pts = []
        for s in samples:
            in_collision = False
            idxs = list(self._tree.query_radius(np.array([s[0], s[1]]).reshape(1, -1), r=self._max_poly_xy)[0])
            if len(idxs) > 0:
                for ind in idxs:
                    p = self._polygons[int(ind)]
                    h = self._heights[int(ind)]
                    if not (p.contains(Point(s)) and h >= s[2]):
                        in_collision = True
            if not in_collision:
                pts.append(s)

        return pts

    @property
    def polygons(self):
        return self._polygons

    @property
    def heights(self):
        return self._heights
Exemple #16
0
def create_density_plot(X, Y, embedding):
    Z = np.zeros_like(X)
    tree = KDTree(embedding[:, :2])
    for i in range(X.shape[0]):
        for j in range(X.shape[1]):
            nearby_points = embedding[tree.query_radius([[X[i, j], Y[i, j]]],
                                                        r=2)[0]]
            Z[i, j] = eval_density_at_point(np.array([X[i, j], Y[i, j]]),
                                            nearby_points)
    return Z / Z.sum()
Exemple #17
0
def get_direct_neighbors(site, radius):
    """
    return direct neighbors inside a radius
    :param site:
    :return:
    """
    coords = np.load(wind_data_path + '/Coords.npy')
    tree = KDTree(coords, leaf_size=1)
    neigh = tree.query_radius(coords[site, :].reshape(1, -1), r=radius, count_only=False, return_distance=False)[0]
    return neigh
Exemple #18
0
def eps_neighbor_count(X, X_test, h):

    #print('h_opt',h_opt)
    #kde =KernelDensity(kernel='tophat', bandwidth=h).fit(X)
    tree = KDTree(X)
    count = tree.query_radius(X_test, r=h, count_only=True)
    #dnsty = kde.score_samples(X_test)
    #print('dnsty', np.exp(dnsty) )
    #print('density, done')
    return count
def estimator1(x, y, k):
    """
    Estimator 1 of Estimating mutual information, A. Kraskov et al., Physical
    Review E 69, 2004.
    x, y: Arrays of shape (N, Dx) and (N, Dy), where N is the number of samples
    and Dx, Dy are the dimensions of the random variables X and Y.
    k: k for the k-th nearest neighbors

    returns the estimate for the mutual information
    """
    z = np.concatenate([x, y], axis=-1)
    # we use chebyshev/maximum metric, since it is the
    # easiest to fulfill eq. 6 of the paper in all dimensions
    tree = KDTree(z, metric="chebyshev")
    # We need to add 1, since query returns the identity
    dist, ind = tree.query(z, k + 1)
    dist = dist[:, -1]
    tree_x = KDTree(x, metric="chebyshev")
    tree_y = KDTree(y, metric="chebyshev")
    # query radius with count_only=True returns one count too much
    # for one of the subspaces, since it is not using strictly less
    indx, distx = tree_x.query_radius(x,
                                      dist,
                                      return_distance=True,
                                      count_only=False)
    indy, disty = tree_y.query_radius(y,
                                      dist,
                                      return_distance=True,
                                      count_only=False)

    distxy = [distx, disty]
    counts = np.empty((x.shape[0], 2))
    for i in range(x.shape[0]):
        for j in range(2):
            tmp_dist = distxy[j][i]
            less = tmp_dist < dist[i]
            counts[i, j] = less.sum()

    # we do not need to add 1, since query_radius allready counted
    # the point itself. So counts[:, i] is allready one too much.
    digamma_x_mean = digamma(counts[:, 0]).mean()
    digamma_y_mean = digamma(counts[:, 1]).mean()
    return digamma(k) + digamma(x.shape[0]) - digamma_x_mean - digamma_y_mean
Exemple #20
0
class Sampler:
    def __init__(self, data):
        self._polygons = extract_polygons(data)
        self._xmin = np.min(data[:, 0] - data[:, 3])
        self._xmax = np.max(data[:, 0] + data[:, 3])

        self._ymin = np.min(data[:, 1] - data[:, 4])
        self._ymax = np.max(data[:, 1] + data[:, 4])

        self._zmin = 20
        # limit z-axis
        self._zmax = 30

        print("Extract Polygons..")
        self._max_poly_xy = 2 * np.max((data[:, 3], data[:, 4]))
        print("Extract Polygons..")
        print(len(self._polygons))
        centers = []
        for p in tqdm(self._polygons):
            centers.append(p.center)
        centers = np.array(centers)
        print("Extract Polygons..")
        self._tree = KDTree(centers, metric='euclidean')
        print("Sampler Initialized..")

    def sample(self, num_samples):
        """Implemented with a k-d tree for efficiency."""
        xvals = np.random.randint(int(self._xmin),
                                  int(self._xmax),
                                  size=num_samples)
        yvals = np.random.randint(int(self._ymin),
                                  int(self._ymax),
                                  size=num_samples)
        zvals = np.random.randint(self._zmin, self._zmax, size=num_samples)
        samples = list(zip(xvals, yvals, zvals))

        pts = []
        for s in samples:
            in_collision = False
            idxs = list(
                self._tree.query_radius(np.array([s[0], s[1]]).reshape(1, -1),
                                        r=self._max_poly_xy)[0])
            if len(idxs) > 0:
                for ind in idxs:
                    p = self._polygons[int(ind)]
                    if p.contains(s) and p.height >= s[2]:
                        in_collision = True
            if not in_collision:
                pts.append(s)

        return pts

    @property
    def polygons(self):
        return self._polygons
Exemple #21
0
def matrix_from_vertices_gen_cocycle(pointcloud, coh_gen, deaths, cocycle,
                                     co_death):

    N = len(pointcloud)

    kd_tree = KDTree(pointcloud, leaf_size=2)
    close_neighbors = kd_tree.query_radius(pointcloud, r=co_death)
    close_neighbors = [
        np.array([
            j for j in close_neighbors[i]
            if j != i and np.linalg.norm(pointcloud[i] -
                                         pointcloud[j]) < co_death
        ]) for i in range(N)
    ]
    close_neighbors = np.array(close_neighbors)

    # start by having as generators only the coboundaries of vertices
    gens = []

    for i in range(N):
        gen = []
        for j in close_neighbors[i]:
            gen.append([i, j, -1])
        if len(gen) > 0:
            gens.append(gen)

    true_coh_gen = []
    for g, d in zip(coh_gen, deaths):
        g_ = []
        for e in g:
            i, j, v = e
            if np.linalg.norm(pointcloud[i] - pointcloud[j]) < min(
                    co_death, d):
                g_.append(e)
        true_coh_gen.append(g_)

    gens = list(true_coh_gen) + list(gens) + [cocycle]

    ### rows should be indexed by pairs of ordered and distinct points within distance co_death
    rows = [(i, j) for i in range(N) for j in close_neighbors[i] if i < j]
    edge_to_row = dict([])
    for n, p in enumerate(rows):
        edge_to_row[p] = n

    M = np.zeros((len(rows), len(gens)), dtype=int)

    for col, g in enumerate(gens):
        for e in g:
            i, j, v = e
            if i < j:
                M[edge_to_row[(i, j)], col] = v
            else:
                M[edge_to_row[(j, i)], col] = -v

    return M
def compute_neighbours(data, radius, sort=False):
    """Transform function"""

    tree = KDTree(data)
    neighbourhoods = tree.query_radius(data, r=radius, return_distance=False)

    if sort:
        for n in neighbourhoods:
            n.sort()

    return neighbourhoods
Exemple #23
0
def count(data,distance):
    data['x'],data['y']= getcoord(data)
    treedt= data[['x','y']]
    buildTree = KDTree(treedt)
    
    point =data[['x','y']]

    res = buildTree.query_radius(point,r=distance,count_only=True)

    #print("count results ",res )
    return res[len(res)-1]-1
Exemple #24
0
def GetPeriodicDuplicatePoints(inPoints, intNumberOfNeighbours: int, fltRadius: float,inCellVectors):
        intLength = len(inPoints) #assumes a lattice configuration with fixed number of neighbours
        if intLength > 0:
                arrWrappedPoints = AddPeriodicWrapper(inPoints, inCellVectors, 4*fltRadius)
                objSpatial = KDTree(arrWrappedPoints)
                arrCounts = objSpatial.query_radius(inPoints, fltRadius, count_only=True)
                arrBoundaryIndices = np.where(arrCounts < intNumberOfNeighbours+1)[0]
                arrBoundaryIndices = arrBoundaryIndices[arrBoundaryIndices < intLength]
                return arrBoundaryIndices
        else:
                return [] 
Exemple #25
0
 def find_neighbors(self, x, X):
     """Find the x's neighbors (points within eps) using sklearn KDTree.
     Return the x's neighbors indices.
     """
     kdt = KDTree(X, leaf_size=5)
     ind = kdt.query_radius([x], r=self.eps)
     neighs = []
     for i in ind[0]:
         if (tuple(X[i]) != tuple(x)):
             neighs.append(i)
     return neighs
Exemple #26
0
def density(features, objectives, density_radius=1):
    """
    Count of objectives within a given radius r to points in features"""
    if not isinstance(features, np.ndarray):
        features = features.locs
    if not isinstance(objectives, np.ndarray):
        objectives = objectives.locs

    tree = KDTree(objectives)
    density = tree.query_radius(features, count_only=True, r=density_radius)
    return minmax(density)
Exemple #27
0
    def fit_predict(self, data):
        cluster_counter = 0
        labels = dict()
        tree = KDTree(data, metric=self.metric, leaf_size=10)

        for index_row, row in enumerate(data):

            print(row[0], row[1])

            if row[0] == 5.2 and row[1] == 2.3:
                print()

            if index_row in labels:
                continue

            neighbors = list(tree.query_radius([row], r=self.eps)[0])

            if len(neighbors) < self.min_samples:
                labels[index_row] = -1
                continue

            labels[index_row] = cluster_counter

            for neighbor_index in neighbors:
                if neighbor_index in labels and labels[neighbor_index] == -1:
                    labels[neighbor_index] = cluster_counter

                if neighbor_index in labels and not :
                    continue

                labels[neighbor_index] = cluster_counter

                add_neighbors = list(tree.query_radius([data[neighbor_index]], r=self.eps)[0])
                if len(add_neighbors) >= self.min_samples:
                    new_add_neighbors = [x for x in add_neighbors if x not in neighbors]
                    neighbors.extend(new_add_neighbors)

            cluster_counter += 1

        res = np.array([labels[key] for key in sorted(labels)])
        return res
Exemple #28
0
def sequential_addition(X, r, order=None):
    tree = KDTree(X, metric='euclidean')
    D, _ = tree.query(X, 10)
    order = order or D[:, 1:].mean(axis=1).argsort()
    
    visited = np.zeros(len(X), dtype=np.bool)

    for i in order:
        if not visited[i]:
            yield i
            iis, = tree.query_radius([X[i]], r, return_distance=False)
            visited[iis] = True
Exemple #29
0
def calc_spatial_neighbor(X_spatial, eps, leaf_size):
    '''
        使用 kdtree 计算空间近邻
        主要是借助kdtree解决大数据量的计算问题
    '''
    tree = KDTree(X_spatial, leaf_size=leaf_size)
    ind = tree.query_radius(X_spatial,
                            eps,
                            return_distance=False,
                            count_only=False,
                            sort_results=False)
    return ind
Exemple #30
0
class hierarchical_search:
    def __init__(self, points, leaf_size):

        self.tree = KDTree(points, leaf_size=leaf_size)
        self.points = points

    def query_radius(self, queries, radius):

        indices = self.tree.query_radius(queries, radius)
        neighborhoods = [self.points[indice] for indice in indices]

        return neighborhoods
    def _mi1(self, x, y):
        assert x.shape[0] == y.shape[0]

        if np.array_equal(x, y):
            return self._entropy(x)

        N, d1 = x.shape
        d2 = y.shape[1]

        xy = np.hstack((x, y))

        treeXY = scsp.cKDTree(xy)

        # find the distance to the k:th neighbour in for every point in (x,y) space
        Kdists = treeXY.query(
            xy, k=self.k + 1, p=float("inf")
        )[0][:, self.k]  # k + 1 since 1st neighbour is always the point itself

        treeX = KDTree(x, metric='chebyshev')
        treeY = KDTree(y, metric='chebyshev')

        # look points whose distance to query point is strictly less than the corresponding Kdist
        MULTIP = 1 - 1e-10

        Kdists = MULTIP * Kdists

        n_x = treeX.query_radius(x, Kdists, count_only=True)
        n_y = treeY.query_radius(y, Kdists, count_only=True)

        mi = psi(self.k) + psi(N) - (np.mean(psi(n_x)) + np.mean(psi(n_y)))

        if self.p == 2:
            norm_term = self._log_cd(d1) - self._log_cd(d2)
            return mi + norm_term

        if self.p == float("inf"):
            return mi

        return mi
Exemple #32
0
def cpec_density(total_cpec_coords, radius=310.559):
    """

    :param total_cpec_coords:
    :type total_cpec_coords:
    :param radius:
    :type radius:
    :return:
    :rtype:
    """
    tc = KDTree(total_cpec_coords)
    total = tc.query_radius(total_cpec_coords, r=radius, count_only=True)
    return
Exemple #33
0
def find_density_reachable_points(dataset, maximum_distance):
    """Creates the density-reachable matrix of the dataset
    The return is a dict that maps each point index (zero-based)
    to a tuple of indices for the points in its neighborhood
    """
    element_count = dataset.shape[0]
    kdtree = KDTree(dataset, metric="euclidean")
    neighborhoods = kdtree.query_radius(X=dataset, r=maximum_distance)
    density_reachable = dict()
    for element_index in range(element_count):
        density_reachable[element_index] = tuple(neighborhoods[element_index])

    return density_reachable
Exemple #34
0
    def find_tree_neighbors(self, atoms, probe):

        points = []
        p = np.ones((len(atoms), 1), dtype=np.int32)
        radius = atoms[:, 3] + probe + probe

        for i in range(len(atoms)):
            points.append([atoms[i, 0], atoms[i, 1], atoms[i, 2]])
        tree = KDTree(points, leaf_size=2)
        print 'RADIUS=', radius, '\n'
        all_nn_indices = tree.query_radius(points,
                                           r=np.transpose(radius))  # NNs
        return all_nn_indices
Exemple #35
0
def samples_within_sphere(raw_data, radius):
    print("Creating Features based on samples within sphere")
    num_samples, num_features = raw_data.shape
    samples_pct = np.floor(num_samples/100)
    
    # Initialize feature output matrix
    if isinstance(radius,list):
        features = np.zeros((num_samples,len(radius)))
        max_r = max(radius)
    else:
        features = np.zeros((num_samples,1))
        max_r = radius
    
    # Make KDTree for nearest neighbor queries
    tree = KDTree(raw_data)         

    # Query of the number of samples with in sphere from the KDTree
    count = tree.query_radius(raw_data, r=radius, count_only=True)

    # Create a progress bar
    pbar = create_pbar(num_samples)

    # Iterate all points
    for idx, point in enumerate(raw_data):
        if isinstance(radius,list):
            #ind, dist = tree.query_radius([point], r=max_r, return_distance = True)
            for i,r in enumerate(radius):
                # Query of the number of samples with in sphere from the KDTree
                count = tree.query_radius([point], r=r, count_only=True)
                features[idx,i] = count[0]
        else:
            features[idx] = count[idx]
       
        # Update the progressbar. Though only in hole percentages to save time
        if idx % samples_pct == 0:
            pbar.update(idx) 
    pbar.finish() 

    return features
Exemple #36
0
def counts_2d_2pt_from_pos(pos_sdss, pos_rm, lam, rmax=220., reso=2.):
    # preliminaries
    nsdss = pos_sdss.shape[0]
    nrm = pos_rm.shape[0]

    # figure out which lambda bins each RM cluster goes into
    lam_bin = get_lambda_bin(lam)
    n_lam_bin = len(set(lam_bin))    
    
    # build a couple of KDTree's, one for SDSS, one for RM.
    from sklearn.neighbors import KDTree
    tree_sdss = KDTree(pos_sdss, leaf_size=30)

    # define grids for r_pi and r_sigma.
    rpigrid = np.arange(-rmax, rmax, reso)
    nrpigrid = len(rpigrid)
    rsigmagrid = np.arange(0, rmax, reso)
    nrsigmagrid = len(rsigmagrid)

    # find all BOSS galaxies within "rmax" Mpc of each RM clusters.
    print '...querying tree...'
    #ind, dist = tree_sdss.query_radius(pos_rm, rmax, count_only=False, return_distance=True)
    print '...done querying tree...'

    # loop over clusters, calculate (r_pi, r_sigma) for all nearby BOSS galaxies
    # bin those counts.
    counts_rpi_rsigma = [np.zeros((nrpigrid+1, nrsigmagrid+1), dtype=np.float) for i in range(n_lam_bin)]
    for irm in range(nrm):
        print '%i/%i'%(irm, nrm)
        #these_ind = ind[irm]
        these_ind, these_s = tree_sdss.query_radius(pos_rm[irm,:], rmax, count_only=False, return_distance=True)
        these_ind = these_ind[0]
        these_s = these_s[0]
        if len(these_ind)==0: continue
        this_pos_rm = pos_rm[irm, :]
        these_pos_sdss = pos_sdss[these_ind, :]
        #these_s = dist[irm]
        these_mu = dot_los2(this_pos_rm, these_pos_sdss)
        these_rpi = these_s*these_mu
        these_rsigma = these_s*np.sqrt((1.-these_mu**2.))
        ind_rpi = np.digitize(these_rpi, rpigrid)
        ind_rsigma = np.digitize(these_rsigma, rsigmagrid)
        this_lam_bin = lam_bin[irm]
        for this_ind_rpi, this_ind_rsigma in zip(ind_rpi, ind_rsigma):
                counts_rpi_rsigma[this_lam_bin][this_ind_rpi, this_ind_rsigma] += 1.

    # normalize
    # ok, really you'd want to normalize by nrm *per lambda bin*,
    # but i don't think it will make any material difference.
    for i in range(n_lam_bin): counts_rpi_rsigma[i] *= (1./nrm/nsdss)
    return counts_rpi_rsigma
	def current_datapoints_threshold_filter(self, neighbour_points = 5):
		"""
		Filter from current datapoints, those that do not have enough neighbour points in the 2*max_dist radius (in meters).
		Assumption: if there is less than neighbour_points around the data point, it can't be a part of event.
		Method doesn't take into account networks.
		This method is computationally cheaper, than self.current_datapoints_outliers_filter, so it is used as a prefilter.
		Method updates self.current_datapoints dict.

		Args:
			neighbour_points (int): minimal number of neighbours, every point should have.
		"""
		nets = self.current_datapoints.keys()
		ids = concatenate([self.current_datapoints[x]['ids'] for x in nets])
		coords = concatenate([self.current_datapoints[x]['array'] for x in nets])
		megatree = KDTree(coords)
		for net in nets:
			neighbours_number = megatree.query_radius(self.current_datapoints[net]['array'], r=self.eps*2, count_only=True)
			self.current_datapoints[net]['array'] = self.current_datapoints[net]['array'][neighbours_number >= neighbour_points]
			self.current_datapoints[net]['ids'] = self.current_datapoints[net]['ids'][neighbours_number >= neighbour_points]
Exemple #38
0
class BigStarBasis(StarBasis):

    def __init__(self, libname='', verbose=False, log_interp=True,
                 n_neighbors=0,  driver=None, in_memory=False,
                 use_params=None, **kwargs):
        """An object which holds the stellar spectral library, performs
        interpolations of that library, and has methods to return attenuated,
        normalized, smoothed stellar spoectra.

        This object is set up to work with large grids, so the models file is
        kept open for acces from disk.  scikits-learn kd-trees are required for
        model access.  Ideally the grid should be regular (though the spacings
        need not be equal along a given dimension).

        :param libname:
            Path to the hdf5 file to use for the spectral library. Must have
            "ckc" or "ykc" in the filename (to specify which kind of loader to
            use)

        :param n_neighbors: (default:0)
            Number of nearest neighbors to use when requested parameters are
            outside the convex hull of the library prameters.  If ``0`` then a
            ValueError is raised instead of the nearest spectrum.

        :param verbose:
            If True, print information about the parameters used when a point
            is outside the convex hull

        :param log_interp: (default: True)
            Interpolate in log(flux) instead of flux.

        :param in_memory: (default: False)
            Switch to determine whether the grid is loaded in memory or read
            from disk each time a model is constructed (like you'd want for
            very large grids).

        :param use_params:
            Sequence of strings. If given, only use the listed parameters
            (which must be present in the `_libparams` structure) to build the
            grid and construct spectra.  Otherwise all fields of `_libparams`
            will be used.
        """
        self.verbose = verbose
        self.logarithmic = log_interp
        self._libname = libname
        self.n_neighbors = n_neighbors
        self._in_memory = in_memory

        self.load_lib(libname, driver=driver)
        # Do some important bookkeeping
        if use_params is None:
            self.stellar_pars = self._libparams.dtype.names
        else:
            self.stellar_pars = tuple(use_params)
        self.ndim = len(self.stellar_pars)
        self.lib_as_grid()
        self.params = {}

    def load_lib(self, libname='', driver=None):
        """Read a ykc library which has been preconvolved to be close to your
        data resolution. This library should be stored as an HDF5 file, with
        the datasets ``wavelengths``, ``parameters`` and ``spectra``.  These
        are ndarrays of shape (nwave,), (nmodels,), and (nmodels, nwave)
        respecitvely.  The ``parameters`` array is a structured array.  The h5
        file object is left open so that spectra can be accessed from disk.
        """
        import h5py
        f = h5py.File(libname, "r", driver=driver)
        self._wave = np.array(f['wavelengths'])
        self._libparams = np.array(f['parameters'])
        if self._in_memory:
            self._spectra = np.array(f['spectra'])
            f.close()
        else:
            self._spectra = f['spectra']

    def get_star_spectrum(self, **kwargs):
        """Given stellar parameters, obtain an interpolated spectrum at those
        parameters.

        :param **kwargs:
            Keyword arguments must include values for the ``stellar_pars``
            parameters that are stored in ``_libparams``.

        :returns wave:
            The wavelengths at which the spectrum is defined.

        :returns spec:
            The spectrum interpolated to the requested parameters

        :returns unc:
            The uncertainty spectrum, where the uncertainty is due to
            interpolation error.  Curently unimplemented (i.e. it is a None
            type object)
        """
        inds, wghts = self.weights(**kwargs)
        if self.logarithmic:
            spec = np.exp(np.dot(wghts, np.log(self._spectra[inds, :])))
        else:
            spec = np.dot(wghts, self._spectra[inds, :])
        spec_unc = None
        return self._wave, spec, spec_unc

    def weights(self, **params):
        inds = self.knearest_inds(**params)
        wghts = self.linear_weights(inds, **params)
        # if wghts.sum() < 1.0:
        #     raise ValueError("Something is wrong with the weights")
        good = wghts > 0
        # if good.sum() < 2**self.ndim:
        #     raise ValueError("Did not find all vertices of the hypercube, "
        #                      "or there is no enclosing hypercube in the library.")
        inds = inds[good]
        wghts = wghts[good]
        wghts /= wghts.sum()
        return inds, wghts

    def lib_as_grid(self):
        """Convert the library parameters to pixel indices in each dimension,
        and build and store a KDTree for the pixel coordinates.
        """
        # Get the unique gridpoints in each param
        self.gridpoints = {}
        for p in self.stellar_pars:
            self.gridpoints[p] = np.unique(self._libparams[p])
        # Digitize the library parameters
        X = np.array([np.digitize(self._libparams[p], bins=self.gridpoints[p],
                                  right=True) for p in self.stellar_pars])
        self.X = X.T
        # Build the KDTree
        self._kdt = KDTree(self.X)  # , metric='euclidean')

    def params_to_grid(self, **targ):
        """Convert a set of parameters to grid pixel coordinates.

        :param targ:
            The target parameter location, as keyword arguments.  The elements
            of ``stellar_pars`` must be present as keywords.

        :returns x:
            The target parameter location in pixel coordinates.
        """
        # bin index
        inds = np.array([np.digitize([targ[p]], bins=self.gridpoints[p], right=False) - 1
                         for p in self.stellar_pars])
        inds = np.squeeze(inds)
        # fractional index.  Could use stored denominator to be slightly faster
        try:
            find = [(targ[p] - self.gridpoints[p][i]) /
                    (self.gridpoints[p][i+1] - self.gridpoints[p][i])
                    for i, p in zip(inds, self.stellar_pars)]
        except(IndexError):
            pstring = "{0}: min={2} max={3} targ={1}\n"
            s = [pstring.format(p, targ[p], *self.gridpoints[p][[0, -1]])
                 for p in self.stellar_pars]
            raise ValueError("At least one parameter outside grid.\n{}".format(' '.join(s)))
        return inds + np.squeeze(find)

    def knearest_inds(self, **params):
        """Find all parameter ``vertices`` within a sphere of radius
        sqrt(ndim).  The parameter values are converted to pixel coordinates
        before a search of the KDTree.

        :param params:
             Keyword arguments which must include keys corresponding to
             ``stellar_pars``, the parameters of the grid.

        :returns inds:
             The sorted indices of all vertices within sqrt(ndim) of the pixel
             coordinates, corresponding to **params.
        """
        # Convert from physical space to grid index space
        xtarg = self.params_to_grid(**params)
        # Query the tree within radius sqrt(ndim)
        try:
            inds = self._kdt.query_radius(xtarg.reshape(1, -1),
                                          r=np.sqrt(self.ndim))
        except(AttributeError):
            inds = self._kdt.query_ball_point(xtarg.reshape(1, -1),
                                              np.sqrt(self.ndim))
        return np.sort(inds[0])

    def linear_weights(self, knearest, **params):
        """Use ND-linear interpolation over the knearest neighbors.

        :param knearest:
            The indices of the ``vertices`` for which to calculate weights.

        :param params:
            The target parameter location, as keyword arguments.

        :returns wght:
            The weight for each vertex, computed as the volume of the hypercube
            formed by the target parameter and each vertex.  Vertices more than
            1 away from the target in any dimension are given a weight of zero.
        """
        xtarg = self.params_to_grid(**params)
        x = self.X[knearest, :]
        dx = xtarg - x
        # Fractional pixel weights
        wght = ((1 - dx) * (dx >= 0) + (1 + dx) * (dx < 0))
        # set weights to zero if model is more than a pixel away
        wght *= (dx > -1) * (dx < 1)
        # compute hyperarea for each model and return
        return wght.prod(axis=-1)

    def triangle_weights(self, knearest, **params):
        """Triangulate the k-nearest models, then use the barycenter of the
        enclosing simplex to interpolate.
        """
        inparams = np.array([params[p] for p in self.stellar_pars])
        dtri = Delaunay(self.model_points[knearest, :])
        triangle_ind = dtri.find_simplex(inparams)
        inds = dtri.simplices[triangle_ind, :]
        transform = dtri.transform[triangle_ind, :, :]
        Tinv = transform[:self.ndim, :]
        x_r = inparams - transform[self.ndim, :]
        bary = np.dot(Tinv, x_r)
        last = 1.0 - bary.sum()
        wghts = np.append(bary, last)
        oo = inds.argsort()
        return inds[oo], wghts[oo]
Exemple #39
0
def get_pairwise_velocities_one_hemi(hemi, kmax=0.1, rmax=50.):
    # create 3d grid object
    grid = grid3d(hemi=hemi)
    
    # load SDSS data
    sdss = load_sdss_data_both_catalogs(hemi)
    
    # load redmapper catalog
    rm = load_redmapper(hemi=hemi)
    
    # get XYZ positions (Mpc) of both datasets
    x_sdss, y_sdss, z_sdss = grid.xyz_from_radecz(sdss['ra'], sdss['dec'], sdss['z'], applyzcut=False)
    x_rm, y_rm, z_rm = grid.xyz_from_radecz(rm['ra'], rm['dec'], rm['z_spec'], applyzcut=False)
    pos_sdss = np.vstack([x_sdss, y_sdss, z_sdss]).T
    pos_rm = np.vstack([x_rm, y_rm, z_rm]).T

    # build a KDTree for SDSS LRG's.
    from sklearn.neighbors import KDTree
    tree_sdss = KDTree(pos_sdss, leaf_size=30)
    # find those RM clusters that have some number of LRG's within X Mpc.
    #rmax = 300. # Mpc
    lrg_counts = tree_sdss.query_radius(pos_rm, rmax, count_only=True)
    ind, dist = tree_sdss.query_radius(pos_rm, rmax, count_only=False, return_distance=True)    
    min_counts = np.percentile(lrg_counts, 10)
    #min_counts = 500.
    #wh_use = np.where(lrg_counts>min_counts)[0]
    #for k in rm.keys(): rm[k] = rm[k][wh_use]
    #lrg_counts = lrg_counts[wh_use]
    #ind = ind[wh_use]
    #dist = dist[wh_use]
    #pos_rm = pos_rm[wh_use, :]
        
    # loop over RM clusters, get vlos
    ncl = len(rm['ra'])
    vlos = np.zeros(ncl)    
    rmin = 5.#Mpc, tmpp, worth exploring
    #r_pivot = 10.
    #r_decay = 10.

    redshift_grid = np.arange(0.05, 0.7, 0.01)
    rfine = np.arange(rmin-1, rmax+1,1.)
    # create a dictionary containing interpoltor objects, keyed on redshift
    corr_delta_vel_dict = {}
    from scipy import interpolate
    for redshift in redshift_grid:
        corr_delta_vel_dict[redshift] = interpolate.interp1d(rfine, corr_delta_vel(rfine, z=redshift, kmax=kmax))


    #distance_weight = 
    print '*********** using kmax=%0.2f, rmax=%i'%(kmax, rmax)
    for i in range(ncl):
        print i,ncl
        if (lrg_counts[i]<min_counts): continue
        wh_not_too_close = np.where(dist[i]>rmin)[0]        
        these_dist = dist[i][wh_not_too_close]
        these_ind = ind[i][wh_not_too_close]
        # get 3d positions
        these_pos_sdss = pos_sdss[these_ind, :]
        this_pos_rm = pos_rm[i, :]

        # dot with line of sight
        these_dot_los = dot_los(this_pos_rm, these_pos_sdss)
        this_redshift = rm['z_spec'][i]
        closest_redshift = redshift_grid[np.argmin(np.abs(redshift_grid-this_redshift))]
        this_corr_delta_vel = corr_delta_vel_dict[closest_redshift]
        these_vel = this_corr_delta_vel(these_dist)
        #ipdb.set_trace()
        #these_vel = corr_delta_vel(these_dist, z=this_redshift, kmax=kmax)
        #these_vel = np.exp(-(these_dist-r_pivot)/r_decay)
        #these_vel = np.exp(-0.5*(these_dist/r_decay)**2.)
        these_vlos = these_vel*these_dot_los
        this_vlos = np.sum(these_vlos) #tmpp, sum or mean?
        #indsort=np.argsort(these_dist)
        #pl.clf(); pl.plot(these_dist[indsort], np.cumsum(these_vlos[indsort]),'.')
        #ipdb.set_trace()
        vlos[i] = this_vlos
    rm['vlos'] = vlos
    rm['weight'] = np.ones(ncl)
    return rm
Exemple #40
0
 def avgdigamma(points,dvec,metric='minkowski', p=float('inf')):
     tree = KDTree(points, metric=DistanceMetric.get_metric(metric,p=p))
     num_points = tree.query_radius(points, dvec - 1e-15, count_only=True)
     return np.sum(digamma(num_points) / len(points) )
Exemple #41
0
                prospective_centers.append([xvc, yvc, zvc])

print('Number of empty cells: ' + repr(nvtot))
print('Proceeding to construct KD-Tree...')
print('')

tree = KDTree(pos, leaf_size=5)

print('KD-Tree successfully constructed')
print('')

counter = 0
for center in prospective_centers:
    counter = counter + 1
    print('Center ' + repr(counter) + ' of ' + repr(len(prospective_centers)))
    ind, dist = tree.query_radius(center, r=maxvoid_radius,\
                                  return_distance=True, sort_results=True)

    countfail = len(dist([0]))
    doIwrite = False
    for i in reversed(range(len(dist[0]))):
        if countfail / (4./3 * numpy.pi * dist[0][i] ** 3) < thresh * rhomed:
            radiofail = dist[0][i]
            iifail = len(dist[0][:i])
            doIwrite = True
            countfail = countfail - 1
            break 

    if doIwrite:
        numpy.savetxt(outfile, (center[0], center[1], center[2],\
                     radiofail, iifail), newline=' ')
Exemple #42
0
def _total_correlation_ksg_sklearn(data, rvs, crvs=None, k=4, noise=1e-10):
    """
    Compute the total correlation from observations. The total correlation is computed between the columns
    specified in `rvs`, given the columns specified in `crvs`. This utilizes the KSG kNN density estimator,
    and works on discrete, continuous, and mixed data.

    Parameters
    ----------
    data : np.array
        Real valued time series data.
    rvs : iterable of iterables
        The columns for which the total correlation is to be computed.
    crvs : iterable
        The columns upon which the total correlation should be conditioned.
    k : int
        The number of nearest neighbors to use in estimating the local kernel density.
    noise : float
        The standard deviation of the normally-distributed noise to add to the data.

    Returns
    -------
    tc : float
        The total correlation of `rvs` given `crvs`.

    Notes
    -----
    The total correlation is computed in bits, not nats as most KSG estimators do.

    This implementation uses scikit-learn.
    """
    # KSG suggest adding noise (to break symmetries?)
    data = _fuzz(data, noise)

    if crvs is None:
        crvs = []

    digamma_N = digamma(len(data))
    log_2 = np.log(2)

    all_rvs = list(flatten(rvs)) + crvs
    rvs = [rv + crvs for rv in rvs]

    d_rvs = [len(data[0, rv]) for rv in rvs]

    tree = KDTree(data[:, all_rvs], metric="chebyshev")
    tree_rvs = [KDTree(data[:, rv], metric="chebyshev") for rv in rvs]

    epsilons = tree.query(data[:, all_rvs], k + 1)[0][:, -1]  # k+1 because of self

    n_rvs = [t.query_radius(data[:, rv], epsilons, count_only=True) for rv, t in zip(rvs, tree_rvs)]

    log_epsilons = np.log(epsilons)

    h_rvs = [-digamma(n_rv).mean() for n_rv, d in zip(n_rvs, d_rvs)]

    h_all = -digamma(k)

    if crvs:
        tree_crvs = KDTree(data[:, crvs], metric="chebyshev")
        n_crvs = tree_crvs.query_radius(data[:, crvs], epsilons, count_only=True)
        h_crvs = -digamma(n_crvs).mean()
    else:
        h_rvs = [h_rv + digamma_N + d * (log_2 - log_epsilons).mean() for h_rv, d in zip(h_rvs, d_rvs)]
        h_all += digamma_N + sum(d_rvs) * (log_2 - log_epsilons).mean()
        h_crvs = 0

    tc = sum([h_rv - h_crvs for h_rv in h_rvs]) - (h_all - h_crvs)

    return tc / log_2
Exemple #43
0
    def __init__(self, pts, k=None, r=None, kmax=None, rmax=None):
        """
        Parameters
        ----------
        pts : array, shape(n, d)
            Data points. Should be already normalized if necessary.
        k : int
            Neighbors used to estimate the local density rho.
        kmax : int
            If given, only search the nearest kmax neighbors to calculate delta.
            kmax is equivalent to search a sphere of size about kmax**(1/d) times
            the local average separation between points.
            Default is to search all points.
        rmax : float
            If given, only search the neighbors within rmax to calculate delta.
            Default is to search all points.

        Todos
        -----
        Optimal choice of k and gamma
        Performance optimization with Cython or Numba
        Substructure within density saddle point
        Labeling the noise
        """
        if (k is not None) and (r is not None):
            raise ValueError("Only one of 'k' or 'r' can be specified!")
        if (kmax is not None) and (rmax is not None):
            raise ValueError("Only one of 'kmax' or 'rmax' can be specified!")

        pts = np.asfarray(pts)
        npts, ndim = pts.shape
        Rmax = np.linalg.norm(pts.max(0) - pts.min(0))
        tree = KDTree(pts)

        # density
        if r is not None:
            k = tree.query_radius(pts, r, count_only=True)
        elif k is not None:
            r = tree.query(pts, k)[0][:, -1]

        sphere_coeff = np.pi**(0.5 * ndim) / gamma_func(0.5 * ndim + 1)
        rho = k / (sphere_coeff * r**ndim)
        rho[rho == 0] = rho[rho > 0].min() / 2  # reduce by an arbitrary factor

        # delta
        delta = np.full(npts, Rmax, dtype='float')
        chief = np.full(npts, -1, dtype='int')  # superior neighbor
        if kmax is not None or rmax is not None:
            if kmax is not None:
                dists, index = tree.query(
                    pts, kmax, return_distance=True, sort_results=True)
            else:
                index, dists = tree.query_radius(
                    pts, rmax, return_distance=True, sort_results=True)
            for i in range(npts):
                rho_i = rho[i]
                for j, dist in zip(index[i], dists[i]):
                    if (rho[j] > rho_i):
                        chief_i, delta_i = j, dist
                        break
                chief[i], delta[i] = chief_i, delta_i
        else:
            dists = squareform(pdist(pts))
            for i in range(npts):
                rho_i, delta_i = rho[i], delta[i]
                for j, dist in enumerate(dists[i]):
                    if (rho[j] > rho_i) and (dist < delta_i):
                        chief_i, delta_i = j, dist
                chief[i], delta[i] = chief_i, delta_i

        # gamma
        gamma = sphere_coeff * rho * delta**ndim  # need sphere_coeff?
        sorted_index = np.argsort(gamma)
        sorted_gamma = gamma[sorted_index]

        # properties
        self.npts = npts
        self.ndim = ndim
        self.pts = pts
        self.rho = rho
        self.delta = delta
        self.gamma = gamma
        self.chief = chief
        self.sorted_index = sorted_index
        self.sorted_gamma = sorted_gamma
Exemple #44
0
class GlobalSynthCat(object):
    """
    A class for synthetic catalogs with a KDTree attribute to allow 
    for super fast queries. 
    """
    
    def __init__(self, cat_fn=None, catalog=None, cat_params={'nsynths':100}):
        from sklearn.neighbors import KDTree

        if cat_fn is not None:
            self.cat = Table.read(cat_fn)
        elif catalog is not None:
            self.cat = catalog
        else:
            self.cat = build_synthetic_galaxy_catalog(**cat_params)

        self.cat['synth_id'] = np.arange(1, len(self.cat) + 1)

        xyz = ra_dec_to_xyz(self.cat['ra'], self.cat['dec'])
        self.kdt = KDTree(np.asarray(xyz).T)

    def query_radius(self, ra, dec, r):
        """
        Search for sources around coordinate within circle of 
        radius r in arcseconds. 
        """
        xyz = np.array(ra_dec_to_xyz(ra, dec)).T.reshape(1, -1)
        idx = self.kdt.query_radius(
            xyz, angular_dist_to_euclidean_dist(r / 3600.0), 
            count_only=False, return_distance=False)[0]
        return self.cat[idx]

    def get_exp_synths(self, exp, search_radius=720):
        """
        Get synths in that fall within the given exposure.
        """

        wcs = exp.getWcs()
        xc, yc =  exp.getDimensions()//2 +  exp.getXY0()
        coord = wcs.pixelToSky(lsst.afw.geom.Point2D(xc, yc))
        ra_c, dec_c = coord.getRa().asDegrees(), coord.getDec().asDegrees()
        cat = self.query_radius(ra_c, dec_c, search_radius).copy()

        if len(cat) > 0:
            mask = np.zeros(len(cat), dtype=bool)
            cat['x'] = -1
            cat['y'] = -1
            
            for i, src in enumerate(cat):
                sky_coord = lsst.afw.geom.SpherePoint(
                    src['ra'] * lsst.afw.geom.degrees, 
                    src['dec'] * lsst.afw.geom.degrees)
                xy_coord = wcs.skyToPixel(sky_coord)
                if exp.getBBox().contains(lsst.afw.geom.Point2I(xy_coord)):
                    mask[i] = True
                    x0, y0 = xy_coord - exp.getXY0()
                    cat[i]['x'] = x0
                    cat[i]['y'] = y0

            cat = cat[mask]
        
        return cat

    def write(self, fn):
        self.cat.write(fn, overwrite=True)