def spatial_linkages(data_source, max_d, data_target=None, chunksize=2**18, remove_coincident_pairs=False): """ Compute the indices of datapoints that lie within distance max_d of one another. Distances are euclidean. :param data_source: numpy or CartesianData array of source data. :param max_d: Maximum distance between points :param data_target: optional EuclideanSpaceTimeData array. If supplied, the linkage indices are between data_source and data_target, otherwise the two are set equal :param chunksize: The size of an iteration chunk. :param remove_coincident_pairs: If True, remove links between pairs of crimes with Delta d == 0 :return: tuple (idx_array_source, idx_array_target, dist_between), """ data_source = CartesianData(data_source) ndata_source = data_source.ndata if data_target is not None: data_target = CartesianData(data_target) ndata_target = data_target.ndata chunksize = min(chunksize, ndata_source * ndata_target) idx_i, idx_j = np.meshgrid(range(ndata_source), range(ndata_target), copy=False) else: # self-linkage data_target = data_source chunksize = min(chunksize, ndata_source * (ndata_source - 1) / 2) idx_i, idx_j = pairwise_differences_indices(ndata_source) link_i = [] link_j = [] link_d = [] for k in range(0, idx_i.size, chunksize): i = idx_i.flat[k:(k + chunksize)] j = idx_j.flat[k:(k + chunksize)] dd = (data_target.getrows(j).distance( data_source.getrows(i))).toarray() mask = (dd <= max_d) if remove_coincident_pairs: mask[dd == 0] = False link_i.extend(i[mask]) link_j.extend(j[mask]) link_d.extend(dd[mask]) return np.array(link_i), np.array(link_j), np.array(link_d)
class RipleyK(object): kde_class = kde_models.VariableBandwidthNnKdeSeparable n_quad = 32 def __init__(self, data, max_d, domain, clip_data=True): """ :param data: :param max_d: :param domain: :param clip_data: If True (default), remove data that lie outside of the domain. This is recommended as they will result in a division by zero later on :return: """ self.data = CartesianData(data) if clip_data: to_keep = [ i for i, xy in enumerate(self.data.data) if geometry.Point(xy).within(domain) ] self.data = self.data.getrows(to_keep) assert self.data.nd == 2, "Input data must be 2D (i.e. purely spatial)" self.n = len(data) self.max_d = max_d self.domain = domain self.S = self.domain.area self.ii = self.jj = self.dd = self.dtheta = None # self.near_exterior = None # self.edge_corr_circ = self.edge_corr_area = None self.edge_correction = None self.intensity = self.n / self.S def process(self): # Call after instantiation to prepare all data and compute edge corrections self.ii, self.jj, self.dd, self.near_exterior = prepare_data( self.data, self.domain, self.max_d) self.edge_correction = self.compute_edge_correction( self.data, self.ii, self.dd, self.domain, self.n_quad) @staticmethod def compute_edge_correction(data, source_idx, dd, domain, n_quad, *args, **kwargs): """ Compute the edge correction factors for the data centered at loc with spatial extent in dd :param data: Full array or DataArray of spatial data :param source_idx: Array of source indices :param dd: Array of distances, i.e. the circle's radius :return: """ data = CartesianData(data, copy=False) # compute distance to edge and therefore which points/distances need edge corrections d_to_ext = np.array([ geometry.Point(data[i]).distance(domain.exterior) for i in range(data.ndata) ]) near_exterior = np.where(d_to_ext[source_idx] < dd)[0] # output array of same size as dd ec = np.ones_like(dd) # can switch the method here mappable_func = partial(isotropic_edge_correction_wrapper, n_quad=n_quad, domain=domain, method='area') print "Computing edge correction terms..." tic = time() with closing(mp.Pool()) as pool: res = pool.map_async(mappable_func, ((data[source_idx[i]], dd[i]) for i in near_exterior)).get(1e100) print "Completed in %f seconds" % (time() - tic) ec[near_exterior] = np.array(res) return ec def compute_k(self, u, dd=None, edge_corr=None, *args, **kwargs): if not hasattr(u, '__iter__'): u = [u] dd = dd if dd is not None else self.dd edge_corr = edge_corr if edge_corr is not None else self.edge_correction res = [] for t in u: ind = (dd <= t) w = 1 / edge_corr[ind] res.append(w.sum() / float(self.n) / self.intensity) return np.array(res) def compute_l(self, u): """ Compute the difference between K and the CSR model :param u: Distance threshold :param v: Time threshold :return: """ k = self.compute_k(u) csr = np.pi * u**2 return k - csr def compute_lhat(self, u): """ Lhat is defined as (K / \pi) ^ 0.5 :param u: :return: """ k = self.compute_k(u) return np.sqrt(k / np.pi) def run_permutation(self, u, niter=20): if np.any(u > self.max_d): raise AttributeError('No values of u may be > max_d') k = [] try: for i in range(niter): data = CartesianData.from_args( *spatial.random_points_within_poly(self.domain, self.n)) ii, jj, dd, near_exterior = prepare_data( data, self.domain, self.max_d) ec = self.compute_edge_correction(data, ii, dd, self.domain, self.n_quad) k.append(self.compute_k(u, dd=dd, edge_corr=ec)) except Exception as exc: print repr(exc) finally: return np.array(k)