def __init__(self, data, max_d, domain, clip_data=True): """ :param data: :param max_d: :param domain: :param clip_data: If True (default), remove data that lie outside of the domain. This is recommended as they will result in a division by zero later on :return: """ self.data = CartesianData(data) if clip_data: to_keep = [ i for i, xy in enumerate(self.data.data) if geometry.Point(xy).within(domain) ] self.data = self.data.getrows(to_keep) assert self.data.nd == 2, "Input data must be 2D (i.e. purely spatial)" self.n = len(data) self.max_d = max_d self.domain = domain self.S = self.domain.area self.ii = self.jj = self.dd = self.dtheta = None # self.near_exterior = None # self.edge_corr_circ = self.edge_corr_area = None self.edge_correction = None self.intensity = self.n / self.S
def run_permutation(self, u, niter=20): if np.any(u > self.max_d): raise AttributeError('No values of u may be > max_d') mappable_func = partial(edge_correction_wrapper, n_quad=32, domain=self.domain) k = [] try: for i in range(niter): data = CartesianData.from_args( *spatial.random_points_within_poly(self.domain, self.n)) ii, jj, dd, near_exterior = prepare_data( data, self.domain, self.max_d) edge_corr_circ = np.ones(dd.size) edge_corr_area = np.ones(dd.size) pool = mp.Pool() res = pool.map_async(mappable_func, ((data[ii[i]], dd[i]) for i in near_exterior)).get(1e100) edge_corr_circ[near_exterior] = np.array(res)[:, 0] edge_corr_area[near_exterior] = np.array(res)[:, 1] k.append(self.compute_k(u, dd=dd, edge_corr=edge_corr_area)) finally: return np.array(k)
def __init__(self, data, max_d, domain): self.data = CartesianData(data) assert self.data.nd == 2, "Input data must be 2D (i.e. purely spatial)" self.n = len(data) self.max_d = max_d self.domain = domain self.S = self.domain.area self.ii = self.jj = self.dd = self.dphi = None self.near_exterior = None self.edge_corr_circ = self.edge_corr_area = None self.intensity = self.n / self.S
def run_permutation(self, u, niter=20): if np.any(u > self.max_d): raise AttributeError('No values of u may be > max_d') k = [] try: for i in range(niter): data = CartesianData.from_args( *spatial.random_points_within_poly(self.domain, self.n)) ii, jj, dd, dtheta, near_exterior = prepare_data( data, self.domain, self.max_d, compute_angles=True) ec = self.compute_edge_correction(data, ii, dd, self.segments, self.domain, self.n_quad) k.append(self.compute_k(u, dd=dd, dtheta=dtheta, edge_corr=ec)) except Exception as exc: print repr(exc) finally: return np.array(k)
def compute_edge_correction(data, source_idx, dd, segments, domain, *args, **kwargs): """ Compute the edge correction factors for the data centered at loc with spatial extent in dd :param data: Full array or DataArray of spatial data :param source_idx: Array of source indices :param dd: Array of distances, i.e. the circle's radius :return: """ data = CartesianData(data, copy=False) # compute distance to edge and therefore which points/distances need edge corrections d_to_ext = np.array([ geometry.Point(data[i]).distance(domain.exterior) for i in range(data.ndata) ]) near_exterior = np.where(d_to_ext[source_idx] < dd)[0] # output array of same size as dd ec = np.ones((dd.size, len(segments))) # can switch the method here def mappable_func(loc, r, seg): return seg.area_correction(loc, r, domain) print "Computing edge correction terms..." tic = time() for j, seg in enumerate(segments): iter_args = ((data[source_idx[i]], dd[i], domain) for i in near_exterior) res = np.array([(seg.area_correction(*x)) for x in iter_args]) ec[near_exterior, j] = np.array(res) # with closing(mp.Pool()) as pool: # for j, seg in enumerate(segments): # iter_args = ( # (data[source_idx[i]], dd[i], seg) for i in near_exterior # ) # ma = dill_mapping_function( # pool, # mappable_func, # ((data[source_idx[i]], dd[i], seg) for i in near_exterior) # ) # res = ma.get(1e100) # ec[near_exterior, j] = np.array(res) print "Completed in %f seconds" % (time() - tic) return ec
def run_permutation(self, u, phi=None, niter=20): """ Run random permutation to test significance of K value. :param u: :param phi: Array of filter functions. These should accept a vector of angles and return a masked array that is True for angles within the segment. :param niter: :return: """ if phi is None: raise AttributeError("phi is a required input argument") if np.any(u > self.max_d): raise AttributeError('No values of u may be > max_d') pool = mp.Pool() k = [] mappable_func = partial(spatial.random_points_within_poly, npts=self.n) all_randomisations = pool.map_async(mappable_func, (self.domain for i in range(niter))).get(1e100) mappable_func = partial(edge_correction_wrapper, n_quad=32, domain=self.domain) try: for i in range(niter): data = CartesianData.from_args(*all_randomisations[i]) ii, jj, dd, dphi, near_exterior = prepare_data( data, self.domain, self.max_d, compute_angles=True) edge_corr_circ = np.ones(dd.size) edge_corr_area = np.ones(dd.size) res = pool.map_async(mappable_func, ((data[ii[i]], dd[i]) for i in near_exterior)).get(1e100) edge_corr_circ[near_exterior] = np.array(res)[:, 0] edge_corr_area[near_exterior] = np.array(res)[:, 1] k.append( self.compute_k(u, phi=phi, dd=dd, dphi=dphi, edge_corr=edge_corr_area)) finally: return np.array(k)
def spatial_linkages(data_source, max_d, data_target=None, chunksize=2**18, remove_coincident_pairs=False): """ Compute the indices of datapoints that lie within distance max_d of one another. Distances are euclidean. :param data_source: numpy or CartesianData array of source data. :param max_d: Maximum distance between points :param data_target: optional EuclideanSpaceTimeData array. If supplied, the linkage indices are between data_source and data_target, otherwise the two are set equal :param chunksize: The size of an iteration chunk. :param remove_coincident_pairs: If True, remove links between pairs of crimes with Delta d == 0 :return: tuple (idx_array_source, idx_array_target, dist_between), """ data_source = CartesianData(data_source) ndata_source = data_source.ndata if data_target is not None: data_target = CartesianData(data_target) ndata_target = data_target.ndata chunksize = min(chunksize, ndata_source * ndata_target) idx_i, idx_j = np.meshgrid(range(ndata_source), range(ndata_target), copy=False) else: # self-linkage data_target = data_source chunksize = min(chunksize, ndata_source * (ndata_source - 1) / 2) idx_i, idx_j = pairwise_differences_indices(ndata_source) link_i = [] link_j = [] link_d = [] for k in range(0, idx_i.size, chunksize): i = idx_i.flat[k:(k + chunksize)] j = idx_j.flat[k:(k + chunksize)] dd = (data_target.getrows(j).distance( data_source.getrows(i))).toarray() mask = (dd <= max_d) if remove_coincident_pairs: mask[dd == 0] = False link_i.extend(i[mask]) link_j.extend(j[mask]) link_d.extend(dd[mask]) return np.array(link_i), np.array(link_j), np.array(link_d)
def compute_edge_correction(data, source_idx, dd, domain, n_quad, *args, **kwargs): """ Compute the edge correction factors for the data centered at loc with spatial extent in dd :param data: Full array or DataArray of spatial data :param source_idx: Array of source indices :param dd: Array of distances, i.e. the circle's radius :return: """ data = CartesianData(data, copy=False) # compute distance to edge and therefore which points/distances need edge corrections d_to_ext = np.array([ geometry.Point(data[i]).distance(domain.exterior) for i in range(data.ndata) ]) near_exterior = np.where(d_to_ext[source_idx] < dd)[0] # output array of same size as dd ec = np.ones_like(dd) # can switch the method here mappable_func = partial(isotropic_edge_correction_wrapper, n_quad=n_quad, domain=domain, method='area') print "Computing edge correction terms..." tic = time() with closing(mp.Pool()) as pool: res = pool.map_async(mappable_func, ((data[source_idx[i]], dd[i]) for i in near_exterior)).get(1e100) print "Completed in %f seconds" % (time() - tic) ec[near_exterior] = np.array(res) return ec
def test_point_process(self): """ Tests the output of the PP stochastic method based on a given random seed. The tests are all based on KNOWN results, NOT on the ideal results. Failing some of these tests may still indicate an improvement. """ r = models.SeppStochasticNn(self.data, max_delta_d=0.75, max_delta_t=80) r.set_seed(42) r.p = estimation.estimator_bowers(self.data, r.linkage, ct=1, cd=10) ps = r.train(niter=15, verbose=False) self.assertEqual(r.ndata, self.data.shape[0]) self.assertEqual(len(r.num_bg), 15) self.assertAlmostEqual(r.l2_differences[0], 0.0011281, places=3) self.assertAlmostEqual(r.l2_differences[-1], 0.0001080, places=3) num_bg_true = self.c.number_bg self.assertTrue( np.abs(r.num_bg[-1] - num_bg_true) / float(num_bg_true) < 0.05) # agree to within 5pct self.assertListEqual(r.num_trig, [r.ndata - x for x in r.num_bg]) self.assertEqual(len(r.linkage[0]), 6927) bg_intensity = self.c.bg_params[0]['intensity'] t = np.linspace(0, max(self.data[:, 0]), 10000) zt = r.bg_kde.marginal_pdf(t, dim=0, normed=False) # mean BG_t mt = np.mean(zt) self.assertTrue(np.abs(mt - bg_intensity) / float(bg_intensity) < 0.05) # integrated squared error ise = np.sum((zt - bg_intensity)**2) * (t[1] - t[0]) # this bound is set manually from previous experiments self.assertTrue( ise < 250 ) # should be as low as possible (no time variation in simulation) # mean BG_x, BG_y # should be (0, 0) x, y = np.meshgrid(np.linspace(-15, 15, 200), np.linspace(-15, 15, 200)) xy = CartesianData.from_meshgrid(x, y) zxy = r.bg_kde.partial_marginal_pdf(xy, normed=False) mx = np.sum(x * zxy) / x.size my = np.sum(y * zxy) / y.size # bounds set manually self.assertTrue(np.abs(mx) < 0.25) self.assertTrue(np.abs(my) < 0.25) # stdev BG_x, BG_y bg_sx = self.c.bg_params[0]['sigma'][0] bg_sy = self.c.bg_params[0]['sigma'][1] stdevx = np.sqrt((np.sum(x**2 * zxy) / (x.size - 1)) - mx**2) stdevy = np.sqrt((np.sum(y**2 * zxy) / (y.size - 1)) - my**2) self.assertTrue( np.abs(stdevx - bg_sx) / bg_sx < 0.4) # agreement here isn't great self.assertTrue( np.abs(stdevy - bg_sy) / bg_sy < 0.4) # agreement here isn't great # measure of asymmetry self.assertTrue(2 * np.abs(stdevx - stdevy) / (stdevx + stdevy) < 0.012) # should be 0 # trigger t t = np.linspace(0, r.max_delta_t, 1000) gt = r.trigger_kde.marginal_pdf(t, normed=False) / r.ndata w = self.c.trigger_decay th = self.c.trigger_intensity gt_true = th * w * np.exp(-w * t) ise = np.sum((gt - gt_true)**2) * (t[1] - t[0]) self.assertTrue(ise < 0.001) x = np.linspace(-r.max_delta_d, r.max_delta_d, 10000) gx = r.trigger_kde.marginal_pdf(x, dim=1, normed=False) / r.ndata gy = r.trigger_kde.marginal_pdf(x, dim=2, normed=False) / r.ndata sx = self.c.trigger_sigma[0] gx_true = th / (np.sqrt(2 * np.pi) * sx) * np.exp(-(x**2) / (2 * sx**2)) ise = np.sum((gx - gx_true)**2) * (x[1] - x[0]) self.assertTrue(ise < 0.1) sy = self.c.trigger_sigma[1] gy_true = th / (np.sqrt(2 * np.pi) * sy) * np.exp(-(x**2) / (2 * sy**2)) ise = np.sum((gy - gy_true)**2) * (x[1] - x[0]) self.assertTrue(ise < 0.01)
def network_paths_source_targets(net_obj, source, targets, max_search_distance, max_split=None, verbose=False, logger=None): if logger is None: logger = logging.getLogger('null') logger.handlers = [] logger.addHandler(logging.NullHandler()) target_points = NetworkData(targets) paths = defaultdict(list) g = network_walker_from_net_point(net_obj, source, max_distance=max_search_distance, max_split=max_split, repeat_edges=True, verbose=verbose, logger=logger) # Cartesian filtering by nodes target_nodes_pos = CartesianData([t.edge.node_pos_coords for t in target_points.toarray(0)]) target_nodes_neg = CartesianData([t.edge.node_neg_coords for t in target_points.toarray(0)]) # Find the targets on the source edge and include these explicitly. # This is required for longer edges, where neither of the edge nodes are within max_search distance on_this_edge = np.array([t.edge == source.edge for t in target_points.toarray(0)]) logger.debug("Found %d points on the starting edge" % on_this_edge.sum()) source_xy_tiled = CartesianData([source.cartesian_coords] * target_points.ndata) target_distance_pos = target_nodes_pos.distance(source_xy_tiled) target_distance_neg = target_nodes_neg.distance(source_xy_tiled) reduced_target_idx = np.where( (target_distance_pos.toarray(0) <= max_search_distance) | (target_distance_neg.toarray(0) <= max_search_distance) | on_this_edge )[0] reduced_targets = target_points.getrows(reduced_target_idx) logger.debug("Initial filtering reduces number of targets from {0} to {1}".format( target_points.ndata, reduced_targets.ndata)) # cartesian filtering by NetPoint # source_xy_tiled = CartesianData([source.cartesian_coords] * target_points.ndata) # target_distance = target_points.to_cartesian().distance(source_xy_tiled) # reduced_target_idx = np.where(target_distance.toarray(0) <= max_search_distance)[0] # reduced_targets = target_points.getrows(reduced_target_idx) # ALL targets kept # reduced_targets = target_points # reduced_target_idx = range(target_points.ndata) for path, edge in g: # test whether any targets lie on the new edge for i, t in enumerate(reduced_targets.toarray(0)): if t.edge == edge: # get distance from current node to this point if not len(path.nodes): # this only happens at the starting edge dist_between = (t - source).length else: # all other situations dist_along = t.node_dist[path.nodes[-1]] dist_between = path.distance_total + dist_along logger.debug("Target %d is on this edge at a distance of %.2f" % (reduced_target_idx[i], dist_between)) if dist_between <= max_search_distance: logger.debug("Adding target %d to paths" % reduced_target_idx[i]) this_path = NetPath( net_obj, start=path.start, end=t, nodes=list(path.nodes), distance=dist_between, split=path.splits_total) paths[reduced_target_idx[i]].append(this_path) return dict(paths)
import numpy as np from data.models import CartesianData, DataArray, NetworkData from matplotlib import pyplot as plt itn_net = load_test_network() nodes = np.array([t['loc'] for t in itn_net.g.node.values()]) xmin, ymin, xmax, ymax = itn_net.extent targets, n_per_edge = utils.network_walker_uniform_sample_points( itn_net, 10) # lay down some random points within that box num_pts = 100 x_pts = np.random.rand(num_pts) * (xmax - xmin) + xmin y_pts = np.random.rand(num_pts) * (ymax - ymin) + ymin xy = CartesianData.from_args(x_pts, y_pts) sources = NetworkData.from_cartesian( itn_net, xy, grid_size=50) # grid_size defaults to 50 # not all points necessarily snap successfully num_pts = sources.ndata radius = 200. nw = utils.NetworkWalker(itn_net, targets, max_distance=radius, max_split=1e4) k = NetworkKernelEqualSplitLinear(sources.getone(0), 200.) k.set_walker(nw) z = k.pdf() zn = z / max(z)
class RipleyK(object): kde_class = kde_models.VariableBandwidthNnKdeSeparable n_quad = 32 def __init__(self, data, max_d, domain, clip_data=True): """ :param data: :param max_d: :param domain: :param clip_data: If True (default), remove data that lie outside of the domain. This is recommended as they will result in a division by zero later on :return: """ self.data = CartesianData(data) if clip_data: to_keep = [ i for i, xy in enumerate(self.data.data) if geometry.Point(xy).within(domain) ] self.data = self.data.getrows(to_keep) assert self.data.nd == 2, "Input data must be 2D (i.e. purely spatial)" self.n = len(data) self.max_d = max_d self.domain = domain self.S = self.domain.area self.ii = self.jj = self.dd = self.dtheta = None # self.near_exterior = None # self.edge_corr_circ = self.edge_corr_area = None self.edge_correction = None self.intensity = self.n / self.S def process(self): # Call after instantiation to prepare all data and compute edge corrections self.ii, self.jj, self.dd, self.near_exterior = prepare_data( self.data, self.domain, self.max_d) self.edge_correction = self.compute_edge_correction( self.data, self.ii, self.dd, self.domain, self.n_quad) @staticmethod def compute_edge_correction(data, source_idx, dd, domain, n_quad, *args, **kwargs): """ Compute the edge correction factors for the data centered at loc with spatial extent in dd :param data: Full array or DataArray of spatial data :param source_idx: Array of source indices :param dd: Array of distances, i.e. the circle's radius :return: """ data = CartesianData(data, copy=False) # compute distance to edge and therefore which points/distances need edge corrections d_to_ext = np.array([ geometry.Point(data[i]).distance(domain.exterior) for i in range(data.ndata) ]) near_exterior = np.where(d_to_ext[source_idx] < dd)[0] # output array of same size as dd ec = np.ones_like(dd) # can switch the method here mappable_func = partial(isotropic_edge_correction_wrapper, n_quad=n_quad, domain=domain, method='area') print "Computing edge correction terms..." tic = time() with closing(mp.Pool()) as pool: res = pool.map_async(mappable_func, ((data[source_idx[i]], dd[i]) for i in near_exterior)).get(1e100) print "Completed in %f seconds" % (time() - tic) ec[near_exterior] = np.array(res) return ec def compute_k(self, u, dd=None, edge_corr=None, *args, **kwargs): if not hasattr(u, '__iter__'): u = [u] dd = dd if dd is not None else self.dd edge_corr = edge_corr if edge_corr is not None else self.edge_correction res = [] for t in u: ind = (dd <= t) w = 1 / edge_corr[ind] res.append(w.sum() / float(self.n) / self.intensity) return np.array(res) def compute_l(self, u): """ Compute the difference between K and the CSR model :param u: Distance threshold :param v: Time threshold :return: """ k = self.compute_k(u) csr = np.pi * u**2 return k - csr def compute_lhat(self, u): """ Lhat is defined as (K / \pi) ^ 0.5 :param u: :return: """ k = self.compute_k(u) return np.sqrt(k / np.pi) def run_permutation(self, u, niter=20): if np.any(u > self.max_d): raise AttributeError('No values of u may be > max_d') k = [] try: for i in range(niter): data = CartesianData.from_args( *spatial.random_points_within_poly(self.domain, self.n)) ii, jj, dd, near_exterior = prepare_data( data, self.domain, self.max_d) ec = self.compute_edge_correction(data, ii, dd, self.domain, self.n_quad) k.append(self.compute_k(u, dd=dd, edge_corr=ec)) except Exception as exc: print repr(exc) finally: return np.array(k)
def network_paths_source_targets(net_obj, source, targets, max_search_distance, max_split=None, verbose=False, logger=None): if logger is None: logger = logging.getLogger('null') logger.handlers = [] logger.addHandler(logging.NullHandler()) target_points = NetworkData(targets) paths = defaultdict(list) g = network_walker_from_net_point(net_obj, source, max_distance=max_search_distance, max_split=max_split, repeat_edges=True, verbose=verbose, logger=logger) # Cartesian filtering by nodes target_nodes_pos = CartesianData( [t.edge.node_pos_coords for t in target_points.toarray(0)]) target_nodes_neg = CartesianData( [t.edge.node_neg_coords for t in target_points.toarray(0)]) # Find the targets on the source edge and include these explicitly. # This is required for longer edges, where neither of the edge nodes are within max_search distance on_this_edge = np.array( [t.edge == source.edge for t in target_points.toarray(0)]) logger.debug("Found %d points on the starting edge" % on_this_edge.sum()) source_xy_tiled = CartesianData([source.cartesian_coords] * target_points.ndata) target_distance_pos = target_nodes_pos.distance(source_xy_tiled) target_distance_neg = target_nodes_neg.distance(source_xy_tiled) reduced_target_idx = np.where( (target_distance_pos.toarray(0) <= max_search_distance) | (target_distance_neg.toarray(0) <= max_search_distance) | on_this_edge)[0] reduced_targets = target_points.getrows(reduced_target_idx) logger.debug( "Initial filtering reduces number of targets from {0} to {1}".format( target_points.ndata, reduced_targets.ndata)) # cartesian filtering by NetPoint # source_xy_tiled = CartesianData([source.cartesian_coords] * target_points.ndata) # target_distance = target_points.to_cartesian().distance(source_xy_tiled) # reduced_target_idx = np.where(target_distance.toarray(0) <= max_search_distance)[0] # reduced_targets = target_points.getrows(reduced_target_idx) # ALL targets kept # reduced_targets = target_points # reduced_target_idx = range(target_points.ndata) for path, edge in g: # test whether any targets lie on the new edge for i, t in enumerate(reduced_targets.toarray(0)): if t.edge == edge: # get distance from current node to this point if not len(path.nodes): # this only happens at the starting edge dist_between = (t - source).length else: # all other situations dist_along = t.node_dist[path.nodes[-1]] dist_between = path.distance_total + dist_along logger.debug( "Target %d is on this edge at a distance of %.2f" % (reduced_target_idx[i], dist_between)) if dist_between <= max_search_distance: logger.debug("Adding target %d to paths" % reduced_target_idx[i]) this_path = NetPath(net_obj, start=path.start, end=t, nodes=list(path.nodes), distance=dist_between, split=path.splits_total) paths[reduced_target_idx[i]].append(this_path) return dict(paths)