def prediction_array(self, time, space_array): # combine time with space dimension space_array = self.space_class(space_array, copy=False) t = DataArray(time * np.ones(space_array.ndata)) if space_array.original_shape is not None: t.original_shape = space_array.original_shape return t.adddim(space_array, type=self.data_class)
def test_cutoff(self): x = DataArray(np.linspace(-5, 5, 100)) self.assertTrue(np.all(self.kernels[1].pdf(x)[x.toarray(0) < 0] == 0)) x = DataArray.from_meshgrid( *np.meshgrid( np.linspace(-5, 5, 50), np.linspace(-5, 5, 50) ) ) res = self.kernels[2].pdf(x) self.assertTrue(np.all(res[x.toarray(0) < 0] == 0)) self.assertTrue(np.all(res[x.toarray(0) >= 0] > 0.))
def test_linkage_function(self): lf = utils.linkage_func_separable(5., 10.) self.assertTrue(lf(4, 9)) self.assertFalse(lf(5.01, 9)) self.assertFalse(lf(4, 10.01)) dt = DataArray(np.random.rand(10)) dd = DataArray(np.random.rand(10)) b_in = lf(dt, dd) b_expct = (dt <= 5.) & (dd <= 10.) self.assertTrue(np.all(b_in == b_expct)) lf = lambda dt, dd: (dt**2 + dd**2)**0.5 < 0.5 b_in = lf(dt, dd) b_expct = (dt**2 + dd**2)**0.5 < 0.5 self.assertTrue(np.all(b_in == b_expct))
def plot_sepp_bg_surface(sepp_obj, domain=None, **kwargs): k = sepp_obj.bg_kde # if x_range is None: # x_range = [min(sepp_obj.data.toarray(1)), max(sepp_obj.data.toarray(1))] # y_range = [min(sepp_obj.data.toarray(2)), max(sepp_obj.data.toarray(2))] # loc = DataArray.from_meshgrid(*np.meshgrid( # np.linspace(x_range[0], x_range[1], npt), # np.linspace(y_range[0], y_range[1], npt), # )) # z = k.partial_marginal_pdf(loc) # if ax is None: # fig = plt.figure() # ax = fig.add_subplot(111) # ax.contourf(loc.toarray(0), loc.toarray(1), z, 50) # ax.set_aspect('equal') # if domain is not None: func = lambda x, y: k.partial_marginal_pdf(DataArray.from_meshgrid(x, y)) if domain is None: xmin, xmax = min(sepp_obj.data.toarray(1)), max( sepp_obj.data.toarray(1)) ymin, ymax = min(sepp_obj.data.toarray(2)), max( sepp_obj.data.toarray(2)) domain = shapely_rectangle_from_vertices(xmin, ymin, xmax, ymax) plot_surface_function_on_polygon(domain, func=func, **kwargs) plt.axis('equal')
def predict(self, time, space_array, force_update=False): """ Generate a prediction from the trained model. Supply single time and spatial sample points in separate arrays If force_update=False then the spatial component is assumed unchanged, so only the time is altered. This is important: updating the spatial sample points loses all cached net paths. """ if self.kde.targets_set and not force_update: self.kde.update_target_times(time) return self.kde.pdf() else: space_array = self.space_class(space_array, copy=False) time_array = DataArray(np.ones(space_array.ndata) * time) targets_array = time_array.adddim(space_array, type=self.data_class) return self.kde.pdf(targets=targets_array)
def test_marginal_cdf_values(self): k = self.kernels[3] x = DataArray(np.linspace(-5, 5, 100)) for i in range(3): y = k.marginal_cdf(x, dim=i) ye = self.expected_marginal_cdf(x, i) self.assertTrue(np.all(np.abs(y - ye) < self.tol))
def marginal_icdf_optimise(k, y, dim=0, tol=1e-8): n = 100 max_iter = 50 f = lambda x: np.abs(k.marginal_cdf(x, dim=dim) - y) mean_bd = np.mean(k.bandwidths[:, dim]) minx = np.min(k.data[:, dim]) maxx = np.max(k.data[:, dim]) err = 1. niter = 0 x0 = 0. while err > tol: if niter > max_iter: raise Exception( "Failed to converge to optimum after %u iterations", max_iter) xe = DataArray(np.linspace(minx, maxx, n)) ye = f(xe) idx = np.argmin(ye) if idx == 0: # return xe[idx] minx -= mean_bd continue if idx == (n - 1): maxx += mean_bd continue err = ye[idx] x0 = xe[idx] minx = xe[idx - 1] maxx = xe[idx + 1] niter += 1 return float(x0)
def test_assess(self): stk = hotspot.SKernelHistoric(1, bdwidth=0.3) vb = validation.ValidationBase(self.data, stk) vb.train_model() # mock roc object with grid mocroc = mock.create_autospec(roc.RocGrid) mocroc.centroids = np.array([[0., 0.], [1., 1.]]) mocroc.sample_units = range( 2) # needs to have the correct length, contents not used mocroc.sample_points = DataArray([[0., 0.], [1., 1.]]) vb.roc = mocroc res = vb._iterate_run(pred_dt_plus=0.2, true_dt_plus=None, true_dt_minus=0.) # set data self.assertTrue(vb.roc.set_data.called) self.assertEqual(vb.roc.set_data.call_count, 1) self.assertTrue( np.all(vb.roc.set_data.call_args[0] == vb.testing( dt_plus=0.2)[:, 1:])) # set prediction self.assertTrue(vb.roc.set_prediction.called) self.assertEqual(vb.roc.set_prediction.call_count, 1) self.assertEqual(len(vb.roc.set_prediction.call_args[0]), 1) t = (vb.cutoff_t + 0.2) # expected prediction values at (0,0), (1,1) pred_arg = DataArray(np.array([[t, 0., 0.], [t, 1., 1.]])) pred_expctd = vb.model.predict(t, mocroc.sample_points) self.assertTrue( np.all(vb.roc.set_prediction.call_args[0][0] == pred_expctd)) # set grid self.assertFalse(vb.roc.set_sample_units.called) vb.set_sample_units(0.1) self.assertTrue(vb.roc.set_sample_units.called) self.assertEqual(vb.roc.set_sample_units.call_count, 1) self.assertTupleEqual(vb.roc.set_sample_units.call_args[0], (0.1, )) # evaluate self.assertTrue(vb.roc.evaluate.called) self.assertEqual(vb.roc.evaluate.call_count, 1)
def set_sample_points(self, n_sample_per_grid, respect_boundary=True, *args, **kwargs): """ Generate n_sample_per_grid sample points per grid unit Return n_sample_per_grid x self.ndata x 2 array, final dim is x, y """ if HAS_GEODJANGO and isinstance(self.poly, geos.GEOSGeometry): point_class = geos.Point else: point_class = Point if self.side_length is None: # grid was supplied as an array # slow version: need to iterate over the polygons xres = np.empty((n_sample_per_grid, self.n_sample_units), dtype=float) yres = np.empty((n_sample_per_grid, self.n_sample_units), dtype=float) for i, p in enumerate(self.grid_polys): xres[:, i], yres[:, i] = random_points_within_poly( p, n_sample_per_grid) else: xmins = np.array([x[0] for x in self.sample_units]) ymins = np.array([x[1] for x in self.sample_units]) xres = np.random.rand(n_sample_per_grid, self.n_sample_units ) * self.side_length + xmins yres = np.random.rand(n_sample_per_grid, self.n_sample_units ) * self.side_length + ymins if respect_boundary: # loop over grid squares that are incomplete for i in np.where(np.array(self.full_grid_square) == False)[0]: inside_idx = np.array([ point_class(x, y).within(self.poly) for x, y in zip(xres[:, i], yres[:, i]) ]) # pad empty parts with repeats of the centroid location num_empty = n_sample_per_grid - sum(inside_idx) if num_empty: cx, cy = self.centroids[i] rem_x = np.concatenate((xres[inside_idx, i], cx * np.ones(num_empty))) rem_y = np.concatenate((yres[inside_idx, i], cy * np.ones(num_empty))) xres[:, i] = rem_x yres[:, i] = rem_y xres = xres.flatten(order='F') yres = yres.flatten(order='F') self.sample_points = DataArray.from_args(xres, yres) self.n_sample_point_per_unit = np.ones( self.n_sample_units) * n_sample_per_grid
def test_pdf_values(self): for i in range(self.min_nd, 4): x = DataArray.from_meshgrid( *np.meshgrid( *[np.linspace(-5, 5, 50)] * i ) ) y = self.kernels[i].pdf(x) ye = self.expected_pdf(x) self.assertTrue(np.all(np.abs(y - ye) < self.tol))
def plot_xy_kde(k, x_range, y_range, npt_1d=50, **kwargs): x, y = np.meshgrid(np.linspace(x_range[0], x_range[1], npt_1d), np.linspace(y_range[0], y_range[1], npt_1d)) loc = DataArray.from_meshgrid(x, y) z = k.pdf(loc, normed=False) fig = plt.figure() ax = fig.add_subplot(111) n_contours = kwargs.pop('n_contours', 40) cax = ax.contourf(x, y, z, n_contours, cmap='binary') if 'clim' in kwargs: clim = kwargs.pop('clim') cax.set_clim(clim) ax.set_xlabel('X (m)') ax.set_ylabel('Y (m)') if kwargs.pop('colorbar', True): fig.colorbar(cax) return fig
def test_mvn3d(self): mvn = kernels.MultivariateNormal([0, 0, 0], [1, 1, 1]) self.assertEqual(mvn.ndim, 3) q = tplquad(partial(quad_pdf_fun, func=mvn.pdf), -5, 5, lambda x: -5, lambda x: 5, lambda x, y: -5, lambda x, y: 5) self.assertAlmostEqual(q[0], 1.0, places=5) # test with absolute values x = np.meshgrid(*([np.linspace(-1, 1, 10)]*3)) x = np.vstack((x[0].flatten(), x[1].flatten(), x[2].flatten())).transpose().reshape(1000, 3) y = mvn.pdf(x) y_expct = multivariate_normal.pdf(x, mean=[0, 0, 0], cov=np.eye(3)) self.assertEqual(np.sum(np.abs((y - y_expct)).flatten()>1e-12), 0) # no single difference > 1e-12 # repeat with Data type x = DataArray(x) y = mvn.pdf(x) self.assertEqual(np.sum(np.abs((y - y_expct)).flatten()>1e-12), 0) # no single difference > 1e-12
def update_target_times(self, target_times): """ Update the times attached to the (spatial) targets. :param target_times: Either an iterable (in which case it must be possible to cast to a DataArray) or a scalar (in which case all target times are set to this value) """ if hasattr(target_times, '__iter__'): target_times = DataArray(target_times, copy=False) self.logger.info("update_target_times with an iterable of len %d", target_times.ndata) if target_times.ndata != self.targets.ndata: raise AttributeError( "The number of data points does not match existing data in the supplied array" ) self.targets.time = target_times else: self.logger.info("update_target_times with a fixed time %f", target_times) self.targets.data[:, 0] = target_times
def test_partials(self): mvn = kernels.MultivariateNormal([0, 0, 0], [1, 2, 3]) arr = np.meshgrid(np.linspace(-15, 15, 20), np.linspace(-15, 15, 20)) xy = DataArray(np.concatenate([t[..., np.newaxis] for t in arr], axis=2)) # attempt to call with data of too few dims with self.assertRaises(AttributeError): p = mvn.partial_marginal_pdf(xy.getdim(0), dim=0) p = mvn.partial_marginal_pdf(xy, dim=0) # should be marginal in 2nd and 3rd dims p_expct = mvn.pdf(xy, dims=[1, 2]) self.assertTrue(np.all(np.abs(p - p_expct) < 1e-14)) p_expct = mvn.marginal_pdf(xy.getdim(0), dim=1) * mvn.marginal_pdf(xy.getdim(1), dim=2) self.assertTrue(np.all(np.abs(p - p_expct) < 1e-14)) p = mvn.partial_marginal_pdf(xy, dim=1) # should be marginal in 1st and 3rd dims p_expct = mvn.pdf(xy, dims=[0, 2]) self.assertTrue(np.all(np.abs(p - p_expct) < 1e-14)) # test directly p_expct = mvn.marginal_pdf(xy.getdim(0), dim=0) * mvn.marginal_pdf(xy.getdim(1), dim=2) self.assertTrue(np.all(np.abs(p - p_expct) < 1e-14))
def test_mvn1d(self): mvn = kernels.MultivariateNormal([0], [1]) self.assertEqual(mvn.ndim, 1) q = quad(partial(quad_pdf_fun, func=mvn.pdf), -5., 5.) self.assertAlmostEqual(q[0], 1.0, places=5) # test with absolute values x = np.linspace(-1, 1, 10).reshape(10, 1) y = mvn.pdf(x) y_expct = norm.pdf(x) for y1, y2 in zip(y, y_expct): self.assertAlmostEqual(y1, y2) # repeat with Data type x = DataArray(np.linspace(-1, 1, 10)) y = mvn.pdf(x) for y1, y2 in zip(y, y_expct): self.assertAlmostEqual(y1, y2) m = mvn.marginal_pdf(x) self.assertListEqual(list(y), list(m)) c = mvn.marginal_cdf(np.array(0.)) self.assertAlmostEqual(c, 0.5)
def radial_spatial_triggering_plots(ppobj, simobj=None, xmax=None, ymax=None, cbar=True, fmax=None, vmax=None): npt = 500 ci = 0.99 fig_kwargs = { 'figsize': (10, 5), 'dpi': 100, 'facecolor': 'w', } assert fmax is None or vmax is None, "Can specify EITHER vmax OR fmax" xmax = xmax or ppobj.trigger_kde.marginal_icdf(ci, dim=1) ymax = ymax or xmax xy = DataArray.from_meshgrid(*np.meshgrid(np.linspace(-xmax, xmax, npt), np.linspace(-ymax, ymax, npt))) zxy1 = ppobj.trigger_kde.partial_marginal_pdf(xy, normed=False) / ppobj.ndata # zxy1 = ppobj.trigger_kde.partial_marginal_pdf(xy, normed=True) if fmax is not None: vmax = sorted(zxy1.flat)[int(zxy1.size * fmax)] elif vmax is not None: pass else: vmax = zxy1.max() if simobj: sx = simobj.trigger_params['sigma'][0] sy = simobj.trigger_params['sigma'][1] th = simobj.trigger_params['intensity'] # zxy2 = th / (np.sqrt(2 * np.pi) * sx * sy) * np.exp( # -(xy.toarray(0) ** 2) / (2 * sx**2) - xy.toarray(1) ** 2 / (2 * sy ** 2) # ) zxy2 = 1 / (2 * np.pi * sx * sy) * np.exp(-(xy.toarray(0)**2) / (2 * sx**2) - xy.toarray(1)**2 / (2 * sy**2)) vmax = max(zxy2.max(), vmax) vmax *= 1.02 fig = plt.figure(**fig_kwargs) if simobj: ax1 = fig.add_subplot(121) else: ax1 = fig.add_subplot(111) cont1 = ax1.contourf(xy.toarray(0), xy.toarray(1), zxy1, 50, cmap=cm.coolwarm, vmin=0, vmax=vmax) ax1.set_xlim([-xmax, xmax]) ax1.set_ylim([-ymax, ymax]) ax1.set_xlabel('X (metres)') ax1.set_ylabel('Y (metres)') ax1.set_aspect('equal') # cax1 = plt.colorbar(cont, ax=ax1) hbar = None if simobj: ax2 = fig.add_subplot(122) cont2 = ax2.contourf(xy.toarray(0), xy.toarray(1), zxy2, 50, cmap=cm.coolwarm, vmin=0, vmax=vmax) ax2.yaxis.set_ticklabels([]) ax2.set_xlabel('X (metres)') ax2.set_xlim([-xmax, xmax]) ax2.set_ylim([-ymax, ymax]) ax2.set_aspect('equal') if cbar: hbar = fig.colorbar(cont2, ax=[ax1, ax2]) else: if cbar: hbar = fig.colorbar(cont1, ax=ax1) if cbar: return hbar
def prediction_array(self, t): n = self.roc.sample_points.ndata ts = np.ones(n) * t data_array = DataArray.from_args(ts).adddim(self.roc.sample_points, type=self.data_class) return data_array
def weighted_bg_mean_density(x, y): xy = DataArray.from_meshgrid(x, y) fxy = np.array( [a.partial_marginal_pdf(xy) for a in res['weighted_backgrounds']]) return fxy.mean(axis=0).reshape(xy.original_shape)
def weighted_bg_rel_range(x, y): xy = DataArray.from_meshgrid(x, y) fxy = np.array( [a.partial_marginal_pdf(xy) for a in res['weighted_backgrounds']]) fxy_mean = fxy.mean(axis=0) return (fxy.ptp(axis=0) / fxy_mean).reshape(xy.original_shape)
nw = utils.NetworkWalker(itn_net, targets, max_distance=radius, max_split=1e4) k = NetworkKernelEqualSplitLinear(sources.getone(0), 200.) k.set_walker(nw) z = k.pdf() zn = z / max(z) # plt.figure() # itn_net.plot_network() # plt.scatter(nodes[:, 0], nodes[:, 1], marker='x', s=15, c='k') # plt.scatter(*targets.to_cartesian().separate, s=(zn * 20) ** 2) # plt.scatter(*sources.getone(0).cartesian_coords, c='r', s=50) # add time to sources and targets times = DataArray(np.random.rand(num_pts)) sources_st = times.adddim(sources, type=NetworkSpaceTimeData) targets_st = DataArray(np.ones(targets.ndata)).adddim( targets, type=NetworkSpaceTimeData) kst = NetworkTemporalKernelEqualSplit( [sources_st.time.getone(0), sources_st.space.getone(0)], [0.5, 200.]) kst.set_walker(nw) zst = kst.pdf(target_times=targets_st.time) zstn = zst / z.max() # plt.figure() # itn_net.plot_network() # plt.scatter(nodes[:, 0], nodes[:, 1], marker='x', s=15, c='k') # plt.scatter(*targets.to_cartesian().separate, s=(zstn * 20) ** 2)
def test_network_roc(self): itn_net = load_test_network() # lay down a few events on the network net_point_array = [] edge_idx = [20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 100] for i in edge_idx: net_point_array.append( itn_net.edges()[i].centroid) # midway along the edge net_point_array = NetworkData(net_point_array) # append to times to get full dataset st_net_array = DataArray.from_args( np.arange(net_point_array.ndata) / float(len(edge_idx))).adddim( net_point_array, type=NetworkSpaceTimeData) # test 1: Bowers kernel (decrease with time and distance) stk = hotspot.STNetworkBowers(a=10., b=1.) vb = validation.NetworkValidationBase(st_net_array, stk) vb.set_sample_units(None) # the argument makes no difference here vb.set_t_cutoff(0.5) res = vb.run(time_step=0.1) # on each step, the segment corresponding to the most recent event should have the highest prediction rank for i in range(5): self.assertTrue( np.all(res['prediction_rank'][i][:( i + 1)] == np.array(edge_idx[5:(i + 6)])[::-1])) # test 2: binary mask kernel in space, decr in time # exponential decrease with dt, zero when dd > 10 radius = 50. stk = hotspot.STNetworkFixedRadius(radius, 1.0) vb = validation.NetworkValidationBase(st_net_array, stk) vb.set_sample_units(None) # the argument makes no difference here vb.set_t_cutoff(0.5) res = vb.run(time_step=0.1, n_iter=1) pvals = vb.roc.prediction_values # check network distance from centroid to sources and verify non zero values pvals_expctd_nonzero = [] for i, e in enumerate(vb.roc.sample_units): if np.any([(e.centroid - x).length <= radius for x in vb.training.toarray(1)]): pvals_expctd_nonzero.append(i) pvals_expctd_nonzero = np.array(pvals_expctd_nonzero) self.assertTrue( np.all(pvals.nonzero()[0] == np.array(pvals_expctd_nonzero))) # test 3: repeat but with a mean version of the ROC vb = validation.NetworkValidationMean(st_net_array, stk) vb.set_sample_units(None, 10) # points are spaced ~10m apart vb.set_t_cutoff(0.5) res = vb.run(time_step=0.1, n_iter=1) # just one run this time pvals2 = vb.roc.prediction_values # check network distance from centroid to sources and verify non zero values pvals_expctd_nonzero2 = [] for i, pt in enumerate(vb.sample_points.toarray(0)): if np.any([(pt - x).length <= radius for x in vb.training.toarray(1)]): # find sample unit from sample point this_sample_unit = bisect.bisect_right( vb.roc.n_sample_point_per_unit.cumsum(), i) if this_sample_unit not in pvals_expctd_nonzero2: pvals_expctd_nonzero2.append(this_sample_unit) self.assertTrue( np.all(pvals2.nonzero()[0] == np.array(pvals_expctd_nonzero2)))
l_target = k.pdf(res_all.getrows(this_idx_target), normed=False) l_sources.append(l_source) l_targets.append(l_target) kst[i, j] = n / (nv * S * T) * np.sum(1 / (omega * l_source * l_target)) # plots plt.figure() plt.contourf(uu, vv, kst - np.pi * uu**2 * vv, 50) plt.colorbar() xy = DataArray.from_meshgrid(*np.meshgrid( np.linspace(res_all.toarray(1).min(), res_all.toarray(1).max(), 50), np.linspace(res_all.toarray(2).min(), res_all.toarray(2).max(), 50))) zz = k.partial_marginal_pdf(xy, normed=False) plt.figure() plt.contourf(xy.toarray(0), xy.toarray(1), zz, 50) plt.colorbar() t = np.linspace(res_all.toarray(0).min(), res_all.toarray(0).max(), 500) plt.figure() plt.plot(t, k.marginal_pdf(t, dim=0, normed=False)) # simulation ONLY if b_sim: true_int = lambda xyz: 250 / ( (1 - np.exp(-1)) *
def create_network_with_crime_counts( start_date=datetime.date(2011, 3, 1), domain_name='South'): # load network, count crimes in 6mo and 12mo window, output shapefile domains = chicago.get_chicago_side_polys() domain = domains[domain_name] end_date = start_date + datetime.timedelta(days=365) crime_types = ( 'THEFT', 'BURGLARY', 'HOMICIDE', 'BATTERY', 'ARSON', 'MOTOR VEHICLE THEFT', 'ASSAULT', 'ROBBERY', ) time_window_filters = { '6mo': lambda t: t <= 183, '12mo': lambda t: t <= 365, } # get crime data data, t0, cid = chicago.get_crimes_by_type(crime_type=crime_types, start_date=start_date, end_date=end_date, domain=domain) # get network osm_file = os.path.join( DATA_DIR, 'osm_chicago', '%s_clipped.net' % consts.FILE_FRIENDLY_REGIONS[domain_name]) net = osm.OSMStreetNet.from_pickle(osm_file) # snap crime data to network with maximum distance cutoff netdata, failed = NetworkData.from_cartesian(net, data[:, 1:], radius=50, return_failure_idx=True) # get non-failed times idx = sorted(set(range(data.shape[0])) - set(failed)) netdata = DataArray(data[idx, 0]).adddim(netdata, type=NetworkSpaceTimeData) # run over edges, count crimes in the two time windows filters = {} for filt_name, filt_func in time_window_filters.items(): filters[filt_name] = filt_func(netdata.toarray(0)).astype(int) # add count attributes to all edges for e in net.edges(): e.attrs['crimes_6mo'] = 0 e.attrs['crimes_12mo'] = 0 edge_counts = {} for i, t in enumerate(netdata.space.toarray()): t.edge.attrs['crimes_6mo'] += filters['6mo'][i] t.edge.attrs['crimes_12mo'] += filters['12mo'][i] net.save(consts.FILE_FRIENDLY_REGIONS[domain_name] + '_network_crime_counts', fmt='shp')