def test_linkage_function(self): lf = utils.linkage_func_separable(5., 10.) self.assertTrue(lf(4, 9)) self.assertFalse(lf(5.01, 9)) self.assertFalse(lf(4, 10.01)) dt = DataArray(np.random.rand(10)) dd = DataArray(np.random.rand(10)) b_in = lf(dt, dd) b_expct = (dt <= 5.) & (dd <= 10.) self.assertTrue(np.all(b_in == b_expct)) lf = lambda dt, dd: (dt**2 + dd**2)**0.5 < 0.5 b_in = lf(dt, dd) b_expct = (dt**2 + dd**2)**0.5 < 0.5 self.assertTrue(np.all(b_in == b_expct))
def test_marginal_cdf_values(self): k = self.kernels[3] x = DataArray(np.linspace(-5, 5, 100)) for i in range(3): y = k.marginal_cdf(x, dim=i) ye = self.expected_marginal_cdf(x, i) self.assertTrue(np.all(np.abs(y - ye) < self.tol))
def marginal_icdf_optimise(k, y, dim=0, tol=1e-8): n = 100 max_iter = 50 f = lambda x: np.abs(k.marginal_cdf(x, dim=dim) - y) mean_bd = np.mean(k.bandwidths[:, dim]) minx = np.min(k.data[:, dim]) maxx = np.max(k.data[:, dim]) err = 1. niter = 0 x0 = 0. while err > tol: if niter > max_iter: raise Exception( "Failed to converge to optimum after %u iterations", max_iter) xe = DataArray(np.linspace(minx, maxx, n)) ye = f(xe) idx = np.argmin(ye) if idx == 0: # return xe[idx] minx -= mean_bd continue if idx == (n - 1): maxx += mean_bd continue err = ye[idx] x0 = xe[idx] minx = xe[idx - 1] maxx = xe[idx + 1] niter += 1 return float(x0)
def prediction_array(self, time, space_array): # combine time with space dimension space_array = self.space_class(space_array, copy=False) t = DataArray(time * np.ones(space_array.ndata)) if space_array.original_shape is not None: t.original_shape = space_array.original_shape return t.adddim(space_array, type=self.data_class)
def test_assess(self): stk = hotspot.SKernelHistoric(1, bdwidth=0.3) vb = validation.ValidationBase(self.data, stk) vb.train_model() # mock roc object with grid mocroc = mock.create_autospec(roc.RocGrid) mocroc.centroids = np.array([[0., 0.], [1., 1.]]) mocroc.sample_units = range( 2) # needs to have the correct length, contents not used mocroc.sample_points = DataArray([[0., 0.], [1., 1.]]) vb.roc = mocroc res = vb._iterate_run(pred_dt_plus=0.2, true_dt_plus=None, true_dt_minus=0.) # set data self.assertTrue(vb.roc.set_data.called) self.assertEqual(vb.roc.set_data.call_count, 1) self.assertTrue( np.all(vb.roc.set_data.call_args[0] == vb.testing( dt_plus=0.2)[:, 1:])) # set prediction self.assertTrue(vb.roc.set_prediction.called) self.assertEqual(vb.roc.set_prediction.call_count, 1) self.assertEqual(len(vb.roc.set_prediction.call_args[0]), 1) t = (vb.cutoff_t + 0.2) # expected prediction values at (0,0), (1,1) pred_arg = DataArray(np.array([[t, 0., 0.], [t, 1., 1.]])) pred_expctd = vb.model.predict(t, mocroc.sample_points) self.assertTrue( np.all(vb.roc.set_prediction.call_args[0][0] == pred_expctd)) # set grid self.assertFalse(vb.roc.set_sample_units.called) vb.set_sample_units(0.1) self.assertTrue(vb.roc.set_sample_units.called) self.assertEqual(vb.roc.set_sample_units.call_count, 1) self.assertTupleEqual(vb.roc.set_sample_units.call_args[0], (0.1, )) # evaluate self.assertTrue(vb.roc.evaluate.called) self.assertEqual(vb.roc.evaluate.call_count, 1)
def test_cutoff(self): x = DataArray(np.linspace(-5, 5, 100)) self.assertTrue(np.all(self.kernels[1].pdf(x)[x.toarray(0) < 0] == 0)) x = DataArray.from_meshgrid( *np.meshgrid( np.linspace(-5, 5, 50), np.linspace(-5, 5, 50) ) ) res = self.kernels[2].pdf(x) self.assertTrue(np.all(res[x.toarray(0) < 0] == 0)) self.assertTrue(np.all(res[x.toarray(0) >= 0] > 0.))
def predict(self, time, space_array, force_update=False): """ Generate a prediction from the trained model. Supply single time and spatial sample points in separate arrays If force_update=False then the spatial component is assumed unchanged, so only the time is altered. This is important: updating the spatial sample points loses all cached net paths. """ if self.kde.targets_set and not force_update: self.kde.update_target_times(time) return self.kde.pdf() else: space_array = self.space_class(space_array, copy=False) time_array = DataArray(np.ones(space_array.ndata) * time) targets_array = time_array.adddim(space_array, type=self.data_class) return self.kde.pdf(targets=targets_array)
def test_mvn3d(self): mvn = kernels.MultivariateNormal([0, 0, 0], [1, 1, 1]) self.assertEqual(mvn.ndim, 3) q = tplquad(partial(quad_pdf_fun, func=mvn.pdf), -5, 5, lambda x: -5, lambda x: 5, lambda x, y: -5, lambda x, y: 5) self.assertAlmostEqual(q[0], 1.0, places=5) # test with absolute values x = np.meshgrid(*([np.linspace(-1, 1, 10)]*3)) x = np.vstack((x[0].flatten(), x[1].flatten(), x[2].flatten())).transpose().reshape(1000, 3) y = mvn.pdf(x) y_expct = multivariate_normal.pdf(x, mean=[0, 0, 0], cov=np.eye(3)) self.assertEqual(np.sum(np.abs((y - y_expct)).flatten()>1e-12), 0) # no single difference > 1e-12 # repeat with Data type x = DataArray(x) y = mvn.pdf(x) self.assertEqual(np.sum(np.abs((y - y_expct)).flatten()>1e-12), 0) # no single difference > 1e-12
def update_target_times(self, target_times): """ Update the times attached to the (spatial) targets. :param target_times: Either an iterable (in which case it must be possible to cast to a DataArray) or a scalar (in which case all target times are set to this value) """ if hasattr(target_times, '__iter__'): target_times = DataArray(target_times, copy=False) self.logger.info("update_target_times with an iterable of len %d", target_times.ndata) if target_times.ndata != self.targets.ndata: raise AttributeError( "The number of data points does not match existing data in the supplied array" ) self.targets.time = target_times else: self.logger.info("update_target_times with a fixed time %f", target_times) self.targets.data[:, 0] = target_times
def test_partials(self): mvn = kernels.MultivariateNormal([0, 0, 0], [1, 2, 3]) arr = np.meshgrid(np.linspace(-15, 15, 20), np.linspace(-15, 15, 20)) xy = DataArray(np.concatenate([t[..., np.newaxis] for t in arr], axis=2)) # attempt to call with data of too few dims with self.assertRaises(AttributeError): p = mvn.partial_marginal_pdf(xy.getdim(0), dim=0) p = mvn.partial_marginal_pdf(xy, dim=0) # should be marginal in 2nd and 3rd dims p_expct = mvn.pdf(xy, dims=[1, 2]) self.assertTrue(np.all(np.abs(p - p_expct) < 1e-14)) p_expct = mvn.marginal_pdf(xy.getdim(0), dim=1) * mvn.marginal_pdf(xy.getdim(1), dim=2) self.assertTrue(np.all(np.abs(p - p_expct) < 1e-14)) p = mvn.partial_marginal_pdf(xy, dim=1) # should be marginal in 1st and 3rd dims p_expct = mvn.pdf(xy, dims=[0, 2]) self.assertTrue(np.all(np.abs(p - p_expct) < 1e-14)) # test directly p_expct = mvn.marginal_pdf(xy.getdim(0), dim=0) * mvn.marginal_pdf(xy.getdim(1), dim=2) self.assertTrue(np.all(np.abs(p - p_expct) < 1e-14))
def test_mvn1d(self): mvn = kernels.MultivariateNormal([0], [1]) self.assertEqual(mvn.ndim, 1) q = quad(partial(quad_pdf_fun, func=mvn.pdf), -5., 5.) self.assertAlmostEqual(q[0], 1.0, places=5) # test with absolute values x = np.linspace(-1, 1, 10).reshape(10, 1) y = mvn.pdf(x) y_expct = norm.pdf(x) for y1, y2 in zip(y, y_expct): self.assertAlmostEqual(y1, y2) # repeat with Data type x = DataArray(np.linspace(-1, 1, 10)) y = mvn.pdf(x) for y1, y2 in zip(y, y_expct): self.assertAlmostEqual(y1, y2) m = mvn.marginal_pdf(x) self.assertListEqual(list(y), list(m)) c = mvn.marginal_cdf(np.array(0.)) self.assertAlmostEqual(c, 0.5)
nw = utils.NetworkWalker(itn_net, targets, max_distance=radius, max_split=1e4) k = NetworkKernelEqualSplitLinear(sources.getone(0), 200.) k.set_walker(nw) z = k.pdf() zn = z / max(z) # plt.figure() # itn_net.plot_network() # plt.scatter(nodes[:, 0], nodes[:, 1], marker='x', s=15, c='k') # plt.scatter(*targets.to_cartesian().separate, s=(zn * 20) ** 2) # plt.scatter(*sources.getone(0).cartesian_coords, c='r', s=50) # add time to sources and targets times = DataArray(np.random.rand(num_pts)) sources_st = times.adddim(sources, type=NetworkSpaceTimeData) targets_st = DataArray(np.ones(targets.ndata)).adddim( targets, type=NetworkSpaceTimeData) kst = NetworkTemporalKernelEqualSplit( [sources_st.time.getone(0), sources_st.space.getone(0)], [0.5, 200.]) kst.set_walker(nw) zst = kst.pdf(target_times=targets_st.time) zstn = zst / z.max() # plt.figure() # itn_net.plot_network() # plt.scatter(nodes[:, 0], nodes[:, 1], marker='x', s=15, c='k') # plt.scatter(*targets.to_cartesian().separate, s=(zstn * 20) ** 2)
def create_network_with_crime_counts( start_date=datetime.date(2011, 3, 1), domain_name='South'): # load network, count crimes in 6mo and 12mo window, output shapefile domains = chicago.get_chicago_side_polys() domain = domains[domain_name] end_date = start_date + datetime.timedelta(days=365) crime_types = ( 'THEFT', 'BURGLARY', 'HOMICIDE', 'BATTERY', 'ARSON', 'MOTOR VEHICLE THEFT', 'ASSAULT', 'ROBBERY', ) time_window_filters = { '6mo': lambda t: t <= 183, '12mo': lambda t: t <= 365, } # get crime data data, t0, cid = chicago.get_crimes_by_type(crime_type=crime_types, start_date=start_date, end_date=end_date, domain=domain) # get network osm_file = os.path.join( DATA_DIR, 'osm_chicago', '%s_clipped.net' % consts.FILE_FRIENDLY_REGIONS[domain_name]) net = osm.OSMStreetNet.from_pickle(osm_file) # snap crime data to network with maximum distance cutoff netdata, failed = NetworkData.from_cartesian(net, data[:, 1:], radius=50, return_failure_idx=True) # get non-failed times idx = sorted(set(range(data.shape[0])) - set(failed)) netdata = DataArray(data[idx, 0]).adddim(netdata, type=NetworkSpaceTimeData) # run over edges, count crimes in the two time windows filters = {} for filt_name, filt_func in time_window_filters.items(): filters[filt_name] = filt_func(netdata.toarray(0)).astype(int) # add count attributes to all edges for e in net.edges(): e.attrs['crimes_6mo'] = 0 e.attrs['crimes_12mo'] = 0 edge_counts = {} for i, t in enumerate(netdata.space.toarray()): t.edge.attrs['crimes_6mo'] += filters['6mo'][i] t.edge.attrs['crimes_12mo'] += filters['12mo'][i] net.save(consts.FILE_FRIENDLY_REGIONS[domain_name] + '_network_crime_counts', fmt='shp')