def _iterate_run(self, pred_dt_plus, true_dt_plus, true_dt_minus, **kwargs): # append source_data kwarg to force the use of new training data each time kwargs['source_data'] = CartesianSpaceTimeData(self.training) return super(SeppValidationFixedModel, self)._iterate_run(pred_dt_plus, true_dt_plus=true_dt_plus, true_dt_minus=true_dt_minus, **kwargs)
def test_cross_linkage(self): data_source = CartesianSpaceTimeData(np.random.randn(5000, 3)) data_target = CartesianSpaceTimeData(np.random.randn(1000, 3)) max_t = max_d = 0.5 linkage_fun_sep = utils.linkage_func_separable(max_t, max_d) i, j = utils.linkages(data_source, linkage_fun_sep, data_target=data_target) self.assertTrue(np.all(i < 5000)) self.assertTrue(np.all(j < 1000)) # manually test restrictions # all time differences positive self.assertTrue(np.all(data_target[j, 0] > data_source[i, 0])) # all time diffs less than max_t self.assertTrue(np.all(data_target[j, 0] - data_source[i, 0] <= max_t)) # all distances <= max_d d = np.sqrt((data_target[j, 1] - data_source[i, 1])**2 + (data_target[j, 2] - data_source[i, 2])**2) self.assertTrue(np.all(d <= max_d))
def pairwise_distance_histogram(data, max_t, max_d, remove_coincident_pairs=False, nbin=40, vmax=None, fmax=None): import seaborn as sns data = CartesianSpaceTimeData(data) linkage_fun = linkage_func_separable(max_t, max_d) i, j = linkages(data, linkage_fun, remove_coincident_pairs=remove_coincident_pairs) interpoint = data[j] - data[i] df = pandas.DataFrame(interpoint[:, 1:], columns=('x (m)', 'y (m)')) if vmax is not None: joint_kws = dict(vmax=vmax) else: joint_kws = dict() sns.set_context("paper", font_scale=2.) with sns.axes_style("white"): grid = sns.jointplot(x='x (m)', y='y (m)', data=df, kind='hex', color='k', stat_func=None, space=0, marginal_kws=dict(bins=nbin), joint_kws=joint_kws, size=8) if fmax is not None: pc = [ t for t in grid.ax_joint.get_children() if isinstance(t, collections.PolyCollection) ][0] cdata = pc.get_array() plt.close(grid.fig) cdata.sort() vmax = cdata[int(len(cdata) * fmax)] grid = sns.jointplot(x='x (m)', y='y (m)', data=df, kind='hex', color='k', stat_func=None, space=0, marginal_kws=dict(bins=nbin), joint_kws=dict(vmax=vmax), size=8) return grid
def txy_to_cartesian_data_array(t, x, y): ndim = t.ndim if x.ndim != ndim or y.ndim != ndim: raise AttributeError("Ndim does not match") shape = t.shape if x.shape != shape or y.shape != shape: raise AttributeError("Shape does not match") return CartesianSpaceTimeData( np.concatenate( (t[..., np.newaxis], x[..., np.newaxis], y[..., np.newaxis]), axis=ndim))
def test_self_linkage(self): data1 = CartesianSpaceTimeData(np.random.randn(5000, 3)) max_t = max_d = 0.5 linkage_fun_sep = utils.linkage_func_separable(max_t, max_d) i, j = utils.linkages(data1, linkage_fun_sep) # manually test restrictions # all time differences positive self.assertTrue(np.all(data1[j, 0] > data1[i, 0])) # all time diffs less than max_t self.assertTrue(np.all(data1[j, 0] - data1[i, 0] <= max_t)) # all distances <= max_d d = np.sqrt((data1[j, 1] - data1[i, 1])**2 + (data1[j, 2] - data1[i, 2])**2) self.assertTrue(np.all(d <= max_d))
def pairwise_distance_histogram_manual(data, max_t, max_d, ax=None, remove_coincident_pairs=False, nbin=40, vmax=None, fmax=None, cmap='binary', colorbar=True, mask=True): # nbin must be EVEN to ensure no weird origin effects if np.mod(nbin, 2): nbin += 1 data = CartesianSpaceTimeData(data) linkage_fun = linkage_func_separable(max_t, max_d) i, j = linkages(data, linkage_fun, remove_coincident_pairs=remove_coincident_pairs) interpoint = data[j] - data[i] xe = np.linspace(-max_d, max_d, nbin) H, xedges, yedges = np.histogram2d(interpoint[:, 1], interpoint[:, 2], [xe, xe]) # H needs to be rotated and flipped H = np.rot90(H) H = np.flipud(H) # Mask zeros if requested if mask: H = np.ma.masked_where(H == 0, H) # Mask pixels with a value of zero # Plot 2D histogram using pcolor if ax is None: fig = plt.figure() ax = fig.add_subplot(111) if fmax is not None: dd_sorted = sorted(np.ma.getdata(H).flat) vmax = dd_sorted[int(len(dd_sorted) * fmax)] quadmesh = ax.pcolormesh(xedges, yedges, H, vmax=vmax, cmap=cmap) if colorbar: cbar = plt.colorbar(quadmesh, ax=ax) cbar.ax.set_ylabel('Counts')
def estimate_intensity(self): self.intensity = {} lookup_ind = [] lookup_data = [] for (i, itxy) in zip(self.ii, self.i_data): if i in lookup_ind: continue lookup_ind.append(i) lookup_data.append(itxy) for (j, jtxy) in zip(self.jj, self.j_data): if j in lookup_ind: continue lookup_ind.append(j) lookup_data.append(jtxy) # intensity = self.kde.pdf(CartesianSpaceTimeData(np.array(lookup_data)), normed=False) intensity = self.kde.partial_marginal_pdf(CartesianSpaceTimeData(np.array(lookup_data)).space, normed=False) / self.T self.intensity = dict( [(ix, v) for ix, v in zip(lookup_ind, intensity)] )
cid_all = np.arange(res_all.ndata) else: max_t = 90 max_d = 500 du = 50 dv = 5 number_nn = [15, 100] simplification_tol = 20 # for speedup in intersection etc. full_poly = cad.get_camden_region() poly = full_poly.simplify(simplification_tol) bdy = poly.boundary res_all, t0, cid_all = cad.get_crimes_by_type(nicl_type=3) res_all = CartesianSpaceTimeData(res_all) cid_all = np.array(sorted(cid_all)) S = poly.area T = np.ptp(res_all.time) n = res_all.ndata # estimate intensity with KDE scott_spatial_bandwidth = res_all.space.data.std( axis=0, ddof=1) * res_all.ndata**(-1. / float(2 + 4)) my_temporal_bandwidth = 0.1 k = kde_models.FixedBandwidthKdeSeparable( res_all, bandwidths=list(scott_spatial_bandwidth) + [my_temporal_bandwidth]) # k = kde_models.VariableBandwidthNnKdeSeparable(res_all, number_nn=number_nn)