Esempio n. 1
0
    def test_linkage_function(self):
        lf = utils.linkage_func_separable(5., 10.)
        self.assertTrue(lf(4, 9))
        self.assertFalse(lf(5.01, 9))
        self.assertFalse(lf(4, 10.01))
        dt = DataArray(np.random.rand(10))
        dd = DataArray(np.random.rand(10))
        b_in = lf(dt, dd)
        b_expct = (dt <= 5.) & (dd <= 10.)
        self.assertTrue(np.all(b_in == b_expct))

        lf = lambda dt, dd: (dt**2 + dd**2)**0.5 < 0.5
        b_in = lf(dt, dd)
        b_expct = (dt**2 + dd**2)**0.5 < 0.5
        self.assertTrue(np.all(b_in == b_expct))
Esempio n. 2
0
 def test_marginal_cdf_values(self):
     k = self.kernels[3]
     x = DataArray(np.linspace(-5, 5, 100))
     for i in range(3):
         y = k.marginal_cdf(x, dim=i)
         ye = self.expected_marginal_cdf(x, i)
         self.assertTrue(np.all(np.abs(y - ye) < self.tol))
Esempio n. 3
0
def marginal_icdf_optimise(k, y, dim=0, tol=1e-8):

    n = 100
    max_iter = 50
    f = lambda x: np.abs(k.marginal_cdf(x, dim=dim) - y)
    mean_bd = np.mean(k.bandwidths[:, dim])
    minx = np.min(k.data[:, dim])
    maxx = np.max(k.data[:, dim])
    err = 1.
    niter = 0
    x0 = 0.
    while err > tol:
        if niter > max_iter:
            raise Exception(
                "Failed to converge to optimum after %u iterations", max_iter)
        xe = DataArray(np.linspace(minx, maxx, n))
        ye = f(xe)
        idx = np.argmin(ye)
        if idx == 0:
            # return xe[idx]
            minx -= mean_bd
            continue
        if idx == (n - 1):
            maxx += mean_bd
            continue
        err = ye[idx]
        x0 = xe[idx]
        minx = xe[idx - 1]
        maxx = xe[idx + 1]
        niter += 1
    return float(x0)
Esempio n. 4
0
 def prediction_array(self, time, space_array):
     # combine time with space dimension
     space_array = self.space_class(space_array, copy=False)
     t = DataArray(time * np.ones(space_array.ndata))
     if space_array.original_shape is not None:
         t.original_shape = space_array.original_shape
     return t.adddim(space_array, type=self.data_class)
Esempio n. 5
0
    def test_assess(self):

        stk = hotspot.SKernelHistoric(1, bdwidth=0.3)
        vb = validation.ValidationBase(self.data, stk)
        vb.train_model()

        # mock roc object with grid
        mocroc = mock.create_autospec(roc.RocGrid)
        mocroc.centroids = np.array([[0., 0.], [1., 1.]])
        mocroc.sample_units = range(
            2)  # needs to have the correct length, contents not used
        mocroc.sample_points = DataArray([[0., 0.], [1., 1.]])
        vb.roc = mocroc

        res = vb._iterate_run(pred_dt_plus=0.2,
                              true_dt_plus=None,
                              true_dt_minus=0.)

        # set data
        self.assertTrue(vb.roc.set_data.called)
        self.assertEqual(vb.roc.set_data.call_count, 1)
        self.assertTrue(
            np.all(vb.roc.set_data.call_args[0] == vb.testing(
                dt_plus=0.2)[:, 1:]))

        # set prediction
        self.assertTrue(vb.roc.set_prediction.called)
        self.assertEqual(vb.roc.set_prediction.call_count, 1)
        self.assertEqual(len(vb.roc.set_prediction.call_args[0]), 1)
        t = (vb.cutoff_t + 0.2)
        # expected prediction values at (0,0), (1,1)
        pred_arg = DataArray(np.array([[t, 0., 0.], [t, 1., 1.]]))
        pred_expctd = vb.model.predict(t, mocroc.sample_points)
        self.assertTrue(
            np.all(vb.roc.set_prediction.call_args[0][0] == pred_expctd))

        # set grid
        self.assertFalse(vb.roc.set_sample_units.called)
        vb.set_sample_units(0.1)
        self.assertTrue(vb.roc.set_sample_units.called)
        self.assertEqual(vb.roc.set_sample_units.call_count, 1)
        self.assertTupleEqual(vb.roc.set_sample_units.call_args[0], (0.1, ))

        # evaluate
        self.assertTrue(vb.roc.evaluate.called)
        self.assertEqual(vb.roc.evaluate.call_count, 1)
Esempio n. 6
0
 def test_cutoff(self):
     x = DataArray(np.linspace(-5, 5, 100))
     self.assertTrue(np.all(self.kernels[1].pdf(x)[x.toarray(0) < 0] == 0))
     x = DataArray.from_meshgrid(
         *np.meshgrid(
             np.linspace(-5, 5, 50),
             np.linspace(-5, 5, 50)
         )
     )
     res = self.kernels[2].pdf(x)
     self.assertTrue(np.all(res[x.toarray(0) < 0] == 0))
     self.assertTrue(np.all(res[x.toarray(0) >= 0] > 0.))
Esempio n. 7
0
 def predict(self, time, space_array, force_update=False):
     """
     Generate a prediction from the trained model.
     Supply single time and spatial sample points in separate arrays
     If force_update=False then the spatial component is assumed unchanged, so only the time is altered.
     This is important: updating the spatial sample points loses all cached net paths.
     """
     if self.kde.targets_set and not force_update:
         self.kde.update_target_times(time)
         return self.kde.pdf()
     else:
         space_array = self.space_class(space_array, copy=False)
         time_array = DataArray(np.ones(space_array.ndata) * time)
         targets_array = time_array.adddim(space_array,
                                           type=self.data_class)
         return self.kde.pdf(targets=targets_array)
Esempio n. 8
0
    def test_mvn3d(self):
        mvn = kernels.MultivariateNormal([0, 0, 0], [1, 1, 1])
        self.assertEqual(mvn.ndim, 3)

        q = tplquad(partial(quad_pdf_fun, func=mvn.pdf), -5, 5, lambda x: -5, lambda x: 5, lambda x, y: -5, lambda x, y: 5)
        self.assertAlmostEqual(q[0], 1.0, places=5)

        # test with absolute values
        x = np.meshgrid(*([np.linspace(-1, 1, 10)]*3))
        x = np.vstack((x[0].flatten(), x[1].flatten(), x[2].flatten())).transpose().reshape(1000, 3)
        y = mvn.pdf(x)
        y_expct = multivariate_normal.pdf(x, mean=[0, 0, 0], cov=np.eye(3))
        self.assertEqual(np.sum(np.abs((y - y_expct)).flatten()>1e-12), 0) # no single difference > 1e-12

        # repeat with Data type
        x = DataArray(x)
        y = mvn.pdf(x)
        self.assertEqual(np.sum(np.abs((y - y_expct)).flatten()>1e-12), 0) # no single difference > 1e-12
Esempio n. 9
0
 def update_target_times(self, target_times):
     """
     Update the times attached to the (spatial) targets.
     :param target_times: Either an iterable (in which case it must be possible to cast to a DataArray) or a scalar
     (in which case all target times are set to this value)
     """
     if hasattr(target_times, '__iter__'):
         target_times = DataArray(target_times, copy=False)
         self.logger.info("update_target_times with an iterable of len %d",
                          target_times.ndata)
         if target_times.ndata != self.targets.ndata:
             raise AttributeError(
                 "The number of data points does not match existing data in the supplied array"
             )
         self.targets.time = target_times
     else:
         self.logger.info("update_target_times with a fixed time %f",
                          target_times)
         self.targets.data[:, 0] = target_times
Esempio n. 10
0
    def test_partials(self):
        mvn = kernels.MultivariateNormal([0, 0, 0], [1, 2, 3])
        arr = np.meshgrid(np.linspace(-15, 15, 20), np.linspace(-15, 15, 20))
        xy = DataArray(np.concatenate([t[..., np.newaxis] for t in arr], axis=2))

        # attempt to call with data of too few dims
        with self.assertRaises(AttributeError):
            p = mvn.partial_marginal_pdf(xy.getdim(0), dim=0)

        p = mvn.partial_marginal_pdf(xy, dim=0)  # should be marginal in 2nd and 3rd dims
        p_expct = mvn.pdf(xy, dims=[1, 2])
        self.assertTrue(np.all(np.abs(p - p_expct) < 1e-14))
        p_expct = mvn.marginal_pdf(xy.getdim(0), dim=1) * mvn.marginal_pdf(xy.getdim(1), dim=2)
        self.assertTrue(np.all(np.abs(p - p_expct) < 1e-14))

        p = mvn.partial_marginal_pdf(xy, dim=1)  # should be marginal in 1st and 3rd dims
        p_expct = mvn.pdf(xy, dims=[0, 2])
        self.assertTrue(np.all(np.abs(p - p_expct) < 1e-14))
        # test directly
        p_expct = mvn.marginal_pdf(xy.getdim(0), dim=0) * mvn.marginal_pdf(xy.getdim(1), dim=2)
        self.assertTrue(np.all(np.abs(p - p_expct) < 1e-14))
Esempio n. 11
0
    def test_mvn1d(self):
        mvn = kernels.MultivariateNormal([0], [1])
        self.assertEqual(mvn.ndim, 1)

        q = quad(partial(quad_pdf_fun, func=mvn.pdf), -5., 5.)
        self.assertAlmostEqual(q[0], 1.0, places=5)

        # test with absolute values
        x = np.linspace(-1, 1, 10).reshape(10, 1)
        y = mvn.pdf(x)
        y_expct = norm.pdf(x)
        for y1, y2 in zip(y, y_expct):
            self.assertAlmostEqual(y1, y2)

        # repeat with Data type
        x = DataArray(np.linspace(-1, 1, 10))
        y = mvn.pdf(x)
        for y1, y2 in zip(y, y_expct):
            self.assertAlmostEqual(y1, y2)

        m = mvn.marginal_pdf(x)
        self.assertListEqual(list(y), list(m))
        c = mvn.marginal_cdf(np.array(0.))
        self.assertAlmostEqual(c, 0.5)
Esempio n. 12
0
    nw = utils.NetworkWalker(itn_net,
                             targets,
                             max_distance=radius,
                             max_split=1e4)
    k = NetworkKernelEqualSplitLinear(sources.getone(0), 200.)
    k.set_walker(nw)
    z = k.pdf()
    zn = z / max(z)
    # plt.figure()
    # itn_net.plot_network()
    # plt.scatter(nodes[:, 0], nodes[:, 1], marker='x', s=15, c='k')
    # plt.scatter(*targets.to_cartesian().separate, s=(zn * 20) ** 2)
    # plt.scatter(*sources.getone(0).cartesian_coords, c='r', s=50)

    # add time to sources and targets
    times = DataArray(np.random.rand(num_pts))
    sources_st = times.adddim(sources, type=NetworkSpaceTimeData)
    targets_st = DataArray(np.ones(targets.ndata)).adddim(
        targets, type=NetworkSpaceTimeData)

    kst = NetworkTemporalKernelEqualSplit(
        [sources_st.time.getone(0),
         sources_st.space.getone(0)], [0.5, 200.])
    kst.set_walker(nw)
    zst = kst.pdf(target_times=targets_st.time)
    zstn = zst / z.max()

    # plt.figure()
    # itn_net.plot_network()
    # plt.scatter(nodes[:, 0], nodes[:, 1], marker='x', s=15, c='k')
    # plt.scatter(*targets.to_cartesian().separate, s=(zstn * 20) ** 2)
Esempio n. 13
0
def create_network_with_crime_counts(
        start_date=datetime.date(2011, 3, 1), domain_name='South'):

    # load network, count crimes in 6mo and 12mo window, output shapefile
    domains = chicago.get_chicago_side_polys()
    domain = domains[domain_name]

    end_date = start_date + datetime.timedelta(days=365)
    crime_types = (
        'THEFT',
        'BURGLARY',
        'HOMICIDE',
        'BATTERY',
        'ARSON',
        'MOTOR VEHICLE THEFT',
        'ASSAULT',
        'ROBBERY',
    )

    time_window_filters = {
        '6mo': lambda t: t <= 183,
        '12mo': lambda t: t <= 365,
    }

    # get crime data
    data, t0, cid = chicago.get_crimes_by_type(crime_type=crime_types,
                                               start_date=start_date,
                                               end_date=end_date,
                                               domain=domain)

    # get network
    osm_file = os.path.join(
        DATA_DIR, 'osm_chicago',
        '%s_clipped.net' % consts.FILE_FRIENDLY_REGIONS[domain_name])
    net = osm.OSMStreetNet.from_pickle(osm_file)

    # snap crime data to network with maximum distance cutoff
    netdata, failed = NetworkData.from_cartesian(net,
                                                 data[:, 1:],
                                                 radius=50,
                                                 return_failure_idx=True)
    # get non-failed times
    idx = sorted(set(range(data.shape[0])) - set(failed))
    netdata = DataArray(data[idx, 0]).adddim(netdata,
                                             type=NetworkSpaceTimeData)

    # run over edges, count crimes in the two time windows
    filters = {}
    for filt_name, filt_func in time_window_filters.items():
        filters[filt_name] = filt_func(netdata.toarray(0)).astype(int)

    # add count attributes to all edges
    for e in net.edges():
        e.attrs['crimes_6mo'] = 0
        e.attrs['crimes_12mo'] = 0

    edge_counts = {}
    for i, t in enumerate(netdata.space.toarray()):
        t.edge.attrs['crimes_6mo'] += filters['6mo'][i]
        t.edge.attrs['crimes_12mo'] += filters['12mo'][i]

    net.save(consts.FILE_FRIENDLY_REGIONS[domain_name] +
             '_network_crime_counts',
             fmt='shp')