Ejemplo n.º 1
0
    def set_sample_points(self,
                          n_sample_per_grid,
                          respect_boundary=True,
                          *args,
                          **kwargs):
        """ Generate n_sample_per_grid sample points per grid unit
         Return n_sample_per_grid x self.ndata x 2 array, final dim is x, y """

        if HAS_GEODJANGO and isinstance(self.poly, geos.GEOSGeometry):
            point_class = geos.Point
        else:
            point_class = Point

        if self.side_length is None:
            # grid was supplied as an array
            # slow version: need to iterate over the polygons
            xres = np.empty((n_sample_per_grid, self.n_sample_units),
                            dtype=float)
            yres = np.empty((n_sample_per_grid, self.n_sample_units),
                            dtype=float)
            for i, p in enumerate(self.grid_polys):
                xres[:, i], yres[:, i] = random_points_within_poly(
                    p, n_sample_per_grid)

        else:
            xmins = np.array([x[0] for x in self.sample_units])
            ymins = np.array([x[1] for x in self.sample_units])
            xres = np.random.rand(n_sample_per_grid, self.n_sample_units
                                  ) * self.side_length + xmins
            yres = np.random.rand(n_sample_per_grid, self.n_sample_units
                                  ) * self.side_length + ymins

        if respect_boundary:
            # loop over grid squares that are incomplete
            for i in np.where(np.array(self.full_grid_square) == False)[0]:
                inside_idx = np.array([
                    point_class(x, y).within(self.poly)
                    for x, y in zip(xres[:, i], yres[:, i])
                ])
                # pad empty parts with repeats of the centroid location
                num_empty = n_sample_per_grid - sum(inside_idx)
                if num_empty:
                    cx, cy = self.centroids[i]
                    rem_x = np.concatenate((xres[inside_idx,
                                                 i], cx * np.ones(num_empty)))
                    rem_y = np.concatenate((yres[inside_idx,
                                                 i], cy * np.ones(num_empty)))
                    xres[:, i] = rem_x
                    yres[:, i] = rem_y

        xres = xres.flatten(order='F')
        yres = yres.flatten(order='F')
        self.sample_points = DataArray.from_args(xres, yres)

        self.n_sample_point_per_unit = np.ones(
            self.n_sample_units) * n_sample_per_grid
Ejemplo n.º 2
0
 def prediction_array(self, t):
     n = self.roc.sample_points.ndata
     ts = np.ones(n) * t
     data_array = DataArray.from_args(ts).adddim(self.roc.sample_points,
                                                 type=self.data_class)
     return data_array
Ejemplo n.º 3
0
    def test_network_roc(self):
        itn_net = load_test_network()

        # lay down a few events on the network
        net_point_array = []
        edge_idx = [20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 100]
        for i in edge_idx:
            net_point_array.append(
                itn_net.edges()[i].centroid)  # midway along the edge
        net_point_array = NetworkData(net_point_array)

        # append to times to get full dataset
        st_net_array = DataArray.from_args(
            np.arange(net_point_array.ndata) / float(len(edge_idx))).adddim(
                net_point_array, type=NetworkSpaceTimeData)

        # test 1: Bowers kernel (decrease with time and distance)
        stk = hotspot.STNetworkBowers(a=10., b=1.)
        vb = validation.NetworkValidationBase(st_net_array, stk)
        vb.set_sample_units(None)  # the argument makes no difference here
        vb.set_t_cutoff(0.5)
        res = vb.run(time_step=0.1)

        # on each step, the segment corresponding to the most recent event should have the highest prediction rank
        for i in range(5):
            self.assertTrue(
                np.all(res['prediction_rank'][i][:(
                    i + 1)] == np.array(edge_idx[5:(i + 6)])[::-1]))

        # test 2: binary mask kernel in space, decr in time
        # exponential decrease with dt, zero when dd > 10
        radius = 50.
        stk = hotspot.STNetworkFixedRadius(radius, 1.0)
        vb = validation.NetworkValidationBase(st_net_array, stk)
        vb.set_sample_units(None)  # the argument makes no difference here
        vb.set_t_cutoff(0.5)
        res = vb.run(time_step=0.1, n_iter=1)
        pvals = vb.roc.prediction_values

        # check network distance from centroid to sources and verify non zero values
        pvals_expctd_nonzero = []
        for i, e in enumerate(vb.roc.sample_units):
            if np.any([(e.centroid - x).length <= radius
                       for x in vb.training.toarray(1)]):
                pvals_expctd_nonzero.append(i)
        pvals_expctd_nonzero = np.array(pvals_expctd_nonzero)
        self.assertTrue(
            np.all(pvals.nonzero()[0] == np.array(pvals_expctd_nonzero)))

        # test 3: repeat but with a mean version of the ROC

        vb = validation.NetworkValidationMean(st_net_array, stk)
        vb.set_sample_units(None, 10)  # points are spaced ~10m apart
        vb.set_t_cutoff(0.5)
        res = vb.run(time_step=0.1, n_iter=1)  # just one run this time
        pvals2 = vb.roc.prediction_values

        # check network distance from centroid to sources and verify non zero values
        pvals_expctd_nonzero2 = []
        for i, pt in enumerate(vb.sample_points.toarray(0)):
            if np.any([(pt - x).length <= radius
                       for x in vb.training.toarray(1)]):
                # find sample unit from sample point
                this_sample_unit = bisect.bisect_right(
                    vb.roc.n_sample_point_per_unit.cumsum(), i)
                if this_sample_unit not in pvals_expctd_nonzero2:
                    pvals_expctd_nonzero2.append(this_sample_unit)

        self.assertTrue(
            np.all(pvals2.nonzero()[0] == np.array(pvals_expctd_nonzero2)))