コード例 #1
0
ファイル: spatial.py プロジェクト: gaberosser/crime-fighter
def spatial_linkages(data_source,
                     max_d,
                     data_target=None,
                     chunksize=2**18,
                     remove_coincident_pairs=False):
    """
    Compute the indices of datapoints that lie within distance max_d of one another. Distances are euclidean.
    :param data_source: numpy or CartesianData array of source data.
    :param max_d: Maximum distance between points
    :param data_target: optional EuclideanSpaceTimeData array.  If supplied, the linkage indices are between
    data_source and data_target, otherwise the two are set equal
    :param chunksize: The size of an iteration chunk.
    :param remove_coincident_pairs: If True, remove links between pairs of crimes with Delta d == 0
    :return: tuple (idx_array_source, idx_array_target, dist_between),
    """
    data_source = CartesianData(data_source)
    ndata_source = data_source.ndata
    if data_target is not None:
        data_target = CartesianData(data_target)
        ndata_target = data_target.ndata
        chunksize = min(chunksize, ndata_source * ndata_target)
        idx_i, idx_j = np.meshgrid(range(ndata_source),
                                   range(ndata_target),
                                   copy=False)
    else:
        # self-linkage
        data_target = data_source
        chunksize = min(chunksize, ndata_source * (ndata_source - 1) / 2)
        idx_i, idx_j = pairwise_differences_indices(ndata_source)

    link_i = []
    link_j = []
    link_d = []

    for k in range(0, idx_i.size, chunksize):
        i = idx_i.flat[k:(k + chunksize)]
        j = idx_j.flat[k:(k + chunksize)]
        dd = (data_target.getrows(j).distance(
            data_source.getrows(i))).toarray()
        mask = (dd <= max_d)
        if remove_coincident_pairs:
            mask[dd == 0] = False
        link_i.extend(i[mask])
        link_j.extend(j[mask])
        link_d.extend(dd[mask])

    return np.array(link_i), np.array(link_j), np.array(link_d)
コード例 #2
0
ファイル: ripley.py プロジェクト: gaberosser/crime-fighter
class RipleyK(object):

    kde_class = kde_models.VariableBandwidthNnKdeSeparable
    n_quad = 32

    def __init__(self, data, max_d, domain, clip_data=True):
        """

        :param data:
        :param max_d:
        :param domain:
        :param clip_data: If True (default), remove data that lie outside of the domain. This is recommended as they
        will result in a division by zero later on
        :return:
        """
        self.data = CartesianData(data)
        if clip_data:
            to_keep = [
                i for i, xy in enumerate(self.data.data)
                if geometry.Point(xy).within(domain)
            ]
            self.data = self.data.getrows(to_keep)
        assert self.data.nd == 2, "Input data must be 2D (i.e. purely spatial)"
        self.n = len(data)
        self.max_d = max_d
        self.domain = domain
        self.S = self.domain.area
        self.ii = self.jj = self.dd = self.dtheta = None
        # self.near_exterior = None
        # self.edge_corr_circ = self.edge_corr_area = None
        self.edge_correction = None
        self.intensity = self.n / self.S

    def process(self):
        # Call after instantiation to prepare all data and compute edge corrections
        self.ii, self.jj, self.dd, self.near_exterior = prepare_data(
            self.data, self.domain, self.max_d)
        self.edge_correction = self.compute_edge_correction(
            self.data, self.ii, self.dd, self.domain, self.n_quad)

    @staticmethod
    def compute_edge_correction(data, source_idx, dd, domain, n_quad, *args,
                                **kwargs):
        """
        Compute the edge correction factors for the data centered at loc with spatial extent in dd
        :param data: Full array or DataArray of spatial data
        :param source_idx: Array of source indices
        :param dd: Array of distances, i.e. the circle's radius
        :return:
        """
        data = CartesianData(data, copy=False)

        # compute distance to edge and therefore which points/distances need edge corrections
        d_to_ext = np.array([
            geometry.Point(data[i]).distance(domain.exterior)
            for i in range(data.ndata)
        ])
        near_exterior = np.where(d_to_ext[source_idx] < dd)[0]

        # output array of same size as dd
        ec = np.ones_like(dd)

        # can switch the method here
        mappable_func = partial(isotropic_edge_correction_wrapper,
                                n_quad=n_quad,
                                domain=domain,
                                method='area')

        print "Computing edge correction terms..."
        tic = time()
        with closing(mp.Pool()) as pool:
            res = pool.map_async(mappable_func,
                                 ((data[source_idx[i]], dd[i])
                                  for i in near_exterior)).get(1e100)
        print "Completed in %f seconds" % (time() - tic)
        ec[near_exterior] = np.array(res)
        return ec

    def compute_k(self, u, dd=None, edge_corr=None, *args, **kwargs):
        if not hasattr(u, '__iter__'):
            u = [u]
        dd = dd if dd is not None else self.dd
        edge_corr = edge_corr if edge_corr is not None else self.edge_correction
        res = []
        for t in u:
            ind = (dd <= t)
            w = 1 / edge_corr[ind]
            res.append(w.sum() / float(self.n) / self.intensity)
        return np.array(res)

    def compute_l(self, u):
        """
        Compute the difference between K and the CSR model
        :param u: Distance threshold
        :param v: Time threshold
        :return:
        """
        k = self.compute_k(u)
        csr = np.pi * u**2
        return k - csr

    def compute_lhat(self, u):
        """
        Lhat is defined as (K / \pi) ^ 0.5
        :param u:
        :return:
        """
        k = self.compute_k(u)
        return np.sqrt(k / np.pi)

    def run_permutation(self, u, niter=20):
        if np.any(u > self.max_d):
            raise AttributeError('No values of u may be > max_d')
        k = []
        try:
            for i in range(niter):
                data = CartesianData.from_args(
                    *spatial.random_points_within_poly(self.domain, self.n))
                ii, jj, dd, near_exterior = prepare_data(
                    data, self.domain, self.max_d)
                ec = self.compute_edge_correction(data, ii, dd, self.domain,
                                                  self.n_quad)
                k.append(self.compute_k(u, dd=dd, edge_corr=ec))
        except Exception as exc:
            print repr(exc)
        finally:
            return np.array(k)