Python KDTree Examples, pysal.common.KDTree Python Examples

Example #1

0

Show file

File: Distance.py Project: jkang1643/GIS

    def __init__(self,
                 data,
                 k=2,
                 p=2,
                 ids=None,
                 radius=None,
                 distance_metric='euclidean'):
        if isKDTree(data):
            self.kdtree = data
            self.data = data.data
        else:
            self.data = data
            self.kdtree = KDTree(data,
                                 radius=radius,
                                 distance_metric=distance_metric)
        self.k = k
        self.p = p
        this_nnq = self.kdtree.query(self.data, k=k + 1, p=p)

        to_weight = this_nnq[1]
        if ids is None:
            ids = list(range(to_weight.shape[0]))

        neighbors = {}
        for i, row in enumerate(to_weight):
            row = row.tolist()
            row.remove(i)
            row = [ids[j] for j in row]
            focal = ids[i]
            neighbors[focal] = row
        W.__init__(self, neighbors, id_order=ids)

Example #2

0

Show file

    def __init__(self, data, bandwidth=None, fixed=True, k=2,
                 function='triangular', eps=1.0000001, ids=None,
                 diagonal=False):
        if issubclass(type(data), scipy.spatial.KDTree):
            self.kdt = data
            self.data = self.kdt.data
            data = self.data
        else:
            self.data = data
            self.kdt = KDTree(self.data)
        self.k = k + 1
        self.function = function.lower()
        self.fixed = fixed
        self.eps = eps
        if bandwidth:
            try:
                bandwidth = np.array(bandwidth)
                bandwidth.shape = (len(bandwidth), 1)
            except:
                bandwidth = np.ones((len(data), 1), 'float') * bandwidth
            self.bandwidth = bandwidth
        else:
            self._set_bw()

        self._eval_kernel()
        neighbors, weights = self._k_to_W(ids)
        if diagonal:
            for i in neighbors:
                weights[i][neighbors[i].index(i)] = 1.0
        W.__init__(self, neighbors, weights, ids)

Example #3

0

Show file

File: Distance.py Project: shepherdmeng/pysal

    def __init__(self,
                 data,
                 threshold,
                 p=2,
                 alpha=-1.0,
                 binary=True,
                 ids=None):
        """Casting to floats is a work around for a bug in scipy.spatial.
        See detail in pysal issue #126.

        """
        if isKDTree(data):
            self.kd = data
            self.data = self.kd.data
        else:
            try:
                data = np.asarray(data)
                if data.dtype.kind != 'f':
                    data = data.astype(float)
                self.data = data
                self.kd = KDTree(self.data)
            except:
                raise ValueError("Could not make array from data")

        self.p = p
        self.threshold = threshold
        self.binary = binary
        self.alpha = alpha
        self._band()
        neighbors, weights = self._distance_to_W(ids)
        W.__init__(self, neighbors, weights, ids)

Example #4

0

Show file

File: Distance.py Project: lanselin/pysal

    def __init__(self, data, bandwidth=None, fixed=True, k=2,
                 function='triangular', eps=1.0000001, ids=None,
                 diagonal=False):
        if isKDTree(data):
            self.kdt = data
            self.data = self.kdt.data
            data = self.data
        else:
            self.data = data
            self.kdt = KDTree(self.data)
        self.k = k + 1
        self.function = function.lower()
        self.fixed = fixed
        self.eps = eps
        if bandwidth:
            try:
                bandwidth = np.array(bandwidth)
                bandwidth.shape = (len(bandwidth), 1)
            except:
                bandwidth = np.ones((len(data), 1), 'float') * bandwidth
            self.bandwidth = bandwidth
        else:
            self._set_bw()

        self._eval_kernel()
        neighbors, weights = self._k_to_W(ids)
        if diagonal:
            for i in neighbors:
                weights[i][neighbors[i].index(i)] = 1.0
        W.__init__(self, neighbors, weights, ids)

Example #5

0

Show file

File: Distance.py Project: lanselin/pysal

    def __init__(self, data, threshold, p=2, alpha=-1.0, binary=True, ids=None,
            build_sp=True, silent=False):
        """Casting to floats is a work around for a bug in scipy.spatial.
        See detail in pysal issue #126.

        """
        self.p = p
        self.threshold = threshold
        self.binary = binary
        self.alpha = alpha
        self.build_sp = build_sp
        self.silent = silent
        
        if isKDTree(data):
            self.kd = data
            self.data = self.kd.data
        else:
            if self.build_sp:
                try:
                    data = np.asarray(data)
                    if data.dtype.kind != 'f':
                        data = data.astype(float)
                    self.data = data
                    self.kd = KDTree(self.data)
                except:
                    raise ValueError("Could not make array from data")        
            else:
                self.data = data
                self.kd = None       
        self._band()
        neighbors, weights = self._distance_to_W(ids)
        W.__init__(self, neighbors, weights, ids, silent_island_warning=self.silent)

Example #6

0

Show file

File: Distance.py Project: umutturk/pysal

    def __init__(self, data, threshold, p=2, alpha=-1.0, binary=True, ids=None):
        """Casting to floats is a work around for a bug in scipy.spatial.
        See detail in pysal issue #126.

        """
        if issubclass(type(data), scipy.spatial.KDTree):
            self.kd = data
            self.data = self.kd.data
        else:
            try:
                data = np.asarray(data)
                if data.dtype.kind != 'f':
                    data = data.astype(float)
                self.data = data
                self.kd = KDTree(self.data)
            except:
                raise ValueError("Could not make array from data")

        self.p = p
        self.threshold = threshold
        self.binary = binary
        self.alpha = alpha
        self._band()
        neighbors, weights = self._distance_to_W(ids)
        W.__init__(self, neighbors, weights, ids)

Example #7

0

Show file

 def kdtree(self):
     if 'kd' in self._propData:
         return self._propData['kd']
     else:
         pts = self.points
         if pts is not None:
             if self.distMethod == 0:  # not Euclidean Distance
                 kd = KDTree(pts)
             elif self.distMethod == 1:  # 'Arc Distance (miles)'
                 kd = KDTree(pts,
                             distance_metric="Arc",
                             radius=pysal.cg.RADIUS_EARTH_MILES)
             elif self.distMethod == 2:  # 'Arc Distance (kilometers)'
                 kd = KDTree(pts,
                             distance_metric="Arc",
                             radius=pysal.cg.RADIUS_EARTH_KM)
             self._propData['kd'] = kd
             return kd
     return None

Example #8

0

Show file

File: Distance.py Project: lanselin/pysal

 def __init__(self, data, k=2, p=2, ids=None, radius=None, distance_metric='euclidean'):
     if isKDTree(data):
         self.kdtree = data
         self.data = data.data
     else:
         self.data = data
         self.kdtree = KDTree(data, radius=radius, distance_metric=distance_metric)
     self.k = k 
     self.p = p
     this_nnq = self.kdtree.query(self.data, k=k+1, p=p)
     
     to_weight = this_nnq[1]
     if ids is None:
         ids = list(range(to_weight.shape[0]))
     
     neighbors = {}
     for i,row in enumerate(to_weight):
         row = row.tolist()
         row.remove(i)
         row = [ids[j] for j in row]
         focal = ids[i]
         neighbors[focal] = row
     W.__init__(self, neighbors, id_order=ids)

Example #9

0

Show file

File: Distance.py Project: shepherdmeng/pysal

def knnW(data, k=2, p=2, ids=None):
    """
    Creates nearest neighbor weights matrix based on k nearest
    neighbors.

    Parameters
    ----------

    kdtree      : object
                  PySAL KDTree or ArcKDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    k           : int
                  number of nearest neighbors
    p           : float
                  Minkowski p-norm distance metric parameter:
                  1<=p<=infinity
                  2: Euclidean distance
                  1: Manhattan distance
                  Ignored if the KDTree is an ArcKDTree
    ids         : list
                  identifiers to attach to each observation

    Returns
    -------

    w         : W
                instance
                Weights object with binary weights

    Examples
    --------

    >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kd = pysal.cg.kdtree.KDTree(np.array(points))
    >>> wnn2 = pysal.knnW(kd, 2)
    >>> [1,3] == wnn2.neighbors[0]
    True

    ids

    >>> wnn2 = knnW(kd,2)
    >>> wnn2[0]
    {1: 1.0, 3: 1.0}
    >>> wnn2[1]
    {0: 1.0, 3: 1.0}

    now with 1 rather than 0 offset

    >>> wnn2 = knnW(kd, 2, ids=range(1,7))
    >>> wnn2[1]
    {2: 1.0, 4: 1.0}
    >>> wnn2[2]
    {1: 1.0, 4: 1.0}
    >>> 0 in wnn2.neighbors
    False

    Notes
    -----

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    pysal.weights.W

    """
    if isKDTree(data):
        kdt = data
        data = kdt.data
    else:
        kdt = KDTree(data)
    nnq = kdt.query(data, k=k + 1, p=p)
    info = nnq[1]

    neighbors = {}
    for i, row in enumerate(info):
        row = row.tolist()
        if i in row:
            row.remove(i)
            focal = i
        if ids:
            row = [ids[j] for j in row]
            focal = ids[i]
        neighbors[focal] = row
    return pysal.weights.W(neighbors, id_order=ids)

Example #10

0

Show file

File: Distance.py Project: shepherdmeng/pysal

class Kernel(W):
    """
    Spatial weights based on kernel functions.

    Parameters
    ----------

    data        : array
                  (n,k) or KDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    bandwidth   : float
                  or array-like (optional)
                  the bandwidth :math:`h_i` for the kernel.
    fixed       : binary
                  If true then :math:`h_i=h \\forall i`. If false then
                  bandwidth is adaptive across observations.
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).  For adaptive bandwidths, :math:`h_i=dknn_i`
    diagonal    : boolean
                  If true, set diagonal weights = 1.0, if false (default),
                  diagonals weights are set to value according to kernel
                  function.
    function    : {'triangular','uniform','quadratic','quartic','gaussian'}
                  kernel function defined as follows with

                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = 1/2 \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)

    eps         : float
                  adjustment to ensure knn distance range is closed on the
                  knnth observations

    Attributes
    ----------
    weights : dict
              Dictionary keyed by id with a list of weights for each neighbor

    neighbors : dict
                of lists of neighbors keyed by observation id

    bandwidth : array
                array of bandwidths

    Examples
    --------

    >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kw=Kernel(points)
    >>> kw.weights[0]
    [1.0, 0.500000049999995, 0.4409830615267465]
    >>> kw.neighbors[0]
    [0, 1, 3]
    >>> kw.bandwidth
    array([[ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002]])
    >>> kw15=Kernel(points,bandwidth=15.0)
    >>> kw15[0]
    {0: 1.0, 1: 0.33333333333333337, 3: 0.2546440075000701}
    >>> kw15.neighbors[0]
    [0, 1, 3]
    >>> kw15.bandwidth
    array([[ 15.],
           [ 15.],
           [ 15.],
           [ 15.],
           [ 15.],
           [ 15.]])

    Adaptive bandwidths user specified

    >>> bw=[25.0,15.0,25.0,16.0,14.5,25.0]
    >>> kwa=Kernel(points,bandwidth=bw)
    >>> kwa.weights[0]
    [1.0, 0.6, 0.552786404500042, 0.10557280900008403]
    >>> kwa.neighbors[0]
    [0, 1, 3, 4]
    >>> kwa.bandwidth
    array([[ 25. ],
           [ 15. ],
           [ 25. ],
           [ 16. ],
           [ 14.5],
           [ 25. ]])

    Endogenous adaptive bandwidths

    >>> kwea=Kernel(points,fixed=False)
    >>> kwea.weights[0]
    [1.0, 0.10557289844279438, 9.99999900663795e-08]
    >>> kwea.neighbors[0]
    [0, 1, 3]
    >>> kwea.bandwidth
    array([[ 11.18034101],
           [ 11.18034101],
           [ 20.000002  ],
           [ 11.18034101],
           [ 14.14213704],
           [ 18.02775818]])

    Endogenous adaptive bandwidths with Gaussian kernel

    >>> kweag=Kernel(points,fixed=False,function='gaussian')
    >>> kweag.weights[0]
    [0.3989422804014327, 0.2674190291577696, 0.2419707487162134]
    >>> kweag.bandwidth
    array([[ 11.18034101],
           [ 11.18034101],
           [ 20.000002  ],
           [ 11.18034101],
           [ 14.14213704],
           [ 18.02775818]])

    Diagonals to 1.0

    >>> kq = Kernel(points,function='gaussian')
    >>> kq.weights
    {0: [0.3989422804014327, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 0.3989422804014327, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 0.3989422804014327, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 0.3989422804014327, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 0.3989422804014327, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 0.3989422804014327]}
    >>> kqd = Kernel(points, function='gaussian', diagonal=True)
    >>> kqd.weights
    {0: [1.0, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 1.0, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 1.0, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 1.0, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 1.0, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 1.0]}

    """
    def __init__(self,
                 data,
                 bandwidth=None,
                 fixed=True,
                 k=2,
                 function='triangular',
                 eps=1.0000001,
                 ids=None,
                 diagonal=False):
        if isKDTree(data):
            self.kdt = data
            self.data = self.kdt.data
            data = self.data
        else:
            self.data = data
            self.kdt = KDTree(self.data)
        self.k = k + 1
        self.function = function.lower()
        self.fixed = fixed
        self.eps = eps
        if bandwidth:
            try:
                bandwidth = np.array(bandwidth)
                bandwidth.shape = (len(bandwidth), 1)
            except:
                bandwidth = np.ones((len(data), 1), 'float') * bandwidth
            self.bandwidth = bandwidth
        else:
            self._set_bw()

        self._eval_kernel()
        neighbors, weights = self._k_to_W(ids)
        if diagonal:
            for i in neighbors:
                weights[i][neighbors[i].index(i)] = 1.0
        W.__init__(self, neighbors, weights, ids)

    def _k_to_W(self, ids=None):
        allneighbors = {}
        weights = {}
        if ids:
            ids = np.array(ids)
        else:
            ids = np.arange(len(self.data))
        for i, neighbors in enumerate(self.kernel):
            if len(self.neigh[i]) == 0:
                allneighbors[ids[i]] = []
                weights[ids[i]] = []
            else:
                allneighbors[ids[i]] = list(ids[self.neigh[i]])
                weights[ids[i]] = self.kernel[i].tolist()
        return allneighbors, weights

    def _set_bw(self):
        dmat, neigh = self.kdt.query(self.data, k=self.k)
        if self.fixed:
            # use max knn distance as bandwidth
            bandwidth = dmat.max() * self.eps
            n = len(dmat)
            self.bandwidth = np.ones((n, 1), 'float') * bandwidth
        else:
            # use local max knn distance
            self.bandwidth = dmat.max(axis=1) * self.eps
            self.bandwidth.shape = (self.bandwidth.size, 1)
            # identify knn neighbors for each point
            nnq = self.kdt.query(self.data, k=self.k)
            self.neigh = nnq[1]

    def _eval_kernel(self):
        # get points within bandwidth distance of each point
        if not hasattr(self, 'neigh'):
            kdtq = self.kdt.query_ball_point
            neighbors = [
                kdtq(self.data[i], r=bwi[0])
                for i, bwi in enumerate(self.bandwidth)
            ]
            self.neigh = neighbors
        # get distances for neighbors
        bw = self.bandwidth

        kdtq = self.kdt.query
        z = []
        for i, nids in enumerate(self.neigh):
            di, ni = kdtq(self.data[i], k=len(nids))
            if not isinstance(di, np.ndarray):
                di = np.asarray([di] * len(nids))
                ni = np.asarray([ni] * len(nids))
            zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i]
            z.append(zi)
        zs = z
        # functions follow Anselin and Rey (2010) table 5.4
        if self.function == 'triangular':
            self.kernel = [1 - zi for zi in zs]
        elif self.function == 'uniform':
            self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs]
        elif self.function == 'quadratic':
            self.kernel = [(3. / 4) * (1 - zi**2) for zi in zs]
        elif self.function == 'quartic':
            self.kernel = [(15. / 16) * (1 - zi**2)**2 for zi in zs]
        elif self.function == 'gaussian':
            c = np.pi * 2
            c = c**(-0.5)
            self.kernel = [c * np.exp(-(zi**2) / 2.) for zi in zs]
        else:
            print('Unsupported kernel function', self.function)

Example #11

0

Show file

File: kerneltest.py Project: pysal/pPysal

class Kernel(W):
    def __init__(self,
                 data,
                 bandwidth=None,
                 fixed=True,
                 k=2,
                 function='triangular',
                 eps=1.0000001,
                 ids=None,
                 diagonal=False,
                 ncores=1):
        if issubclass(type(data), scipy.spatial.KDTree):
            self.kdt = data
            self.data = self.kdt.data
            data = self.data
        else:
            self.data = data
            self.kdt = KDTree(self.data)
        self.k = k + 1
        self.function = function.lower()
        self.fixed = fixed
        self.eps = eps
        self.ncores = ncores

        if bandwidth:
            try:
                bandwidth = np.array(bandwidth)
                bandwidth.shape = (len(bandwidth), 1)
            except:
                bandwidth = np.ones((len(data), 1), 'float') * bandwidth
            self.bandwidth = bandwidth
        else:
            self._set_bw()

        self._eval_kernel()
        neighbors, weights = self._k_to_W(ids)
        if diagonal:
            for i in neighbors:
                weights[i][neighbors[i].index(i)] = 1.0
        W.__init__(self, neighbors, weights, ids)

    def _k_to_W(self, ids=None):
        allneighbors = {}
        weights = {}
        if ids:
            ids = np.array(ids)
        else:
            ids = np.arange(len(self.data))
        for i, neighbors in enumerate(self.kernel):
            if len(self.neigh[i]) == 0:
                allneighbors[ids[i]] = []
                weights[ids[i]] = []
            else:
                allneighbors[ids[i]] = list(ids[self.neigh[i]])
                weights[ids[i]] = self.kernel[i].tolist()
        return allneighbors, weights

    def _set_bw(self):
        dmat, neigh = self.kdt.query(self.data, k=self.k)
        if self.fixed:
            # use max knn distance as bandwidth
            bandwidth = dmat.max() * self.eps
            n = len(dmat)
            self.bandwidth = np.ones((n, 1), 'float') * bandwidth
        else:
            # use local max knn distance
            self.bandwidth = dmat.max(axis=1) * self.eps
            self.bandwidth.shape = (self.bandwidth.size, 1)
            # identify knn neighbors for each point
            nnq = self.kdt.query(self.data, k=self.k)
            self.neigh = nnq[1]

    def _eval_kernel(self):
        t1 = time.time()
        # get points within bandwidth distance of each point
        kdtbq = self.kdt.query_ball_point
        kdtq = self.kdt.query
        bw = self.bandwidth
        if self.ncores > 1:
            pool = mp.Pool(processes=self.ncores,
                           initializer=loadkd,
                           initargs=(kdtbq, kdtq, bw))
        if not hasattr(self, 'neigh'):
            if self.ncores > 1:
                neighbors = pool.map(bqwrapper,
                                     self.data,
                                     chunksize=len(self.bandwidth) /
                                     self.ncores)
            else:
                neighbors = [
                    kdtbq(self.data[i], r=bwi[0])
                    for i, bwi in enumerate(self.bandwidth)
                ]
            self.neigh = neighbors
        t2 = time.time()
        print "Ball Point Query took {} seconds.".format(t2 - t1)
        # get distances for neighbors
        bw = self.bandwidth

        #kdtq = self.kdt.query
        z = []
        t1 = time.time()
        if self.ncores > 1:
            iterable = [(i, nids, self.data[i])
                        for i, nids in enumerate(self.neigh)]
            z = pool.map(qwrapper, iterable)
        else:
            for i, nids in enumerate(self.neigh):
                di, ni = kdtq(self.data[i], k=len(nids))
                zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i]
                z.append(zi)
        t2 = time.time()
        print "Local query took: {} seconds".format(t2 - t1)
        zs = z
        # functions follow Anselin and Rey (2010) table 5.4
        if self.function == 'triangular':
            self.kernel = [1 - zi for zi in zs]
        elif self.function == 'uniform':
            self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs]
        elif self.function == 'quadratic':
            self.kernel = [(3. / 4) * (1 - zi**2) for zi in zs]
        elif self.function == 'quartic':
            self.kernel = [(15. / 16) * (1 - zi**2)**2 for zi in zs]
        elif self.function == 'gaussian':
            c = np.pi * 2
            c = c**(-0.5)
            self.kernel = [c * np.exp(-(zi**2) / 2.) for zi in zs]
        else:
            print 'Unsupported kernel function', self.function

Example #12

0

Show file

File: Distance.py Project: lanselin/pysal

class DistanceBand(W):
    """
    Spatial weights based on distance band.

    Parameters
    ----------

    data        : array
                  (n,k) or KDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    threshold  : float
                 distance band
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    binary     : boolean
                 If true w_{ij}=1 if d_{i,j}<=threshold, otherwise w_{i,j}=0
                 If false wij=dij^{alpha}
    alpha      : float
                 distance decay parameter for weight (default -1.0)
                 if alpha is positive the weights will not decline with
                 distance. If binary is True, alpha is ignored

    ids         : list
                  values to use for keys of the neighbors and weights dicts
    
    build_sp    : boolean
                  True to build sparse distance matrix and false to build dense
                  distance matrix; significant speed gains may be obtained
                  dending on the sparsity of the of distance_matrix and
                  threshold that is applied
    silent      : boolean
                  By default PySAL will print a warning if the
                  dataset contains any disconnected observations or
                  islands. To silence this warning set this
                  parameter to True.

    Attributes
    ----------
    weights : dict
              of neighbor weights keyed by observation id

    neighbors : dict
                of neighbors keyed by observation id

    Examples
    --------

    >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> wcheck = pysal.W({0: [1, 3], 1: [0, 3], 2: [], 3: [0, 1], 4: [5], 5: [4]})
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w=DistanceBand(points,threshold=11.2)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> pysal.weights.util.neighbor_equality(w, wcheck)
    True
    >>> w=DistanceBand(points,threshold=14.2)
    >>> wcheck = pysal.W({0: [1, 3], 1: [0, 3, 4], 2: [4], 3: [1, 0], 4: [5, 2, 1], 5: [4]})
    >>> pysal.weights.util.neighbor_equality(w, wcheck)
    True



    inverse distance weights

    >>> w=DistanceBand(points,threshold=11.2,binary=False)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w.weights[0]
    [0.10000000000000001, 0.089442719099991588]
    >>> w.neighbors[0]
    [1, 3]
    >>>

    gravity weights

    >>> w=DistanceBand(points,threshold=11.2,binary=False,alpha=-2.)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w.weights[0]
    [0.01, 0.0079999999999999984]

    Notes
    -----

    This was initially implemented running scipy 0.8.0dev (in epd 6.1).
    earlier versions of scipy (0.7.0) have a logic bug in scipy/sparse/dok.py
    so serge changed line 221 of that file on sal-dev to fix the logic bug.

    """

    def __init__(self, data, threshold, p=2, alpha=-1.0, binary=True, ids=None,
            build_sp=True, silent=False):
        """Casting to floats is a work around for a bug in scipy.spatial.
        See detail in pysal issue #126.

        """
        self.p = p
        self.threshold = threshold
        self.binary = binary
        self.alpha = alpha
        self.build_sp = build_sp
        self.silent = silent
        
        if isKDTree(data):
            self.kd = data
            self.data = self.kd.data
        else:
            if self.build_sp:
                try:
                    data = np.asarray(data)
                    if data.dtype.kind != 'f':
                        data = data.astype(float)
                    self.data = data
                    self.kd = KDTree(self.data)
                except:
                    raise ValueError("Could not make array from data")        
            else:
                self.data = data
                self.kd = None       
        self._band()
        neighbors, weights = self._distance_to_W(ids)
        W.__init__(self, neighbors, weights, ids, silent_island_warning=self.silent)

    @classmethod
    def from_shapefile(cls, filepath, threshold, idVariable=None, **kwargs):
        """
        Distance-band based weights from shapefile

        Arguments
        ---------
        shapefile   : string
                      shapefile name with shp suffix
        idVariable  : string
                      name of column in shapefile's DBF to use for ids

        Returns
        --------
        Kernel Weights Object

        See Also
        ---------
        :class: `pysal.weights.DistanceBand`
        :class: `pysal.weights.W`
        """
        points = get_points_array_from_shapefile(filepath)
        if idVariable is not None:
            ids = get_ids(filepath, idVariable)
        else:
            ids = None
        return cls.from_array(points, threshold, ids=ids, **kwargs)
    
    @classmethod
    def from_array(cls, array, threshold, **kwargs):
        """
        Construct a DistanceBand weights from an array. Supports all the same options
        as :class:`pysal.weights.DistanceBand`

        See Also
        --------
        :class:`pysal.weights.DistanceBand`
        :class:`pysal.weights.W`
        """
        return cls(array, threshold, **kwargs)
    
    @classmethod
    def from_dataframe(cls, df, threshold, geom_col='geometry', ids=None, **kwargs):
        """
        Make DistanceBand weights from a dataframe.

        Parameters
        ----------
        df      :   pandas.dataframe
                    a dataframe with a geometry column that can be used to
                    construct a W object
        geom_col :   string
                    column name of the geometry stored in df
        ids     :   string or iterable
                    if string, the column name of the indices from the dataframe
                    if iterable, a list of ids to use for the W
                    if None, df.index is used.

        See Also
        --------
        :class:`pysal.weights.DistanceBand`
        :class:`pysal.weights.W`
        """
        pts = get_points_array(df[geom_col])
        if ids is None:
            ids = df.index.tolist()
        elif isinstance(ids, str):
            ids = df[ids].tolist()
        return cls(pts, threshold, ids=ids, **kwargs)

    def _band(self):
        """Find all pairs within threshold.

        """
        if self.build_sp:    
            self.dmat = self.kd.sparse_distance_matrix(
                    self.kd, max_distance=self.threshold).tocsr()
        else:
            if str(self.kd).split('.')[-1][0:10] == 'Arc_KDTree':
            	raise TypeError('Unable to calculate dense arc distance matrix;'
            	        ' parameter "build_sp" must be set to True for arc'
            	        ' distance type weight')
            self.dmat = self._spdistance_matrix(self.data, self.data, self.threshold)


    def _distance_to_W(self, ids=None):
        if self.binary:
            self.dmat[self.dmat>0] = 1
            self.dmat.eliminate_zeros()
            tempW = WSP2W(WSP(self.dmat), silent_island_warning=self.silent)
            neighbors = tempW.neighbors
            weight_keys = tempW.weights.keys()
            weight_vals = tempW.weights.values()
            weights = dict(zip(weight_keys, map(list, weight_vals)))
            return neighbors, weights
        else:
            weighted = self.dmat.power(self.alpha)
            weighted[weighted==np.inf] = 0
            weighted.eliminate_zeros()
            tempW = WSP2W(WSP(weighted), silent_island_warning=self.silent)
            neighbors = tempW.neighbors
            weight_keys = tempW.weights.keys()
            weight_vals = tempW.weights.values()
            weights = dict(zip(weight_keys, map(list, weight_vals)))
            return neighbors, weights

    def _spdistance_matrix(self, x,y, threshold=None):
        dist = distance_matrix(x,y)
        if threshold is not None:
            zeros = dist > threshold
            dist[zeros] = 0
        return sp.csr_matrix(dist)

Example #13

0

Show file

File: kerneltest.py Project: giserh/cybergis-toolkit

class Kernel(W):
    def __init__(self, data, bandwidth=None, fixed=True, k=2,
                 function='triangular', eps=1.0000001, ids=None,
                 diagonal=False, ncores=1):
        if issubclass(type(data), scipy.spatial.KDTree):
            self.kdt = data
            self.data = self.kdt.data
            data = self.data
        else:
            self.data = data
            self.kdt = KDTree(self.data)
        self.k = k + 1
        self.function = function.lower()
        self.fixed = fixed
        self.eps = eps
        self.ncores = ncores

        if bandwidth:
            try:
                bandwidth = np.array(bandwidth)
                bandwidth.shape = (len(bandwidth), 1)
            except:
                bandwidth = np.ones((len(data), 1), 'float') * bandwidth
            self.bandwidth = bandwidth
        else:
            self._set_bw()

        self._eval_kernel()
        neighbors, weights = self._k_to_W(ids)
        if diagonal:
            for i in neighbors:
                weights[i][neighbors[i].index(i)] = 1.0
        W.__init__(self, neighbors, weights, ids)

    def _k_to_W(self, ids=None):
        allneighbors = {}
        weights = {}
        if ids:
            ids = np.array(ids)
        else:
            ids = np.arange(len(self.data))
        for i, neighbors in enumerate(self.kernel):
            if len(self.neigh[i]) == 0:
                allneighbors[ids[i]] = []
                weights[ids[i]] = []
            else:
                allneighbors[ids[i]] = list(ids[self.neigh[i]])
                weights[ids[i]] = self.kernel[i].tolist()
        return allneighbors, weights

    def _set_bw(self):
        dmat, neigh = self.kdt.query(self.data, k=self.k)
        if self.fixed:
            # use max knn distance as bandwidth
            bandwidth = dmat.max() * self.eps
            n = len(dmat)
            self.bandwidth = np.ones((n, 1), 'float') * bandwidth
        else:
            # use local max knn distance
            self.bandwidth = dmat.max(axis=1) * self.eps
            self.bandwidth.shape = (self.bandwidth.size, 1)
            # identify knn neighbors for each point
            nnq = self.kdt.query(self.data, k=self.k)
            self.neigh = nnq[1]

    def _eval_kernel(self):
        t1 = time.time()
        # get points within bandwidth distance of each point
        kdtbq = self.kdt.query_ball_point
        kdtq = self.kdt.query
        bw = self.bandwidth
        if self.ncores > 1:
            pool = mp.Pool(processes=self.ncores, initializer=loadkd, initargs=(kdtbq,kdtq,bw))
        if not hasattr(self, 'neigh'):
            if self.ncores > 1:
                neighbors = pool.map(bqwrapper,self.data, chunksize = len(self.bandwidth) / self.ncores)
            else:
                neighbors = [kdtbq(self.data[i], r=bwi[0]) for i,
                            bwi in enumerate(self.bandwidth)]
            self.neigh = neighbors
        t2 = time.time()
        print "Ball Point Query took {} seconds.".format(t2 - t1)
        # get distances for neighbors
        bw = self.bandwidth

        #kdtq = self.kdt.query
        z = []
        t1 = time.time()
        if self.ncores > 1:
            iterable = [(i,nids, self.data[i]) for i, nids in enumerate(self.neigh)]
            z = pool.map(qwrapper, iterable)
        else:
            for i, nids in enumerate(self.neigh):
                di, ni = kdtq(self.data[i], k=len(nids))
                zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i]
                z.append(zi)
        t2 = time.time()
        print "Local query took: {} seconds".format(t2 - t1)
        zs = z
        # functions follow Anselin and Rey (2010) table 5.4
        if self.function == 'triangular':
            self.kernel = [1 - zi for zi in zs]
        elif self.function == 'uniform':
            self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs]
        elif self.function == 'quadratic':
            self.kernel = [(3. / 4) * (1 - zi ** 2) for zi in zs]
        elif self.function == 'quartic':
            self.kernel = [(15. / 16) * (1 - zi ** 2) ** 2 for zi in zs]
        elif self.function == 'gaussian':
            c = np.pi * 2
            c = c ** (-0.5)
            self.kernel = [c * np.exp(-(zi ** 2) / 2.) for zi in zs]
        else:
            print 'Unsupported kernel function', self.function

Example #14

0

Show file

File: Distance.py Project: lanselin/pysal

class Kernel(W):
    """
    Spatial weights based on kernel functions.

    Parameters
    ----------

    data        : array
                  (n,k) or KDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    bandwidth   : float
                  or array-like (optional)
                  the bandwidth :math:`h_i` for the kernel.
    fixed       : binary
                  If true then :math:`h_i=h \\forall i`. If false then
                  bandwidth is adaptive across observations.
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).  For adaptive bandwidths, :math:`h_i=dknn_i`
    diagonal    : boolean
                  If true, set diagonal weights = 1.0, if false (default),
                  diagonals weights are set to value according to kernel
                  function.
    function    : {'triangular','uniform','quadratic','quartic','gaussian'}
                  kernel function defined as follows with

                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = 1/2 \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)

    eps         : float
                  adjustment to ensure knn distance range is closed on the
                  knnth observations

    Attributes
    ----------
    weights : dict
              Dictionary keyed by id with a list of weights for each neighbor

    neighbors : dict
                of lists of neighbors keyed by observation id

    bandwidth : array
                array of bandwidths

    Examples
    --------

    >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kw=Kernel(points)
    >>> kw.weights[0]
    [1.0, 0.500000049999995, 0.4409830615267465]
    >>> kw.neighbors[0]
    [0, 1, 3]
    >>> kw.bandwidth
    array([[ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002]])
    >>> kw15=Kernel(points,bandwidth=15.0)
    >>> kw15[0]
    {0: 1.0, 1: 0.33333333333333337, 3: 0.2546440075000701}
    >>> kw15.neighbors[0]
    [0, 1, 3]
    >>> kw15.bandwidth
    array([[ 15.],
           [ 15.],
           [ 15.],
           [ 15.],
           [ 15.],
           [ 15.]])

    Adaptive bandwidths user specified

    >>> bw=[25.0,15.0,25.0,16.0,14.5,25.0]
    >>> kwa=Kernel(points,bandwidth=bw)
    >>> kwa.weights[0]
    [1.0, 0.6, 0.552786404500042, 0.10557280900008403]
    >>> kwa.neighbors[0]
    [0, 1, 3, 4]
    >>> kwa.bandwidth
    array([[ 25. ],
           [ 15. ],
           [ 25. ],
           [ 16. ],
           [ 14.5],
           [ 25. ]])

    Endogenous adaptive bandwidths

    >>> kwea=Kernel(points,fixed=False)
    >>> kwea.weights[0]
    [1.0, 0.10557289844279438, 9.99999900663795e-08]
    >>> kwea.neighbors[0]
    [0, 1, 3]
    >>> kwea.bandwidth
    array([[ 11.18034101],
           [ 11.18034101],
           [ 20.000002  ],
           [ 11.18034101],
           [ 14.14213704],
           [ 18.02775818]])

    Endogenous adaptive bandwidths with Gaussian kernel

    >>> kweag=Kernel(points,fixed=False,function='gaussian')
    >>> kweag.weights[0]
    [0.3989422804014327, 0.2674190291577696, 0.2419707487162134]
    >>> kweag.bandwidth
    array([[ 11.18034101],
           [ 11.18034101],
           [ 20.000002  ],
           [ 11.18034101],
           [ 14.14213704],
           [ 18.02775818]])

    Diagonals to 1.0

    >>> kq = Kernel(points,function='gaussian')
    >>> kq.weights
    {0: [0.3989422804014327, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 0.3989422804014327, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 0.3989422804014327, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 0.3989422804014327, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 0.3989422804014327, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 0.3989422804014327]}
    >>> kqd = Kernel(points, function='gaussian', diagonal=True)
    >>> kqd.weights
    {0: [1.0, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 1.0, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 1.0, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 1.0, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 1.0, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 1.0]}

    """
    def __init__(self, data, bandwidth=None, fixed=True, k=2,
                 function='triangular', eps=1.0000001, ids=None,
                 diagonal=False):
        if isKDTree(data):
            self.kdt = data
            self.data = self.kdt.data
            data = self.data
        else:
            self.data = data
            self.kdt = KDTree(self.data)
        self.k = k + 1
        self.function = function.lower()
        self.fixed = fixed
        self.eps = eps
        if bandwidth:
            try:
                bandwidth = np.array(bandwidth)
                bandwidth.shape = (len(bandwidth), 1)
            except:
                bandwidth = np.ones((len(data), 1), 'float') * bandwidth
            self.bandwidth = bandwidth
        else:
            self._set_bw()

        self._eval_kernel()
        neighbors, weights = self._k_to_W(ids)
        if diagonal:
            for i in neighbors:
                weights[i][neighbors[i].index(i)] = 1.0
        W.__init__(self, neighbors, weights, ids)
    
    @classmethod
    def from_shapefile(cls, filepath, idVariable=None,  **kwargs):
        """
        Kernel based weights from shapefile

        Arguments
        ---------
        shapefile   : string
                      shapefile name with shp suffix
        idVariable  : string
                      name of column in shapefile's DBF to use for ids

        Returns
        --------
        Kernel Weights Object

        See Also
        ---------
        :class:`pysal.weights.Kernel`
        :class:`pysal.weights.W`
        """
        points = get_points_array_from_shapefile(filepath)
        if idVariable is not None:
            ids = get_ids(filepath, idVariable)
        else:
            ids = None
        return cls.from_array(points, ids=ids, **kwargs)
    
    @classmethod
    def from_array(cls, array, **kwargs):
        """
        Construct a Kernel weights from an array. Supports all the same options
        as :class:`pysal.weights.Kernel`

        See Also
        --------
        :class:`pysal.weights.Kernel`
        :class:`pysal.weights.W`
        """
        return cls(array, **kwargs)

    @classmethod
    def from_dataframe(cls, df, geom_col='geometry', ids=None, **kwargs):
        """
        Make Kernel weights from a dataframe.

        Parameters
        ----------
        df      :   pandas.dataframe
                    a dataframe with a geometry column that can be used to
                    construct a W object
        geom_col :   string
                    column name of the geometry stored in df
        ids     :   string or iterable
                    if string, the column name of the indices from the dataframe
                    if iterable, a list of ids to use for the W
                    if None, df.index is used.

        See Also
        --------
        :class:`pysal.weights.Kernel`
        :class:`pysal.weights.W`
        """
        pts = get_points_array(df[geom_col])
        if ids is None:
            ids = df.index.tolist()
        elif isinstance(ids, str):
            ids = df[ids].tolist()
        return cls(pts, ids=ids, **kwargs)

    def _k_to_W(self, ids=None):
        allneighbors = {}
        weights = {}
        if ids:
            ids = np.array(ids)
        else:
            ids = np.arange(len(self.data))
        for i, neighbors in enumerate(self.kernel):
            if len(self.neigh[i]) == 0:
                allneighbors[ids[i]] = []
                weights[ids[i]] = []
            else:
                allneighbors[ids[i]] = list(ids[self.neigh[i]])
                weights[ids[i]] = self.kernel[i].tolist()
        return allneighbors, weights

    def _set_bw(self):
        dmat, neigh = self.kdt.query(self.data, k=self.k)
        if self.fixed:
            # use max knn distance as bandwidth
            bandwidth = dmat.max() * self.eps
            n = len(dmat)
            self.bandwidth = np.ones((n, 1), 'float') * bandwidth
        else:
            # use local max knn distance
            self.bandwidth = dmat.max(axis=1) * self.eps
            self.bandwidth.shape = (self.bandwidth.size, 1)
            # identify knn neighbors for each point
            nnq = self.kdt.query(self.data, k=self.k)
            self.neigh = nnq[1]

    def _eval_kernel(self):
        # get points within bandwidth distance of each point
        if not hasattr(self, 'neigh'):
            kdtq = self.kdt.query_ball_point
            neighbors = [kdtq(self.data[i], r=bwi[0]) for i,
                         bwi in enumerate(self.bandwidth)]
            self.neigh = neighbors
        # get distances for neighbors
        bw = self.bandwidth

        kdtq = self.kdt.query
        z = []
        for i, nids in enumerate(self.neigh):
            di, ni = kdtq(self.data[i], k=len(nids))
            if not isinstance(di, np.ndarray):
                di = np.asarray([di] * len(nids))
                ni = np.asarray([ni] * len(nids))
            zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i]
            z.append(zi)
        zs = z
        # functions follow Anselin and Rey (2010) table 5.4
        if self.function == 'triangular':
            self.kernel = [1 - zi for zi in zs]
        elif self.function == 'uniform':
            self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs]
        elif self.function == 'quadratic':
            self.kernel = [(3. / 4) * (1 - zi ** 2) for zi in zs]
        elif self.function == 'quartic':
            self.kernel = [(15. / 16) * (1 - zi ** 2) ** 2 for zi in zs]
        elif self.function == 'gaussian':
            c = np.pi * 2
            c = c ** (-0.5)
            self.kernel = [c * np.exp(-(zi ** 2) / 2.) for zi in zs]
        else:
            print('Unsupported kernel function', self.function)

Example #15

0

Show file

File: Distance.py Project: jkang1643/GIS

class DistanceBand(W):
    """
    Spatial weights based on distance band.

    Parameters
    ----------

    data        : array
                  (n,k) or KDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    threshold  : float
                 distance band
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    binary     : boolean
                 If true w_{ij}=1 if d_{i,j}<=threshold, otherwise w_{i,j}=0
                 If false wij=dij^{alpha}
    alpha      : float
                 distance decay parameter for weight (default -1.0)
                 if alpha is positive the weights will not decline with
                 distance. If binary is True, alpha is ignored

    ids         : list
                  values to use for keys of the neighbors and weights dicts
    
    build_sp    : boolean
                  True to build sparse distance matrix and false to build dense
                  distance matrix; significant speed gains may be obtained
                  dending on the sparsity of the of distance_matrix and
                  threshold that is applied
    silent      : boolean
                  By default PySAL will print a warning if the
                  dataset contains any disconnected observations or
                  islands. To silence this warning set this
                  parameter to True.

    Attributes
    ----------
    weights : dict
              of neighbor weights keyed by observation id

    neighbors : dict
                of neighbors keyed by observation id

    Examples
    --------

    >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> wcheck = pysal.W({0: [1, 3], 1: [0, 3], 2: [], 3: [0, 1], 4: [5], 5: [4]})
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w=DistanceBand(points,threshold=11.2)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> pysal.weights.util.neighbor_equality(w, wcheck)
    True
    >>> w=DistanceBand(points,threshold=14.2)
    >>> wcheck = pysal.W({0: [1, 3], 1: [0, 3, 4], 2: [4], 3: [1, 0], 4: [5, 2, 1], 5: [4]})
    >>> pysal.weights.util.neighbor_equality(w, wcheck)
    True



    inverse distance weights

    >>> w=DistanceBand(points,threshold=11.2,binary=False)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w.weights[0]
    [0.10000000000000001, 0.089442719099991588]
    >>> w.neighbors[0]
    [1, 3]
    >>>

    gravity weights

    >>> w=DistanceBand(points,threshold=11.2,binary=False,alpha=-2.)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w.weights[0]
    [0.01, 0.0079999999999999984]

    Notes
    -----

    This was initially implemented running scipy 0.8.0dev (in epd 6.1).
    earlier versions of scipy (0.7.0) have a logic bug in scipy/sparse/dok.py
    so serge changed line 221 of that file on sal-dev to fix the logic bug.

    """
    def __init__(self,
                 data,
                 threshold,
                 p=2,
                 alpha=-1.0,
                 binary=True,
                 ids=None,
                 build_sp=True,
                 silent=False):
        """Casting to floats is a work around for a bug in scipy.spatial.
        See detail in pysal issue #126.

        """
        if ids is not None:
            ids = list(ids)
        self.p = p
        self.threshold = threshold
        self.binary = binary
        self.alpha = alpha
        self.build_sp = build_sp
        self.silent = silent

        if isKDTree(data):
            self.kd = data
            self.data = self.kd.data
        else:
            if self.build_sp:
                try:
                    data = np.asarray(data)
                    if data.dtype.kind != 'f':
                        data = data.astype(float)
                    self.data = data
                    self.kd = KDTree(self.data)
                except:
                    raise ValueError("Could not make array from data")
            else:
                self.data = data
                self.kd = None
        self._band()
        neighbors, weights = self._distance_to_W(ids)
        W.__init__(self,
                   neighbors,
                   weights,
                   ids,
                   silent_island_warning=self.silent)

    @classmethod
    def from_shapefile(cls, filepath, threshold, idVariable=None, **kwargs):
        """
        Distance-band based weights from shapefile

        Arguments
        ---------
        shapefile   : string
                      shapefile name with shp suffix
        idVariable  : string
                      name of column in shapefile's DBF to use for ids

        Returns
        --------
        Kernel Weights Object

        See Also
        ---------
        :class: `pysal.weights.DistanceBand`
        :class: `pysal.weights.W`
        """
        points = get_points_array_from_shapefile(filepath)
        if idVariable is not None:
            ids = get_ids(filepath, idVariable)
        else:
            ids = None
        return cls.from_array(points, threshold, ids=ids, **kwargs)

    @classmethod
    def from_array(cls, array, threshold, **kwargs):
        """
        Construct a DistanceBand weights from an array. Supports all the same options
        as :class:`pysal.weights.DistanceBand`

        See Also
        --------
        :class:`pysal.weights.DistanceBand`
        :class:`pysal.weights.W`
        """
        return cls(array, threshold, **kwargs)

    @classmethod
    def from_dataframe(cls,
                       df,
                       threshold,
                       geom_col='geometry',
                       ids=None,
                       **kwargs):
        """
        Make DistanceBand weights from a dataframe.

        Parameters
        ----------
        df      :   pandas.dataframe
                    a dataframe with a geometry column that can be used to
                    construct a W object
        geom_col :   string
                    column name of the geometry stored in df
        ids     :   string or iterable
                    if string, the column name of the indices from the dataframe
                    if iterable, a list of ids to use for the W
                    if None, df.index is used.

        See Also
        --------
        :class:`pysal.weights.DistanceBand`
        :class:`pysal.weights.W`
        """
        pts = get_points_array(df[geom_col])
        if ids is None:
            ids = df.index.tolist()
        elif isinstance(ids, str):
            ids = df[ids].tolist()
        else:
            ids = df.index.tolist()
        return cls(pts, threshold, ids=ids, **kwargs)

    def _band(self):
        """Find all pairs within threshold.

        """
        if self.build_sp:
            self.dmat = self.kd.sparse_distance_matrix(
                self.kd, max_distance=self.threshold, p=self.p).tocsr()
        else:
            if str(self.kd).split('.')[-1][0:10] == 'Arc_KDTree':
                raise TypeError(
                    'Unable to calculate dense arc distance matrix;'
                    ' parameter "build_sp" must be set to True for arc'
                    ' distance type weight')
            self.dmat = self._spdistance_matrix(self.data, self.data,
                                                self.threshold)

    def _distance_to_W(self, ids=None):
        if self.binary:
            self.dmat[self.dmat > 0] = 1
            self.dmat.eliminate_zeros()
            tempW = WSP2W(WSP(self.dmat, id_order=ids),
                          silent_island_warning=self.silent)
            neighbors = tempW.neighbors
            weight_keys = list(tempW.weights.keys())
            weight_vals = list(tempW.weights.values())
            weights = dict(list(zip(weight_keys, list(map(list,
                                                          weight_vals)))))
            return neighbors, weights
        else:
            weighted = self.dmat.power(self.alpha)
            weighted[weighted == np.inf] = 0
            weighted.eliminate_zeros()
            tempW = WSP2W(WSP(weighted, id_order=ids),
                          silent_island_warning=self.silent)
            neighbors = tempW.neighbors
            weight_keys = list(tempW.weights.keys())
            weight_vals = list(tempW.weights.values())
            weights = dict(list(zip(weight_keys, list(map(list,
                                                          weight_vals)))))
            return neighbors, weights

    def _spdistance_matrix(self, x, y, threshold=None):
        dist = distance_matrix(x, y)
        if threshold is not None:
            zeros = dist > threshold
            dist[zeros] = 0
        return sp.csr_matrix(dist)

Example #16

0

Show file

def knnW(data, k=2, p=2, ids=None, pct_unique=0.25):
    """
    Creates nearest neighbor weights matrix based on k nearest
    neighbors.

    Parameters
    ----------

    data       : array (n,k) or KDTree where KDtree.data is array (n,k)
                 n observations on k characteristics used to measure
                 distances between the n objects
    k          : int
                 number of nearest neighbors
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    ids        : list
                 identifiers to attach to each observation
    pct_unique : float
                 threshold percentage of unique points in data. Below this
                 threshold tree is built on unique values only

    Returns
    -------

    w         : W instance
                Weights object with binary weights

    Examples
    --------

    >>> x,y=np.indices((5,5))
    >>> x.shape=(25,1)
    >>> y.shape=(25,1)
    >>> data=np.hstack([x,y])
    >>> wnn2=knnW(data,k=2)
    >>> wnn4=knnW(data,k=4)
    >>> set([1,5,6,2]) == set(wnn4.neighbors[0])
    True
    >>> set([0,6,10,1]) == set(wnn4.neighbors[5])
    True
    >>> set([1,5]) == set(wnn2.neighbors[0])
    True
    >>> set([0,6]) == set(wnn2.neighbors[5])
    True
    >>> "%.2f"%wnn2.pct_nonzero
    '0.08'
    >>> wnn4.pct_nonzero
    0.16
    >>> wnn3e=knnW(data,p=2,k=3)
    >>> set([1,5,6]) == set(wnn3e.neighbors[0])
    True
    >>> wnn3m=knnW(data,p=1,k=3)
    >>> a = set([1,5,2])
    >>> b = set([1,5,6])
    >>> c = set([1,5,10])
    >>> w0n = set(wnn3m.neighbors[0])
    >>> a==w0n or b==w0n or c==w0n
    True

    ids

    >>> wnn2 = knnW(data,2)
    >>> wnn2[0]
    {1: 1.0, 5: 1.0}
    >>> wnn2[1]
    {0: 1.0, 2: 1.0}

    now with 1 rather than 0 offset

    >>> wnn2 = knnW(data,2, ids = range(1,26))
    >>> wnn2[1]
    {2: 1.0, 6: 1.0}
    >>> wnn2[2]
    {1: 1.0, 3: 1.0}
    >>> 0 in wnn2.neighbors
    False

    Notes
    -----

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    pysal.weights.W

    """

    if issubclass(type(data), scipy.spatial.KDTree):
        kd = data
        data = kd.data
        nnq = kd.query(data, k=k+1, p=p)
        info = nnq[1]
    elif type(data).__name__ == 'ndarray':
        # check if unique points are a small fraction of all points
        ind =  np.lexsort(data.T)
        u = data[np.concatenate(([True],np.any(data[ind[1:]]!=data[ind[:-1]],axis=1)))]
        pct_u = len(u)*1. / len(data)
        if pct_u < pct_unique:
            tree = KDTree(u)
            nnq = tree.query(data, k=k+1, p=p)
            info = nnq[1]
            uid = [np.where((data == ui).all(axis=1))[0][0] for ui in u]
            new_info = np.zeros((len(data), k + 1), 'int')
            for i, row in enumerate(info):
                new_info[i] = [uid[j] for j in row]
            info = new_info
        else:
            kd = KDTree(data)
            # calculate
            nnq = kd.query(data, k=k + 1, p=p)
            info = nnq[1]
    else:
        print 'Unsupported type'
        return None

    neighbors = {}
    for i, row in enumerate(info):
        row = row.tolist()
        if i in row:
            row.remove(i)
            focal = i
        if ids:
            row = [ ids[j] for j in row]
            focal = ids[i]
        neighbors[focal] = row
    return pysal.weights.W(neighbors,  id_order=ids)

Example #17

0

Show file

File: Distance.py Project: lanselin/pysal

class KNN(W):
    """
    Creates nearest neighbor weights matrix based on k nearest
    neighbors.

    Parameters
    ----------
    kdtree      : object
                  PySAL KDTree or ArcKDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    k           : int
                  number of nearest neighbors
    p           : float
                  Minkowski p-norm distance metric parameter:
                  1<=p<=infinity
                  2: Euclidean distance
                  1: Manhattan distance
                  Ignored if the KDTree is an ArcKDTree
    ids         : list
                  identifiers to attach to each observation

    Returns
    -------

    w         : W
                instance
                Weights object with binary weights

    Examples
    --------
    >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kd = pysal.cg.kdtree.KDTree(np.array(points))
    >>> wnn2 = pysal.KNN(kd, 2)
    >>> [1,3] == wnn2.neighbors[0]
    True

    ids

    >>> wnn2 = KNN(kd,2)
    >>> wnn2[0]
    {1: 1.0, 3: 1.0}
    >>> wnn2[1]
    {0: 1.0, 3: 1.0}

    now with 1 rather than 0 offset

    >>> wnn2 = KNN(kd, 2, ids=range(1,7))
    >>> wnn2[1]
    {2: 1.0, 4: 1.0}
    >>> wnn2[2]
    {1: 1.0, 4: 1.0}
    >>> 0 in wnn2.neighbors
    False

    Notes
    -----

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    :class:`pysal.weights.W`
    """
    def __init__(self, data, k=2, p=2, ids=None, radius=None, distance_metric='euclidean'):
        if isKDTree(data):
            self.kdtree = data
            self.data = data.data
        else:
            self.data = data
            self.kdtree = KDTree(data, radius=radius, distance_metric=distance_metric)
        self.k = k 
        self.p = p
        this_nnq = self.kdtree.query(self.data, k=k+1, p=p)
        
        to_weight = this_nnq[1]
        if ids is None:
            ids = list(range(to_weight.shape[0]))
        
        neighbors = {}
        for i,row in enumerate(to_weight):
            row = row.tolist()
            row.remove(i)
            row = [ids[j] for j in row]
            focal = ids[i]
            neighbors[focal] = row
        W.__init__(self, neighbors, id_order=ids)
    
    @classmethod
    def from_shapefile(cls, filepath, **kwargs):
        """
        Nearest neighbor weights from a shapefile.

        Parameters
        ----------

        data       : string
                     shapefile containing attribute data.
        k          : int
                     number of nearest neighbors
        p          : float
                     Minkowski p-norm distance metric parameter:
                     1<=p<=infinity
                     2: Euclidean distance
                     1: Manhattan distance
        ids        : list
                     identifiers to attach to each observation
        radius     : float
                     If supplied arc_distances will be calculated
                     based on the given radius. p will be ignored.

        Returns
        -------

        w         : KNN
                    instance; Weights object with binary weights.

        Examples
        --------

        Polygon shapefile

        >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"))
        >>> "%.4f"%wc.pct_nonzero
        '4.0816'
        >>> set([2,1]) == set(wc.neighbors[0])
        True
        >>> wc3=pysal.knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3)
        >>> set(wc3.neighbors[0]) == set([2,1,3])
        True
        >>> set(wc3.neighbors[2]) == set([4,3,0])
        True

        1 offset rather than 0 offset

        >>> wc3_1=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID")
        >>> set([4,3,2]) == set(wc3_1.neighbors[1])
        True
        >>> wc3_1.weights[2]
        [1.0, 1.0, 1.0]
        >>> set([4,1,8]) == set(wc3_1.neighbors[2])
        True


        Point shapefile

        >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"))
        >>> w.pct_nonzero
        1.1904761904761905
        >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1)
        >>> "%.3f"%w1.pct_nonzero

        Notes
        -----

        Ties between neighbors of equal distance are arbitrarily broken.

        See Also
        --------
        :class:`pysal.weights.KNN`
        :class:`pysal.weights.W`
        """
        return cls(get_points_array_from_shapefile(filepath), **kwargs)
    
    @classmethod
    def from_array(cls, array, **kwargs):
        """
        Creates nearest neighbor weights matrix based on k nearest
        neighbors.

        Parameters
        ----------
        array       : np.ndarray
                      (n, k) array representing n observations on 
                      k characteristics used to measure distances 
                      between the n objects
        **kwargs    : keyword arguments, see Rook

        Returns
        -------
        w         : W
                    instance
                    Weights object with binary weights

        Examples
        --------
        >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
        >>> wnn2 = pysal.KNN.from_array(points, 2)
        >>> [1,3] == wnn2.neighbors[0]
        True

        ids

        >>> wnn2 = KNN.from_array(points,2)
        >>> wnn2[0]
        {1: 1.0, 3: 1.0}
        >>> wnn2[1]
        {0: 1.0, 3: 1.0}

        now with 1 rather than 0 offset

        >>> wnn2 = KNN.from_array(points, 2, ids=range(1,7))
        >>> wnn2[1]
        {2: 1.0, 4: 1.0}
        >>> wnn2[2]
        {1: 1.0, 4: 1.0}
        >>> 0 in wnn2.neighbors
        False

        Notes
        -----

        Ties between neighbors of equal distance are arbitrarily broken.

        See Also
        --------
        :class: `pysal.weights.KNN`
        :class:`pysal.weights.W`
        """
        return cls(array, **kwargs)

    @classmethod
    def from_dataframe(cls, df, geom_col='geometry', ids=None, **kwargs):
        """
        Make KNN weights from a dataframe.

        Parameters
        ----------
        df      :   pandas.dataframe
                    a dataframe with a geometry column that can be used to
                    construct a W object
        geom_col :   string
                    column name of the geometry stored in df
        ids     :   string or iterable
                    if string, the column name of the indices from the dataframe
                    if iterable, a list of ids to use for the W
                    if None, df.index is used.

        See Also
        --------
        :class: `pysal.weights.KNN`
        :class:`pysal.weights.W`
        """
        pts = get_points_array(df[geom_col])
        if ids is None:
            ids = df.index.tolist()
        elif isinstance(ids, str):
            ids = df[ids].tolist()
        return cls(pts, ids=ids, **kwargs)

    def reweight(self, k=None, p=None, new_data=None, new_ids=None, inplace=True):
        """
        Redo K-Nearest Neighbor weights construction using given parameters

        Parameters
        ----------
        new_data    : np.ndarray
                      an array containing additional data to use in the KNN
                      weight
        new_ids     : list
                      a list aligned with new_data that provides the ids for
                      each new observation
        inplace     : bool
                      a flag denoting whether to modify the KNN object 
                      in place or to return a new KNN object
        k           : int
                      number of nearest neighbors
        p           : float
                      Minkowski p-norm distance metric parameter:
                      1<=p<=infinity
                      2: Euclidean distance
                      1: Manhattan distance
                      Ignored if the KDTree is an ArcKDTree

        Returns
        -------
        A copy of the object using the new parameterization, or None if the
        object is reweighted in place.
        """
        if (new_data is not None):
            new_data = np.asarray(new_data).reshape(-1,2)
            data = np.vstack((self.data, new_data)).reshape(-1,2)
            if new_ids is not None:
                ids = copy.deepcopy(self.id_order)
                ids.extend(list(new_ids))
            else:
                ids = list(range(data.shape[0]))
        elif (new_data is None) and (new_ids is None):
            # If not, we can use the same kdtree we have
            data = self.kdtree
            ids = self.id_order
        elif (new_data is None) and (new_ids is not None):
            Warn('Remapping ids must be done using w.remap_ids')
        if k is None:
            k = self.k
        if p is None:
            p = self.p
        if inplace:
            self._reset()
            self.__init__(data, ids=ids, k=k, p=p)
        else:
            return KNN(data, ids=ids, k=k, p=p)

Example #18

0

Show file

File: Distance.py Project: cheneason/pysal

class DistanceBand(W):
    """Spatial weights based on distance band

    Parameters
    ----------

    data        : array (n,k) or KDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    threshold  : float
                 distance band
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    binary     : binary
                 If true w_{ij}=1 if d_{i,j}<=threshold, otherwise w_{i,j}=0
                 If false wij=dij^{alpha}
    alpha      : float
                 distance decay parameter for weight (default -1.0)
                 if alpha is positive the weights will not decline with
                 distance. If binary is True, alpha is ignored

    Examples
    --------

    >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> w=DistanceBand(points,threshold=11.2)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w.weights
    {0: [1, 1], 1: [1, 1], 2: [], 3: [1, 1], 4: [1], 5: [1]}
    >>> w.neighbors
    {0: [1, 3], 1: [0, 3], 2: [], 3: [0, 1], 4: [5], 5: [4]}
    >>> w=DistanceBand(points,threshold=14.2)
    >>> w.weights
    {0: [1, 1], 1: [1, 1, 1], 2: [1], 3: [1, 1], 4: [1, 1, 1], 5: [1]}
    >>> w.neighbors
    {0: [1, 3], 1: [0, 3, 4], 2: [4], 3: [0, 1], 4: [1, 2, 5], 5: [4]}

    inverse distance weights

    >>> w=DistanceBand(points,threshold=11.2,binary=False)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w.weights[0]
    [0.10000000000000001, 0.089442719099991588]
    >>> w.neighbors[0]
    [1, 3]
    >>>

    gravity weights

    >>> w=DistanceBand(points,threshold=11.2,binary=False,alpha=-2.)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w.weights[0]
    [0.01, 0.0079999999999999984]

    Notes
    -----

    this was initially implemented running scipy 0.8.0dev (in epd 6.1).
    earlier versions of scipy (0.7.0) have a logic bug in scipy/sparse/dok.py
    so serge changed line 221 of that file on sal-dev to fix the logic bug

    """
    def __init__(self, data, threshold, p=2, alpha=-1.0, binary=True, ids=None):
        """
        Casting to floats is a work around for a bug in scipy.spatial.  See detail in pysal issue #126
        """
        if issubclass(type(data), scipy.spatial.KDTree):
            self.kd = data
            self.data = self.kd.data
        else:
            try:
                data = np.asarray(data)
                if data.dtype.kind != 'f':
                    data = data.astype(float)
                self.data = data
                self.kd = KDTree(self.data)
            except:
                raise ValueError("Could not make array from data")

        self.p = p
        self.threshold = threshold
        self.binary = binary
        self.alpha = alpha
        self._band()
        neighbors, weights = self._distance_to_W(ids)
        W.__init__(self, neighbors, weights, ids)

    def _band(self):
        """
        find all pairs within threshold
        """
        kd = self.kd
        #ns=[kd.query_ball_point(point,self.threshold) for point in self.data]
        ns = kd.query_ball_tree(kd, self.threshold)
        self._nmat = ns

    def _distance_to_W(self, ids=None):
        allneighbors = {}
        weights = {}
        if ids:
            ids = np.array(ids)
        else:
            ids = np.arange(len(self._nmat))
        if self.binary:
            for i, neighbors in enumerate(self._nmat):
                ns = [ni for ni in neighbors if ni != i]
                neigh = list(ids[ns])
                if len(neigh) == 0:
                    allneighbors[ids[i]] = []
                    weights[ids[i]] = []
                else:
                    allneighbors[ids[i]] = neigh
                    weights[ids[i]] = [1] * len(ns)
        else:
            self.dmat = self.kd.sparse_distance_matrix(
                self.kd, max_distance=self.threshold)
            for i, neighbors in enumerate(self._nmat):
                ns = [ni for ni in neighbors if ni != i]
                neigh = list(ids[ns])
                if len(neigh) == 0:
                    allneighbors[ids[i]] = []
                    weights[ids[i]] = []
                else:
                    try:
                        allneighbors[ids[i]] = neigh
                        weights[ids[i]] = [self.dmat[(
                            i, j)] ** self.alpha for j in ns]
                    except ZeroDivisionError, e:
                        print(e, "Cannot compute inverse distance for elements at same location (distance=0).")
        return allneighbors, weights

Example #19

0

Show file

File: Distance.py Project: cheneason/pysal

def knnW(data, k=2, p=2, ids=None, pct_unique=0.25):
    """
    Creates nearest neighbor weights matrix based on k nearest
    neighbors.

    Parameters
    ----------

    data       : array (n,k) or KDTree where KDtree.data is array (n,k)
                 n observations on k characteristics used to measure
                 distances between the n objects
    k          : int
                 number of nearest neighbors
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    ids        : list
                 identifiers to attach to each observation
    pct_unique : float
                 threshold percentage of unique points in data. Below this
                 threshold tree is built on unique values only

    Returns
    -------

    w         : W instance
                Weights object with binary weights

    Examples
    --------

    >>> x,y=np.indices((5,5))
    >>> x.shape=(25,1)
    >>> y.shape=(25,1)
    >>> data=np.hstack([x,y])
    >>> wnn2=knnW(data,k=2)
    >>> wnn4=knnW(data,k=4)
    >>> set([1,5,6,2]) == set(wnn4.neighbors[0])
    True
    >>> set([0,6,10,1]) == set(wnn4.neighbors[5])
    True
    >>> set([1,5]) == set(wnn2.neighbors[0])
    True
    >>> set([0,6]) == set(wnn2.neighbors[5])
    True
    >>> "%.2f"%wnn2.pct_nonzero
    '0.08'
    >>> wnn4.pct_nonzero
    0.16
    >>> wnn3e=knnW(data,p=2,k=3)
    >>> set([1,5,6]) == set(wnn3e.neighbors[0])
    True
    >>> wnn3m=knnW(data,p=1,k=3)
    >>> a = set([1,5,2])
    >>> b = set([1,5,6])
    >>> c = set([1,5,10])
    >>> w0n = set(wnn3m.neighbors[0])
    >>> a==w0n or b==w0n or c==w0n
    True

    ids

    >>> wnn2 = knnW(data,2)
    >>> wnn2[0]
    {1: 1.0, 5: 1.0}
    >>> wnn2[1]
    {0: 1.0, 2: 1.0}

    now with 1 rather than 0 offset

    >>> wnn2 = knnW(data,2, ids = range(1,26))
    >>> wnn2[1]
    {2: 1.0, 6: 1.0}
    >>> wnn2[2]
    {1: 1.0, 3: 1.0}
    >>> 0 in wnn2.neighbors
    False

    Notes
    -----

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    pysal.weights.W

    """

    if issubclass(type(data), scipy.spatial.KDTree):
        kd = data
        data = kd.data
        nnq = kd.query(data, k=k+1, p=p)
        info = nnq[1]
    elif type(data).__name__ == 'ndarray':
        # check if unique points are a small fraction of all points
        ind =  np.lexsort(data.T)
        u = data[np.concatenate(([True],np.any(data[ind[1:]]!=data[ind[:-1]],axis=1)))]
        pct_u = len(u)*1. / len(data)
        if pct_u < pct_unique:
            tree = KDTree(u)
            nnq = tree.query(data, k=k+1, p=p)
            info = nnq[1]
            uid = [np.where((data == ui).all(axis=1))[0][0] for ui in u]
            new_info = np.zeros((len(data), k + 1), 'int')
            for i, row in enumerate(info):
                new_info[i] = [uid[j] for j in row]
            info = new_info
        else:
            kd = KDTree(data)
            # calculate
            nnq = kd.query(data, k=k + 1, p=p)
            info = nnq[1]
    else:
        print 'Unsupported type'
        return None

    neighbors = {}
    for i, row in enumerate(info):
        row = row.tolist()
        if i in row:
            row.remove(i)
            focal = i
        if ids:
            row = [ ids[j] for j in row]
            focal = ids[i]
        neighbors[focal] = row
    return pysal.weights.W(neighbors,  id_order=ids)

Example #20

0

Show file

File: Distance.py Project: umutturk/pysal

class DistanceBand(W):
    """
    Spatial weights based on distance band.

    Parameters
    ----------

    data        : array
                  (n,k) or KDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    threshold  : float
                 distance band
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    binary     : boolean
                 If true w_{ij}=1 if d_{i,j}<=threshold, otherwise w_{i,j}=0
                 If false wij=dij^{alpha}
    alpha      : float
                 distance decay parameter for weight (default -1.0)
                 if alpha is positive the weights will not decline with
                 distance. If binary is True, alpha is ignored

    ids         : list
                  values to use for keys of the neighbors and weights dicts

    Examples
    --------

    >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> w=DistanceBand(points,threshold=11.2)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w.weights
    {0: [1, 1], 1: [1, 1], 2: [], 3: [1, 1], 4: [1], 5: [1]}
    >>> w.neighbors
    {0: [1, 3], 1: [0, 3], 2: [], 3: [1, 0], 4: [5], 5: [4]}
    >>> w=DistanceBand(points,threshold=14.2)
    >>> w.weights
    {0: [1, 1], 1: [1, 1, 1], 2: [1], 3: [1, 1], 4: [1, 1, 1], 5: [1]}
    >>> w.neighbors
    {0: [1, 3], 1: [0, 3, 4], 2: [4], 3: [1, 0], 4: [5, 1, 2], 5: [4]}

    inverse distance weights

    >>> w=DistanceBand(points,threshold=11.2,binary=False)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w.weights[0]
    [0.10000000000000001, 0.089442719099991588]
    >>> w.neighbors[0]
    [1, 3]
    >>>

    gravity weights

    >>> w=DistanceBand(points,threshold=11.2,binary=False,alpha=-2.)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w.weights[0]
    [0.01, 0.0079999999999999984]

    Notes
    -----

    This was initially implemented running scipy 0.8.0dev (in epd 6.1).
    earlier versions of scipy (0.7.0) have a logic bug in scipy/sparse/dok.py
    so serge changed line 221 of that file on sal-dev to fix the logic bug.

    """

    def __init__(self, data, threshold, p=2, alpha=-1.0, binary=True, ids=None):
        """Casting to floats is a work around for a bug in scipy.spatial.
        See detail in pysal issue #126.

        """
        if issubclass(type(data), scipy.spatial.KDTree):
            self.kd = data
            self.data = self.kd.data
        else:
            try:
                data = np.asarray(data)
                if data.dtype.kind != 'f':
                    data = data.astype(float)
                self.data = data
                self.kd = KDTree(self.data)
            except:
                raise ValueError("Could not make array from data")

        self.p = p
        self.threshold = threshold
        self.binary = binary
        self.alpha = alpha
        self._band()
        neighbors, weights = self._distance_to_W(ids)
        W.__init__(self, neighbors, weights, ids)

    def _band(self):
        """Find all pairs within threshold.

        """
        self.dmat = self.kd.sparse_distance_matrix(
                self.kd, max_distance=self.threshold)

    def _distance_to_W(self, ids=None):
        if ids:
            ids = np.array(ids)
        else:
            ids = np.arange(self.dmat.shape[0])
        neighbors = dict([(i,[]) for i in ids])
        weights = dict([(i,[]) for i in ids])
        if self.binary:
            for key,weight in self.dmat.items():
                i,j = key
                if j not in neighbors[i]:
                    weights[i].append(1)
                    neighbors[i].append(j)
                if i not in neighbors[j]:
                    weights[j].append(1)
                    neighbors[j].append(i)

        else:
            for key,weight in self.dmat.items():
                i,j = key
                if j not in neighbors[i]:
                    weights[i].append(weight**self.alpha)
                    neighbors[i].append(j)
                if i not in neighbors[j]:
                    weights[j].append(weight**self.alpha)
                    neighbors[j].append(i)

        return neighbors, weights

Example #21

0

Show file

File: Distance.py Project: shepherdmeng/pysal

class DistanceBand(W):
    """
    Spatial weights based on distance band.

    Parameters
    ----------

    data        : array
                  (n,k) or KDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    threshold  : float
                 distance band
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    binary     : boolean
                 If true w_{ij}=1 if d_{i,j}<=threshold, otherwise w_{i,j}=0
                 If false wij=dij^{alpha}
    alpha      : float
                 distance decay parameter for weight (default -1.0)
                 if alpha is positive the weights will not decline with
                 distance. If binary is True, alpha is ignored

    ids         : list
                  values to use for keys of the neighbors and weights dicts

    Attributes
    ----------
    weights : dict
              of neighbor weights keyed by observation id

    neighbors : dict
                of neighbors keyed by observation id

    Examples
    --------

    >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> wcheck = pysal.W({0: [1, 3], 1: [0, 3], 2: [], 3: [0, 1], 4: [5], 5: [4]})
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w=DistanceBand(points,threshold=11.2)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> pysal.weights.util.neighbor_equality(w, wcheck)
    True
    >>> w=DistanceBand(points,threshold=14.2)
    >>> wcheck = pysal.W({0: [1, 3], 1: [0, 3, 4], 2: [4], 3: [1, 0], 4: [5, 2, 1], 5: [4]})
    >>> pysal.weights.util.neighbor_equality(w, wcheck)
    True



    inverse distance weights

    >>> w=DistanceBand(points,threshold=11.2,binary=False)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w.weights[0]
    [0.10000000000000001, 0.089442719099991588]
    >>> w.neighbors[0]
    [1, 3]
    >>>

    gravity weights

    >>> w=DistanceBand(points,threshold=11.2,binary=False,alpha=-2.)
    WARNING: there is one disconnected observation (no neighbors)
    Island id:  [2]
    >>> w.weights[0]
    [0.01, 0.0079999999999999984]

    Notes
    -----

    This was initially implemented running scipy 0.8.0dev (in epd 6.1).
    earlier versions of scipy (0.7.0) have a logic bug in scipy/sparse/dok.py
    so serge changed line 221 of that file on sal-dev to fix the logic bug.

    """
    def __init__(self,
                 data,
                 threshold,
                 p=2,
                 alpha=-1.0,
                 binary=True,
                 ids=None):
        """Casting to floats is a work around for a bug in scipy.spatial.
        See detail in pysal issue #126.

        """
        if isKDTree(data):
            self.kd = data
            self.data = self.kd.data
        else:
            try:
                data = np.asarray(data)
                if data.dtype.kind != 'f':
                    data = data.astype(float)
                self.data = data
                self.kd = KDTree(self.data)
            except:
                raise ValueError("Could not make array from data")

        self.p = p
        self.threshold = threshold
        self.binary = binary
        self.alpha = alpha
        self._band()
        neighbors, weights = self._distance_to_W(ids)
        W.__init__(self, neighbors, weights, ids)

    def _band(self):
        """Find all pairs within threshold.

        """
        self.dmat = self.kd.sparse_distance_matrix(self.kd,
                                                   max_distance=self.threshold)

    def _distance_to_W(self, ids=None):
        if ids:
            ids = np.array(ids)
        else:
            ids = np.arange(self.dmat.shape[0])
        neighbors = dict([(i, []) for i in ids])
        weights = dict([(i, []) for i in ids])
        if self.binary:
            for key, weight in self.dmat.items():
                i, j = key
                if i != j:
                    if j not in neighbors[i]:
                        weights[i].append(1)
                        neighbors[i].append(j)
                    if i not in neighbors[j]:
                        weights[j].append(1)
                        neighbors[j].append(i)

        else:
            for key, weight in self.dmat.items():
                i, j = key
                if i != j:
                    if j not in neighbors[i]:
                        weights[i].append(weight**self.alpha)
                        neighbors[i].append(j)
                    if i not in neighbors[j]:
                        weights[j].append(weight**self.alpha)
                        neighbors[j].append(i)

        return neighbors, weights

Example #22

0

Show file

File: Distance.py Project: denadai2/pysal

def knnW(data, k=2, p=2, ids=None):
    """
    Creates nearest neighbor weights matrix based on k nearest
    neighbors.

    Parameters
    ----------

    kdtree      : object
                  PySAL KDTree or ArcKDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    k           : int
                  number of nearest neighbors
    p           : float
                  Minkowski p-norm distance metric parameter:
                  1<=p<=infinity
                  2: Euclidean distance
                  1: Manhattan distance
                  Ignored if the KDTree is an ArcKDTree
    ids         : list
                  identifiers to attach to each observation

    Returns
    -------

    w         : W
                instance
                Weights object with binary weights

    Examples
    --------

    >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kd = pysal.cg.kdtree.KDTree(np.array(points))
    >>> wnn2 = pysal.knnW(kd, 2)
    >>> [1,3] == wnn2.neighbors[0]
    True

    ids

    >>> wnn2 = knnW(kd,2)
    >>> wnn2[0]
    {1: 1.0, 3: 1.0}
    >>> wnn2[1]
    {0: 1.0, 3: 1.0}

    now with 1 rather than 0 offset

    >>> wnn2 = knnW(kd, 2, ids=range(1,7))
    >>> wnn2[1]
    {2: 1.0, 4: 1.0}
    >>> wnn2[2]
    {1: 1.0, 4: 1.0}
    >>> 0 in wnn2.neighbors
    False

    Notes
    -----

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    pysal.weights.W

    """
    if isKDTree(data):
        kdt = data
        data = kdt.data
    else:
        kdt = KDTree(data)
    nnq = kdt.query(data, k=k+1, p=p)
    info = nnq[1]

    neighbors = {}
    for i, row in enumerate(info):
        row = row.tolist()
        if i in row:
            row.remove(i)
            focal = i
        if ids:
            row = [ ids[j] for j in row]
            focal = ids[i]
        neighbors[focal] = row
    return pysal.weights.W(neighbors,  id_order=ids)

Example #23

0

Show file

File: Distance.py Project: andyreagan/pysal

class Kernel(W):
    """Spatial weights based on kernel functions

    Parameters
    ----------

    data        : array (n,k) or KDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    bandwidth   : float or array-like (optional)
                  the bandwidth :math:`h_i` for the kernel.
    fixed       : binary
                  If true then :math:`h_i=h \\forall i`. If false then
                  bandwidth is adaptive across observations.
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).  For adaptive bandwidths, :math:`h_i=dknn_i`
    diagonal    : boolean
                  If true, set diagonal weights = 1.0, if false (default),
                  diagonals weights are set to value according to kernel
                  function.
    function    : string {'triangular','uniform','quadratic','quartic','gaussian'}
                  kernel function defined as follows with

                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = 1/2 \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)

    eps         : float
                  adjustment to ensure knn distance range is closed on the
                  knnth observations

    Examples
    --------

    >>> points=[(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kw=Kernel(points)
    >>> kw.weights[0]
    [1.0, 0.500000049999995, 0.4409830615267465]
    >>> kw.neighbors[0]
    [0, 1, 3]
    >>> kw.bandwidth
    array([[ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002],
           [ 20.000002]])
    >>> kw15=Kernel(points,bandwidth=15.0)
    >>> kw15[0]
    {0: 1.0, 1: 0.33333333333333337, 3: 0.2546440075000701}
    >>> kw15.neighbors[0]
    [0, 1, 3]
    >>> kw15.bandwidth
    array([[ 15.],
           [ 15.],
           [ 15.],
           [ 15.],
           [ 15.],
           [ 15.]])

    Adaptive bandwidths user specified

    >>> bw=[25.0,15.0,25.0,16.0,14.5,25.0]
    >>> kwa=Kernel(points,bandwidth=bw)
    >>> kwa.weights[0]
    [1.0, 0.6, 0.552786404500042, 0.10557280900008403]
    >>> kwa.neighbors[0]
    [0, 1, 3, 4]
    >>> kwa.bandwidth
    array([[ 25. ],
           [ 15. ],
           [ 25. ],
           [ 16. ],
           [ 14.5],
           [ 25. ]])

    Endogenous adaptive bandwidths

    >>> kwea=Kernel(points,fixed=False)
    >>> kwea.weights[0]
    [1.0, 0.10557289844279438, 9.99999900663795e-08]
    >>> kwea.neighbors[0]
    [0, 1, 3]
    >>> kwea.bandwidth
    array([[ 11.18034101],
           [ 11.18034101],
           [ 20.000002  ],
           [ 11.18034101],
           [ 14.14213704],
           [ 18.02775818]])

    Endogenous adaptive bandwidths with Gaussian kernel

    >>> kweag=Kernel(points,fixed=False,function='gaussian')
    >>> kweag.weights[0]
    [0.3989422804014327, 0.2674190291577696, 0.2419707487162134]
    >>> kweag.bandwidth
    array([[ 11.18034101],
           [ 11.18034101],
           [ 20.000002  ],
           [ 11.18034101],
           [ 14.14213704],
           [ 18.02775818]])

    Diagonals to 1.0

    >>> kq = Kernel(points,function='gaussian')
    >>> kq.weights
    {0: [0.3989422804014327, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 0.3989422804014327, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 0.3989422804014327, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 0.3989422804014327, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 0.3989422804014327, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 0.3989422804014327]}
    >>> kqd = Kernel(points, function='gaussian', diagonal=True)
    >>> kqd.weights
    {0: [1.0, 0.35206533556593145, 0.3412334260702758], 1: [0.35206533556593145, 1.0, 0.2419707487162134, 0.3412334260702758, 0.31069657591175387], 2: [0.2419707487162134, 1.0, 0.31069657591175387], 3: [0.3412334260702758, 0.3412334260702758, 1.0, 0.3011374490937829, 0.26575287272131043], 4: [0.31069657591175387, 0.31069657591175387, 0.3011374490937829, 1.0, 0.35206533556593145], 5: [0.26575287272131043, 0.35206533556593145, 1.0]}
    """

    def __init__(
        self, data, bandwidth=None, fixed=True, k=2, function="triangular", eps=1.0000001, ids=None, diagonal=False
    ):
        if issubclass(type(data), scipy.spatial.KDTree):
            self.kdt = data
            self.data = self.kdt.data
            data = self.data
        else:
            self.data = data
            self.kdt = KDTree(self.data)
        self.k = k + 1
        self.function = function.lower()
        self.fixed = fixed
        self.eps = eps
        if bandwidth:
            try:
                bandwidth = np.array(bandwidth)
                bandwidth.shape = (len(bandwidth), 1)
            except:
                bandwidth = np.ones((len(data), 1), "float") * bandwidth
            self.bandwidth = bandwidth
        else:
            self._set_bw()

        self._eval_kernel()
        neighbors, weights = self._k_to_W(ids)
        if diagonal:
            for i in neighbors:
                weights[i][neighbors[i].index(i)] = 1.0
        W.__init__(self, neighbors, weights, ids)

    def _k_to_W(self, ids=None):
        allneighbors = {}
        weights = {}
        if ids:
            ids = np.array(ids)
        else:
            ids = np.arange(len(self.data))
        for i, neighbors in enumerate(self.kernel):
            if len(self.neigh[i]) == 0:
                allneighbors[ids[i]] = []
                weights[ids[i]] = []
            else:
                allneighbors[ids[i]] = list(ids[self.neigh[i]])
                weights[ids[i]] = self.kernel[i].tolist()
        return allneighbors, weights

    def _set_bw(self):
        dmat, neigh = self.kdt.query(self.data, k=self.k)
        if self.fixed:
            # use max knn distance as bandwidth
            bandwidth = dmat.max() * self.eps
            n = len(dmat)
            self.bandwidth = np.ones((n, 1), "float") * bandwidth
        else:
            # use local max knn distance
            self.bandwidth = dmat.max(axis=1) * self.eps
            self.bandwidth.shape = (self.bandwidth.size, 1)
            # identify knn neighbors for each point
            nnq = self.kdt.query(self.data, k=self.k)
            self.neigh = nnq[1]

    def _eval_kernel(self):
        # get points within bandwidth distance of each point
        if not hasattr(self, "neigh"):
            kdtq = self.kdt.query_ball_point
            neighbors = [kdtq(self.data[i], r=bwi[0]) for i, bwi in enumerate(self.bandwidth)]
            self.neigh = neighbors
        # get distances for neighbors
        bw = self.bandwidth

        kdtq = self.kdt.query
        z = []
        for i, nids in enumerate(self.neigh):
            di, ni = kdtq(self.data[i], k=len(nids))
            zi = np.array([dict(zip(ni, di))[nid] for nid in nids]) / bw[i]
            z.append(zi)
        zs = z
        # functions follow Anselin and Rey (2010) table 5.4
        if self.function == "triangular":
            self.kernel = [1 - zi for zi in zs]
        elif self.function == "uniform":
            self.kernel = [np.ones(zi.shape) * 0.5 for zi in zs]
        elif self.function == "quadratic":
            self.kernel = [(3.0 / 4) * (1 - zi ** 2) for zi in zs]
        elif self.function == "quartic":
            self.kernel = [(15.0 / 16) * (1 - zi ** 2) ** 2 for zi in zs]
        elif self.function == "gaussian":
            c = np.pi * 2
            c = c ** (-0.5)
            self.kernel = [c * np.exp(-(zi ** 2) / 2.0) for zi in zs]
        else:
            print "Unsupported kernel function", self.function

Example #24

0

Show file

File: Distance.py Project: jkang1643/GIS

class KNN(W):
    """
    Creates nearest neighbor weights matrix based on k nearest
    neighbors.

    Parameters
    ----------
    kdtree      : object
                  PySAL KDTree or ArcKDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    k           : int
                  number of nearest neighbors
    p           : float
                  Minkowski p-norm distance metric parameter:
                  1<=p<=infinity
                  2: Euclidean distance
                  1: Manhattan distance
                  Ignored if the KDTree is an ArcKDTree
    ids         : list
                  identifiers to attach to each observation

    Returns
    -------

    w         : W
                instance
                Weights object with binary weights

    Examples
    --------
    >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kd = pysal.cg.kdtree.KDTree(np.array(points))
    >>> wnn2 = pysal.KNN(kd, 2)
    >>> [1,3] == wnn2.neighbors[0]
    True

    ids

    >>> wnn2 = KNN(kd,2)
    >>> wnn2[0]
    {1: 1.0, 3: 1.0}
    >>> wnn2[1]
    {0: 1.0, 3: 1.0}

    now with 1 rather than 0 offset

    >>> wnn2 = KNN(kd, 2, ids=range(1,7))
    >>> wnn2[1]
    {2: 1.0, 4: 1.0}
    >>> wnn2[2]
    {1: 1.0, 4: 1.0}
    >>> 0 in wnn2.neighbors
    False

    Notes
    -----

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    :class:`pysal.weights.W`
    """
    def __init__(self,
                 data,
                 k=2,
                 p=2,
                 ids=None,
                 radius=None,
                 distance_metric='euclidean'):
        if isKDTree(data):
            self.kdtree = data
            self.data = data.data
        else:
            self.data = data
            self.kdtree = KDTree(data,
                                 radius=radius,
                                 distance_metric=distance_metric)
        self.k = k
        self.p = p
        this_nnq = self.kdtree.query(self.data, k=k + 1, p=p)

        to_weight = this_nnq[1]
        if ids is None:
            ids = list(range(to_weight.shape[0]))

        neighbors = {}
        for i, row in enumerate(to_weight):
            row = row.tolist()
            row.remove(i)
            row = [ids[j] for j in row]
            focal = ids[i]
            neighbors[focal] = row
        W.__init__(self, neighbors, id_order=ids)

    @classmethod
    def from_shapefile(cls, filepath, **kwargs):
        """
        Nearest neighbor weights from a shapefile.

        Parameters
        ----------

        data       : string
                     shapefile containing attribute data.
        k          : int
                     number of nearest neighbors
        p          : float
                     Minkowski p-norm distance metric parameter:
                     1<=p<=infinity
                     2: Euclidean distance
                     1: Manhattan distance
        ids        : list
                     identifiers to attach to each observation
        radius     : float
                     If supplied arc_distances will be calculated
                     based on the given radius. p will be ignored.

        Returns
        -------

        w         : KNN
                    instance; Weights object with binary weights.

        Examples
        --------

        Polygon shapefile

        >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"))
        >>> "%.4f"%wc.pct_nonzero
        '4.0816'
        >>> set([2,1]) == set(wc.neighbors[0])
        True
        >>> wc3=pysal.knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3)
        >>> set(wc3.neighbors[0]) == set([2,1,3])
        True
        >>> set(wc3.neighbors[2]) == set([4,3,0])
        True

        1 offset rather than 0 offset

        >>> wc3_1=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID")
        >>> set([4,3,2]) == set(wc3_1.neighbors[1])
        True
        >>> wc3_1.weights[2]
        [1.0, 1.0, 1.0]
        >>> set([4,1,8]) == set(wc3_1.neighbors[2])
        True


        Point shapefile

        >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"))
        >>> w.pct_nonzero
        1.1904761904761905
        >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1)
        >>> "%.3f"%w1.pct_nonzero

        Notes
        -----

        Ties between neighbors of equal distance are arbitrarily broken.

        See Also
        --------
        :class:`pysal.weights.KNN`
        :class:`pysal.weights.W`
        """
        return cls(get_points_array_from_shapefile(filepath), **kwargs)

    @classmethod
    def from_array(cls, array, **kwargs):
        """
        Creates nearest neighbor weights matrix based on k nearest
        neighbors.

        Parameters
        ----------
        array       : np.ndarray
                      (n, k) array representing n observations on 
                      k characteristics used to measure distances 
                      between the n objects
        **kwargs    : keyword arguments, see Rook

        Returns
        -------
        w         : W
                    instance
                    Weights object with binary weights

        Examples
        --------
        >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
        >>> wnn2 = pysal.KNN.from_array(points, 2)
        >>> [1,3] == wnn2.neighbors[0]
        True

        ids

        >>> wnn2 = KNN.from_array(points,2)
        >>> wnn2[0]
        {1: 1.0, 3: 1.0}
        >>> wnn2[1]
        {0: 1.0, 3: 1.0}

        now with 1 rather than 0 offset

        >>> wnn2 = KNN.from_array(points, 2, ids=range(1,7))
        >>> wnn2[1]
        {2: 1.0, 4: 1.0}
        >>> wnn2[2]
        {1: 1.0, 4: 1.0}
        >>> 0 in wnn2.neighbors
        False

        Notes
        -----

        Ties between neighbors of equal distance are arbitrarily broken.

        See Also
        --------
        :class: `pysal.weights.KNN`
        :class:`pysal.weights.W`
        """
        return cls(array, **kwargs)

    @classmethod
    def from_dataframe(cls, df, geom_col='geometry', ids=None, **kwargs):
        """
        Make KNN weights from a dataframe.

        Parameters
        ----------
        df      :   pandas.dataframe
                    a dataframe with a geometry column that can be used to
                    construct a W object
        geom_col :   string
                    column name of the geometry stored in df
        ids     :   string or iterable
                    if string, the column name of the indices from the dataframe
                    if iterable, a list of ids to use for the W
                    if None, df.index is used.

        See Also
        --------
        :class: `pysal.weights.KNN`
        :class:`pysal.weights.W`
        """
        pts = get_points_array(df[geom_col])
        if ids is None:
            ids = df.index.tolist()
        elif isinstance(ids, str):
            ids = df[ids].tolist()
        return cls(pts, ids=ids, **kwargs)

    def reweight(self,
                 k=None,
                 p=None,
                 new_data=None,
                 new_ids=None,
                 inplace=True):
        """
        Redo K-Nearest Neighbor weights construction using given parameters

        Parameters
        ----------
        new_data    : np.ndarray
                      an array containing additional data to use in the KNN
                      weight
        new_ids     : list
                      a list aligned with new_data that provides the ids for
                      each new observation
        inplace     : bool
                      a flag denoting whether to modify the KNN object 
                      in place or to return a new KNN object
        k           : int
                      number of nearest neighbors
        p           : float
                      Minkowski p-norm distance metric parameter:
                      1<=p<=infinity
                      2: Euclidean distance
                      1: Manhattan distance
                      Ignored if the KDTree is an ArcKDTree

        Returns
        -------
        A copy of the object using the new parameterization, or None if the
        object is reweighted in place.
        """
        if (new_data is not None):
            new_data = np.asarray(new_data).reshape(-1, 2)
            data = np.vstack((self.data, new_data)).reshape(-1, 2)
            if new_ids is not None:
                ids = copy.deepcopy(self.id_order)
                ids.extend(list(new_ids))
            else:
                ids = list(range(data.shape[0]))
        elif (new_data is None) and (new_ids is None):
            # If not, we can use the same kdtree we have
            data = self.kdtree
            ids = self.id_order
        elif (new_data is None) and (new_ids is not None):
            Warn('Remapping ids must be done using w.remap_ids')
        if k is None:
            k = self.k
        if p is None:
            p = self.p
        if inplace:
            self._reset()
            self.__init__(data, ids=ids, k=k, p=p)
        else:
            return KNN(data, ids=ids, k=k, p=p)