Beispiel #1
0
    def __init__(self,
                 data,
                 threshold,
                 p=2,
                 alpha=-1.0,
                 binary=True,
                 ids=None):
        """Casting to floats is a work around for a bug in scipy.spatial.
        See detail in pysal issue #126.

        """
        if isKDTree(data):
            self.kd = data
            self.data = self.kd.data
        else:
            try:
                data = np.asarray(data)
                if data.dtype.kind != 'f':
                    data = data.astype(float)
                self.data = data
                self.kd = KDTree(self.data)
            except:
                raise ValueError("Could not make array from data")

        self.p = p
        self.threshold = threshold
        self.binary = binary
        self.alpha = alpha
        self._band()
        neighbors, weights = self._distance_to_W(ids)
        W.__init__(self, neighbors, weights, ids)
Beispiel #2
0
    def __init__(self,
                 data,
                 k=2,
                 p=2,
                 ids=None,
                 radius=None,
                 distance_metric='euclidean'):
        if isKDTree(data):
            self.kdtree = data
            self.data = data.data
        else:
            self.data = data
            self.kdtree = KDTree(data,
                                 radius=radius,
                                 distance_metric=distance_metric)
        self.k = k
        self.p = p
        this_nnq = self.kdtree.query(self.data, k=k + 1, p=p)

        to_weight = this_nnq[1]
        if ids is None:
            ids = list(range(to_weight.shape[0]))

        neighbors = {}
        for i, row in enumerate(to_weight):
            row = row.tolist()
            row.remove(i)
            row = [ids[j] for j in row]
            focal = ids[i]
            neighbors[focal] = row
        W.__init__(self, neighbors, id_order=ids)
Beispiel #3
0
    def __init__(self, data, bandwidth=None, fixed=True, k=2,
                 function='triangular', eps=1.0000001, ids=None,
                 diagonal=False):
        if issubclass(type(data), scipy.spatial.KDTree):
            self.kdt = data
            self.data = self.kdt.data
            data = self.data
        else:
            self.data = data
            self.kdt = KDTree(self.data)
        self.k = k + 1
        self.function = function.lower()
        self.fixed = fixed
        self.eps = eps
        if bandwidth:
            try:
                bandwidth = np.array(bandwidth)
                bandwidth.shape = (len(bandwidth), 1)
            except:
                bandwidth = np.ones((len(data), 1), 'float') * bandwidth
            self.bandwidth = bandwidth
        else:
            self._set_bw()

        self._eval_kernel()
        neighbors, weights = self._k_to_W(ids)
        if diagonal:
            for i in neighbors:
                weights[i][neighbors[i].index(i)] = 1.0
        W.__init__(self, neighbors, weights, ids)
Beispiel #4
0
 def kdtree(self):
     if 'kd' in self._propData:
         return self._propData['kd']
     else:
         pts = self.points
         if pts is not None:
             if self.distMethod == 0:  # not Euclidean Distance
                 kd = KDTree(pts)
             elif self.distMethod == 1:  # 'Arc Distance (miles)'
                 kd = KDTree(pts,
                             distance_metric="Arc",
                             radius=pysal.cg.RADIUS_EARTH_MILES)
             elif self.distMethod == 2:  # 'Arc Distance (kilometers)'
                 kd = KDTree(pts,
                             distance_metric="Arc",
                             radius=pysal.cg.RADIUS_EARTH_KM)
             self._propData['kd'] = kd
             return kd
     return None
Beispiel #5
0
def knnW(data, k=2, p=2, ids=None):
    """
    Creates nearest neighbor weights matrix based on k nearest
    neighbors.

    Parameters
    ----------

    kdtree      : object
                  PySAL KDTree or ArcKDTree where KDtree.data is array (n,k)
                  n observations on k characteristics used to measure
                  distances between the n objects
    k           : int
                  number of nearest neighbors
    p           : float
                  Minkowski p-norm distance metric parameter:
                  1<=p<=infinity
                  2: Euclidean distance
                  1: Manhattan distance
                  Ignored if the KDTree is an ArcKDTree
    ids         : list
                  identifiers to attach to each observation

    Returns
    -------

    w         : W
                instance
                Weights object with binary weights

    Examples
    --------

    >>> points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
    >>> kd = pysal.cg.kdtree.KDTree(np.array(points))
    >>> wnn2 = pysal.knnW(kd, 2)
    >>> [1,3] == wnn2.neighbors[0]
    True

    ids

    >>> wnn2 = knnW(kd,2)
    >>> wnn2[0]
    {1: 1.0, 3: 1.0}
    >>> wnn2[1]
    {0: 1.0, 3: 1.0}

    now with 1 rather than 0 offset

    >>> wnn2 = knnW(kd, 2, ids=range(1,7))
    >>> wnn2[1]
    {2: 1.0, 4: 1.0}
    >>> wnn2[2]
    {1: 1.0, 4: 1.0}
    >>> 0 in wnn2.neighbors
    False

    Notes
    -----

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    pysal.weights.W

    """
    if isKDTree(data):
        kdt = data
        data = kdt.data
    else:
        kdt = KDTree(data)
    nnq = kdt.query(data, k=k + 1, p=p)
    info = nnq[1]

    neighbors = {}
    for i, row in enumerate(info):
        row = row.tolist()
        if i in row:
            row.remove(i)
            focal = i
        if ids:
            row = [ids[j] for j in row]
            focal = ids[i]
        neighbors[focal] = row
    return pysal.weights.W(neighbors, id_order=ids)
Beispiel #6
0
def knnW(data, k=2, p=2, ids=None, pct_unique=0.25):
    """
    Creates nearest neighbor weights matrix based on k nearest
    neighbors.

    Parameters
    ----------

    data       : array (n,k) or KDTree where KDtree.data is array (n,k)
                 n observations on k characteristics used to measure
                 distances between the n objects
    k          : int
                 number of nearest neighbors
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    ids        : list
                 identifiers to attach to each observation
    pct_unique : float
                 threshold percentage of unique points in data. Below this
                 threshold tree is built on unique values only

    Returns
    -------

    w         : W instance
                Weights object with binary weights

    Examples
    --------

    >>> x,y=np.indices((5,5))
    >>> x.shape=(25,1)
    >>> y.shape=(25,1)
    >>> data=np.hstack([x,y])
    >>> wnn2=knnW(data,k=2)
    >>> wnn4=knnW(data,k=4)
    >>> set([1,5,6,2]) == set(wnn4.neighbors[0])
    True
    >>> set([0,6,10,1]) == set(wnn4.neighbors[5])
    True
    >>> set([1,5]) == set(wnn2.neighbors[0])
    True
    >>> set([0,6]) == set(wnn2.neighbors[5])
    True
    >>> "%.2f"%wnn2.pct_nonzero
    '0.08'
    >>> wnn4.pct_nonzero
    0.16
    >>> wnn3e=knnW(data,p=2,k=3)
    >>> set([1,5,6]) == set(wnn3e.neighbors[0])
    True
    >>> wnn3m=knnW(data,p=1,k=3)
    >>> a = set([1,5,2])
    >>> b = set([1,5,6])
    >>> c = set([1,5,10])
    >>> w0n = set(wnn3m.neighbors[0])
    >>> a==w0n or b==w0n or c==w0n
    True

    ids

    >>> wnn2 = knnW(data,2)
    >>> wnn2[0]
    {1: 1.0, 5: 1.0}
    >>> wnn2[1]
    {0: 1.0, 2: 1.0}

    now with 1 rather than 0 offset

    >>> wnn2 = knnW(data,2, ids = range(1,26))
    >>> wnn2[1]
    {2: 1.0, 6: 1.0}
    >>> wnn2[2]
    {1: 1.0, 3: 1.0}
    >>> 0 in wnn2.neighbors
    False

    Notes
    -----

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    pysal.weights.W

    """

    if issubclass(type(data), scipy.spatial.KDTree):
        kd = data
        data = kd.data
        nnq = kd.query(data, k=k+1, p=p)
        info = nnq[1]
    elif type(data).__name__ == 'ndarray':
        # check if unique points are a small fraction of all points
        ind =  np.lexsort(data.T)
        u = data[np.concatenate(([True],np.any(data[ind[1:]]!=data[ind[:-1]],axis=1)))]
        pct_u = len(u)*1. / len(data)
        if pct_u < pct_unique:
            tree = KDTree(u)
            nnq = tree.query(data, k=k+1, p=p)
            info = nnq[1]
            uid = [np.where((data == ui).all(axis=1))[0][0] for ui in u]
            new_info = np.zeros((len(data), k + 1), 'int')
            for i, row in enumerate(info):
                new_info[i] = [uid[j] for j in row]
            info = new_info
        else:
            kd = KDTree(data)
            # calculate
            nnq = kd.query(data, k=k + 1, p=p)
            info = nnq[1]
    else:
        print 'Unsupported type'
        return None

    neighbors = {}
    for i, row in enumerate(info):
        row = row.tolist()
        if i in row:
            row.remove(i)
            focal = i
        if ids:
            row = [ ids[j] for j in row]
            focal = ids[i]
        neighbors[focal] = row
    return pysal.weights.W(neighbors,  id_order=ids)