Example #1
0
def threshold_continuousW_from_shapefile(shapefile, threshold, p=2,
                                         alpha=-1, idVariable=None, radius=None):
    """
    Threshold distance based continuous weights from a shapefile.

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix
    threshold  : float
                 distance band
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    alpha      : float
                 distance decay parameter for weight (default -1.0)
                 if alpha is positive the weights will not decline with
                 distance.
    idVariable : string
                 name of a column in the shapefile's DBF to use for ids
    radius     : float
                 If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w         : W
                instance; Weights object with continuous weights.

    Examples
    --------
    >>> w = threshold_continuousW_from_shapefile(pysal.examples.get_path("columbus.shp"),0.62,idVariable="POLYID")
    >>> w.weights[1]
    [1.6702346893743334, 1.7250729841938093]

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """

    data = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        w = DistanceBand(data, threshold=threshold, p=p, alpha=alpha, binary=False)
        w.remap_ids(ids)
    else:
        w =  threshold_continuousW_from_array(data, threshold, p=p, alpha=alpha)
    w.set_shapefile(shapefile,idVariable)
    return w
Example #2
0
def threshold_binaryW_from_shapefile(shapefile,
                                     threshold,
                                     p=2,
                                     idVariable=None,
                                     radius=None):
    """
    Threshold distance based binary weights from a shapefile.

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix
    threshold  : float
                 distance band
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    idVariable : string
                 name of a column in the shapefile's DBF to use for ids
    radius     : float
                 If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w         : W
                instance
                Weights object with binary weights

    Examples
    --------
    >>> import libpysal.api as ps
    >>> import libpysal
    >>> w = ps.threshold_binaryW_from_shapefile(libpysal.examples.get_path("columbus.shp"),0.62,idVariable="POLYID")
    >>> w.weights[1]
    [1.0, 1.0]

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """

    data = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        data = cg.KDTree(data, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        w = DistanceBand(data, threshold=threshold, p=p)
        w.remap_ids(ids)
        return w
    return threshold_binaryW_from_array(data, threshold, p=p)
Example #3
0
File: user.py Project: ds2010/pysal
def threshold_binaryW_from_shapefile(shapefile, threshold, p=2, idVariable=None, radius=None):
    """
    Threshold distance based binary weights from a shapefile.

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix
    threshold  : float
                 distance band
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    idVariable : string
                 name of a column in the shapefile's DBF to use for ids
    radius     : float
                 If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w         : W
                instance
                Weights object with binary weights

    Examples
    --------
    >>> w = threshold_binaryW_from_shapefile(pysal.examples.get_path("columbus.shp"),0.62,idVariable="POLYID")
    >>> w.weights[1]
    [1, 1]

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """

    data = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        w = DistanceBand(data, threshold=threshold, p=p)
        w.remap_ids(ids)
        return w
    return threshold_binaryW_from_array(data, threshold, p=p)
Example #4
0
def min_threshold_dist_from_shapefile(shapefile, radius=None, p=2):
    """
    Kernel weights with adaptive bandwidths.

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix.
    radius     : float
                 If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance

    Returns
    -------
    d          : float
                 Maximum nearest neighbor distance between the n
                 observations.

    Examples
    --------
    >>> import libpysal.api as ps
    >>> import libpysal
    >>> md = ps.min_threshold_dist_from_shapefile(libpysal.examples.get_path("columbus.shp"))
    >>> md
    0.61886415807685413
    >>> ps.min_threshold_dist_from_shapefile(libpysal.examples.get_path("stl_hom.shp"), libpysal.cg.sphere.RADIUS_EARTH_MILES)
    31.846942936393717

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """
    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        kdt = cg.kdtree.Arc_KDTree(points, radius=radius)
        nn = kdt.query(kdt.data, k=2)
        nnd = nn[0].max(axis=0)[1]
        return nnd
    return min_threshold_distance(points, p)
Example #5
0
def min_threshold_dist_from_shapefile(shapefile, radius=None, p=2):
    """
    Kernel weights with adaptive bandwidths.

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix.
    radius     : float
                 If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance

    Returns
    -------
    d          : float
                 Maximum nearest neighbor distance between the n
                 observations.

    Examples
    --------
    >>> md = min_threshold_dist_from_shapefile(pysal.examples.get_path("columbus.shp"))
    >>> md
    0.61886415807685413
    >>> min_threshold_dist_from_shapefile(pysal.examples.get_path("stl_hom.shp"), pysal.cg.sphere.RADIUS_EARTH_MILES)
    31.846942936393717

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """
    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        kdt = pysal.cg.kdtree.Arc_KDTree(points, radius=radius)
        nn = kdt.query(kdt.data, k=2)
        nnd = nn[0].max(axis=0)[1]
        return nnd
    return min_threshold_distance(points, p)
Example #6
0
def min_threshold_dist_from_shapefile(shapefile, radius=None, p=2):
    """
    Kernel weights with adaptive bandwidths

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix
    radius     : If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance

    Returns
    -------
    d            : float
                   minimum nearest neighbor distance between the n observations


    Examples
    --------
    >>> md = min_threshold_dist_from_shapefile(pysal.examples.get_path("columbus.shp"))
    >>> md
    0.61886415807685413
    >>> min_threshold_dist_from_shapefile(pysal.examples.get_path("stl_hom.shp"), pysal.cg.sphere.RADIUS_EARTH_MILES)
    31.846942936393717

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """
    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius)
    return min_threshold_distance(points, p)
Example #7
0
def kernelW_from_shapefile(shapefile, k=2, function='triangular',
        idVariable=None, fixed=True, radius=None, diagonal=False):
    """
    Kernel based weights.

    Parameters
    ----------

    shapefile   : string
                  shapefile name with shp suffix
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. Bandwidth taken as :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).
    function    : {'triangular','uniform','quadratic','epanechnikov', 'quartic','bisquare','gaussian'}

                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = |z| \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  epanechnikov

                  .. math::

                      K(z) = (1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  bisquare

                  .. math::

                      K(z) = (1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)
    idVariable   : string
                   name of a column in the shapefile's DBF to use for ids

    fixed        : binary
                   If true then :math:`h_i=h \\forall i`. If false then
                   bandwidth is adaptive across observations.
    radius     : float
                 If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.
    diagonal   : boolean
                 If true, set diagonal weights = 1.0, if false (default)
                 diagonal weights are set to value according to kernel
                 function

    Returns
    -------

    w            : W
                   instance of spatial weights

    Examples
    --------
    >>> kw = pysal.kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID', function = 'gaussian')

    >>> kwd = pysal.kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID', function = 'gaussian', diagonal = True)
    >>> set(kw.neighbors[1]) == set([4, 2, 3, 1])
    True
    >>> set(kwd.neighbors[1]) == set([4, 2, 3, 1])
    True
    >>>
    >>> set(kw.weights[1]) == set( [0.2436835517263174, 0.29090631630909874, 0.29671172124745776, 0.3989422804014327])
    True
    >>> set(kwd.weights[1]) == set( [0.2436835517263174, 0.29090631630909874, 0.29671172124745776, 1.0])
    True


    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """

    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return Kernel(points, function=function, k=k, ids=ids, fixed=fixed,
                diagonal = diagonal)
    return kernelW(points, k=k, function=function, fixed=fixed,
            diagonal=diagonal)
Example #8
0
def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None):
    """
    Nearest neighbor weights from a shapefile.

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix
    k          : int
                 number of nearest neighbors
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    idVariable : string
                 name of a column in the shapefile's DBF to use for ids
    radius     : float
                 If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w         : W
                instance; Weights object with binary weights

    Examples
    --------

    Polygon shapefile

    >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"))
    >>> "%.4f"%wc.pct_nonzero
    '4.0816'
    >>> set([2,1]) == set(wc.neighbors[0])
    True
    >>> wc3=pysal.knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3)
    >>> set(wc3.neighbors[0]) == set([2,1,3])
    True
    >>> set(wc3.neighbors[2]) == set([4,3,0])
    True

    1 offset rather than 0 offset

    >>> wc3_1=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID")
    >>> set([4,3,2]) == set(wc3_1.neighbors[1])
    True
    >>> wc3_1.weights[2]
    [1.0, 1.0, 1.0]
    >>> set([4,1,8]) == set(wc3_1.neighbors[2])
    True


    Point shapefile

    >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"))
    >>> w.pct_nonzero
    1.1904761904761905
    >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1)
    >>> "%.3f"%w1.pct_nonzero
    '0.595'
    >>>

    Notes
    -----

    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    :class:`pysal.weights.W`

    """

    data = get_points_array_from_shapefile(shapefile)

    if radius is not None:
        kdtree = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius)
    else:
        kdtree = pysal.cg.KDTree(data)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return knnW(kdtree, k=k, p=p, ids=ids)
    return knnW(kdtree, k=k, p=p)
Example #9
0
def adaptive_kernelW_from_shapefile(shapefile,
                                    bandwidths=None,
                                    k=2,
                                    function='triangular',
                                    idVariable=None,
                                    radius=None):
    """
    Kernel weights with adaptive bandwidths

    Parameters
    ----------

    shapefile   : string
                  shapefile name with shp suffix
    bandwidths  : float or array-like (optional)
                  the bandwidth :math:`h_i` for the kernel.
                  if no bandwidth is specified k is used to determine the
                  adaptive bandwidth
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).  For adaptive bandwidths, :math:`h_i=dknn_i`
    function    : string {'triangular','uniform','quadratic','quartic','gaussian'}
                  kernel function defined as follows with

                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = |z| \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)
    idVariable   : string
                   name of a column in the shapefile's DBF to use for ids
    radius     : If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w            : W
                   instance of spatial weights


    Examples
    --------
    >>> kwa = adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"))
    >>> kwa.weights[0]
    [1.0, 0.03178906767736345, 9.99999900663795e-08]
    >>> kwa.bandwidth[:3]
    array([[ 0.59871832],
           [ 0.59871832],
           [ 0.56095647]])

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """
    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return Kernel(points,
                      bandwidth=bandwidths,
                      fixed=False,
                      k=k,
                      function=function,
                      ids=ids)
    return adaptive_kernelW(points,
                            bandwidths=bandwidths,
                            k=k,
                            function=function)
Example #10
0
def kernelW_from_shapefile(shapefile,
                           k=2,
                           function='triangular',
                           idVariable=None,
                           fixed=True,
                           radius=None):
    """
    Kernel based weights

    Parameters
    ----------

    shapefile   : string
                  shapefile name with shp suffix
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. Bandwidth taken as :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).
    function    : string {'triangular','uniform','quadratic','epanechnikov',
                  'quartic','bisquare','gaussian'}


                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = |z| \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  epanechnikov

                  .. math::

                      K(z) = (1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  bisquare

                  .. math::

                      K(z) = (1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)
    idVariable   : string
                   name of a column in the shapefile's DBF to use for ids

    fixed        : binary
                   If true then :math:`h_i=h \\forall i`. If false then
                   bandwidth is adaptive across observations.
    radius     : If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.


    Returns
    -------

    w            : W
                   instance of spatial weights

    Examples
    --------
    >>> kw = kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID')
    >>> kw.weights[1]
    [0.2052478782400463, 0.007078773148450623, 1.0, 0.23051223027663237]
    >>> kw.bandwidth[:3]
    array([[ 0.75333961],
           [ 0.75333961],
           [ 0.75333961]])


    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.


    """
    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return Kernel(points, function=function, k=k, ids=ids, fixed=fixed)
    return kernelW(points, k=k, function=function, fixed=fixed)
Example #11
0
#Hardcoded paths to my data for testing
shapefile = '/home/jlaura/Downloads/julia/us_tracts_data.shp'
subset = '/home/jlaura/Downloads/julia/subset.shp'
tiny = '/home/jlaura/Downloads/julia/tiny.shp'
a = '/home/jlaura/Downloads/julia/25.shp'

#Hardcoded parameters
k = 2
function = 'triangular'
fixed = False
diagonal = False

ta = time.time()
#Get the centroids from the polys
t1 = time.time()
points = get_points_array_from_shapefile(shapefile)
t2 = time.time()
print "Generating points array took {} seconds.".format(t2 - t1)

#Pickle and unpickle a class method - avoids instancemethod error in mp
#http://stackoverflow.com/questions/1816958/cant-pickle-type-instancemethod-when-using-pythons-multiprocessing-pool-ma/7309686#7309686
def _pickle_method(method):
    func_name = method.im_func.__name__
    obj = method.im_self
    cls = method.im_class
    return _unpickle_method, (func_name, obj, cls)

def _unpickle_method(func_name, obj, cls):
    for cls in cls.mro():
        try:
            func = cls.__dict__[func_name]
Example #12
0
def adaptive_kernelW_from_shapefile(shapefile, bandwidths=None, k=2, function='triangular',
                                    idVariable=None, radius=None):
    """
    Kernel weights with adaptive bandwidths

    Parameters
    ----------

    shapefile   : string
                  shapefile name with shp suffix
    bandwidths  : float or array-like (optional)
                  the bandwidth :math:`h_i` for the kernel.
                  if no bandwidth is specified k is used to determine the
                  adaptive bandwidth
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).  For adaptive bandwidths, :math:`h_i=dknn_i`
    function    : string {'triangular','uniform','quadratic','quartic','gaussian'}
                  kernel function defined as follows with

                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = |z| \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)
    idVariable   : string
                   name of a column in the shapefile's DBF to use for ids
    radius     : If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w            : W
                   instance of spatial weights


    Examples
    --------
    >>> kwa = adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"))
    >>> kwa.weights[0]
    [1.0, 0.03178906767736345, 9.99999900663795e-08]
    >>> kwa.bandwidth[:3]
    array([[ 0.59871832],
           [ 0.59871832],
           [ 0.56095647]])

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """
    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return Kernel(points, bandwidth=bandwidths, fixed=False, k=k, function=function, ids=ids)
    return adaptive_kernelW(points, bandwidths=bandwidths, k=k, function=function)
Example #13
0
def kernelW_from_shapefile(shapefile, k=2, function='triangular', idVariable=None, fixed=True, radius=None):
    """
    Kernel based weights

    Parameters
    ----------

    shapefile   : string
                  shapefile name with shp suffix
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. Bandwidth taken as :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).
    function    : string {'triangular','uniform','quadratic','epanechnikov',
                  'quartic','bisquare','gaussian'}


                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = |z| \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  epanechnikov

                  .. math::

                      K(z) = (1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  bisquare

                  .. math::

                      K(z) = (1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)
    idVariable   : string
                   name of a column in the shapefile's DBF to use for ids

    fixed        : binary
                   If true then :math:`h_i=h \\forall i`. If false then
                   bandwidth is adaptive across observations.
    radius     : If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.


    Returns
    -------

    w            : W
                   instance of spatial weights

    Examples
    --------
    >>> kw = kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID')
    >>> kw.weights[1]
    [0.2052478782400463, 0.007078773148450623, 1.0, 0.23051223027663237]
    >>> kw.bandwidth[:3]
    array([[ 0.75333961],
           [ 0.75333961],
           [ 0.75333961]])


    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.


    """
    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return Kernel(points, function=function, k=k, ids=ids, fixed=fixed)
    return kernelW(points, k=k, function=function, fixed=fixed)
Example #14
0
def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None):
    """
    Nearest neighbor weights from a shapefile

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix
    k          : int
                 number of nearest neighbors
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    idVariable : string
                 name of a column in the shapefile's DBF to use for ids
    radius     : If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w         : W instance
                Weights object with binary weights


    Examples
    --------

    Polygon shapefile

    >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"))
    >>> wc.pct_nonzero
    0.040816326530612242
    >>> wc3=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID")
    >>> wc3.weights[1]
    [1, 1, 1]
    >>> wc3.neighbors[1]
    [3, 2, 4]
    >>> wc.neighbors[0]
    [2, 1]

    Point shapefile

    >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"))
    >>> w.pct_nonzero
    0.011904761904761904
    >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1)
    >>> w1.pct_nonzero
    0.0059523809523809521
    >>>

    Notes
    -----

    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    Ties between neighbors of equal distance are arbitrarily broken.


    See Also
    --------
    :class:`pysal.weights.W`

    """

    data = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return knnW(data, k=k, p=p, ids=ids)
    return knnW(data, k=k, p=p)
Example #15
0
def adaptive_kernelW_from_shapefile(shapefile, bandwidths=None, k=2, function='triangular',
                                    idVariable=None, radius=None,
                                    diagonal = False):
    """
    Kernel weights with adaptive bandwidths.

    Parameters
    ----------

    shapefile   : string
                  shapefile name with shp suffix
    bandwidths  : float
                  or array-like (optional)
                  the bandwidth :math:`h_i` for the kernel.
                  if no bandwidth is specified k is used to determine the
                  adaptive bandwidth
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).  For adaptive bandwidths, :math:`h_i=dknn_i`
    function    : {'triangular','uniform','quadratic','quartic','gaussian'}
                  kernel function defined as follows with

                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = |z| \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)
    idVariable   : string
                   name of a column in the shapefile's DBF to use for ids
    radius     : float
                 If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.
    diagonal   : boolean
                 If true, set diagonal weights = 1.0, if false (default)
                 diagonal weights are set to value according to kernel
                 function

    Returns
    -------

    w            : W
                   instance of spatial weights

    Examples
    --------
    >>> kwa = pysal.adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"), function='gaussian')
    >>> kwad = pysal.adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"), function='gaussian', diagonal=True)
    >>> kwa.neighbors[0]
    [0, 2, 1]
    >>> kwad.neighbors[0]
    [0, 2, 1]
    >>> kwa.weights[0]
    [0.3989422804014327, 0.24966013701844503, 0.2419707487162134]
    >>> kwad.weights[0]
    [1.0, 0.24966013701844503, 0.2419707487162134]
    >>>

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """
    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return Kernel(points, bandwidth=bandwidths, fixed=False, k=k,
                function=function, ids=ids, diagonal=diagonal)
    return adaptive_kernelW(points, bandwidths=bandwidths, k=k,
            function=function, diagonal=diagonal)
Example #16
0
#Hardcoded paths to my data for testing
shapefile = '/home/jlaura/Downloads/julia/us_tracts_data.shp'
subset = '/home/jlaura/Downloads/julia/subset.shp'
tiny = '/home/jlaura/Downloads/julia/tiny.shp'
a = '/home/jlaura/Downloads/julia/25.shp'

#Hardcoded parameters
k = 2
function = 'triangular'
fixed = False
diagonal = False

ta = time.time()
#Get the centroids from the polys
t1 = time.time()
points = get_points_array_from_shapefile(shapefile)
t2 = time.time()
print "Generating points array took {} seconds.".format(t2 - t1)


#Pickle and unpickle a class method - avoids instancemethod error in mp
#http://stackoverflow.com/questions/1816958/cant-pickle-type-instancemethod-when-using-pythons-multiprocessing-pool-ma/7309686#7309686
def _pickle_method(method):
    func_name = method.im_func.__name__
    obj = method.im_self
    cls = method.im_class
    return _unpickle_method, (func_name, obj, cls)


def _unpickle_method(func_name, obj, cls):
    for cls in cls.mro():
Example #17
0
def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None):
    """
    Nearest neighbor weights from a shapefile

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix
    k          : int
                 number of nearest neighbors
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    idVariable : string
                 name of a column in the shapefile's DBF to use for ids
    radius     : If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w         : W instance
                Weights object with binary weights


    Examples
    --------

    Polygon shapefile

    >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"))
    >>> wc.pct_nonzero
    0.040816326530612242
    >>> wc3=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID")
    >>> wc3.weights[1]
    [1, 1, 1]
    >>> wc3.neighbors[1]
    [3, 2, 4]
    >>> wc.neighbors[0]
    [2, 1]

    Point shapefile

    >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"))
    >>> w.pct_nonzero
    0.011904761904761904
    >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1)
    >>> w1.pct_nonzero
    0.0059523809523809521
    >>>

    Notes
    -----

    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    Ties between neighbors of equal distance are arbitrarily broken.


    See Also
    --------
    :class:`pysal.weights.W`

    """

    data = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return knnW(data, k=k, p=p, ids=ids)
    return knnW(data, k=k, p=p)