def threshold_continuousW_from_shapefile(shapefile, threshold, p=2, alpha=-1, idVariable=None, radius=None): """ Threshold distance based continuous weights from a shapefile. Parameters ---------- shapefile : string shapefile name with shp suffix threshold : float distance band p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance alpha : float distance decay parameter for weight (default -1.0) if alpha is positive the weights will not decline with distance. idVariable : string name of a column in the shapefile's DBF to use for ids radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance; Weights object with continuous weights. Examples -------- >>> w = threshold_continuousW_from_shapefile(pysal.examples.get_path("columbus.shp"),0.62,idVariable="POLYID") >>> w.weights[1] [1.6702346893743334, 1.7250729841938093] Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ data = get_points_array_from_shapefile(shapefile) if radius is not None: data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) w = DistanceBand(data, threshold=threshold, p=p, alpha=alpha, binary=False) w.remap_ids(ids) else: w = threshold_continuousW_from_array(data, threshold, p=p, alpha=alpha) w.set_shapefile(shapefile,idVariable) return w
def threshold_binaryW_from_shapefile(shapefile, threshold, p=2, idVariable=None, radius=None): """ Threshold distance based binary weights from a shapefile. Parameters ---------- shapefile : string shapefile name with shp suffix threshold : float distance band p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance idVariable : string name of a column in the shapefile's DBF to use for ids radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance Weights object with binary weights Examples -------- >>> import libpysal.api as ps >>> import libpysal >>> w = ps.threshold_binaryW_from_shapefile(libpysal.examples.get_path("columbus.shp"),0.62,idVariable="POLYID") >>> w.weights[1] [1.0, 1.0] Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ data = get_points_array_from_shapefile(shapefile) if radius is not None: data = cg.KDTree(data, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) w = DistanceBand(data, threshold=threshold, p=p) w.remap_ids(ids) return w return threshold_binaryW_from_array(data, threshold, p=p)
def threshold_binaryW_from_shapefile(shapefile, threshold, p=2, idVariable=None, radius=None): """ Threshold distance based binary weights from a shapefile. Parameters ---------- shapefile : string shapefile name with shp suffix threshold : float distance band p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance idVariable : string name of a column in the shapefile's DBF to use for ids radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance Weights object with binary weights Examples -------- >>> w = threshold_binaryW_from_shapefile(pysal.examples.get_path("columbus.shp"),0.62,idVariable="POLYID") >>> w.weights[1] [1, 1] Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ data = get_points_array_from_shapefile(shapefile) if radius is not None: data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) w = DistanceBand(data, threshold=threshold, p=p) w.remap_ids(ids) return w return threshold_binaryW_from_array(data, threshold, p=p)
def min_threshold_dist_from_shapefile(shapefile, radius=None, p=2): """ Kernel weights with adaptive bandwidths. Parameters ---------- shapefile : string shapefile name with shp suffix. radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance Returns ------- d : float Maximum nearest neighbor distance between the n observations. Examples -------- >>> import libpysal.api as ps >>> import libpysal >>> md = ps.min_threshold_dist_from_shapefile(libpysal.examples.get_path("columbus.shp")) >>> md 0.61886415807685413 >>> ps.min_threshold_dist_from_shapefile(libpysal.examples.get_path("stl_hom.shp"), libpysal.cg.sphere.RADIUS_EARTH_MILES) 31.846942936393717 Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ points = get_points_array_from_shapefile(shapefile) if radius is not None: kdt = cg.kdtree.Arc_KDTree(points, radius=radius) nn = kdt.query(kdt.data, k=2) nnd = nn[0].max(axis=0)[1] return nnd return min_threshold_distance(points, p)
def min_threshold_dist_from_shapefile(shapefile, radius=None, p=2): """ Kernel weights with adaptive bandwidths. Parameters ---------- shapefile : string shapefile name with shp suffix. radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance Returns ------- d : float Maximum nearest neighbor distance between the n observations. Examples -------- >>> md = min_threshold_dist_from_shapefile(pysal.examples.get_path("columbus.shp")) >>> md 0.61886415807685413 >>> min_threshold_dist_from_shapefile(pysal.examples.get_path("stl_hom.shp"), pysal.cg.sphere.RADIUS_EARTH_MILES) 31.846942936393717 Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ points = get_points_array_from_shapefile(shapefile) if radius is not None: kdt = pysal.cg.kdtree.Arc_KDTree(points, radius=radius) nn = kdt.query(kdt.data, k=2) nnd = nn[0].max(axis=0)[1] return nnd return min_threshold_distance(points, p)
def min_threshold_dist_from_shapefile(shapefile, radius=None, p=2): """ Kernel weights with adaptive bandwidths Parameters ---------- shapefile : string shapefile name with shp suffix radius : If supplied arc_distances will be calculated based on the given radius. p will be ignored. p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance Returns ------- d : float minimum nearest neighbor distance between the n observations Examples -------- >>> md = min_threshold_dist_from_shapefile(pysal.examples.get_path("columbus.shp")) >>> md 0.61886415807685413 >>> min_threshold_dist_from_shapefile(pysal.examples.get_path("stl_hom.shp"), pysal.cg.sphere.RADIUS_EARTH_MILES) 31.846942936393717 Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ points = get_points_array_from_shapefile(shapefile) if radius is not None: points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius) return min_threshold_distance(points, p)
def kernelW_from_shapefile(shapefile, k=2, function='triangular', idVariable=None, fixed=True, radius=None, diagonal=False): """ Kernel based weights. Parameters ---------- shapefile : string shapefile name with shp suffix k : int the number of nearest neighbors to use for determining bandwidth. Bandwidth taken as :math:`h_i=max(dknn) \\forall i` where :math:`dknn` is a vector of k-nearest neighbor distances (the distance to the kth nearest neighbor for each observation). function : {'triangular','uniform','quadratic','epanechnikov', 'quartic','bisquare','gaussian'} .. math:: z_{i,j} = d_{i,j}/h_i triangular .. math:: K(z) = (1 - |z|) \ if |z| \le 1 uniform .. math:: K(z) = |z| \ if |z| \le 1 quadratic .. math:: K(z) = (3/4)(1-z^2) \ if |z| \le 1 epanechnikov .. math:: K(z) = (1-z^2) \ if |z| \le 1 quartic .. math:: K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1 bisquare .. math:: K(z) = (1-z^2)^2 \ if |z| \le 1 gaussian .. math:: K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2) idVariable : string name of a column in the shapefile's DBF to use for ids fixed : binary If true then :math:`h_i=h \\forall i`. If false then bandwidth is adaptive across observations. radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. diagonal : boolean If true, set diagonal weights = 1.0, if false (default) diagonal weights are set to value according to kernel function Returns ------- w : W instance of spatial weights Examples -------- >>> kw = pysal.kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID', function = 'gaussian') >>> kwd = pysal.kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID', function = 'gaussian', diagonal = True) >>> set(kw.neighbors[1]) == set([4, 2, 3, 1]) True >>> set(kwd.neighbors[1]) == set([4, 2, 3, 1]) True >>> >>> set(kw.weights[1]) == set( [0.2436835517263174, 0.29090631630909874, 0.29671172124745776, 0.3989422804014327]) True >>> set(kwd.weights[1]) == set( [0.2436835517263174, 0.29090631630909874, 0.29671172124745776, 1.0]) True Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ points = get_points_array_from_shapefile(shapefile) if radius is not None: points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) return Kernel(points, function=function, k=k, ids=ids, fixed=fixed, diagonal = diagonal) return kernelW(points, k=k, function=function, fixed=fixed, diagonal=diagonal)
def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None): """ Nearest neighbor weights from a shapefile. Parameters ---------- shapefile : string shapefile name with shp suffix k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance idVariable : string name of a column in the shapefile's DBF to use for ids radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance; Weights object with binary weights Examples -------- Polygon shapefile >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp")) >>> "%.4f"%wc.pct_nonzero '4.0816' >>> set([2,1]) == set(wc.neighbors[0]) True >>> wc3=pysal.knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3) >>> set(wc3.neighbors[0]) == set([2,1,3]) True >>> set(wc3.neighbors[2]) == set([4,3,0]) True 1 offset rather than 0 offset >>> wc3_1=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID") >>> set([4,3,2]) == set(wc3_1.neighbors[1]) True >>> wc3_1.weights[2] [1.0, 1.0, 1.0] >>> set([4,1,8]) == set(wc3_1.neighbors[2]) True Point shapefile >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp")) >>> w.pct_nonzero 1.1904761904761905 >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1) >>> "%.3f"%w1.pct_nonzero '0.595' >>> Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. Ties between neighbors of equal distance are arbitrarily broken. See Also -------- :class:`pysal.weights.W` """ data = get_points_array_from_shapefile(shapefile) if radius is not None: kdtree = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius) else: kdtree = pysal.cg.KDTree(data) if idVariable: ids = get_ids(shapefile, idVariable) return knnW(kdtree, k=k, p=p, ids=ids) return knnW(kdtree, k=k, p=p)
def adaptive_kernelW_from_shapefile(shapefile, bandwidths=None, k=2, function='triangular', idVariable=None, radius=None): """ Kernel weights with adaptive bandwidths Parameters ---------- shapefile : string shapefile name with shp suffix bandwidths : float or array-like (optional) the bandwidth :math:`h_i` for the kernel. if no bandwidth is specified k is used to determine the adaptive bandwidth k : int the number of nearest neighbors to use for determining bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i` where :math:`dknn` is a vector of k-nearest neighbor distances (the distance to the kth nearest neighbor for each observation). For adaptive bandwidths, :math:`h_i=dknn_i` function : string {'triangular','uniform','quadratic','quartic','gaussian'} kernel function defined as follows with .. math:: z_{i,j} = d_{i,j}/h_i triangular .. math:: K(z) = (1 - |z|) \ if |z| \le 1 uniform .. math:: K(z) = |z| \ if |z| \le 1 quadratic .. math:: K(z) = (3/4)(1-z^2) \ if |z| \le 1 quartic .. math:: K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1 gaussian .. math:: K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2) idVariable : string name of a column in the shapefile's DBF to use for ids radius : If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance of spatial weights Examples -------- >>> kwa = adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp")) >>> kwa.weights[0] [1.0, 0.03178906767736345, 9.99999900663795e-08] >>> kwa.bandwidth[:3] array([[ 0.59871832], [ 0.59871832], [ 0.56095647]]) Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ points = get_points_array_from_shapefile(shapefile) if radius is not None: points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) return Kernel(points, bandwidth=bandwidths, fixed=False, k=k, function=function, ids=ids) return adaptive_kernelW(points, bandwidths=bandwidths, k=k, function=function)
def kernelW_from_shapefile(shapefile, k=2, function='triangular', idVariable=None, fixed=True, radius=None): """ Kernel based weights Parameters ---------- shapefile : string shapefile name with shp suffix k : int the number of nearest neighbors to use for determining bandwidth. Bandwidth taken as :math:`h_i=max(dknn) \\forall i` where :math:`dknn` is a vector of k-nearest neighbor distances (the distance to the kth nearest neighbor for each observation). function : string {'triangular','uniform','quadratic','epanechnikov', 'quartic','bisquare','gaussian'} .. math:: z_{i,j} = d_{i,j}/h_i triangular .. math:: K(z) = (1 - |z|) \ if |z| \le 1 uniform .. math:: K(z) = |z| \ if |z| \le 1 quadratic .. math:: K(z) = (3/4)(1-z^2) \ if |z| \le 1 epanechnikov .. math:: K(z) = (1-z^2) \ if |z| \le 1 quartic .. math:: K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1 bisquare .. math:: K(z) = (1-z^2)^2 \ if |z| \le 1 gaussian .. math:: K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2) idVariable : string name of a column in the shapefile's DBF to use for ids fixed : binary If true then :math:`h_i=h \\forall i`. If false then bandwidth is adaptive across observations. radius : If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance of spatial weights Examples -------- >>> kw = kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID') >>> kw.weights[1] [0.2052478782400463, 0.007078773148450623, 1.0, 0.23051223027663237] >>> kw.bandwidth[:3] array([[ 0.75333961], [ 0.75333961], [ 0.75333961]]) Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ points = get_points_array_from_shapefile(shapefile) if radius is not None: points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) return Kernel(points, function=function, k=k, ids=ids, fixed=fixed) return kernelW(points, k=k, function=function, fixed=fixed)
#Hardcoded paths to my data for testing shapefile = '/home/jlaura/Downloads/julia/us_tracts_data.shp' subset = '/home/jlaura/Downloads/julia/subset.shp' tiny = '/home/jlaura/Downloads/julia/tiny.shp' a = '/home/jlaura/Downloads/julia/25.shp' #Hardcoded parameters k = 2 function = 'triangular' fixed = False diagonal = False ta = time.time() #Get the centroids from the polys t1 = time.time() points = get_points_array_from_shapefile(shapefile) t2 = time.time() print "Generating points array took {} seconds.".format(t2 - t1) #Pickle and unpickle a class method - avoids instancemethod error in mp #http://stackoverflow.com/questions/1816958/cant-pickle-type-instancemethod-when-using-pythons-multiprocessing-pool-ma/7309686#7309686 def _pickle_method(method): func_name = method.im_func.__name__ obj = method.im_self cls = method.im_class return _unpickle_method, (func_name, obj, cls) def _unpickle_method(func_name, obj, cls): for cls in cls.mro(): try: func = cls.__dict__[func_name]
def adaptive_kernelW_from_shapefile(shapefile, bandwidths=None, k=2, function='triangular', idVariable=None, radius=None): """ Kernel weights with adaptive bandwidths Parameters ---------- shapefile : string shapefile name with shp suffix bandwidths : float or array-like (optional) the bandwidth :math:`h_i` for the kernel. if no bandwidth is specified k is used to determine the adaptive bandwidth k : int the number of nearest neighbors to use for determining bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i` where :math:`dknn` is a vector of k-nearest neighbor distances (the distance to the kth nearest neighbor for each observation). For adaptive bandwidths, :math:`h_i=dknn_i` function : string {'triangular','uniform','quadratic','quartic','gaussian'} kernel function defined as follows with .. math:: z_{i,j} = d_{i,j}/h_i triangular .. math:: K(z) = (1 - |z|) \ if |z| \le 1 uniform .. math:: K(z) = |z| \ if |z| \le 1 quadratic .. math:: K(z) = (3/4)(1-z^2) \ if |z| \le 1 quartic .. math:: K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1 gaussian .. math:: K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2) idVariable : string name of a column in the shapefile's DBF to use for ids radius : If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance of spatial weights Examples -------- >>> kwa = adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp")) >>> kwa.weights[0] [1.0, 0.03178906767736345, 9.99999900663795e-08] >>> kwa.bandwidth[:3] array([[ 0.59871832], [ 0.59871832], [ 0.56095647]]) Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ points = get_points_array_from_shapefile(shapefile) if radius is not None: points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) return Kernel(points, bandwidth=bandwidths, fixed=False, k=k, function=function, ids=ids) return adaptive_kernelW(points, bandwidths=bandwidths, k=k, function=function)
def kernelW_from_shapefile(shapefile, k=2, function='triangular', idVariable=None, fixed=True, radius=None): """ Kernel based weights Parameters ---------- shapefile : string shapefile name with shp suffix k : int the number of nearest neighbors to use for determining bandwidth. Bandwidth taken as :math:`h_i=max(dknn) \\forall i` where :math:`dknn` is a vector of k-nearest neighbor distances (the distance to the kth nearest neighbor for each observation). function : string {'triangular','uniform','quadratic','epanechnikov', 'quartic','bisquare','gaussian'} .. math:: z_{i,j} = d_{i,j}/h_i triangular .. math:: K(z) = (1 - |z|) \ if |z| \le 1 uniform .. math:: K(z) = |z| \ if |z| \le 1 quadratic .. math:: K(z) = (3/4)(1-z^2) \ if |z| \le 1 epanechnikov .. math:: K(z) = (1-z^2) \ if |z| \le 1 quartic .. math:: K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1 bisquare .. math:: K(z) = (1-z^2)^2 \ if |z| \le 1 gaussian .. math:: K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2) idVariable : string name of a column in the shapefile's DBF to use for ids fixed : binary If true then :math:`h_i=h \\forall i`. If false then bandwidth is adaptive across observations. radius : If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance of spatial weights Examples -------- >>> kw = kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID') >>> kw.weights[1] [0.2052478782400463, 0.007078773148450623, 1.0, 0.23051223027663237] >>> kw.bandwidth[:3] array([[ 0.75333961], [ 0.75333961], [ 0.75333961]]) Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ points = get_points_array_from_shapefile(shapefile) if radius is not None: points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) return Kernel(points, function=function, k=k, ids=ids, fixed=fixed) return kernelW(points, k=k, function=function, fixed=fixed)
def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None): """ Nearest neighbor weights from a shapefile Parameters ---------- shapefile : string shapefile name with shp suffix k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance idVariable : string name of a column in the shapefile's DBF to use for ids radius : If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance Weights object with binary weights Examples -------- Polygon shapefile >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp")) >>> wc.pct_nonzero 0.040816326530612242 >>> wc3=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID") >>> wc3.weights[1] [1, 1, 1] >>> wc3.neighbors[1] [3, 2, 4] >>> wc.neighbors[0] [2, 1] Point shapefile >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp")) >>> w.pct_nonzero 0.011904761904761904 >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1) >>> w1.pct_nonzero 0.0059523809523809521 >>> Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. Ties between neighbors of equal distance are arbitrarily broken. See Also -------- :class:`pysal.weights.W` """ data = get_points_array_from_shapefile(shapefile) if radius is not None: data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) return knnW(data, k=k, p=p, ids=ids) return knnW(data, k=k, p=p)
def adaptive_kernelW_from_shapefile(shapefile, bandwidths=None, k=2, function='triangular', idVariable=None, radius=None, diagonal = False): """ Kernel weights with adaptive bandwidths. Parameters ---------- shapefile : string shapefile name with shp suffix bandwidths : float or array-like (optional) the bandwidth :math:`h_i` for the kernel. if no bandwidth is specified k is used to determine the adaptive bandwidth k : int the number of nearest neighbors to use for determining bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i` where :math:`dknn` is a vector of k-nearest neighbor distances (the distance to the kth nearest neighbor for each observation). For adaptive bandwidths, :math:`h_i=dknn_i` function : {'triangular','uniform','quadratic','quartic','gaussian'} kernel function defined as follows with .. math:: z_{i,j} = d_{i,j}/h_i triangular .. math:: K(z) = (1 - |z|) \ if |z| \le 1 uniform .. math:: K(z) = |z| \ if |z| \le 1 quadratic .. math:: K(z) = (3/4)(1-z^2) \ if |z| \le 1 quartic .. math:: K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1 gaussian .. math:: K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2) idVariable : string name of a column in the shapefile's DBF to use for ids radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. diagonal : boolean If true, set diagonal weights = 1.0, if false (default) diagonal weights are set to value according to kernel function Returns ------- w : W instance of spatial weights Examples -------- >>> kwa = pysal.adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"), function='gaussian') >>> kwad = pysal.adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"), function='gaussian', diagonal=True) >>> kwa.neighbors[0] [0, 2, 1] >>> kwad.neighbors[0] [0, 2, 1] >>> kwa.weights[0] [0.3989422804014327, 0.24966013701844503, 0.2419707487162134] >>> kwad.weights[0] [1.0, 0.24966013701844503, 0.2419707487162134] >>> Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ points = get_points_array_from_shapefile(shapefile) if radius is not None: points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) return Kernel(points, bandwidth=bandwidths, fixed=False, k=k, function=function, ids=ids, diagonal=diagonal) return adaptive_kernelW(points, bandwidths=bandwidths, k=k, function=function, diagonal=diagonal)
#Hardcoded paths to my data for testing shapefile = '/home/jlaura/Downloads/julia/us_tracts_data.shp' subset = '/home/jlaura/Downloads/julia/subset.shp' tiny = '/home/jlaura/Downloads/julia/tiny.shp' a = '/home/jlaura/Downloads/julia/25.shp' #Hardcoded parameters k = 2 function = 'triangular' fixed = False diagonal = False ta = time.time() #Get the centroids from the polys t1 = time.time() points = get_points_array_from_shapefile(shapefile) t2 = time.time() print "Generating points array took {} seconds.".format(t2 - t1) #Pickle and unpickle a class method - avoids instancemethod error in mp #http://stackoverflow.com/questions/1816958/cant-pickle-type-instancemethod-when-using-pythons-multiprocessing-pool-ma/7309686#7309686 def _pickle_method(method): func_name = method.im_func.__name__ obj = method.im_self cls = method.im_class return _unpickle_method, (func_name, obj, cls) def _unpickle_method(func_name, obj, cls): for cls in cls.mro():
def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None): """ Nearest neighbor weights from a shapefile Parameters ---------- shapefile : string shapefile name with shp suffix k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance idVariable : string name of a column in the shapefile's DBF to use for ids radius : If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance Weights object with binary weights Examples -------- Polygon shapefile >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp")) >>> wc.pct_nonzero 0.040816326530612242 >>> wc3=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID") >>> wc3.weights[1] [1, 1, 1] >>> wc3.neighbors[1] [3, 2, 4] >>> wc.neighbors[0] [2, 1] Point shapefile >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp")) >>> w.pct_nonzero 0.011904761904761904 >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1) >>> w1.pct_nonzero 0.0059523809523809521 >>> Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. Ties between neighbors of equal distance are arbitrarily broken. See Also -------- :class:`pysal.weights.W` """ data = get_points_array_from_shapefile(shapefile) if radius is not None: data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) return knnW(data, k=k, p=p, ids=ids) return knnW(data, k=k, p=p)