def threshold_binaryW_from_shapefile(shapefile, threshold, p=2, idVariable=None, radius=None): """ Threshold distance based binary weights from a shapefile. Parameters ---------- shapefile : string shapefile name with shp suffix threshold : float distance band p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance idVariable : string name of a column in the shapefile's DBF to use for ids radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance Weights object with binary weights Examples -------- >>> import libpysal.api as ps >>> import libpysal >>> w = ps.threshold_binaryW_from_shapefile(libpysal.examples.get_path("columbus.shp"),0.62,idVariable="POLYID") >>> w.weights[1] [1.0, 1.0] Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ data = get_points_array_from_shapefile(shapefile) if radius is not None: data = cg.KDTree(data, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) w = DistanceBand(data, threshold=threshold, p=p) w.remap_ids(ids) return w return threshold_binaryW_from_array(data, threshold, p=p)
def queen_from_shapefile(shapefile, idVariable=None, sparse=False): """ Queen contiguity weights from a polygon shapefile Parameters ---------- shapefile : string name of polygon shapefile including suffix. idVariable : string name of a column in the shapefile's DBF to use for ids. sparse : boolean If True return WSP instance If False return W instance Returns ------- w : W instance of spatial weights Examples -------- >>> wq=queen_from_shapefile(pysal.examples.get_path("columbus.shp")) >>> "%.3f"%wq.pct_nonzero '0.098' >>> wq=queen_from_shapefile(pysal.examples.get_path("columbus.shp"),"POLYID") >>> "%.3f"%wq.pct_nonzero '0.098' >>> wq=queen_from_shapefile(pysal.examples.get_path("columbus.shp"), sparse=True) >>> pct_sp = wq.sparse.nnz *1. / wq.n**2 >>> "%.3f"%pct_sp '0.098' Notes ----- Queen contiguity defines as neighbors any pair of polygons that share at least one vertex in their polygon definitions. See Also -------- :class:`pysal.weights.W` """ shp = pysal.open(shapefile) if idVariable: ids = get_ids(shapefile, idVariable) else: ids = None w = buildContiguity(shp, criterion='queen', ids=ids) shp.close() w.set_shapefile(shapefile, idVariable) if sparse: w = pysal.weights.WSP(w.sparse, id_order=ids) return w
def threshold_continuousW_from_shapefile(shapefile, threshold, p=2, alpha=-1, idVariable=None, radius=None): """ Threshold distance based continuous weights from a shapefile. Parameters ---------- shapefile : string shapefile name with shp suffix threshold : float distance band p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance alpha : float distance decay parameter for weight (default -1.0) if alpha is positive the weights will not decline with distance. idVariable : string name of a column in the shapefile's DBF to use for ids radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance; Weights object with continuous weights. Examples -------- >>> w = threshold_continuousW_from_shapefile(pysal.examples.get_path("columbus.shp"),0.62,idVariable="POLYID") >>> w.weights[1] [1.6702346893743334, 1.7250729841938093] Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ data = get_points_array_from_shapefile(shapefile) if radius is not None: data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) w = DistanceBand(data, threshold=threshold, p=p, alpha=alpha, binary=False) w.remap_ids(ids) else: w = threshold_continuousW_from_array(data, threshold, p=p, alpha=alpha) w.set_shapefile(shapefile,idVariable) return w
def queen_from_shapefile(shapefile, idVariable=None, sparse=False): """ Queen contiguity weights from a polygon shapefile. Parameters ---------- shapefile : string name of polygon shapefile including suffix. idVariable : string name of a column in the shapefile's DBF to use for ids. sparse : boolean If True return WSP instance If False return W instance Returns ------- w : W instance of spatial weights Examples -------- >>> wq=queen_from_shapefile(pysal.examples.get_path("columbus.shp")) >>> "%.3f"%wq.pct_nonzero '9.829' >>> wq=queen_from_shapefile(pysal.examples.get_path("columbus.shp"),"POLYID") >>> "%.3f"%wq.pct_nonzero '9.829' >>> wq=queen_from_shapefile(pysal.examples.get_path("columbus.shp"), sparse=True) >>> pct_sp = wq.sparse.nnz *1. / wq.n**2 >>> "%.3f"%pct_sp '0.098' Notes ----- Queen contiguity defines as neighbors any pair of polygons that share at least one vertex in their polygon definitions. See Also -------- :class:`pysal.weights.W` """ shp = pysal.open(shapefile) w = buildContiguity(shp, criterion='queen') if idVariable: ids = get_ids(shapefile, idVariable) w.remap_ids(ids) else: ids = None shp.close() w.set_shapefile(shapefile, idVariable) if sparse: w = pysal.weights.WSP(w.sparse, id_order=ids) return w
def rook_from_shapefile(shapefile, idVariable=None, sparse=False): """ Rook contiguity weights from a polygon shapefile. Parameters ---------- shapefile : string name of polygon shapefile including suffix. idVariable: string name of a column in the shapefile's DBF to use for ids. sparse : boolean If True return WSP instance If False return W instance Returns ------- w : W instance of spatial weights Examples -------- >>> wr=rook_from_shapefile(pysal.examples.get_path("columbus.shp"), "POLYID") >>> "%.3f"%wr.pct_nonzero '8.330' >>> wr=rook_from_shapefile(pysal.examples.get_path("columbus.shp"), sparse=True) >>> pct_sp = wr.sparse.nnz *1. / wr.n**2 >>> "%.3f"%pct_sp '0.083' Notes ----- Rook contiguity defines as neighbors any pair of polygons that share a common edge in their polygon definitions. See Also -------- :class:`pysal.weights.W` """ shp = pysal.open(shapefile) w = buildContiguity(shp, criterion='rook') if idVariable: ids = get_ids(shapefile, idVariable) w.remap_ids(ids) else: ids = None shp.close() w.set_shapefile(shapefile, idVariable) if sparse: w = pysal.weights.WSP(w.sparse, id_order=ids) return w
def rook_from_shapefile(shapefile, idVariable=None, sparse=False): """ Rook contiguity weights from a polygon shapefile. Parameters ---------- shapefile : string name of polygon shapefile including suffix. sparse : boolean If True return WSP instance If False return W instance Returns ------- w : W instance of spatial weights Examples -------- >>> wr=rook_from_shapefile(pysal.examples.get_path("columbus.shp"), "POLYID") >>> "%.3f"%wr.pct_nonzero '8.330' >>> wr=rook_from_shapefile(pysal.examples.get_path("columbus.shp"), sparse=True) >>> pct_sp = wr.sparse.nnz *1. / wr.n**2 >>> "%.3f"%pct_sp '0.083' Notes ----- Rook contiguity defines as neighbors any pair of polygons that share a common edge in their polygon definitions. See Also -------- :class:`pysal.weights.W` """ shp = pysal.open(shapefile) w = buildContiguity(shp, criterion='rook') if idVariable: ids = get_ids(shapefile, idVariable) w.remap_ids(ids) else: ids = None shp.close() w.set_shapefile(shapefile, idVariable) if sparse: w = pysal.weights.WSP(w.sparse, id_order=ids) return w
def threshold_binaryW_from_shapefile(shapefile, threshold, p=2, idVariable=None, radius=None): """ Threshold distance based binary weights from a shapefile. Parameters ---------- shapefile : string shapefile name with shp suffix threshold : float distance band p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance idVariable : string name of a column in the shapefile's DBF to use for ids radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance Weights object with binary weights Examples -------- >>> w = threshold_binaryW_from_shapefile(pysal.examples.get_path("columbus.shp"),0.62,idVariable="POLYID") >>> w.weights[1] [1, 1] Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ data = get_points_array_from_shapefile(shapefile) if radius is not None: data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) w = DistanceBand(data, threshold=threshold, p=p) w.remap_ids(ids) return w return threshold_binaryW_from_array(data, threshold, p=p)
def rook_from_shapefile(shapefile, idVariable=None, sparse=False): """ Rook contiguity weights from a polygon shapefile Parameters ---------- shapefile : string name of polygon shapefile including suffix. sparse : boolean If True return WSP instance If False return W instance Returns ------- w : W instance of spatial weights Examples -------- >>> wr=rook_from_shapefile(pysal.examples.get_path("columbus.shp"), "POLYID") >>> wr.pct_nonzero 0.083298625572678045 >>> wr=rook_from_shapefile(pysal.examples.get_path("columbus.shp"), sparse=True) >>> wr.sparse.nnz *1. / wr.n**2 0.083298625572678045 Notes ----- Rook contiguity defines as neighbors any pair of polygons that share a common edge in their polygon definitions. See Also -------- :class:`pysal.weights.W` """ shp = pysal.open(shapefile) if idVariable: ids = get_ids(shapefile, idVariable) else: ids = None w = buildContiguity(shp, criterion='rook', ids=ids) shp.close() if sparse: w = pysal.weights.WSP(w.sparse, id_order=ids) return w
def adaptive_kernelW_from_shapefile(shapefile, bandwidths=None, k=2, function='triangular', idVariable=None, radius=None): """ Kernel weights with adaptive bandwidths Parameters ---------- shapefile : string shapefile name with shp suffix bandwidths : float or array-like (optional) the bandwidth :math:`h_i` for the kernel. if no bandwidth is specified k is used to determine the adaptive bandwidth k : int the number of nearest neighbors to use for determining bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i` where :math:`dknn` is a vector of k-nearest neighbor distances (the distance to the kth nearest neighbor for each observation). For adaptive bandwidths, :math:`h_i=dknn_i` function : string {'triangular','uniform','quadratic','quartic','gaussian'} kernel function defined as follows with .. math:: z_{i,j} = d_{i,j}/h_i triangular .. math:: K(z) = (1 - |z|) \ if |z| \le 1 uniform .. math:: K(z) = |z| \ if |z| \le 1 quadratic .. math:: K(z) = (3/4)(1-z^2) \ if |z| \le 1 quartic .. math:: K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1 gaussian .. math:: K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2) idVariable : string name of a column in the shapefile's DBF to use for ids radius : If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance of spatial weights Examples -------- >>> kwa = adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp")) >>> kwa.weights[0] [1.0, 0.03178906767736345, 9.99999900663795e-08] >>> kwa.bandwidth[:3] array([[ 0.59871832], [ 0.59871832], [ 0.56095647]]) Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ points = get_points_array_from_shapefile(shapefile) if radius is not None: points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) return Kernel(points, bandwidth=bandwidths, fixed=False, k=k, function=function, ids=ids) return adaptive_kernelW(points, bandwidths=bandwidths, k=k, function=function)
def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None): """ Nearest neighbor weights from a shapefile. Parameters ---------- shapefile : string shapefile name with shp suffix k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance idVariable : string name of a column in the shapefile's DBF to use for ids radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance; Weights object with binary weights Examples -------- Polygon shapefile >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp")) >>> "%.4f"%wc.pct_nonzero '4.0816' >>> set([2,1]) == set(wc.neighbors[0]) True >>> wc3=pysal.knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3) >>> set(wc3.neighbors[0]) == set([2,1,3]) True >>> set(wc3.neighbors[2]) == set([4,3,0]) True 1 offset rather than 0 offset >>> wc3_1=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID") >>> set([4,3,2]) == set(wc3_1.neighbors[1]) True >>> wc3_1.weights[2] [1.0, 1.0, 1.0] >>> set([4,1,8]) == set(wc3_1.neighbors[2]) True Point shapefile >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp")) >>> w.pct_nonzero 1.1904761904761905 >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1) >>> "%.3f"%w1.pct_nonzero '0.595' >>> Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. Ties between neighbors of equal distance are arbitrarily broken. See Also -------- :class:`pysal.weights.W` """ data = get_points_array_from_shapefile(shapefile) if radius is not None: kdtree = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius) else: kdtree = pysal.cg.KDTree(data) if idVariable: ids = get_ids(shapefile, idVariable) return knnW(kdtree, k=k, p=p, ids=ids) return knnW(kdtree, k=k, p=p)
import spotipy import spotipy.util as util from spotipy.oauth2 import SpotifyClientCredentials from secret import USER_EMAIL, CLIENT_ID, CLIENT_SECRET, PLAYLIST_ID from pprint import pprint from util import get_recently_added, get_ids if __name__ == '__main__': print( 'this script will erase your in rotation and start it over with the ' + 'latest 100 songs added.') scope = 'user-library-read playlist-modify-public' token = util.prompt_for_user_token(USER_EMAIL, scope, client_id=CLIENT_ID, client_secret=CLIENT_SECRET, redirect_uri='http://localhost/') sp = spotipy.Spotify(auth=token) user_id = sp.current_user()['id'] recently_added_ids = get_ids(get_recently_added(sp, at_least_100=True)) sp.user_playlist_replace_tracks(user_id, PLAYLIST_ID, recently_added_ids)
def kernelW_from_shapefile(shapefile, k=2, function='triangular', idVariable=None, fixed=True, radius=None): """ Kernel based weights Parameters ---------- shapefile : string shapefile name with shp suffix k : int the number of nearest neighbors to use for determining bandwidth. Bandwidth taken as :math:`h_i=max(dknn) \\forall i` where :math:`dknn` is a vector of k-nearest neighbor distances (the distance to the kth nearest neighbor for each observation). function : string {'triangular','uniform','quadratic','epanechnikov', 'quartic','bisquare','gaussian'} .. math:: z_{i,j} = d_{i,j}/h_i triangular .. math:: K(z) = (1 - |z|) \ if |z| \le 1 uniform .. math:: K(z) = |z| \ if |z| \le 1 quadratic .. math:: K(z) = (3/4)(1-z^2) \ if |z| \le 1 epanechnikov .. math:: K(z) = (1-z^2) \ if |z| \le 1 quartic .. math:: K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1 bisquare .. math:: K(z) = (1-z^2)^2 \ if |z| \le 1 gaussian .. math:: K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2) idVariable : string name of a column in the shapefile's DBF to use for ids fixed : binary If true then :math:`h_i=h \\forall i`. If false then bandwidth is adaptive across observations. radius : If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance of spatial weights Examples -------- >>> kw = kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID') >>> kw.weights[1] [0.2052478782400463, 0.007078773148450623, 1.0, 0.23051223027663237] >>> kw.bandwidth[:3] array([[ 0.75333961], [ 0.75333961], [ 0.75333961]]) Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ points = get_points_array_from_shapefile(shapefile) if radius is not None: points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) return Kernel(points, function=function, k=k, ids=ids, fixed=fixed) return kernelW(points, k=k, function=function, fixed=fixed)
def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None): """ Nearest neighbor weights from a shapefile Parameters ---------- shapefile : string shapefile name with shp suffix k : int number of nearest neighbors p : float Minkowski p-norm distance metric parameter: 1<=p<=infinity 2: Euclidean distance 1: Manhattan distance idVariable : string name of a column in the shapefile's DBF to use for ids radius : If supplied arc_distances will be calculated based on the given radius. p will be ignored. Returns ------- w : W instance Weights object with binary weights Examples -------- Polygon shapefile >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp")) >>> wc.pct_nonzero 0.040816326530612242 >>> wc3=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID") >>> wc3.weights[1] [1, 1, 1] >>> wc3.neighbors[1] [3, 2, 4] >>> wc.neighbors[0] [2, 1] Point shapefile >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp")) >>> w.pct_nonzero 0.011904761904761904 >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1) >>> w1.pct_nonzero 0.0059523809523809521 >>> Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. Ties between neighbors of equal distance are arbitrarily broken. See Also -------- :class:`pysal.weights.W` """ data = get_points_array_from_shapefile(shapefile) if radius is not None: data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) return knnW(data, k=k, p=p, ids=ids) return knnW(data, k=k, p=p)
assert len( recently_added) >= 100 # if not true, spotify api might be down. final_playlist = [] i0, i1 = 0, 0 matched = False while i0 < len(in_rotation) and i1 < len(recently_added): if in_rotation[i0][0] == recently_added[i1][0]: print_aligned(in_rotation[i0][1], recently_added[i1][1], log) final_playlist.append(in_rotation[i0]) matched = True i0 += 1 i1 += 1 else: if in_rotation[i0][0] not in get_ids(recently_added): print_song1( in_rotation[i0][1], '[WILL BE REMOVED (old and added manually, or removed from library)]', log) i0 += 1 else: if not matched: print_song2('[WILL BE ADDED (newly added song)]', recently_added[i1][1], log) final_playlist.append(recently_added[i1]) i1 += 1 else: print_song2('[WILL REMAIN OUT (manually removed)]', recently_added[i1][1], log) i1 += 1
def kernelW_from_shapefile(shapefile, k=2, function='triangular', idVariable=None, fixed=True, radius=None, diagonal=False): """ Kernel based weights. Parameters ---------- shapefile : string shapefile name with shp suffix k : int the number of nearest neighbors to use for determining bandwidth. Bandwidth taken as :math:`h_i=max(dknn) \\forall i` where :math:`dknn` is a vector of k-nearest neighbor distances (the distance to the kth nearest neighbor for each observation). function : {'triangular','uniform','quadratic','epanechnikov', 'quartic','bisquare','gaussian'} .. math:: z_{i,j} = d_{i,j}/h_i triangular .. math:: K(z) = (1 - |z|) \ if |z| \le 1 uniform .. math:: K(z) = |z| \ if |z| \le 1 quadratic .. math:: K(z) = (3/4)(1-z^2) \ if |z| \le 1 epanechnikov .. math:: K(z) = (1-z^2) \ if |z| \le 1 quartic .. math:: K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1 bisquare .. math:: K(z) = (1-z^2)^2 \ if |z| \le 1 gaussian .. math:: K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2) idVariable : string name of a column in the shapefile's DBF to use for ids fixed : binary If true then :math:`h_i=h \\forall i`. If false then bandwidth is adaptive across observations. radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. diagonal : boolean If true, set diagonal weights = 1.0, if false (default) diagonal weights are set to value according to kernel function Returns ------- w : W instance of spatial weights Examples -------- >>> kw = pysal.kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID', function = 'gaussian') >>> kwd = pysal.kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID', function = 'gaussian', diagonal = True) >>> set(kw.neighbors[1]) == set([4, 2, 3, 1]) True >>> set(kwd.neighbors[1]) == set([4, 2, 3, 1]) True >>> >>> set(kw.weights[1]) == set( [0.2436835517263174, 0.29090631630909874, 0.29671172124745776, 0.3989422804014327]) True >>> set(kwd.weights[1]) == set( [0.2436835517263174, 0.29090631630909874, 0.29671172124745776, 1.0]) True Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ points = get_points_array_from_shapefile(shapefile) if radius is not None: points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) return Kernel(points, function=function, k=k, ids=ids, fixed=fixed, diagonal = diagonal) return kernelW(points, k=k, function=function, fixed=fixed, diagonal=diagonal)
import sys import pickle sys.path.append('modules') import models import util link = 'https://www.dropbox.com/s/11pujhqtcmvv00o/all_filtered.csv?dl=1' data_st, data_item, internal_st_ids, internal_item_ids, items_select = util.get_ids( link) graph_model = models.GraphWandering(data_st, data_item, len(internal_st_ids), len(internal_item_ids)) log_model = models.LMF(already_liked=graph_model.neighborhood_graph[0]) log_model.fit(data_st, data_item, len(internal_st_ids), len(internal_item_ids)) ensemble_model = models.Ensemble(log_model, graph_model) course_selector = util.CourseSelector(items_select) course_searcher = util.CourseSearcher(list(internal_item_ids.keys())) id_to_course = {internal_item_ids[c]: c for c in internal_item_ids} with open('ensemble.pickle', 'wb') as f: pickle.dump(ensemble_model, f) with open('selector.pickle', 'wb') as f: pickle.dump(course_selector, f) with open('searcher.pickle', 'wb') as f: pickle.dump(course_searcher, f) with open('internal_item_ids.pickle', 'wb') as f: pickle.dump(internal_item_ids, f)
def adaptive_kernelW_from_shapefile(shapefile, bandwidths=None, k=2, function='triangular', idVariable=None, radius=None, diagonal = False): """ Kernel weights with adaptive bandwidths. Parameters ---------- shapefile : string shapefile name with shp suffix bandwidths : float or array-like (optional) the bandwidth :math:`h_i` for the kernel. if no bandwidth is specified k is used to determine the adaptive bandwidth k : int the number of nearest neighbors to use for determining bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i` where :math:`dknn` is a vector of k-nearest neighbor distances (the distance to the kth nearest neighbor for each observation). For adaptive bandwidths, :math:`h_i=dknn_i` function : {'triangular','uniform','quadratic','quartic','gaussian'} kernel function defined as follows with .. math:: z_{i,j} = d_{i,j}/h_i triangular .. math:: K(z) = (1 - |z|) \ if |z| \le 1 uniform .. math:: K(z) = |z| \ if |z| \le 1 quadratic .. math:: K(z) = (3/4)(1-z^2) \ if |z| \le 1 quartic .. math:: K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1 gaussian .. math:: K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2) idVariable : string name of a column in the shapefile's DBF to use for ids radius : float If supplied arc_distances will be calculated based on the given radius. p will be ignored. diagonal : boolean If true, set diagonal weights = 1.0, if false (default) diagonal weights are set to value according to kernel function Returns ------- w : W instance of spatial weights Examples -------- >>> kwa = pysal.adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"), function='gaussian') >>> kwad = pysal.adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"), function='gaussian', diagonal=True) >>> kwa.neighbors[0] [0, 2, 1] >>> kwad.neighbors[0] [0, 2, 1] >>> kwa.weights[0] [0.3989422804014327, 0.24966013701844503, 0.2419707487162134] >>> kwad.weights[0] [1.0, 0.24966013701844503, 0.2419707487162134] >>> Notes ----- Supports polygon or point shapefiles. For polygon shapefiles, distance is based on polygon centroids. Distances are defined using coordinates in shapefile which are assumed to be projected and not geographical coordinates. """ points = get_points_array_from_shapefile(shapefile) if radius is not None: points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius) if idVariable: ids = get_ids(shapefile, idVariable) return Kernel(points, bandwidth=bandwidths, fixed=False, k=k, function=function, ids=ids, diagonal=diagonal) return adaptive_kernelW(points, bandwidths=bandwidths, k=k, function=function, diagonal=diagonal)
'xml', '002') dest_path = os.path.join( 'd:/usr-profiles/chuang/Desktop/Dev/textmining/2_imf_docs/1_use_xmls', 'process_search_docs', 'data', '002') copy = False dump = True ids, meta = read_meta('staff_reports_meta.csv') #%% ## copy xml to data folder if copy: copy_files(data_path, dest_path) #%% ## keep only staff reports xmls = os.listdir(dest_path) xmls = [f for f in xmls if get_ids(f)[1] in ids] #%% ## dump xmls to pickle if dump: doc_list = list() total_length = len(xmls) print( 'converting {} xmls into paragraph lists ......'.format(total_length)) for idx, file_name in enumerate(xmls): xml_path = os.path.join(dest_path, file_name) try: series_id, file_id = get_ids(file_name) except: continue