Ejemplo n.º 1
0
def threshold_binaryW_from_shapefile(shapefile,
                                     threshold,
                                     p=2,
                                     idVariable=None,
                                     radius=None):
    """
    Threshold distance based binary weights from a shapefile.

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix
    threshold  : float
                 distance band
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    idVariable : string
                 name of a column in the shapefile's DBF to use for ids
    radius     : float
                 If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w         : W
                instance
                Weights object with binary weights

    Examples
    --------
    >>> import libpysal.api as ps
    >>> import libpysal
    >>> w = ps.threshold_binaryW_from_shapefile(libpysal.examples.get_path("columbus.shp"),0.62,idVariable="POLYID")
    >>> w.weights[1]
    [1.0, 1.0]

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """

    data = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        data = cg.KDTree(data, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        w = DistanceBand(data, threshold=threshold, p=p)
        w.remap_ids(ids)
        return w
    return threshold_binaryW_from_array(data, threshold, p=p)
Ejemplo n.º 2
0
def queen_from_shapefile(shapefile, idVariable=None, sparse=False):
    """
    Queen contiguity weights from a polygon shapefile

    Parameters
    ----------

    shapefile   : string
                  name of polygon shapefile including suffix.
    idVariable  : string
                  name of a column in the shapefile's DBF to use for ids.
    sparse    : boolean
                If True return WSP instance
                If False return W instance
    Returns
    -------

    w            : W
                   instance of spatial weights

    Examples
    --------
    >>> wq=queen_from_shapefile(pysal.examples.get_path("columbus.shp"))
    >>> "%.3f"%wq.pct_nonzero
    '0.098'
    >>> wq=queen_from_shapefile(pysal.examples.get_path("columbus.shp"),"POLYID")
    >>> "%.3f"%wq.pct_nonzero
    '0.098'
    >>> wq=queen_from_shapefile(pysal.examples.get_path("columbus.shp"), sparse=True)
    >>> pct_sp = wq.sparse.nnz *1. / wq.n**2
    >>> "%.3f"%pct_sp
    '0.098'



    Notes
    -----

    Queen contiguity defines as neighbors any pair of polygons that share at
    least one vertex in their polygon definitions.

    See Also
    --------
    :class:`pysal.weights.W`

    """
    shp = pysal.open(shapefile)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
    else:
        ids = None
    w = buildContiguity(shp, criterion='queen', ids=ids)
    shp.close()
    w.set_shapefile(shapefile, idVariable)

    if sparse:
        w = pysal.weights.WSP(w.sparse, id_order=ids)

    return w
Ejemplo n.º 3
0
def threshold_continuousW_from_shapefile(shapefile, threshold, p=2,
                                         alpha=-1, idVariable=None, radius=None):
    """
    Threshold distance based continuous weights from a shapefile.

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix
    threshold  : float
                 distance band
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    alpha      : float
                 distance decay parameter for weight (default -1.0)
                 if alpha is positive the weights will not decline with
                 distance.
    idVariable : string
                 name of a column in the shapefile's DBF to use for ids
    radius     : float
                 If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w         : W
                instance; Weights object with continuous weights.

    Examples
    --------
    >>> w = threshold_continuousW_from_shapefile(pysal.examples.get_path("columbus.shp"),0.62,idVariable="POLYID")
    >>> w.weights[1]
    [1.6702346893743334, 1.7250729841938093]

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """

    data = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        w = DistanceBand(data, threshold=threshold, p=p, alpha=alpha, binary=False)
        w.remap_ids(ids)
    else:
        w =  threshold_continuousW_from_array(data, threshold, p=p, alpha=alpha)
    w.set_shapefile(shapefile,idVariable)
    return w
Ejemplo n.º 4
0
def queen_from_shapefile(shapefile, idVariable=None, sparse=False):
    """
    Queen contiguity weights from a polygon shapefile.

    Parameters
    ----------

    shapefile   : string
                  name of polygon shapefile including suffix.
    idVariable  : string
                  name of a column in the shapefile's DBF to use for ids.
    sparse    : boolean
                If True return WSP instance
                If False return W instance
    Returns
    -------

    w            : W
                   instance of spatial weights

    Examples
    --------
    >>> wq=queen_from_shapefile(pysal.examples.get_path("columbus.shp"))
    >>> "%.3f"%wq.pct_nonzero
    '9.829'
    >>> wq=queen_from_shapefile(pysal.examples.get_path("columbus.shp"),"POLYID")
    >>> "%.3f"%wq.pct_nonzero
    '9.829'
    >>> wq=queen_from_shapefile(pysal.examples.get_path("columbus.shp"), sparse=True)
    >>> pct_sp = wq.sparse.nnz *1. / wq.n**2
    >>> "%.3f"%pct_sp
    '0.098'

    Notes
    -----

    Queen contiguity defines as neighbors any pair of polygons that share at
    least one vertex in their polygon definitions.

    See Also
    --------
    :class:`pysal.weights.W`

    """
    shp = pysal.open(shapefile)
    w = buildContiguity(shp, criterion='queen')
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        w.remap_ids(ids)
    else:
        ids = None
    shp.close()
    w.set_shapefile(shapefile, idVariable)

    if sparse:
        w = pysal.weights.WSP(w.sparse, id_order=ids)

    return w
Ejemplo n.º 5
0
def rook_from_shapefile(shapefile, idVariable=None, sparse=False):
    """
    Rook contiguity weights from a polygon shapefile.

    Parameters
    ----------

    shapefile : string
                name of polygon shapefile including suffix.
    idVariable: string
                name of a column in the shapefile's DBF to use for ids.
    sparse    : boolean
                If True return WSP instance
                If False return W instance

    Returns
    -------

    w          : W
                 instance of spatial weights

    Examples
    --------
    >>> wr=rook_from_shapefile(pysal.examples.get_path("columbus.shp"), "POLYID")
    >>> "%.3f"%wr.pct_nonzero
    '8.330'
    >>> wr=rook_from_shapefile(pysal.examples.get_path("columbus.shp"), sparse=True)
    >>> pct_sp = wr.sparse.nnz *1. / wr.n**2
    >>> "%.3f"%pct_sp
    '0.083'

    Notes
    -----

    Rook contiguity defines as neighbors any pair of polygons that share a
    common edge in their polygon definitions.

    See Also
    --------
    :class:`pysal.weights.W`

    """
    shp = pysal.open(shapefile)
    w = buildContiguity(shp, criterion='rook')
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        w.remap_ids(ids)
    else:
        ids = None
    shp.close()
    w.set_shapefile(shapefile, idVariable)

    if sparse:
        w = pysal.weights.WSP(w.sparse, id_order=ids)

    return w
Ejemplo n.º 6
0
def rook_from_shapefile(shapefile, idVariable=None, sparse=False):
    """
    Rook contiguity weights from a polygon shapefile.

    Parameters
    ----------

    shapefile : string
                name of polygon shapefile including suffix.
    sparse    : boolean
                If True return WSP instance
                If False return W instance

    Returns
    -------

    w          : W
                 instance of spatial weights

    Examples
    --------
    >>> wr=rook_from_shapefile(pysal.examples.get_path("columbus.shp"), "POLYID")
    >>> "%.3f"%wr.pct_nonzero
    '8.330'
    >>> wr=rook_from_shapefile(pysal.examples.get_path("columbus.shp"), sparse=True)
    >>> pct_sp = wr.sparse.nnz *1. / wr.n**2
    >>> "%.3f"%pct_sp
    '0.083'

    Notes
    -----

    Rook contiguity defines as neighbors any pair of polygons that share a
    common edge in their polygon definitions.

    See Also
    --------
    :class:`pysal.weights.W`

    """
    shp = pysal.open(shapefile)
    w = buildContiguity(shp, criterion='rook')
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        w.remap_ids(ids)
    else:
        ids = None
    shp.close()
    w.set_shapefile(shapefile, idVariable)

    if sparse:
        w = pysal.weights.WSP(w.sparse, id_order=ids)


    return w
Ejemplo n.º 7
0
Archivo: user.py Proyecto: ds2010/pysal
def threshold_binaryW_from_shapefile(shapefile, threshold, p=2, idVariable=None, radius=None):
    """
    Threshold distance based binary weights from a shapefile.

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix
    threshold  : float
                 distance band
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    idVariable : string
                 name of a column in the shapefile's DBF to use for ids
    radius     : float
                 If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w         : W
                instance
                Weights object with binary weights

    Examples
    --------
    >>> w = threshold_binaryW_from_shapefile(pysal.examples.get_path("columbus.shp"),0.62,idVariable="POLYID")
    >>> w.weights[1]
    [1, 1]

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """

    data = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        w = DistanceBand(data, threshold=threshold, p=p)
        w.remap_ids(ids)
        return w
    return threshold_binaryW_from_array(data, threshold, p=p)
Ejemplo n.º 8
0
def rook_from_shapefile(shapefile, idVariable=None, sparse=False):
    """
    Rook contiguity weights from a polygon shapefile

    Parameters
    ----------

    shapefile : string
                name of polygon shapefile including suffix.
    sparse    : boolean
                If True return WSP instance
                If False return W instance

    Returns
    -------

    w          : W
                 instance of spatial weights

    Examples
    --------
    >>> wr=rook_from_shapefile(pysal.examples.get_path("columbus.shp"), "POLYID")
    >>> wr.pct_nonzero
    0.083298625572678045
    >>> wr=rook_from_shapefile(pysal.examples.get_path("columbus.shp"), sparse=True)
    >>> wr.sparse.nnz *1. / wr.n**2
    0.083298625572678045

    Notes
    -----

    Rook contiguity defines as neighbors any pair of polygons that share a
    common edge in their polygon definitions.

    See Also
    --------
    :class:`pysal.weights.W`

    """
    shp = pysal.open(shapefile)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
    else:
        ids = None
    w = buildContiguity(shp, criterion='rook', ids=ids)
    shp.close()
    if sparse:
        w = pysal.weights.WSP(w.sparse, id_order=ids)
    return w
Ejemplo n.º 9
0
def adaptive_kernelW_from_shapefile(shapefile, bandwidths=None, k=2, function='triangular',
                                    idVariable=None, radius=None):
    """
    Kernel weights with adaptive bandwidths

    Parameters
    ----------

    shapefile   : string
                  shapefile name with shp suffix
    bandwidths  : float or array-like (optional)
                  the bandwidth :math:`h_i` for the kernel.
                  if no bandwidth is specified k is used to determine the
                  adaptive bandwidth
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).  For adaptive bandwidths, :math:`h_i=dknn_i`
    function    : string {'triangular','uniform','quadratic','quartic','gaussian'}
                  kernel function defined as follows with

                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = |z| \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)
    idVariable   : string
                   name of a column in the shapefile's DBF to use for ids
    radius     : If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w            : W
                   instance of spatial weights


    Examples
    --------
    >>> kwa = adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"))
    >>> kwa.weights[0]
    [1.0, 0.03178906767736345, 9.99999900663795e-08]
    >>> kwa.bandwidth[:3]
    array([[ 0.59871832],
           [ 0.59871832],
           [ 0.56095647]])

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """
    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return Kernel(points, bandwidth=bandwidths, fixed=False, k=k, function=function, ids=ids)
    return adaptive_kernelW(points, bandwidths=bandwidths, k=k, function=function)
Ejemplo n.º 10
0
def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None):
    """
    Nearest neighbor weights from a shapefile.

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix
    k          : int
                 number of nearest neighbors
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    idVariable : string
                 name of a column in the shapefile's DBF to use for ids
    radius     : float
                 If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w         : W
                instance; Weights object with binary weights

    Examples
    --------

    Polygon shapefile

    >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"))
    >>> "%.4f"%wc.pct_nonzero
    '4.0816'
    >>> set([2,1]) == set(wc.neighbors[0])
    True
    >>> wc3=pysal.knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3)
    >>> set(wc3.neighbors[0]) == set([2,1,3])
    True
    >>> set(wc3.neighbors[2]) == set([4,3,0])
    True

    1 offset rather than 0 offset

    >>> wc3_1=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID")
    >>> set([4,3,2]) == set(wc3_1.neighbors[1])
    True
    >>> wc3_1.weights[2]
    [1.0, 1.0, 1.0]
    >>> set([4,1,8]) == set(wc3_1.neighbors[2])
    True


    Point shapefile

    >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"))
    >>> w.pct_nonzero
    1.1904761904761905
    >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1)
    >>> "%.3f"%w1.pct_nonzero
    '0.595'
    >>>

    Notes
    -----

    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    Ties between neighbors of equal distance are arbitrarily broken.

    See Also
    --------
    :class:`pysal.weights.W`

    """

    data = get_points_array_from_shapefile(shapefile)

    if radius is not None:
        kdtree = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius)
    else:
        kdtree = pysal.cg.KDTree(data)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return knnW(kdtree, k=k, p=p, ids=ids)
    return knnW(kdtree, k=k, p=p)
Ejemplo n.º 11
0
import spotipy
import spotipy.util as util

from spotipy.oauth2 import SpotifyClientCredentials
from secret import USER_EMAIL, CLIENT_ID, CLIENT_SECRET, PLAYLIST_ID
from pprint import pprint
from util import get_recently_added, get_ids

if __name__ == '__main__':
    print(
        'this script will erase your in rotation and start it over with the ' +
        'latest 100 songs added.')

    scope = 'user-library-read playlist-modify-public'
    token = util.prompt_for_user_token(USER_EMAIL,
                                       scope,
                                       client_id=CLIENT_ID,
                                       client_secret=CLIENT_SECRET,
                                       redirect_uri='http://localhost/')
    sp = spotipy.Spotify(auth=token)
    user_id = sp.current_user()['id']

    recently_added_ids = get_ids(get_recently_added(sp, at_least_100=True))
    sp.user_playlist_replace_tracks(user_id, PLAYLIST_ID, recently_added_ids)
Ejemplo n.º 12
0
def adaptive_kernelW_from_shapefile(shapefile,
                                    bandwidths=None,
                                    k=2,
                                    function='triangular',
                                    idVariable=None,
                                    radius=None):
    """
    Kernel weights with adaptive bandwidths

    Parameters
    ----------

    shapefile   : string
                  shapefile name with shp suffix
    bandwidths  : float or array-like (optional)
                  the bandwidth :math:`h_i` for the kernel.
                  if no bandwidth is specified k is used to determine the
                  adaptive bandwidth
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).  For adaptive bandwidths, :math:`h_i=dknn_i`
    function    : string {'triangular','uniform','quadratic','quartic','gaussian'}
                  kernel function defined as follows with

                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = |z| \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)
    idVariable   : string
                   name of a column in the shapefile's DBF to use for ids
    radius     : If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w            : W
                   instance of spatial weights


    Examples
    --------
    >>> kwa = adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"))
    >>> kwa.weights[0]
    [1.0, 0.03178906767736345, 9.99999900663795e-08]
    >>> kwa.bandwidth[:3]
    array([[ 0.59871832],
           [ 0.59871832],
           [ 0.56095647]])

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """
    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return Kernel(points,
                      bandwidth=bandwidths,
                      fixed=False,
                      k=k,
                      function=function,
                      ids=ids)
    return adaptive_kernelW(points,
                            bandwidths=bandwidths,
                            k=k,
                            function=function)
Ejemplo n.º 13
0
def kernelW_from_shapefile(shapefile,
                           k=2,
                           function='triangular',
                           idVariable=None,
                           fixed=True,
                           radius=None):
    """
    Kernel based weights

    Parameters
    ----------

    shapefile   : string
                  shapefile name with shp suffix
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. Bandwidth taken as :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).
    function    : string {'triangular','uniform','quadratic','epanechnikov',
                  'quartic','bisquare','gaussian'}


                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = |z| \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  epanechnikov

                  .. math::

                      K(z) = (1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  bisquare

                  .. math::

                      K(z) = (1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)
    idVariable   : string
                   name of a column in the shapefile's DBF to use for ids

    fixed        : binary
                   If true then :math:`h_i=h \\forall i`. If false then
                   bandwidth is adaptive across observations.
    radius     : If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.


    Returns
    -------

    w            : W
                   instance of spatial weights

    Examples
    --------
    >>> kw = kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID')
    >>> kw.weights[1]
    [0.2052478782400463, 0.007078773148450623, 1.0, 0.23051223027663237]
    >>> kw.bandwidth[:3]
    array([[ 0.75333961],
           [ 0.75333961],
           [ 0.75333961]])


    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.


    """
    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return Kernel(points, function=function, k=k, ids=ids, fixed=fixed)
    return kernelW(points, k=k, function=function, fixed=fixed)
Ejemplo n.º 14
0
def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None):
    """
    Nearest neighbor weights from a shapefile

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix
    k          : int
                 number of nearest neighbors
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    idVariable : string
                 name of a column in the shapefile's DBF to use for ids
    radius     : If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w         : W instance
                Weights object with binary weights


    Examples
    --------

    Polygon shapefile

    >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"))
    >>> wc.pct_nonzero
    0.040816326530612242
    >>> wc3=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID")
    >>> wc3.weights[1]
    [1, 1, 1]
    >>> wc3.neighbors[1]
    [3, 2, 4]
    >>> wc.neighbors[0]
    [2, 1]

    Point shapefile

    >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"))
    >>> w.pct_nonzero
    0.011904761904761904
    >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1)
    >>> w1.pct_nonzero
    0.0059523809523809521
    >>>

    Notes
    -----

    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    Ties between neighbors of equal distance are arbitrarily broken.


    See Also
    --------
    :class:`pysal.weights.W`

    """

    data = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return knnW(data, k=k, p=p, ids=ids)
    return knnW(data, k=k, p=p)
Ejemplo n.º 15
0
    assert len(
        recently_added) >= 100  # if not true, spotify api might be down.

    final_playlist = []

    i0, i1 = 0, 0
    matched = False
    while i0 < len(in_rotation) and i1 < len(recently_added):
        if in_rotation[i0][0] == recently_added[i1][0]:
            print_aligned(in_rotation[i0][1], recently_added[i1][1], log)
            final_playlist.append(in_rotation[i0])
            matched = True
            i0 += 1
            i1 += 1
        else:
            if in_rotation[i0][0] not in get_ids(recently_added):
                print_song1(
                    in_rotation[i0][1],
                    '[WILL BE REMOVED (old and added manually, or removed from library)]',
                    log)
                i0 += 1
            else:
                if not matched:
                    print_song2('[WILL BE ADDED (newly added song)]',
                                recently_added[i1][1], log)
                    final_playlist.append(recently_added[i1])
                    i1 += 1
                else:
                    print_song2('[WILL REMAIN OUT (manually removed)]',
                                recently_added[i1][1], log)
                    i1 += 1
Ejemplo n.º 16
0
def knnW_from_shapefile(shapefile, k=2, p=2, idVariable=None, radius=None):
    """
    Nearest neighbor weights from a shapefile

    Parameters
    ----------

    shapefile  : string
                 shapefile name with shp suffix
    k          : int
                 number of nearest neighbors
    p          : float
                 Minkowski p-norm distance metric parameter:
                 1<=p<=infinity
                 2: Euclidean distance
                 1: Manhattan distance
    idVariable : string
                 name of a column in the shapefile's DBF to use for ids
    radius     : If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.

    Returns
    -------

    w         : W instance
                Weights object with binary weights


    Examples
    --------

    Polygon shapefile

    >>> wc=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"))
    >>> wc.pct_nonzero
    0.040816326530612242
    >>> wc3=knnW_from_shapefile(pysal.examples.get_path("columbus.shp"),k=3,idVariable="POLYID")
    >>> wc3.weights[1]
    [1, 1, 1]
    >>> wc3.neighbors[1]
    [3, 2, 4]
    >>> wc.neighbors[0]
    [2, 1]

    Point shapefile

    >>> w=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"))
    >>> w.pct_nonzero
    0.011904761904761904
    >>> w1=knnW_from_shapefile(pysal.examples.get_path("juvenile.shp"),k=1)
    >>> w1.pct_nonzero
    0.0059523809523809521
    >>>

    Notes
    -----

    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    Ties between neighbors of equal distance are arbitrarily broken.


    See Also
    --------
    :class:`pysal.weights.W`

    """

    data = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        data = pysal.cg.KDTree(data, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return knnW(data, k=k, p=p, ids=ids)
    return knnW(data, k=k, p=p)
Ejemplo n.º 17
0
def kernelW_from_shapefile(shapefile, k=2, function='triangular', idVariable=None, fixed=True, radius=None):
    """
    Kernel based weights

    Parameters
    ----------

    shapefile   : string
                  shapefile name with shp suffix
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. Bandwidth taken as :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).
    function    : string {'triangular','uniform','quadratic','epanechnikov',
                  'quartic','bisquare','gaussian'}


                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = |z| \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  epanechnikov

                  .. math::

                      K(z) = (1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  bisquare

                  .. math::

                      K(z) = (1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)
    idVariable   : string
                   name of a column in the shapefile's DBF to use for ids

    fixed        : binary
                   If true then :math:`h_i=h \\forall i`. If false then
                   bandwidth is adaptive across observations.
    radius     : If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.


    Returns
    -------

    w            : W
                   instance of spatial weights

    Examples
    --------
    >>> kw = kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID')
    >>> kw.weights[1]
    [0.2052478782400463, 0.007078773148450623, 1.0, 0.23051223027663237]
    >>> kw.bandwidth[:3]
    array([[ 0.75333961],
           [ 0.75333961],
           [ 0.75333961]])


    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.


    """
    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return Kernel(points, function=function, k=k, ids=ids, fixed=fixed)
    return kernelW(points, k=k, function=function, fixed=fixed)
Ejemplo n.º 18
0
def kernelW_from_shapefile(shapefile, k=2, function='triangular',
        idVariable=None, fixed=True, radius=None, diagonal=False):
    """
    Kernel based weights.

    Parameters
    ----------

    shapefile   : string
                  shapefile name with shp suffix
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. Bandwidth taken as :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).
    function    : {'triangular','uniform','quadratic','epanechnikov', 'quartic','bisquare','gaussian'}

                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = |z| \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  epanechnikov

                  .. math::

                      K(z) = (1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  bisquare

                  .. math::

                      K(z) = (1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)
    idVariable   : string
                   name of a column in the shapefile's DBF to use for ids

    fixed        : binary
                   If true then :math:`h_i=h \\forall i`. If false then
                   bandwidth is adaptive across observations.
    radius     : float
                 If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.
    diagonal   : boolean
                 If true, set diagonal weights = 1.0, if false (default)
                 diagonal weights are set to value according to kernel
                 function

    Returns
    -------

    w            : W
                   instance of spatial weights

    Examples
    --------
    >>> kw = pysal.kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID', function = 'gaussian')

    >>> kwd = pysal.kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"),idVariable='POLYID', function = 'gaussian', diagonal = True)
    >>> set(kw.neighbors[1]) == set([4, 2, 3, 1])
    True
    >>> set(kwd.neighbors[1]) == set([4, 2, 3, 1])
    True
    >>>
    >>> set(kw.weights[1]) == set( [0.2436835517263174, 0.29090631630909874, 0.29671172124745776, 0.3989422804014327])
    True
    >>> set(kwd.weights[1]) == set( [0.2436835517263174, 0.29090631630909874, 0.29671172124745776, 1.0])
    True


    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """

    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return Kernel(points, function=function, k=k, ids=ids, fixed=fixed,
                diagonal = diagonal)
    return kernelW(points, k=k, function=function, fixed=fixed,
            diagonal=diagonal)
Ejemplo n.º 19
0
import sys
import pickle

sys.path.append('modules')

import models
import util

link = 'https://www.dropbox.com/s/11pujhqtcmvv00o/all_filtered.csv?dl=1'
data_st, data_item, internal_st_ids, internal_item_ids, items_select = util.get_ids(
    link)
graph_model = models.GraphWandering(data_st, data_item, len(internal_st_ids),
                                    len(internal_item_ids))
log_model = models.LMF(already_liked=graph_model.neighborhood_graph[0])
log_model.fit(data_st, data_item, len(internal_st_ids), len(internal_item_ids))
ensemble_model = models.Ensemble(log_model, graph_model)
course_selector = util.CourseSelector(items_select)
course_searcher = util.CourseSearcher(list(internal_item_ids.keys()))
id_to_course = {internal_item_ids[c]: c for c in internal_item_ids}

with open('ensemble.pickle', 'wb') as f:
    pickle.dump(ensemble_model, f)

with open('selector.pickle', 'wb') as f:
    pickle.dump(course_selector, f)

with open('searcher.pickle', 'wb') as f:
    pickle.dump(course_searcher, f)

with open('internal_item_ids.pickle', 'wb') as f:
    pickle.dump(internal_item_ids, f)
Ejemplo n.º 20
0
def adaptive_kernelW_from_shapefile(shapefile, bandwidths=None, k=2, function='triangular',
                                    idVariable=None, radius=None,
                                    diagonal = False):
    """
    Kernel weights with adaptive bandwidths.

    Parameters
    ----------

    shapefile   : string
                  shapefile name with shp suffix
    bandwidths  : float
                  or array-like (optional)
                  the bandwidth :math:`h_i` for the kernel.
                  if no bandwidth is specified k is used to determine the
                  adaptive bandwidth
    k           : int
                  the number of nearest neighbors to use for determining
                  bandwidth. For fixed bandwidth, :math:`h_i=max(dknn) \\forall i`
                  where :math:`dknn` is a vector of k-nearest neighbor
                  distances (the distance to the kth nearest neighbor for each
                  observation).  For adaptive bandwidths, :math:`h_i=dknn_i`
    function    : {'triangular','uniform','quadratic','quartic','gaussian'}
                  kernel function defined as follows with

                  .. math::

                      z_{i,j} = d_{i,j}/h_i

                  triangular

                  .. math::

                      K(z) = (1 - |z|) \ if |z| \le 1

                  uniform

                  .. math::

                      K(z) = |z| \ if |z| \le 1

                  quadratic

                  .. math::

                      K(z) = (3/4)(1-z^2) \ if |z| \le 1

                  quartic

                  .. math::

                      K(z) = (15/16)(1-z^2)^2 \ if |z| \le 1

                  gaussian

                  .. math::

                      K(z) = (2\pi)^{(-1/2)} exp(-z^2 / 2)
    idVariable   : string
                   name of a column in the shapefile's DBF to use for ids
    radius     : float
                 If supplied arc_distances will be calculated
                 based on the given radius. p will be ignored.
    diagonal   : boolean
                 If true, set diagonal weights = 1.0, if false (default)
                 diagonal weights are set to value according to kernel
                 function

    Returns
    -------

    w            : W
                   instance of spatial weights

    Examples
    --------
    >>> kwa = pysal.adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"), function='gaussian')
    >>> kwad = pysal.adaptive_kernelW_from_shapefile(pysal.examples.get_path("columbus.shp"), function='gaussian', diagonal=True)
    >>> kwa.neighbors[0]
    [0, 2, 1]
    >>> kwad.neighbors[0]
    [0, 2, 1]
    >>> kwa.weights[0]
    [0.3989422804014327, 0.24966013701844503, 0.2419707487162134]
    >>> kwad.weights[0]
    [1.0, 0.24966013701844503, 0.2419707487162134]
    >>>

    Notes
    -----
    Supports polygon or point shapefiles. For polygon shapefiles, distance is
    based on polygon centroids. Distances are defined using coordinates in
    shapefile which are assumed to be projected and not geographical
    coordinates.

    """
    points = get_points_array_from_shapefile(shapefile)
    if radius is not None:
        points = pysal.cg.KDTree(points, distance_metric='Arc', radius=radius)
    if idVariable:
        ids = get_ids(shapefile, idVariable)
        return Kernel(points, bandwidth=bandwidths, fixed=False, k=k,
                function=function, ids=ids, diagonal=diagonal)
    return adaptive_kernelW(points, bandwidths=bandwidths, k=k,
            function=function, diagonal=diagonal)
Ejemplo n.º 21
0
    'xml', '002')
dest_path = os.path.join(
    'd:/usr-profiles/chuang/Desktop/Dev/textmining/2_imf_docs/1_use_xmls',
    'process_search_docs', 'data', '002')
copy = False
dump = True
ids, meta = read_meta('staff_reports_meta.csv')

#%%
## copy xml to data folder
if copy:
    copy_files(data_path, dest_path)
#%%
## keep only staff reports
xmls = os.listdir(dest_path)
xmls = [f for f in xmls if get_ids(f)[1] in ids]

#%%
## dump xmls to pickle

if dump:
    doc_list = list()
    total_length = len(xmls)
    print(
        'converting {} xmls into paragraph lists ......'.format(total_length))
    for idx, file_name in enumerate(xmls):
        xml_path = os.path.join(dest_path, file_name)
        try:
            series_id, file_id = get_ids(file_name)
        except:
            continue