コード例 #1
0
ファイル: registry.py プロジェクト: CartoDB/pysal
def _find_bcs():
    classes = dict()
    bcs = dict()
    ucs = dict()
    submods = dict()
    for obname in dir(pysal.spreg):
        ob = pysal.spreg.__dict__[obname]
        if isinstance(ob, ModuleType):
            if ob.__package__.startswith("pysal"):
                submods.update({obname:ob})
        elif isinstance(ob, ClassType):
            classes.update({obname:ob})
            if ob.__name__.startswith('Base'):
                bcs.update({obname:ob})
    for modname, mod in diter(submods):
        basecands = dict()
        for clname in dir(mod):
            cl = mod.__dict__[clname]
            if isinstance(cl, ClassType):
                try:
                    if cl.__name__.startswith('Base'):
                        if cl not in bcs:
                            bcs.update({cl.__name__:cl})
                        else:
                            classes.update({cl.__name__:cl})
                except:
                    pass
    ucs.update({k:v for k,v in diter(classes) if (
                any([issubclass(v, bc) for bc in bcs.values()])
                and (k not in bcs))
                or k.endswith('Regimes')})
    return bcs, ucs
コード例 #2
0
ファイル: registry.py プロジェクト: sathish-deevi/pysal
def _find_bcs():
    classes = dict()
    bcs = dict()
    ucs = dict()
    submods = dict()
    for obname in dir(pysal.spreg):
        ob = pysal.spreg.__dict__[obname]
        if isinstance(ob, ModuleType):
            if ob.__package__.startswith("pysal"):
                submods.update({obname: ob})
        elif isinstance(ob, ClassType):
            classes.update({obname: ob})
            if ob.__name__.startswith('Base'):
                bcs.update({obname: ob})
    for modname, mod in diter(submods):
        basecands = dict()
        for clname in dir(mod):
            cl = mod.__dict__[clname]
            if isinstance(cl, ClassType):
                try:
                    if cl.__name__.startswith('Base'):
                        if cl not in bcs:
                            bcs.update({cl.__name__: cl})
                        else:
                            classes.update({cl.__name__: cl})
                except:
                    pass
    ucs.update({
        k: v
        for k, v in diter(classes)
        if (any([issubclass(v, bc) for bc in bcs.values()]) and (k not in bcs))
        or k.endswith('Regimes')
    })
    return bcs, ucs
コード例 #3
0
ファイル: test_explorer.py プロジェクト: dfolch/cenpy
 def test_explain(self):
     explaintext = cenpy.explorer.explain(self.av[0])
     self.assertIsInstance(explaintext, dict)
     for k,v in diter(explaintext):
         self.assertIsInstance(k, testtype)
         self.assertNotEqual(len(k), 0)
         self.assertIsInstance(v, testtype)
         self.assertNotEqual(len(v), 0)
     
     explaintextv = cenpy.explorer.explain(self.av[0], verbose=True)
     self.assertIsInstance(explaintextv, dict)
     self.assertGreaterEqual(len(explaintextv), 2)
コード例 #4
0
    def test_explain(self):
        explaintext = cenpy.explorer.explain(self.av[0])
        self.assertIsInstance(explaintext, dict)
        for k, v in diter(explaintext):
            self.assertIsInstance(k, testtype)
            self.assertNotEqual(len(k), 0)
            self.assertIsInstance(v, testtype)
            self.assertNotEqual(len(v), 0)

        explaintextv = cenpy.explorer.explain(self.av[0], verbose=True)
        self.assertIsInstance(explaintextv, dict)
        self.assertGreaterEqual(len(explaintextv), 2)
コード例 #5
0
    def test_available(self):
        self.assertIsInstance(self.av, list)
        self.assertNotEqual(len(self.av), 0)
        for name in self.av:
            self.assertIsInstance(name, testtype)

        self.assertIsInstance(self.avv, dict)
        self.assertNotEqual(len(self.avv), 0)
        for k, v in diter(self.avv):
            self.assertIsInstance(k, testtype)
            self.assertNotEqual(len(k), 0)
            self.assertIsInstance(v, testtype)
            self.assertNotEqual(len(v), 0)
コード例 #6
0
ファイル: test_explorer.py プロジェクト: dfolch/cenpy
 def test_available(self):
     self.assertIsInstance(self.av, list)
     self.assertNotEqual(len(self.av), 0)
     for name in self.av:
         self.assertIsInstance(name, testtype)
     
     self.assertIsInstance(self.avv, dict)
     self.assertNotEqual(len(self.avv), 0)
     for k,v in diter(self.avv):
         self.assertIsInstance(k, testtype)
         self.assertNotEqual(len(k), 0) 
         self.assertIsInstance(v, testtype)
         self.assertNotEqual(len(v), 0)
コード例 #7
0
    def __init__(self, baseurl, **kwargs):
        """
        Class representing the ESRI Layer in the TIGER API

        Parameters
        ----------
        baseurl :   str
                    the url for the Layer. 

        """
        self.__dict__.update({"_" + k: v for k, v in diter(kwargs)})
        if hasattr(self, "_fields"):
            self.variables = pd.DataFrame(self._fields)
        self._baseurl = baseurl + "/" + str(self._id)
コード例 #8
0
ファイル: tiger.py プロジェクト: AnastasiaZA/cenpy
 def __init__(self, baseurl, **kwargs):
     self.__dict__.update({'_' + k: v for k, v in diter(kwargs)})
     if hasattr(self, '_fields'):
         self.variables = pd.DataFrame(self._fields)
     self._baseurl = baseurl + '/' + str(self._id)
コード例 #9
0
ファイル: explorer.py プロジェクト: dcassid5/cenpy
from six import iteritems as diter
import pandas as pd
import os
import six

if six.PY3:
    unicode = str

fp = os.path.dirname(os.path.realpath(__file__))

raw_APIs = r.get('https://api.census.gov/data.json').json()['dataset']

APIs = {
    entry['identifier'].split('id')[-1].lstrip('/'):
    {key: value
     for key, value in diter(entry) if key != entry['identifier']}
    for entry in raw_APIs
}


def available(verbose=False):
    """
    Returns available identifiers for Census Data APIs. 
    NOTE: we do not support the Economic Indicators Time Series API yet.

    Arguments
    ==========
    verbose : boolean governing whether to provide ID and title
              or just ID

    Returns
コード例 #10
0
    def query(self, raw=False, strict=False, **kwargs):
        """
        A query function to extract data out of MapServer layers. I've exposed
        every option here 

        Parameters
        ---------- 
        where: str, required
                    sql query string. 
        out_fields: list or str
                    fields to pass from the header out (default: '*')
        return_geometry: bool
                    bool describing whether to return geometry or just the
                    dataframe. (default: True)
        geometry_precision: str
                    a number of significant digits to which the output of the
                    query should be truncated (default: None)
        out_sr: int or str
                    ESRI WKID spatial reference into which to reproject 
                    the geodata (default: None)
        return_ids_only: bool
                    bool stating to only return ObjectIDs. (default: False)
        return_z: bool
                     whether to return z components of shp-z, (default: False)
        return_m: bool
                     whether to return m components of shp-m, (default: False)
        strict  :   bool
                    whether to throw an error if invalid polygons are provided from the API (True)
                    or just warn that at least one polygon is invalid (default: False)
        raw : bool
              whether to provide the raw geometries from the API  (default: False)
        
        Returns
        ------- 
        Dataframe or GeoDataFrame containing entries from the geodatabase

        Notes
        -----
        Most of the time, this should be used leaning on the SQL "where"
        argument: 

        cxn.query(where='GEOID LIKE "06*"')

        In most cases, you'll be querying against layers, not MapServices
        overall. 
        """
        # parse args
        kwargs = {"".join(k.split("_")): v for k, v in diter(kwargs)}

        # construct query string
        self._basequery = copy.deepcopy(_basequery)
        for k, v in diter(kwargs):
            try:
                self._basequery[k] = v
            except KeyError:
                raise KeyError("Option '{k}' not recognized, check parameters")
        qstring = "&".join(
            ["{}={}".format(k, v) for k, v in diter(self._basequery)])
        self._last_query = self._baseurl + "/query?" + qstring
        # run query
        resp = r.get(self._last_query + "&f=json")
        resp.raise_for_status()
        datadict = resp.json()
        if raw:
            return datadict
        if kwargs.get("returnGeometry", "true") == "false":
            return pd.DataFrame.from_records(
                [x["attributes"] for x in datadict["features"]])
        # convert to output format
        try:
            features = datadict["features"]
        except KeyError:
            code, msg = datadict["error"]["code"], datadict["error"]["message"]
            details = datadict["error"]["details"]
            if details is []:
                details = "Mapserver provided no detailed error"
            raise KeyError((
                r"Response from API is malformed. You may have "
                r"submitted too many queries, formatted the request incorrectly, "
                r"or experienced significant network connectivity issues."
                r" Check to make sure that your inputs, like placenames, are spelled"
                r" correctly, and that your geographies match the level at which you"
                r" intend to query. The original error from the Census is:\n"
                r"(API ERROR {}:{}({}))".format(code, msg, details)))
        todf = []
        for i, feature in enumerate(features):
            locfeat = gpsr.__dict__[datadict["geometryType"]](feature)
            todf.append(locfeat["properties"])
            todf[i].update({"geometry": locfeat["geometry"]})
        df = pd.DataFrame(todf)
        outdf = gpsr.convert_geometries(df, strict=strict)
        outdf = GeoDataFrame(outdf)
        crs = datadict.pop("spatialReference", None)
        if crs is not None:
            crs = crs.get("latestWkid", crs.get("wkid"))
            crs = 'epsg:{}'.format(crs)
        outdf.crs = crs
        return outdf
コード例 #11
0
ファイル: registry.py プロジェクト: sathish-deevi/pysal
                            classes.update({cl.__name__: cl})
                except:
                    pass
    ucs.update({
        k: v
        for k, v in diter(classes)
        if (any([issubclass(v, bc) for bc in bcs.values()]) and (k not in bcs))
        or k.endswith('Regimes')
    })
    return bcs, ucs


base, user = _find_bcs()
_everything = base.copy()
_everything.update(user)

for k, v in diter(base):
    exec('{k} = {v}'.format(k=k, v=v))
for k, v in diter(user):
    exec('{k} = {v}'.format(k=k, v=v))

__all__ = list()
__all__.extend(base.keys())
__all__.extend(user.keys())
#regimes = {cls for cls in user if 'regimes' in cls.__module__}

#if we go with something like a "base" and "user" submodule setup,
#it'd be as simple as flattening the subclasses out into those submodules.
#for name, cl in diter(_find_bcs()[0]):
#    exec('{n} = {cls}'.format(n=name, cls=cl))
コード例 #12
0
ファイル: tiger.py プロジェクト: dfolch/cenpy
    def query(self, **kwargs):
        """
        A query function to extract data out of MapServer layers. I've exposed
        every option here 

        Parameters
        ==========
        where: str, required
                    sql query string. 
        out_fields: list or str, (default: '*') 
                    fields to pass from the header out
        return_geometry: bool, (default: True)
                    bool describing whether to return geometry or just the
                    dataframe
        geometry_precision: str, (default: None)
                    a number of significant digits to which the output of the
                    query should be truncated
        out_sr: int or str, (default: None)
                    ESRI WKID spatial reference into which to reproject 
                    the geodata
        return_ids_only: bool, (default: False)
                    bool stating to only return ObjectIDs
        return_z: bool, (default: False)
                     whether to return z components of shp-z
        return_m: bool, (default: False)
                     whether to return m components of shp-m

        Returns
        =======
        Dataframe or GeoDataFrame containing entries from the geodatabase

        Notes
        =====
        Most of the time, this should be used leaning on the SQL "where"
        argument: 

        cxn.query(where='GEOID LIKE "06*"')

        In most cases, you'll be querying against layers, not MapServices
        overall. 
        """
    #parse args
        pkg = kwargs.pop('pkg', 'pysal')
        gpize = kwargs.pop('gpize', False)
        if pkg.lower() == 'geopandas':
            pkg = 'shapely'
            gpize = True
        kwargs = {''.join(k.split('_')):v for k,v in diter(kwargs)}
    
    #construct query string
        self._basequery = copy.deepcopy(_basequery)
        for k,v in diter(kwargs):
            try:
                self._basequery[k] = v
            except KeyError:
                raise KeyError("Option '{k}' not recognized, check parameters")
        qstring = '&'.join(['{}={}'.format(k,v) for k,v in diter(self._basequery)])
        self._last_query = self._baseurl + '/query?' + qstring
    #run query
        resp = r.get(self._last_query + '&f=json')
        resp.raise_for_status()
        datadict = resp.json()
    #convert to output format
        features = datadict['features']
        todf = []
        for i, feature in enumerate(features):
            locfeat = gpsr.__dict__[datadict['geometryType']](feature)
            todf.append(locfeat['properties'])
            todf[i].update({'geometry':locfeat['geometry']})
        df = pd.DataFrame(todf)
        outdf = gpsr.convert_geometries(df)
        if gpize:
            try:
                from geopandas import GeoDataFrame
                outdf = GeoDataFrame(outdf)
            except:
                print('Geopandas dataframe conversion failed! Continuing...')
        outdf.crs = datadict.pop('spatialReference', {})
        return outdf
コード例 #13
0
ファイル: tiger.py プロジェクト: ljwolf/cenpy
    def query(self, **kwargs):
        """
        A query function to extract data out of MapServer layers. I've exposed
        every option here 

        Parameters
        ==========
        where: str, required
                    sql query string. 
        out_fields: list or str, (default: '*') 
                    fields to pass from the header out
        return_geometry: bool, (default: True)
                    bool describing whether to return geometry or just the
                    dataframe
        geometry_precision: str, (default: None)
                    a number of significant digits to which the output of the
                    query should be truncated
        out_sr: int or str, (default: None)
                    ESRI WKID spatial reference into which to reproject 
                    the geodata
        return_ids_only: bool, (default: False)
                    bool stating to only return ObjectIDs
        return_z: bool, (default: False)
                     whether to return z components of shp-z
        return_m: bool, (default: False)
                     whether to return m components of shp-m
        pkg     :   str (default: 'geopandas')
                    what geometry type to provide in the results of the query. Uses shapely
                    shapes by default. Supports "pysal," which constructs a pandas dataframe
                    with pysal shapes in a geometry column; 'shapely', which builds a pandas
                    dataframe with shapely shapes in a geometry column, and "geopandas,"
                    which returns a geopandas GeoDataFrame.
        strict  :   bool (default: True)
                    whether to throw an error if invalid polygons are provided from the API (True)
                    or just warn that at least one polygon is invalid (False)
        Returns
        =======
        Dataframe or GeoDataFrame containing entries from the geodatabase

        Notes
        =====
        Most of the time, this should be used leaning on the SQL "where"
        argument: 

        cxn.query(where='GEOID LIKE "06*"')

        In most cases, you'll be querying against layers, not MapServices
        overall. 
        """
    #parse args
        pkg = kwargs.pop('pkg', 'geopandas')
        gpize = kwargs.pop('gpize', False)
        strict = kwargs.pop('strict', False)
        if pkg.lower() == 'geopandas':
            pkg = 'shapely'
            gpize = True
        kwargs = {''.join(k.split('_')):v for k,v in diter(kwargs)}
    
    #construct query string
        self._basequery = copy.deepcopy(_basequery)
        for k,v in diter(kwargs):
            try:
                self._basequery[k] = v
            except KeyError:
                raise KeyError("Option '{k}' not recognized, check parameters")
        qstring = '&'.join(['{}={}'.format(k,v) for k,v in diter(self._basequery)])
        self._last_query = self._baseurl + '/query?' + qstring
    #run query
        resp = r.get(self._last_query + '&f=json')
        resp.raise_for_status()
        datadict = resp.json()
    #convert to output format
        try:
            features = datadict['features']
        except KeyError:
            code, msg = datadict['error']['code'], datadict['error']['message']
            details = datadict['error']['details']
            if details is []:
                details = 'Mapserver provided no detailed error'
            raise KeyError(('Response from API is malformed. You may have '
                            'submitted too many queries, or experienced '
                            'significant network connectivity issues.\n'
                            '(API ERROR {}:{}({}))'.format(code, msg, details)))
        todf = []
        for i, feature in enumerate(features):
            locfeat = gpsr.__dict__[datadict['geometryType']](feature)
            todf.append(locfeat['properties'])
            todf[i].update({'geometry':locfeat['geometry']})
        df = pd.DataFrame(todf)
        outdf = gpsr.convert_geometries(df, pkg, strict=strict)
        if gpize:
            try:
                from geopandas import GeoDataFrame
                outdf = GeoDataFrame(outdf)
            except:
                print('Geopandas dataframe conversion failed! Continuing...')
        crs = datadict.pop('spatialReference', None)
        if crs is not None:
            crs = crs.get('latestWkid', crs.get('wkid'))
            crs = dict(init='epsg:{}'.format(crs))
        outdf.crs = crs 
        return outdf
コード例 #14
0
ファイル: explorer.py プロジェクト: ljwolf/cenpy
import requests as r
from six import iteritems as diter
import pandas as pd
import os
import six

if six.PY3:
    unicode = str

fp  = os.path.dirname(os.path.realpath(__file__))

raw_APIs = r.get('https://api.census.gov/data.json').json()['dataset']

APIs = {entry['identifier'].split('id')[-1].lstrip('/'): {key: value for key,value in diter(entry) if key != entry['identifier']} for entry in raw_APIs}

def available(verbose=True):
    """
    Returns available identifiers for Census Data APIs. 
    NOTE: we do not support the Economic Indicators Time Series API yet.

    Arguments
    ==========
    verbose : boolean governing whether to provide ID and title
              or just ID

    Returns
    ========

    identifiers (if verbose: and dataset names)

    """
コード例 #15
0
ファイル: registry.py プロジェクト: CartoDB/pysal
                        if cl not in bcs:
                            bcs.update({cl.__name__:cl})
                        else:
                            classes.update({cl.__name__:cl})
                except:
                    pass
    ucs.update({k:v for k,v in diter(classes) if (
                any([issubclass(v, bc) for bc in bcs.values()])
                and (k not in bcs))
                or k.endswith('Regimes')})
    return bcs, ucs

base, user = _find_bcs()
_everything = base.copy()
_everything.update(user)

for k,v in diter(base):
    exec('{k} = {v}'.format(k=k,v=v))
for k,v in diter(user):
    exec('{k} = {v}'.format(k=k,v=v))

__all__ = list()
__all__.extend(base.keys())
__all__.extend(user.keys())
#regimes = {cls for cls in user if 'regimes' in cls.__module__}

#if we go with something like a "base" and "user" submodule setup,
#it'd be as simple as flattening the subclasses out into those submodules. 
#for name, cl in diter(_find_bcs()[0]):
#    exec('{n} = {cls}'.format(n=name, cls=cl))
コード例 #16
0
ファイル: tiger.py プロジェクト: AnastasiaZA/cenpy
    def query(self, **kwargs):
        """
        A query function to extract data out of MapServer layers. I've exposed
        every option here 

        Parameters
        ==========
        where: str, required
                    sql query string. 
        out_fields: list or str, (default: '*') 
                    fields to pass from the header out
        return_geometry: bool, (default: True)
                    bool describing whether to return geometry or just the
                    dataframe
        geometry_precision: str, (default: None)
                    a number of significant digits to which the output of the
                    query should be truncated
        out_sr: int or str, (default: None)
                    ESRI WKID spatial reference into which to reproject 
                    the geodata
        return_ids_only: bool, (default: False)
                    bool stating to only return ObjectIDs
        return_z: bool, (default: False)
                     whether to return z components of shp-z
        return_m: bool, (default: False)
                     whether to return m components of shp-m
        pkg     :   str (default: 'pysal')
                    what geometry type to provide in the results of the query. Uses PySAL
                    shapes by default. Supports "shapely," which constructs shapely
                    shapes instead of pysal shape, and "geopandas," which packs shapely
                    shapes into a GeoPandas dataframe. 
        Returns
        =======
        Dataframe or GeoDataFrame containing entries from the geodatabase

        Notes
        =====
        Most of the time, this should be used leaning on the SQL "where"
        argument: 

        cxn.query(where='GEOID LIKE "06*"')

        In most cases, you'll be querying against layers, not MapServices
        overall. 
        """
        #parse args
        pkg = kwargs.pop('pkg', 'pysal')
        gpize = kwargs.pop('gpize', False)
        if pkg.lower() == 'geopandas':
            pkg = 'shapely'
            gpize = True
        kwargs = {''.join(k.split('_')): v for k, v in diter(kwargs)}

        #construct query string
        self._basequery = copy.deepcopy(_basequery)
        for k, v in diter(kwargs):
            try:
                self._basequery[k] = v
            except KeyError:
                raise KeyError("Option '{k}' not recognized, check parameters")
        qstring = '&'.join(
            ['{}={}'.format(k, v) for k, v in diter(self._basequery)])
        self._last_query = self._baseurl + '/query?' + qstring
        #run query
        resp = r.get(self._last_query + '&f=json')
        resp.raise_for_status()
        datadict = resp.json()
        #convert to output format
        try:
            features = datadict['features']
        except KeyError:
            code, msg = datadict['error']['code'], datadict['error']['message']
            details = datadict['error']['details']
            if details is []:
                details = 'Mapserver provided no detailed error'
            raise KeyError(('Response from API is malformed. You may have '
                            'submitted too many queries, or experienced '
                            'significant network connectivity issues.\n'
                            '(API ERROR {}:{}({}))'.format(code, msg,
                                                           details)))
        todf = []
        for i, feature in enumerate(features):
            locfeat = gpsr.__dict__[datadict['geometryType']](feature)
            todf.append(locfeat['properties'])
            todf[i].update({'geometry': locfeat['geometry']})
        df = pd.DataFrame(todf)
        outdf = gpsr.convert_geometries(df, pkg)
        if gpize:
            try:
                from geopandas import GeoDataFrame
                outdf = GeoDataFrame(outdf)
            except:
                print('Geopandas dataframe conversion failed! Continuing...')
        outdf.crs = datadict.pop('spatialReference', {})
        return outdf
コード例 #17
0
ファイル: tiger.py プロジェクト: dfolch/cenpy
 def __init__(self, baseurl, **kwargs):
     self.__dict__.update({'_'+k:v for k,v in diter(kwargs)})
     if hasattr(self, '_fields'):
         self.variables = pd.DataFrame(self._fields)
     self._baseurl = baseurl + '/' + str(self._id)
コード例 #18
0
def lag_categorical(w, y, ties='tryself'):
    """
    Spatial lag operator for categorical variables.

    Constructs the most common categories of neighboring observations, weighted
    by their weight strength.

    Parameters
    ----------

    w                   : W
                          PySAL spatial weightsobject
    y                   : iterable
                          iterable collection of categories (either int or
                          string) with dimensionality conforming to w (see examples)
    ties                : str
                          string describing the method to use when resolving
                          ties. By default, the option is "tryself",
                          and the category of the focal observation
                          is included with its neighbors to try
                          and break a tie. If this does not resolve the tie,
                          a winner is chosen randomly. To just use random choice to
                          break ties, pass "random" instead.
    Returns
    -------
    an (n x k) column vector containing the most common neighboring observation

    Notes
    -----
    This works on any array where the number of unique elements along the column
    axis is less than the number of elements in the array, for any dtype.
    That means the routine should work on any dtype that np.unique() can
    compare.

    Examples
    --------

    Set up a 9x9 weights matrix describing a 3x3 regular lattice. Lag one list of
    categorical variables with no ties.

    >>> import pysal
    >>> import numpy as np
    >>> w = pysal.lat2W(3, 3)
    >>> y = ['a','b','a','b','c','b','c','b','c']
    >>> y_l = pysal.weights.spatial_lag.lag_categorical(w, y)
    >>> y_l
    array(['b', 'a', 'b', 'c', 'b', 'c', 'b', 'c', 'b'], dtype='|S1')

    Explicitly reshape y into a (9x1) array and calculate lag again

    >>> yvect = np.array(y).reshape(9,1)
    >>> yvect_l = pysal.weights.spatial_lag.lag_categorical(w,yvect)
    array([['b'],
           ['a'],
           ['b'],
           ['c'],
           ['b'],
           ['c'],
           ['b'],
           ['c'],
           ['b']],
          dtype='|S1')

    compute the lag of a 9x2 matrix of categories

    >>> y2 = ['a', 'c', 'c', 'd', 'b', 'a', 'd', 'd', 'c']
    >>> ym = np.vstack((y,y2)).T
    >>> ym_lag = pysal.weights.spatial_lag.lag_categorical(w,ym)
    >>> ym_lag
    array([['b', 'b'],
	   ['a', 'c'],
	   ['b', 'c'],
	   ['c', 'd'],
	   ['b', 'd'],
	   ['c', 'c'],
	   ['c', 'd'],
	   ['c', 'd'],
	   ['b', 'b']],
	  dtype='|S1')
    """
    if isinstance(y, list):
        y = np.array(y)
    orig_shape = y.shape
    if len(orig_shape) > 1:
        if orig_shape[1] > 1:
            return np.vstack([lag_categorical(w, col) for col in y.T]).T
    y = y.flatten()
    output = np.zeros_like(y)
    keys = np.unique(y)
    inty = np.zeros(y.shape, dtype=np.int)
    for i, key in enumerate(keys):
        inty[y == key] = i
    for idx, neighbors in w:
        vals = np.zeros(keys.shape)
        for neighb, weight in diter(neighbors):
            vals[inty[w.id2i[neighb]]] += weight
        outidx = _resolve_ties(idx, inty, vals, neighbors, ties, w)
        output[w.id2i[idx]] = keys[outidx]
    return output.reshape(orig_shape)
コード例 #19
0
ファイル: explorer.py プロジェクト: weikang9009/cenpy
import os
import six

if six.PY3:
    unicode = str

fp = os.path.dirname(os.path.realpath(__file__))

resp = raw_APIs = r.get("https://api.census.gov/data.json")
try:
    resp.raise_for_status()
    raw_APIs = resp.json()["dataset"]
    APIs = {
        entry["identifier"].split("id")[-1].lstrip("/"): {
            key: value
            for key, value in diter(entry) if key != entry["identifier"]
        }
        for entry in raw_APIs
    }
except r.HTTPError:
    raise r.HTTPError(
        "The main Census API Endpoint (https://api.census.gov/data.json) is not available."
        " Try visiting https://api.census.gov/data.json in a web browser to verify connectivity."
    )
except JSONDecodeError:
    raise JSONDecodeError(
        "The main Census API Endpoint (https://api.census.gov/data.json) returned malformed content."
        " Try visiting https://api.census.gov/data.json in a web browser to verify connectivity."
    )

コード例 #20
0
ファイル: spatial_lag.py プロジェクト: CartoDB/pysal
def lag_categorical(w, y, ties='tryself'):
    """
    Spatial lag operator for categorical variables.

    Constructs the most common categories of neighboring observations, weighted
    by their weight strength.

    Parameters
    ----------

    w                   : W
                          PySAL spatial weightsobject
    y                   : iterable
                          iterable collection of categories (either int or
                          string) with dimensionality conforming to w (see examples)
    ties                : str
                          string describing the method to use when resolving
                          ties. By default, the option is "tryself",
                          and the category of the focal observation
                          is included with its neighbors to try
                          and break a tie. If this does not resolve the tie,
                          a winner is chosen randomly. To just use random choice to
                          break ties, pass "random" instead.
    Returns
    -------
    an (n x k) column vector containing the most common neighboring observation

    Notes
    -----
    This works on any array where the number of unique elements along the column
    axis is less than the number of elements in the array, for any dtype.
    That means the routine should work on any dtype that np.unique() can
    compare.

    Examples
    --------

    Set up a 9x9 weights matrix describing a 3x3 regular lattice. Lag one list of
    categorical variables with no ties.

    >>> import pysal
    >>> import numpy as np
    >>> w = pysal.lat2W(3, 3)
    >>> y = ['a','b','a','b','c','b','c','b','c']
    >>> y_l = pysal.weights.spatial_lag.lag_categorical(w, y)
    >>> y_l
    array(['b', 'a', 'b', 'c', 'b', 'c', 'b', 'c', 'b'], dtype='|S1')

    Explicitly reshape y into a (9x1) array and calculate lag again

    >>> yvect = np.array(y).reshape(9,1)
    >>> yvect_l = pysal.weights.spatial_lag.lag_categorical(w,yvect)
    array([['b'],
           ['a'],
           ['b'],
           ['c'],
           ['b'],
           ['c'],
           ['b'],
           ['c'],
           ['b']],
          dtype='|S1')

    compute the lag of a 9x2 matrix of categories

    >>> y2 = ['a', 'c', 'c', 'd', 'b', 'a', 'd', 'd', 'c']
    >>> ym = np.vstack((y,y2)).T
    >>> ym_lag = pysal.weights.spatial_lag.lag_categorical(w,ym)
    >>> ym_lag
    array([['b', 'b'],
	   ['a', 'c'],
	   ['b', 'c'],
	   ['c', 'd'],
	   ['b', 'd'],
	   ['c', 'c'],
	   ['c', 'd'],
	   ['c', 'd'],
	   ['b', 'b']],
	  dtype='|S1')
    """
    if isinstance(y, list):
        y = np.array(y)
    orig_shape = y.shape
    if len(orig_shape) > 1:
        if orig_shape[1] > 1:
            return np.vstack([lag_categorical(w,col) for col in y.T]).T
    y = y.flatten()
    output = np.zeros_like(y)
    keys = np.unique(y)
    inty = np.zeros(y.shape, dtype=np.int)
    for i,key in enumerate(keys):
       inty[y == key] = i
    for idx,neighbors in w:
        vals = np.zeros(keys.shape)
        for neighb, weight in diter(neighbors):
            vals[inty[w.id2i[neighb]]] += weight
        outidx = _resolve_ties(idx,inty,vals,neighbors,ties, w)
        output[w.id2i[idx]] = keys[outidx]
    return output.reshape(orig_shape)