def _find_bcs(): classes = dict() bcs = dict() ucs = dict() submods = dict() for obname in dir(pysal.spreg): ob = pysal.spreg.__dict__[obname] if isinstance(ob, ModuleType): if ob.__package__.startswith("pysal"): submods.update({obname:ob}) elif isinstance(ob, ClassType): classes.update({obname:ob}) if ob.__name__.startswith('Base'): bcs.update({obname:ob}) for modname, mod in diter(submods): basecands = dict() for clname in dir(mod): cl = mod.__dict__[clname] if isinstance(cl, ClassType): try: if cl.__name__.startswith('Base'): if cl not in bcs: bcs.update({cl.__name__:cl}) else: classes.update({cl.__name__:cl}) except: pass ucs.update({k:v for k,v in diter(classes) if ( any([issubclass(v, bc) for bc in bcs.values()]) and (k not in bcs)) or k.endswith('Regimes')}) return bcs, ucs
def _find_bcs(): classes = dict() bcs = dict() ucs = dict() submods = dict() for obname in dir(pysal.spreg): ob = pysal.spreg.__dict__[obname] if isinstance(ob, ModuleType): if ob.__package__.startswith("pysal"): submods.update({obname: ob}) elif isinstance(ob, ClassType): classes.update({obname: ob}) if ob.__name__.startswith('Base'): bcs.update({obname: ob}) for modname, mod in diter(submods): basecands = dict() for clname in dir(mod): cl = mod.__dict__[clname] if isinstance(cl, ClassType): try: if cl.__name__.startswith('Base'): if cl not in bcs: bcs.update({cl.__name__: cl}) else: classes.update({cl.__name__: cl}) except: pass ucs.update({ k: v for k, v in diter(classes) if (any([issubclass(v, bc) for bc in bcs.values()]) and (k not in bcs)) or k.endswith('Regimes') }) return bcs, ucs
def test_explain(self): explaintext = cenpy.explorer.explain(self.av[0]) self.assertIsInstance(explaintext, dict) for k,v in diter(explaintext): self.assertIsInstance(k, testtype) self.assertNotEqual(len(k), 0) self.assertIsInstance(v, testtype) self.assertNotEqual(len(v), 0) explaintextv = cenpy.explorer.explain(self.av[0], verbose=True) self.assertIsInstance(explaintextv, dict) self.assertGreaterEqual(len(explaintextv), 2)
def test_explain(self): explaintext = cenpy.explorer.explain(self.av[0]) self.assertIsInstance(explaintext, dict) for k, v in diter(explaintext): self.assertIsInstance(k, testtype) self.assertNotEqual(len(k), 0) self.assertIsInstance(v, testtype) self.assertNotEqual(len(v), 0) explaintextv = cenpy.explorer.explain(self.av[0], verbose=True) self.assertIsInstance(explaintextv, dict) self.assertGreaterEqual(len(explaintextv), 2)
def test_available(self): self.assertIsInstance(self.av, list) self.assertNotEqual(len(self.av), 0) for name in self.av: self.assertIsInstance(name, testtype) self.assertIsInstance(self.avv, dict) self.assertNotEqual(len(self.avv), 0) for k, v in diter(self.avv): self.assertIsInstance(k, testtype) self.assertNotEqual(len(k), 0) self.assertIsInstance(v, testtype) self.assertNotEqual(len(v), 0)
def test_available(self): self.assertIsInstance(self.av, list) self.assertNotEqual(len(self.av), 0) for name in self.av: self.assertIsInstance(name, testtype) self.assertIsInstance(self.avv, dict) self.assertNotEqual(len(self.avv), 0) for k,v in diter(self.avv): self.assertIsInstance(k, testtype) self.assertNotEqual(len(k), 0) self.assertIsInstance(v, testtype) self.assertNotEqual(len(v), 0)
def __init__(self, baseurl, **kwargs): """ Class representing the ESRI Layer in the TIGER API Parameters ---------- baseurl : str the url for the Layer. """ self.__dict__.update({"_" + k: v for k, v in diter(kwargs)}) if hasattr(self, "_fields"): self.variables = pd.DataFrame(self._fields) self._baseurl = baseurl + "/" + str(self._id)
def __init__(self, baseurl, **kwargs): self.__dict__.update({'_' + k: v for k, v in diter(kwargs)}) if hasattr(self, '_fields'): self.variables = pd.DataFrame(self._fields) self._baseurl = baseurl + '/' + str(self._id)
from six import iteritems as diter import pandas as pd import os import six if six.PY3: unicode = str fp = os.path.dirname(os.path.realpath(__file__)) raw_APIs = r.get('https://api.census.gov/data.json').json()['dataset'] APIs = { entry['identifier'].split('id')[-1].lstrip('/'): {key: value for key, value in diter(entry) if key != entry['identifier']} for entry in raw_APIs } def available(verbose=False): """ Returns available identifiers for Census Data APIs. NOTE: we do not support the Economic Indicators Time Series API yet. Arguments ========== verbose : boolean governing whether to provide ID and title or just ID Returns
def query(self, raw=False, strict=False, **kwargs): """ A query function to extract data out of MapServer layers. I've exposed every option here Parameters ---------- where: str, required sql query string. out_fields: list or str fields to pass from the header out (default: '*') return_geometry: bool bool describing whether to return geometry or just the dataframe. (default: True) geometry_precision: str a number of significant digits to which the output of the query should be truncated (default: None) out_sr: int or str ESRI WKID spatial reference into which to reproject the geodata (default: None) return_ids_only: bool bool stating to only return ObjectIDs. (default: False) return_z: bool whether to return z components of shp-z, (default: False) return_m: bool whether to return m components of shp-m, (default: False) strict : bool whether to throw an error if invalid polygons are provided from the API (True) or just warn that at least one polygon is invalid (default: False) raw : bool whether to provide the raw geometries from the API (default: False) Returns ------- Dataframe or GeoDataFrame containing entries from the geodatabase Notes ----- Most of the time, this should be used leaning on the SQL "where" argument: cxn.query(where='GEOID LIKE "06*"') In most cases, you'll be querying against layers, not MapServices overall. """ # parse args kwargs = {"".join(k.split("_")): v for k, v in diter(kwargs)} # construct query string self._basequery = copy.deepcopy(_basequery) for k, v in diter(kwargs): try: self._basequery[k] = v except KeyError: raise KeyError("Option '{k}' not recognized, check parameters") qstring = "&".join( ["{}={}".format(k, v) for k, v in diter(self._basequery)]) self._last_query = self._baseurl + "/query?" + qstring # run query resp = r.get(self._last_query + "&f=json") resp.raise_for_status() datadict = resp.json() if raw: return datadict if kwargs.get("returnGeometry", "true") == "false": return pd.DataFrame.from_records( [x["attributes"] for x in datadict["features"]]) # convert to output format try: features = datadict["features"] except KeyError: code, msg = datadict["error"]["code"], datadict["error"]["message"] details = datadict["error"]["details"] if details is []: details = "Mapserver provided no detailed error" raise KeyError(( r"Response from API is malformed. You may have " r"submitted too many queries, formatted the request incorrectly, " r"or experienced significant network connectivity issues." r" Check to make sure that your inputs, like placenames, are spelled" r" correctly, and that your geographies match the level at which you" r" intend to query. The original error from the Census is:\n" r"(API ERROR {}:{}({}))".format(code, msg, details))) todf = [] for i, feature in enumerate(features): locfeat = gpsr.__dict__[datadict["geometryType"]](feature) todf.append(locfeat["properties"]) todf[i].update({"geometry": locfeat["geometry"]}) df = pd.DataFrame(todf) outdf = gpsr.convert_geometries(df, strict=strict) outdf = GeoDataFrame(outdf) crs = datadict.pop("spatialReference", None) if crs is not None: crs = crs.get("latestWkid", crs.get("wkid")) crs = 'epsg:{}'.format(crs) outdf.crs = crs return outdf
classes.update({cl.__name__: cl}) except: pass ucs.update({ k: v for k, v in diter(classes) if (any([issubclass(v, bc) for bc in bcs.values()]) and (k not in bcs)) or k.endswith('Regimes') }) return bcs, ucs base, user = _find_bcs() _everything = base.copy() _everything.update(user) for k, v in diter(base): exec('{k} = {v}'.format(k=k, v=v)) for k, v in diter(user): exec('{k} = {v}'.format(k=k, v=v)) __all__ = list() __all__.extend(base.keys()) __all__.extend(user.keys()) #regimes = {cls for cls in user if 'regimes' in cls.__module__} #if we go with something like a "base" and "user" submodule setup, #it'd be as simple as flattening the subclasses out into those submodules. #for name, cl in diter(_find_bcs()[0]): # exec('{n} = {cls}'.format(n=name, cls=cl))
def query(self, **kwargs): """ A query function to extract data out of MapServer layers. I've exposed every option here Parameters ========== where: str, required sql query string. out_fields: list or str, (default: '*') fields to pass from the header out return_geometry: bool, (default: True) bool describing whether to return geometry or just the dataframe geometry_precision: str, (default: None) a number of significant digits to which the output of the query should be truncated out_sr: int or str, (default: None) ESRI WKID spatial reference into which to reproject the geodata return_ids_only: bool, (default: False) bool stating to only return ObjectIDs return_z: bool, (default: False) whether to return z components of shp-z return_m: bool, (default: False) whether to return m components of shp-m Returns ======= Dataframe or GeoDataFrame containing entries from the geodatabase Notes ===== Most of the time, this should be used leaning on the SQL "where" argument: cxn.query(where='GEOID LIKE "06*"') In most cases, you'll be querying against layers, not MapServices overall. """ #parse args pkg = kwargs.pop('pkg', 'pysal') gpize = kwargs.pop('gpize', False) if pkg.lower() == 'geopandas': pkg = 'shapely' gpize = True kwargs = {''.join(k.split('_')):v for k,v in diter(kwargs)} #construct query string self._basequery = copy.deepcopy(_basequery) for k,v in diter(kwargs): try: self._basequery[k] = v except KeyError: raise KeyError("Option '{k}' not recognized, check parameters") qstring = '&'.join(['{}={}'.format(k,v) for k,v in diter(self._basequery)]) self._last_query = self._baseurl + '/query?' + qstring #run query resp = r.get(self._last_query + '&f=json') resp.raise_for_status() datadict = resp.json() #convert to output format features = datadict['features'] todf = [] for i, feature in enumerate(features): locfeat = gpsr.__dict__[datadict['geometryType']](feature) todf.append(locfeat['properties']) todf[i].update({'geometry':locfeat['geometry']}) df = pd.DataFrame(todf) outdf = gpsr.convert_geometries(df) if gpize: try: from geopandas import GeoDataFrame outdf = GeoDataFrame(outdf) except: print('Geopandas dataframe conversion failed! Continuing...') outdf.crs = datadict.pop('spatialReference', {}) return outdf
def query(self, **kwargs): """ A query function to extract data out of MapServer layers. I've exposed every option here Parameters ========== where: str, required sql query string. out_fields: list or str, (default: '*') fields to pass from the header out return_geometry: bool, (default: True) bool describing whether to return geometry or just the dataframe geometry_precision: str, (default: None) a number of significant digits to which the output of the query should be truncated out_sr: int or str, (default: None) ESRI WKID spatial reference into which to reproject the geodata return_ids_only: bool, (default: False) bool stating to only return ObjectIDs return_z: bool, (default: False) whether to return z components of shp-z return_m: bool, (default: False) whether to return m components of shp-m pkg : str (default: 'geopandas') what geometry type to provide in the results of the query. Uses shapely shapes by default. Supports "pysal," which constructs a pandas dataframe with pysal shapes in a geometry column; 'shapely', which builds a pandas dataframe with shapely shapes in a geometry column, and "geopandas," which returns a geopandas GeoDataFrame. strict : bool (default: True) whether to throw an error if invalid polygons are provided from the API (True) or just warn that at least one polygon is invalid (False) Returns ======= Dataframe or GeoDataFrame containing entries from the geodatabase Notes ===== Most of the time, this should be used leaning on the SQL "where" argument: cxn.query(where='GEOID LIKE "06*"') In most cases, you'll be querying against layers, not MapServices overall. """ #parse args pkg = kwargs.pop('pkg', 'geopandas') gpize = kwargs.pop('gpize', False) strict = kwargs.pop('strict', False) if pkg.lower() == 'geopandas': pkg = 'shapely' gpize = True kwargs = {''.join(k.split('_')):v for k,v in diter(kwargs)} #construct query string self._basequery = copy.deepcopy(_basequery) for k,v in diter(kwargs): try: self._basequery[k] = v except KeyError: raise KeyError("Option '{k}' not recognized, check parameters") qstring = '&'.join(['{}={}'.format(k,v) for k,v in diter(self._basequery)]) self._last_query = self._baseurl + '/query?' + qstring #run query resp = r.get(self._last_query + '&f=json') resp.raise_for_status() datadict = resp.json() #convert to output format try: features = datadict['features'] except KeyError: code, msg = datadict['error']['code'], datadict['error']['message'] details = datadict['error']['details'] if details is []: details = 'Mapserver provided no detailed error' raise KeyError(('Response from API is malformed. You may have ' 'submitted too many queries, or experienced ' 'significant network connectivity issues.\n' '(API ERROR {}:{}({}))'.format(code, msg, details))) todf = [] for i, feature in enumerate(features): locfeat = gpsr.__dict__[datadict['geometryType']](feature) todf.append(locfeat['properties']) todf[i].update({'geometry':locfeat['geometry']}) df = pd.DataFrame(todf) outdf = gpsr.convert_geometries(df, pkg, strict=strict) if gpize: try: from geopandas import GeoDataFrame outdf = GeoDataFrame(outdf) except: print('Geopandas dataframe conversion failed! Continuing...') crs = datadict.pop('spatialReference', None) if crs is not None: crs = crs.get('latestWkid', crs.get('wkid')) crs = dict(init='epsg:{}'.format(crs)) outdf.crs = crs return outdf
import requests as r from six import iteritems as diter import pandas as pd import os import six if six.PY3: unicode = str fp = os.path.dirname(os.path.realpath(__file__)) raw_APIs = r.get('https://api.census.gov/data.json').json()['dataset'] APIs = {entry['identifier'].split('id')[-1].lstrip('/'): {key: value for key,value in diter(entry) if key != entry['identifier']} for entry in raw_APIs} def available(verbose=True): """ Returns available identifiers for Census Data APIs. NOTE: we do not support the Economic Indicators Time Series API yet. Arguments ========== verbose : boolean governing whether to provide ID and title or just ID Returns ======== identifiers (if verbose: and dataset names) """
if cl not in bcs: bcs.update({cl.__name__:cl}) else: classes.update({cl.__name__:cl}) except: pass ucs.update({k:v for k,v in diter(classes) if ( any([issubclass(v, bc) for bc in bcs.values()]) and (k not in bcs)) or k.endswith('Regimes')}) return bcs, ucs base, user = _find_bcs() _everything = base.copy() _everything.update(user) for k,v in diter(base): exec('{k} = {v}'.format(k=k,v=v)) for k,v in diter(user): exec('{k} = {v}'.format(k=k,v=v)) __all__ = list() __all__.extend(base.keys()) __all__.extend(user.keys()) #regimes = {cls for cls in user if 'regimes' in cls.__module__} #if we go with something like a "base" and "user" submodule setup, #it'd be as simple as flattening the subclasses out into those submodules. #for name, cl in diter(_find_bcs()[0]): # exec('{n} = {cls}'.format(n=name, cls=cl))
def query(self, **kwargs): """ A query function to extract data out of MapServer layers. I've exposed every option here Parameters ========== where: str, required sql query string. out_fields: list or str, (default: '*') fields to pass from the header out return_geometry: bool, (default: True) bool describing whether to return geometry or just the dataframe geometry_precision: str, (default: None) a number of significant digits to which the output of the query should be truncated out_sr: int or str, (default: None) ESRI WKID spatial reference into which to reproject the geodata return_ids_only: bool, (default: False) bool stating to only return ObjectIDs return_z: bool, (default: False) whether to return z components of shp-z return_m: bool, (default: False) whether to return m components of shp-m pkg : str (default: 'pysal') what geometry type to provide in the results of the query. Uses PySAL shapes by default. Supports "shapely," which constructs shapely shapes instead of pysal shape, and "geopandas," which packs shapely shapes into a GeoPandas dataframe. Returns ======= Dataframe or GeoDataFrame containing entries from the geodatabase Notes ===== Most of the time, this should be used leaning on the SQL "where" argument: cxn.query(where='GEOID LIKE "06*"') In most cases, you'll be querying against layers, not MapServices overall. """ #parse args pkg = kwargs.pop('pkg', 'pysal') gpize = kwargs.pop('gpize', False) if pkg.lower() == 'geopandas': pkg = 'shapely' gpize = True kwargs = {''.join(k.split('_')): v for k, v in diter(kwargs)} #construct query string self._basequery = copy.deepcopy(_basequery) for k, v in diter(kwargs): try: self._basequery[k] = v except KeyError: raise KeyError("Option '{k}' not recognized, check parameters") qstring = '&'.join( ['{}={}'.format(k, v) for k, v in diter(self._basequery)]) self._last_query = self._baseurl + '/query?' + qstring #run query resp = r.get(self._last_query + '&f=json') resp.raise_for_status() datadict = resp.json() #convert to output format try: features = datadict['features'] except KeyError: code, msg = datadict['error']['code'], datadict['error']['message'] details = datadict['error']['details'] if details is []: details = 'Mapserver provided no detailed error' raise KeyError(('Response from API is malformed. You may have ' 'submitted too many queries, or experienced ' 'significant network connectivity issues.\n' '(API ERROR {}:{}({}))'.format(code, msg, details))) todf = [] for i, feature in enumerate(features): locfeat = gpsr.__dict__[datadict['geometryType']](feature) todf.append(locfeat['properties']) todf[i].update({'geometry': locfeat['geometry']}) df = pd.DataFrame(todf) outdf = gpsr.convert_geometries(df, pkg) if gpize: try: from geopandas import GeoDataFrame outdf = GeoDataFrame(outdf) except: print('Geopandas dataframe conversion failed! Continuing...') outdf.crs = datadict.pop('spatialReference', {}) return outdf
def __init__(self, baseurl, **kwargs): self.__dict__.update({'_'+k:v for k,v in diter(kwargs)}) if hasattr(self, '_fields'): self.variables = pd.DataFrame(self._fields) self._baseurl = baseurl + '/' + str(self._id)
def lag_categorical(w, y, ties='tryself'): """ Spatial lag operator for categorical variables. Constructs the most common categories of neighboring observations, weighted by their weight strength. Parameters ---------- w : W PySAL spatial weightsobject y : iterable iterable collection of categories (either int or string) with dimensionality conforming to w (see examples) ties : str string describing the method to use when resolving ties. By default, the option is "tryself", and the category of the focal observation is included with its neighbors to try and break a tie. If this does not resolve the tie, a winner is chosen randomly. To just use random choice to break ties, pass "random" instead. Returns ------- an (n x k) column vector containing the most common neighboring observation Notes ----- This works on any array where the number of unique elements along the column axis is less than the number of elements in the array, for any dtype. That means the routine should work on any dtype that np.unique() can compare. Examples -------- Set up a 9x9 weights matrix describing a 3x3 regular lattice. Lag one list of categorical variables with no ties. >>> import pysal >>> import numpy as np >>> w = pysal.lat2W(3, 3) >>> y = ['a','b','a','b','c','b','c','b','c'] >>> y_l = pysal.weights.spatial_lag.lag_categorical(w, y) >>> y_l array(['b', 'a', 'b', 'c', 'b', 'c', 'b', 'c', 'b'], dtype='|S1') Explicitly reshape y into a (9x1) array and calculate lag again >>> yvect = np.array(y).reshape(9,1) >>> yvect_l = pysal.weights.spatial_lag.lag_categorical(w,yvect) array([['b'], ['a'], ['b'], ['c'], ['b'], ['c'], ['b'], ['c'], ['b']], dtype='|S1') compute the lag of a 9x2 matrix of categories >>> y2 = ['a', 'c', 'c', 'd', 'b', 'a', 'd', 'd', 'c'] >>> ym = np.vstack((y,y2)).T >>> ym_lag = pysal.weights.spatial_lag.lag_categorical(w,ym) >>> ym_lag array([['b', 'b'], ['a', 'c'], ['b', 'c'], ['c', 'd'], ['b', 'd'], ['c', 'c'], ['c', 'd'], ['c', 'd'], ['b', 'b']], dtype='|S1') """ if isinstance(y, list): y = np.array(y) orig_shape = y.shape if len(orig_shape) > 1: if orig_shape[1] > 1: return np.vstack([lag_categorical(w, col) for col in y.T]).T y = y.flatten() output = np.zeros_like(y) keys = np.unique(y) inty = np.zeros(y.shape, dtype=np.int) for i, key in enumerate(keys): inty[y == key] = i for idx, neighbors in w: vals = np.zeros(keys.shape) for neighb, weight in diter(neighbors): vals[inty[w.id2i[neighb]]] += weight outidx = _resolve_ties(idx, inty, vals, neighbors, ties, w) output[w.id2i[idx]] = keys[outidx] return output.reshape(orig_shape)
import os import six if six.PY3: unicode = str fp = os.path.dirname(os.path.realpath(__file__)) resp = raw_APIs = r.get("https://api.census.gov/data.json") try: resp.raise_for_status() raw_APIs = resp.json()["dataset"] APIs = { entry["identifier"].split("id")[-1].lstrip("/"): { key: value for key, value in diter(entry) if key != entry["identifier"] } for entry in raw_APIs } except r.HTTPError: raise r.HTTPError( "The main Census API Endpoint (https://api.census.gov/data.json) is not available." " Try visiting https://api.census.gov/data.json in a web browser to verify connectivity." ) except JSONDecodeError: raise JSONDecodeError( "The main Census API Endpoint (https://api.census.gov/data.json) returned malformed content." " Try visiting https://api.census.gov/data.json in a web browser to verify connectivity." )
def lag_categorical(w, y, ties='tryself'): """ Spatial lag operator for categorical variables. Constructs the most common categories of neighboring observations, weighted by their weight strength. Parameters ---------- w : W PySAL spatial weightsobject y : iterable iterable collection of categories (either int or string) with dimensionality conforming to w (see examples) ties : str string describing the method to use when resolving ties. By default, the option is "tryself", and the category of the focal observation is included with its neighbors to try and break a tie. If this does not resolve the tie, a winner is chosen randomly. To just use random choice to break ties, pass "random" instead. Returns ------- an (n x k) column vector containing the most common neighboring observation Notes ----- This works on any array where the number of unique elements along the column axis is less than the number of elements in the array, for any dtype. That means the routine should work on any dtype that np.unique() can compare. Examples -------- Set up a 9x9 weights matrix describing a 3x3 regular lattice. Lag one list of categorical variables with no ties. >>> import pysal >>> import numpy as np >>> w = pysal.lat2W(3, 3) >>> y = ['a','b','a','b','c','b','c','b','c'] >>> y_l = pysal.weights.spatial_lag.lag_categorical(w, y) >>> y_l array(['b', 'a', 'b', 'c', 'b', 'c', 'b', 'c', 'b'], dtype='|S1') Explicitly reshape y into a (9x1) array and calculate lag again >>> yvect = np.array(y).reshape(9,1) >>> yvect_l = pysal.weights.spatial_lag.lag_categorical(w,yvect) array([['b'], ['a'], ['b'], ['c'], ['b'], ['c'], ['b'], ['c'], ['b']], dtype='|S1') compute the lag of a 9x2 matrix of categories >>> y2 = ['a', 'c', 'c', 'd', 'b', 'a', 'd', 'd', 'c'] >>> ym = np.vstack((y,y2)).T >>> ym_lag = pysal.weights.spatial_lag.lag_categorical(w,ym) >>> ym_lag array([['b', 'b'], ['a', 'c'], ['b', 'c'], ['c', 'd'], ['b', 'd'], ['c', 'c'], ['c', 'd'], ['c', 'd'], ['b', 'b']], dtype='|S1') """ if isinstance(y, list): y = np.array(y) orig_shape = y.shape if len(orig_shape) > 1: if orig_shape[1] > 1: return np.vstack([lag_categorical(w,col) for col in y.T]).T y = y.flatten() output = np.zeros_like(y) keys = np.unique(y) inty = np.zeros(y.shape, dtype=np.int) for i,key in enumerate(keys): inty[y == key] = i for idx,neighbors in w: vals = np.zeros(keys.shape) for neighb, weight in diter(neighbors): vals[inty[w.id2i[neighb]]] += weight outidx = _resolve_ties(idx,inty,vals,neighbors,ties, w) output[w.id2i[idx]] = keys[outidx] return output.reshape(orig_shape)