def test_urls(self): import rowgenerators as rg gdf = rg.geoframe('censusgeo://CA/140') print(gdf.set_index('geoid').head()) return u = rg.parse_app_url('census://CA/140/B17001') t = u.get_resource().get_target() print(t, t.year, t.release) self.assertEqual('census://CA/140/B17001', str(t)) self.assertEqual(2016, t.year) self.assertEqual(5, t.release) u = rg.parse_app_url('census://2015/3/CA/140/B17001') t = u.get_resource().get_target() print(t, t.year, t.release) self.assertEqual('census://2015/3/CA/140/B17001', str(t)) self.assertEqual(2015, t.year) self.assertEqual(3, t.release) gdf = t.geoframe() self.assertEqual(43.083, gdf.area.sum().round(3)) gdf = rg.geoframe('census://CA/140/B17001') self.assertEqual(43.083, gdf.area.sum().round(3)) gdf = rg.geoframe('censusgeo://CA/140') self.assertEqual(43.083, gdf.area.sum().round(3))
def test_shell_not_linear_ring(self): import rowgenerators as rg t = rg.geoframe('censusgeo://2019/5/CA/tract') print(len(t))
def test_national_geo(self): from rowgenerators import geoframe gf = geoframe('censusgeo://US/cbsa') print(len(gf))
def _f_get_split_blocks(st, cache, url): k = f'blocks/geo/{st}' if not cache.exists(k): df = rg.geoframe(url).to_crs(4326) df['geoid'] = df.geoid20.apply(lambda v: str(Block.parse(v).as_acs())) df = df.rename(columns={ 'aland20': 'aland', 'awater20': 'awater', 'intptlat20': 'lat', 'intptlon20': 'lon', }) df = df[['geoid', 'aland', 'awater', 'lat', 'lon', 'geometry']] df['lat'] = df.lat.astype(float) df['lon'] = df.lon.astype(float) cache.put(k, df) return k
def tracts(self): if self._tracts is None: logger.info("Building tracts") url_t = self.pkg.reference('us_tracts_template').url frames = [ rg.geoframe(url_t.format(st=st)) for st in tqdm(stusab.values()) ] tracts = pd.concat(frames).to_crs(4326) # Mark the tracts in the continential US tracts['continential'] = tracts.statefp.isin( self.states.statefp.unique()).astype(int) tracts['tract_id'] = tracts.reset_index().index # Need to convert to each UTM zone to get accurate area # computation. # tracts = tracts.sort_values('geoid').reset_index() # frames = [ g.to_crs(int(idx)).area for idx, g in tracts.groupby('utm_epsg')] # t = pd.concat(frames).to_frame('utm_area') # tracts = tracts.join(t) tracts['geohash'] = tracts[[ 'intptlat', 'intptlon' ]].astype(float).apply(lambda r: gh.encode(r.intptlat, r.intptlon), axis=1) tracts['gh4'] = tracts.geohash.str.slice(0, 4) self._tracts = tracts[[ 'geoid', 'tract_id', 'geohash', 'statefp', 'intptlat', 'intptlon', 'geometry', 'aland', 'awater', 'gh4', 'continential' ]] return self._tracts
def census_geo(state, sl='state', year=2018, release=5): import rowgenerators as rg return rg.geoframe(f'censusgeo://{year}/{release}/{state}/{sl}')