def counties(self): from geoid.acs import AcsGeoid from geoid.civick import GVid, State d = {AcsGeoid.parse(row.geoid).county_name.medium_name:str(AcsGeoid.parse(row.geoid).convert(GVid) ) for row in self.dep('geofile50') if row.state == 6} d['California'] = State(6) return d
def sub_geoids(v): """Replace state abbreviations and state fips codes with state and national geoids""" from geoid.censusnames import stusab from geoid.acs import Us, State, AcsGeoid if len(v) == 2: v = v.upper() stmap = {v: k for k, v in stusab.items()} if v == 'US': return str(Us()) if v in stmap: return str(State(stmap[v])) # Maybe it is a state number try: v = int(v) return str(State(v)) except ValueError: pass return str(AcsGeoid.parse(v))
def shape_url(self): """Return the shapefile URL""" from geoid.acs import AcsGeoid us = tiger_url(self.year, self.summary_level, AcsGeoid.parse(self.geoid).stusab) return parse_app_url(us)
def geo_url(self): """Return a url for the geofile for this Census file""" from geoid.acs import AcsGeoid us = tiger_url(self.year, self.summary_level, AcsGeoid.parse(self.geoid).stusab) return parse_app_url(us)
def __init__(self, ref, cache=None, working_dir=None, **kwargs): from geoid.acs import AcsGeoid from publicdata.census.files.metafiles import TableMeta super().__init__(ref, cache, working_dir, **kwargs) gid = AcsGeoid.parse(self.ref.geoid) self.table = Table(self.ref.year, self.ref.release, gid.stusab, str(self.ref.summary_level), self.ref.tableid) self._meta = TableMeta(self.ref.year, self.ref.release) assert isinstance(ref, CensusUrl)
def parse_to_gvid(v): """Parse an ACS Geoid or a GVID to a GVID""" from geoid.civick import GVid from geoid.acs import AcsGeoid m1 = '' try: return GVid.parse(v) except ValueError as e: m1 = str(e) try: return AcsGeoid.parse(v).convert(GVid) except ValueError as e: raise ValueError("Failed to parse to either ACS or GVid: {}; {}".format(m1, str(e)))
def __init__(self, ref, cache=None, working_dir=None, **kwargs): from geoid.acs import AcsGeoid from publicdata.census.files.metafiles import TableMeta super().__init__(ref, cache, working_dir, **kwargs) gid = AcsGeoid.parse(self.ref.geoid) try: stusab = gid.stusab except AttributeError: gid.us# "US" level geoids don't have a state parameter. stusab = "US" self.table = Table(self.ref.year, self.ref.release, stusab, str(self.ref.summary_level), self.ref.tableid) self._meta = TableMeta(self.ref.year, self.ref.release) self._source_url = kwargs.get('source_url') assert(self._source_url) assert isinstance(ref, CensusUrl)
def __iter__(self): from ambry import get_library import censuslib.dataframe import pandas as pd # The district NCES codes aren't in the district file, although they are in the school file. schools = self.bundle.partition(table='schools').analysis.dataframe() schools['cd_code'] = schools.cdscode.apply(lambda cdscode: cdscode[:7]) nces_districts = schools[schools.statustype=='Active'][['ncesdist', 'cd_code']].drop_duplicates() # The actual districts file ca_districts = self.bundle.partition(table='districts').analysis.dataframe()[ ['cd_code', 'county_sos','county_fips','county_gvid','county','district']] #assert len(nces_districts) == len(ca_districts) # Combine the codes from the school file with the district file cd_code_districts = ca_districts.set_index('cd_code').join(nces_districts.set_index('cd_code')).reset_index() ## ## Join the California state districts list with the Census districts list ### from geoid.acs import AcsGeoid from geoid.civick import GVid dist_pred = lambda row: row.state ==6 def mk_cd_code(nces): return '06{:05d}'.format(nces) # Combine the three partitions for school districts in the census, and extract the NCES code elem = self.bundle.dep('elementary').analysis.dataframe(dist_pred)[['geoid', 'name']].copy() elem['nces'] = elem.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sdelm) ) second = self.bundle.dep('secondary').analysis.dataframe(dist_pred)[['geoid', 'name']].copy() second['nces'] = second.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sdsec) ) unified = self.bundle.dep('unified').analysis.dataframe(dist_pred)[['geoid', 'name']].copy() unified['nces'] = unified.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sduni) ) # Add a GVID census_districts = pd.concat([elem, second, unified], axis=0) census_districts['gvid'] = census_districts.geoid.apply(lambda geoid: AcsGeoid.parse(geoid).convert(GVid) ) # Do the join districts = cd_code_districts.set_index('ncesdist').join(census_districts.drop_duplicates().set_index('nces')).reset_index() districts.columns = ['ncesdist'] + list(districts.columns)[1:] # These are different sizes, don't know why. The de-duplicated census file is smaller than the # list from California, probably because many of the districts are smaller than the reporting limits. # NOTE: Becase join is a left join, (a) it must be joined in the order above ( cd_code_districts, the larger list, # is the base ) and (b) the joined 'districts' dataframe will have some missing geoids. # The missing geoids appears to be primarily for County offices of education and districts in small counties. # >>> print len(census_districts), len(census_districts.drop_duplicates()), len(cd_code_districts), len(districts) # >>> 1976 988 1098 1098 districts.gvid.fillna(value='', inplace = True) districts.geoid.fillna(value='', inplace = True) districts.name.fillna(value='', inplace = True) districts.ncesdist.fillna(value=0, inplace = True) df = districts.reset_index() yield ['id'] + list(df.columns) for index, row in df.iterrows(): yield [index] + list(row)