Ejemplo n.º 1
0
 def counties(self):
     from geoid.acs import AcsGeoid
     from geoid.civick import GVid, State
     
     
     d =  {AcsGeoid.parse(row.geoid).county_name.medium_name:str(AcsGeoid.parse(row.geoid).convert(GVid) )
             for row in self.dep('geofile50') if row.state == 6}
      
     d['California'] = State(6)
     
     return d
Ejemplo n.º 2
0
def sub_geoids(v):
    """Replace state abbreviations and state fips codes with state and national geoids"""

    from geoid.censusnames import stusab
    from geoid.acs import Us, State, AcsGeoid

    if len(v) == 2:

        v = v.upper()

        stmap = {v: k for k, v in stusab.items()}

        if v == 'US':
            return str(Us())

        if v in stmap:
            return str(State(stmap[v]))

    # Maybe it is a state number
    try:
        v = int(v)
        return str(State(v))
    except ValueError:
        pass

    return str(AcsGeoid.parse(v))
Ejemplo n.º 3
0
    def shape_url(self):
        """Return the shapefile URL"""
        from geoid.acs import AcsGeoid

        us = tiger_url(self.year, self.summary_level,
                       AcsGeoid.parse(self.geoid).stusab)

        return parse_app_url(us)
Ejemplo n.º 4
0
    def geo_url(self):
        """Return a url for the geofile for this Census file"""
        from geoid.acs import AcsGeoid

        us = tiger_url(self.year, self.summary_level,
                       AcsGeoid.parse(self.geoid).stusab)

        return parse_app_url(us)
Ejemplo n.º 5
0
    def __init__(self, ref, cache=None, working_dir=None, **kwargs):
        from geoid.acs import AcsGeoid
        from publicdata.census.files.metafiles import TableMeta

        super().__init__(ref, cache, working_dir, **kwargs)

        gid = AcsGeoid.parse(self.ref.geoid)

        self.table = Table(self.ref.year, self.ref.release, gid.stusab,
                           str(self.ref.summary_level), self.ref.tableid)

        self._meta = TableMeta(self.ref.year, self.ref.release)

        assert isinstance(ref, CensusUrl)
Ejemplo n.º 6
0
def parse_to_gvid(v):
    """Parse an ACS Geoid or a GVID to a GVID"""
    from geoid.civick import GVid
    from geoid.acs import AcsGeoid

    m1 = ''

    try:
        return GVid.parse(v)
    except ValueError as e:
        m1 = str(e)

    try:
        return AcsGeoid.parse(v).convert(GVid)
    except ValueError as e:
        raise ValueError("Failed to parse to either ACS or GVid: {}; {}".format(m1, str(e)))
Ejemplo n.º 7
0
def generate_all(sumlevel, d):
    """Generate a dict that includes all of the available geoid values, with keys
    for the most common names for those values. """

    from geoid.civick import GVid
    from geoid.tiger import TigerGeoid
    from geoid.acs import AcsGeoid

    sumlevel = int(sumlevel)

    d = dict(d.items())

    # Map common name variants
    if 'cousub' in d:
        d['cosub'] = d['cousub']
        del d['cousub']

    if 'blkgrp' in d:
        d['blockgroup'] = d['blkgrp']
        del d['blkgrp']

    if 'zcta5' in d:
        d['zcta'] = d['zcta5']
        del d['zcta5']

    gvid_class = GVid.resolve_summary_level(sumlevel)

    if not gvid_class:
        return {}

    geoidt_class = TigerGeoid.resolve_summary_level(sumlevel)
    geoid_class = AcsGeoid.resolve_summary_level(sumlevel)

    try:
        return dict(
            gvid=str(gvid_class(**d)),
            geoid=str(geoid_class(**d)),
            geoidt=str(geoidt_class(**d))
        )
    except:
        raise
Ejemplo n.º 8
0
    def __init__(self, ref, cache=None, working_dir=None, **kwargs):
        from geoid.acs import AcsGeoid
        from publicdata.census.files.metafiles import TableMeta

        super().__init__(ref, cache, working_dir, **kwargs)

        gid = AcsGeoid.parse(self.ref.geoid)

        try:
            stusab = gid.stusab
        except AttributeError:
            gid.us# "US" level geoids don't have a state parameter.
            stusab = "US"

        self.table = Table(self.ref.year, self.ref.release, stusab,
                           str(self.ref.summary_level), self.ref.tableid)

        self._meta = TableMeta(self.ref.year, self.ref.release)

        self._source_url = kwargs.get('source_url')

        assert(self._source_url)

        assert isinstance(ref, CensusUrl)
Ejemplo n.º 9
0
    def __iter__(self):

        from ambry import get_library
        import censuslib.dataframe
        import pandas as pd

        # The district NCES codes aren't in the district file, although they are in the school file. 
        schools = self.bundle.partition(table='schools').analysis.dataframe()
        schools['cd_code'] = schools.cdscode.apply(lambda cdscode: cdscode[:7])

        nces_districts = schools[schools.statustype=='Active'][['ncesdist', 'cd_code']].drop_duplicates()

        # The actual districts file
        ca_districts = self.bundle.partition(table='districts').analysis.dataframe()[
            ['cd_code', 'county_sos','county_fips','county_gvid','county','district']]

        #assert len(nces_districts) == len(ca_districts)

        # Combine the codes from the school file with the district file
        cd_code_districts = ca_districts.set_index('cd_code').join(nces_districts.set_index('cd_code')).reset_index()
        
        ##
        ## Join the California state districts list with the Census districts list
        ###

        from geoid.acs import AcsGeoid
        from geoid.civick import GVid
        dist_pred = lambda row: row.state ==6


        def mk_cd_code(nces):
            return '06{:05d}'.format(nces)

        # Combine the three partitions for school districts in the census, and extract the NCES code
        
        elem = self.bundle.dep('elementary').analysis.dataframe(dist_pred)[['geoid', 'name']].copy()
        elem['nces'] = elem.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sdelm) )
        
        second = self.bundle.dep('secondary').analysis.dataframe(dist_pred)[['geoid', 'name']].copy()
        second['nces'] = second.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sdsec) )
        
        unified = self.bundle.dep('unified').analysis.dataframe(dist_pred)[['geoid', 'name']].copy()
        unified['nces'] = unified.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sduni) )

        # Add a GVID
        census_districts = pd.concat([elem, second, unified], axis=0)
        census_districts['gvid'] = census_districts.geoid.apply(lambda geoid: AcsGeoid.parse(geoid).convert(GVid) )

        # Do the join
        districts = cd_code_districts.set_index('ncesdist').join(census_districts.drop_duplicates().set_index('nces')).reset_index()
        districts.columns = ['ncesdist'] + list(districts.columns)[1:]

        # These are different sizes, don't know why. The de-duplicated census file is smaller than the 
        # list from California, probably because many of the districts are smaller than the reporting limits. 
        # NOTE: Becase join is a left join, (a) it must be joined in the order above ( cd_code_districts, the larger list, 
        # is the base ) and (b) the joined 'districts' dataframe will have some missing geoids. 
        # The missing geoids appears to be primarily for County offices of education and districts in small counties. 
        # >>> print len(census_districts), len(census_districts.drop_duplicates()), len(cd_code_districts), len(districts)
        # >>> 1976 988 1098 1098
        
        districts.gvid.fillna(value='', inplace = True)
        districts.geoid.fillna(value='', inplace = True)
        districts.name.fillna(value='', inplace = True)
        districts.ncesdist.fillna(value=0, inplace = True)
        
        df = districts.reset_index()

        yield ['id'] + list(df.columns)

        for index, row in df.iterrows():

            yield [index] + list(row)