def counties(self):
     from geoid.acs import AcsGeoid
     from geoid.civick import GVid, State
     
     
     d =  {AcsGeoid.parse(row.geoid).county_name.medium_name:str(AcsGeoid.parse(row.geoid).convert(GVid) )
             for row in self.dep('geofile50') if row.state == 6}
      
     d['California'] = State(6)
     
     return d
Beispiel #2
0
def sub_geoids(v):
    """Replace state abbreviations and state fips codes with state and national geoids"""

    from geoid.censusnames import stusab
    from geoid.acs import Us, State, AcsGeoid

    if len(v) == 2:

        v = v.upper()

        stmap = {v: k for k, v in stusab.items()}

        if v == 'US':
            return str(Us())

        if v in stmap:
            return str(State(stmap[v]))

    # Maybe it is a state number
    try:
        v = int(v)
        return str(State(v))
    except ValueError:
        pass

    return str(AcsGeoid.parse(v))
Beispiel #3
0
    def shape_url(self):
        """Return the shapefile URL"""
        from geoid.acs import AcsGeoid

        us = tiger_url(self.year, self.summary_level,
                       AcsGeoid.parse(self.geoid).stusab)

        return parse_app_url(us)
Beispiel #4
0
    def geo_url(self):
        """Return a url for the geofile for this Census file"""
        from geoid.acs import AcsGeoid

        us = tiger_url(self.year, self.summary_level,
                       AcsGeoid.parse(self.geoid).stusab)

        return parse_app_url(us)
Beispiel #5
0
    def __init__(self, ref, cache=None, working_dir=None, **kwargs):
        from geoid.acs import AcsGeoid
        from publicdata.census.files.metafiles import TableMeta

        super().__init__(ref, cache, working_dir, **kwargs)

        gid = AcsGeoid.parse(self.ref.geoid)

        self.table = Table(self.ref.year, self.ref.release, gid.stusab,
                           str(self.ref.summary_level), self.ref.tableid)

        self._meta = TableMeta(self.ref.year, self.ref.release)

        assert isinstance(ref, CensusUrl)
Beispiel #6
0
def parse_to_gvid(v):
    """Parse an ACS Geoid or a GVID to a GVID"""
    from geoid.civick import GVid
    from geoid.acs import AcsGeoid

    m1 = ''

    try:
        return GVid.parse(v)
    except ValueError as e:
        m1 = str(e)

    try:
        return AcsGeoid.parse(v).convert(GVid)
    except ValueError as e:
        raise ValueError("Failed to parse to either ACS or GVid: {}; {}".format(m1, str(e)))
Beispiel #7
0
    def __init__(self, ref, cache=None, working_dir=None, **kwargs):
        from geoid.acs import AcsGeoid
        from publicdata.census.files.metafiles import TableMeta

        super().__init__(ref, cache, working_dir, **kwargs)

        gid = AcsGeoid.parse(self.ref.geoid)

        try:
            stusab = gid.stusab
        except AttributeError:
            gid.us# "US" level geoids don't have a state parameter.
            stusab = "US"

        self.table = Table(self.ref.year, self.ref.release, stusab,
                           str(self.ref.summary_level), self.ref.tableid)

        self._meta = TableMeta(self.ref.year, self.ref.release)

        self._source_url = kwargs.get('source_url')

        assert(self._source_url)

        assert isinstance(ref, CensusUrl)
Beispiel #8
0
    def __iter__(self):

        from ambry import get_library
        import censuslib.dataframe
        import pandas as pd

        # The district NCES codes aren't in the district file, although they are in the school file. 
        schools = self.bundle.partition(table='schools').analysis.dataframe()
        schools['cd_code'] = schools.cdscode.apply(lambda cdscode: cdscode[:7])

        nces_districts = schools[schools.statustype=='Active'][['ncesdist', 'cd_code']].drop_duplicates()

        # The actual districts file
        ca_districts = self.bundle.partition(table='districts').analysis.dataframe()[
            ['cd_code', 'county_sos','county_fips','county_gvid','county','district']]

        #assert len(nces_districts) == len(ca_districts)

        # Combine the codes from the school file with the district file
        cd_code_districts = ca_districts.set_index('cd_code').join(nces_districts.set_index('cd_code')).reset_index()
        
        ##
        ## Join the California state districts list with the Census districts list
        ###

        from geoid.acs import AcsGeoid
        from geoid.civick import GVid
        dist_pred = lambda row: row.state ==6


        def mk_cd_code(nces):
            return '06{:05d}'.format(nces)

        # Combine the three partitions for school districts in the census, and extract the NCES code
        
        elem = self.bundle.dep('elementary').analysis.dataframe(dist_pred)[['geoid', 'name']].copy()
        elem['nces'] = elem.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sdelm) )
        
        second = self.bundle.dep('secondary').analysis.dataframe(dist_pred)[['geoid', 'name']].copy()
        second['nces'] = second.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sdsec) )
        
        unified = self.bundle.dep('unified').analysis.dataframe(dist_pred)[['geoid', 'name']].copy()
        unified['nces'] = unified.geoid.apply(lambda geoid: mk_cd_code(AcsGeoid.parse(geoid).sduni) )

        # Add a GVID
        census_districts = pd.concat([elem, second, unified], axis=0)
        census_districts['gvid'] = census_districts.geoid.apply(lambda geoid: AcsGeoid.parse(geoid).convert(GVid) )

        # Do the join
        districts = cd_code_districts.set_index('ncesdist').join(census_districts.drop_duplicates().set_index('nces')).reset_index()
        districts.columns = ['ncesdist'] + list(districts.columns)[1:]

        # These are different sizes, don't know why. The de-duplicated census file is smaller than the 
        # list from California, probably because many of the districts are smaller than the reporting limits. 
        # NOTE: Becase join is a left join, (a) it must be joined in the order above ( cd_code_districts, the larger list, 
        # is the base ) and (b) the joined 'districts' dataframe will have some missing geoids. 
        # The missing geoids appears to be primarily for County offices of education and districts in small counties. 
        # >>> print len(census_districts), len(census_districts.drop_duplicates()), len(cd_code_districts), len(districts)
        # >>> 1976 988 1098 1098
        
        districts.gvid.fillna(value='', inplace = True)
        districts.geoid.fillna(value='', inplace = True)
        districts.name.fillna(value='', inplace = True)
        districts.ncesdist.fillna(value=0, inplace = True)
        
        df = districts.reset_index()

        yield ['id'] + list(df.columns)

        for index, row in df.iterrows():

            yield [index] + list(row)