Esempio n. 1
0
    def __init__(self, year, release, stusab, summary_level, tableid):

        ## HACK! This initializer will download files. It should not

        from geoid.censusnames import stusab as state_name_map

        super().__init__(year, release, stusab, summary_level, seq=None)

        self.meta = tablemeta(year, release)

        self.tableid = tableid.strip().lower()

        try:
            self.table = self.meta.tables[self.tableid]
        except KeyError as e:

            alt_c = 'c' + self.tableid[1:]
            alt_b = 'b' + self.tableid[1:]

            if (self.tableid.startswith('b') and alt_c in self.meta.tables):
                other_msg = f" However, table '{alt_c}' exists"
            elif (self.tableid.startswith('c') and alt_b in self.meta.tables):
                other_msg = f" However, table '{alt_b}' exists"
            else:
                other_msg = ''

            raise SourceError(f"Table metadata does not include table '{self.tableid}' " + other_msg)

        self.state_abs = list(state_name_map.values()) if self.stusab.upper() == 'US' else [self.stusab]

        self.item_getters = []

        self.lr_pos = None

        self._collect_sequences()
def build_block_maps(pkg):
    cache = get_cache(pkg)
    states = list(stusab.values())

    grid_key = 'blocks/map/source/utm'
    cache.put(grid_key, pkg.reference('utm_grid').geoframe())

    cbsa_key = 'blocks/map/source/cbsa'
    cache.put(cbsa_key, pkg.reference('cbsa').geoframe().to_crs(4326))

    tasks = [(cache, st, grid_key, cbsa_key) for st in states]

    try:
        import appnope
        with appnope.nope_scope():
            r = run_mp(_f_block_maps, tasks)
    except ImportError:
        r = run_mp(_f_block_maps, tasks)

    cbsa_map = pd.concat([cache.get(e[0]) for e in r])\

    utm_map = pd.concat([cache.get(e[1]) for e in r])

    pkg_root = Path(pkg.path).parent
    cbsa_map.to_csv(pkg_root.joinpath('data', 'cbsa_map.csv'), index=False)
    utm_map.to_csv(pkg_root.joinpath('data', 'utm_map.csv'), index=False)
Esempio n. 3
0
    def __init__(self, year, release, stusab, summary_level, tableid):
        from geoid.censusnames import stusab as state_name_map

        super().__init__(year, release, stusab, summary_level, seq=None)

        self.meta = tablemeta(year, release)

        self.tableid = tableid.strip().lower()

        try:
            self.table = self.meta.tables[self.tableid]
        except KeyError as e:

            alt_c = 'c' + self.tableid[1:]
            alt_b = 'b' + self.tableid[1:]

            if (self.tableid.startswith('b') and alt_c in self.meta.tables):
                other_msg = f" However, table '{alt_c}' exists"
            elif (self.tableid.startswith('c') and alt_b in self.meta.tables):
                other_msg = f" However, table '{alt_b}' exists"
            else:
                other_msg = ''

            raise SourceError(
                f"Table metadata does not include table '{self.tableid}' " +
                other_msg)

        self.seq = int(self.table.seq)

        self.state_abs = list(
            state_name_map.values()) if self.stusab.upper() == 'US' else [
                self.stusab
            ]

        # First sequence file
        sequence_file = SequenceFile(self.year, self.release,
                                     self.state_abs[0], self.summary_level,
                                     self.seq)

        # Get the column names that we will be extracting from the segment

        self._columns = []

        for c in sequence_file.meta:
            if c.table_id and c.table_id.lower() == tableid.lower():
                self._columns.append(c)

        self.lr_pos = sequence_file.file_headers.index('LOGRECNO')

        self.col_positions = [c.seq_file_col_no for c in self._columns]
        self.ig = itemgetter(*self.col_positions)

        geo = self.geo()

        self.file_headers = geo['LOGRECNO'][0] + self.ig(
            sequence_file.file_headers)
        self.descriptions = geo['LOGRECNO'][0] + self.ig(
            sequence_file.descriptions)
def split_blocks(pkg):
    """Download block files and cache them"""
    cache = get_cache(pkg)

    states = list(stusab.values())

    keys = run_mp(_f_get_split_blocks, [(st, cache, pkg.reference('block_templ').url.format(st=st))
                                        for st in states], n_cpu=4)  # 4 cpu b/c we're downloading

    return keys
def join_blocks(pkg, break_starts):
    """Join census blocks and OSM points"""

    cache = get_cache(pkg)

    states = list(stusab.values())

    tasks = list(e + (cache,) for e in product(break_starts, states))

    keys = run_mp(_f_join_blocks, tasks)

    joins = [e for e in keys if not isinstance(e, Exception)]
    exn = [e for e in keys if isinstance(e, Exception)]

    return joins
    def tracts(self):

        if self._tracts is None:
            logger.info("Building tracts")

            url_t = self.pkg.reference('us_tracts_template').url
            frames = [
                rg.geoframe(url_t.format(st=st))
                for st in tqdm(stusab.values())
            ]

            tracts = pd.concat(frames).to_crs(4326)

            # Mark the tracts in the continential US
            tracts['continential'] = tracts.statefp.isin(
                self.states.statefp.unique()).astype(int)
            tracts['tract_id'] = tracts.reset_index().index

            # Need to convert to each UTM zone to get accurate area
            # computation.
            # tracts = tracts.sort_values('geoid').reset_index()
            # frames = [ g.to_crs(int(idx)).area for idx, g in tracts.groupby('utm_epsg')]
            # t = pd.concat(frames).to_frame('utm_area')
            # tracts = tracts.join(t)

            tracts['geohash'] = tracts[[
                'intptlat', 'intptlon'
            ]].astype(float).apply(lambda r: gh.encode(r.intptlat, r.intptlon),
                                   axis=1)

            tracts['gh4'] = tracts.geohash.str.slice(0, 4)

            self._tracts = tracts[[
                'geoid', 'tract_id', 'geohash', 'statefp', 'intptlat',
                'intptlon', 'geometry', 'aland', 'awater', 'gh4',
                'continential'
            ]]

        return self._tracts
Esempio n. 7
0
from .files.metafiles import TableMeta
from .appurl import CensusUrl
from .files.appurl import CensusFileUrl, CensusGeoUrl
from .censusreporter.url import CensusReporterUrl, CensusReporterShapeURL

from pkg_resources import get_distribution, DistributionNotFound

try:
    # Change here if project is renamed and does not equal the package name
    dist_name = 'publicdata-census'
    __version__ = get_distribution(dist_name).version
except DistributionNotFound:
    __version__ = 'unknown'
finally:
    del get_distribution, DistributionNotFound

from geoid.censusnames import stusab
continential_states = list(
    sorted(set(stusab.values()) - {'DC', 'PR', 'AK', 'HI'}))


def census_table(table, state, sl='state', year=2018, release=5):
    import rowgenerators as rg
    return rg.dataframe(f'census://{year}/{release}/{state}/{sl}/{table}')


def census_geo(state, sl='state', year=2018, release=5):
    import rowgenerators as rg
    return rg.geoframe(f'censusgeo://{year}/{release}/{state}/{sl}')