コード例 #1
0
    def gen_blocks(self, feature):
        block_fields = {}
        tlid = feature.get('TLID')
        for field_key, feature_key in (('right_from_num', 'RFROMADD'),
                                       ('left_from_num', 'LFROMADD'),
                                       ('right_to_num', 'RTOADD'),
                                       ('left_to_num', 'LTOADD')):
            block_fields[field_key] = feature.get(feature_key)

        block_fields['right_zip'] = feature.get('ZIPR')
        block_fields['left_zip'] = feature.get('ZIPL')
        for side in ('right', 'left'):
            block_fields[side + '_city'] = self._get_city(feature, side[0].upper()).upper()
            block_fields[side + '_state'] = self._get_state(feature, side[0].upper()).upper()

        if tlid in self.featnames_db:
            suffix_standardizer = geocoder_parsing.STANDARDIZERS['suffix']
            suffix_matcher = geocoder_parsing.TOKEN_REGEXES['suffix']

            for featname in self.featnames_db[tlid]:
                # Prefix eg. 'STATE HWY'.
                block_fields['prefix'] = featname.get('PRETYPABRV', '').upper().strip()
                # Main part of the name, eg. 'MAIN'
                block_fields['street'] = featname['NAME'].upper().strip()
                # Prefix direction eg. 'N'.
                block_fields['predir'] = featname['PREDIRABRV'].upper().strip()
                # Suffix direction eg. 'SW'.
                block_fields['postdir'] = featname['SUFDIRABRV'].upper().strip()
                # Road type, eg. 'ST', 'AVE', 'PKWY'.
                block_fields['suffix'] = featname['SUFTYPABRV'].upper().strip()
                if not block_fields['suffix']:
                    # Bug in the data:
                    # Many streets named eg. 'Wilson Park' put the whole thing in the
                    # name and nothing in the suffix.
                    # This breaks our geocoder, because it parses 'Park' as the suffix
                    # and expects to find it in that field.
                    # So, check if the street name ends with a recognized suffix.
                    if block_fields['street'].count(' '):
                        name_parts = block_fields['street'].split()
                        raw_suffix = name_parts.pop()
                        street = ' '.join(name_parts)
                        if suffix_matcher.match(raw_suffix):
                            block_fields['suffix'] = suffix_standardizer(raw_suffix)
                            block_fields['street'] = street

                # More bugs in data: some auxiliary roads have the prefix as
                # part of the name in nonstandard format.
                if not block_fields['prefix']:
                    prefix, street = None, None
                    if block_fields['street'].startswith('INTERSTATE '):
                        prefix, street = block_fields['street'].split(' ', 1)
                    elif block_fields['street'].startswith('I-'):
                        prefix, street = block_fields['street'].split('-', 1)
                    if prefix and street:
                        logger.debug("Splitting prefix %r out of street %r" % (prefix, street))
                        block_fields['street'] = street.strip()
                        block_fields['prefix'] = prefix.strip()
                yield block_fields.copy()

                self.tlids_with_blocks.add(tlid)
コード例 #2
0
    def skip_feature(self, feature):
        if self.filter_bounds:
            if not feature.geom.intersects(self.filter_bounds):
                logger.debug("Skipping %s, out of bounds" % feature)
                return True

        if not feature.get('MTFCC') in VALID_MTFCC:
            logger.debug("Skipping %s, not a valid feature type" % feature.get('MTFCC'))
            return True

        if self.filter_city:
            in_city = False
            for side in ('R', 'L'):
                if self._get_city(feature, side).upper() == self.filter_city:
                    in_city = True
            if not in_city:
                logger.debug("Skipping %s, out of city" % feature)
                return True

        if not (
            ((feature.get('RFROMADD') and feature.get('RTOADD')) or
            (feature.get('LFROMADD') and feature.get('LTOADD')))):
            logger.debug("Skipping %s, not enough address info" % feature)
            return True

        return False
コード例 #3
0
    def skip_feature(self, feature):
        if self.filter_bounds:
            if not feature.geom.intersects(self.filter_bounds):
                logger.debug("Skipping %s, out of bounds" % feature)
                return True

        if not feature.get('MTFCC') in VALID_MTFCC:
            logger.debug("Skipping %s, not a valid feature type" %
                         feature.get('MTFCC'))
            return True

        if self.filter_city:
            in_city = False
            for side in ('R', 'L'):
                if self._get_city(feature, side).upper() == self.filter_city:
                    in_city = True
            if not in_city:
                logger.debug("Skipping %s, out of city" % feature)
                return True

        if not (((feature.get('RFROMADD') and feature.get('RTOADD')) or
                 (feature.get('LFROMADD') and feature.get('LTOADD')))):
            logger.debug("Skipping %s, not enough address info" % feature)
            return True

        return False
コード例 #4
0
    def _clean_featnames(self, featnames_dbf):
        rel_db = self._load_rel_db(featnames_dbf, 'TLID')
        featnames_db = defaultdict(list)
        for tlid, rows in rel_db.iteritems():
            primary = None
            alternates = []
            for row in rows:
                # TLID is Tiger/Line ID, unique per edge.
                # We use TLID instead of LINEARID as the key because
                # LINEARID is only unique per 'linear feature', which is
                # an implicit union of some edges. So if we used LINEARID,
                # we'd clobber a lot of keys in the call to
                # _load_rel_db().
                # Fixes #14 ("missing blocks").
                if row['MTFCC'] not in VALID_MTFCC:
                    continue
                if not row.get('FULLNAME'):
                    self.log("skipping tlid %r, no fullname" % tlid)
                    continue
                if row['PAFLAG'] == 'P':
                    primary = row
                    featnames_db[tlid].append(row)
                else:
                    alternates.append(row)
            # A lot of alternates seem to be duplicates of the primary name,
            # not useful.
            alternates = [
                row for row in alternates
                if row['NAME'].upper() != primary['NAME'].upper()
            ]

            # For now we just log alternates that were found. Ideally we could save these
            # as aliases somehow, but at the moment we don't have a good way to do that.

            for alternate in alternates:
                correct = primary['NAME'].upper()
                incorrect = alternate['NAME'].upper()
                msg = 'Found alternate name for {0} ({1}): {2}\n{3}\n{4}'
                logger.debug(
                    msg.format(correct, primary['TLID'], incorrect,
                               pprint.pformat(primary),
                               pprint.pformat(alternate)))
        return featnames_db
コード例 #5
0
    def _get_city(self, feature, side):
        city = ''
        if self.fix_cities:
            from ebpub.db.models import get_city_locations
            overlapping_cities = list(get_city_locations().filter(location__intersects=feature.geom.geos))
            if overlapping_cities:
                city = overlapping_cities[0].name
                logger.debug("overriding city to %s" % city)
        else:
            fid = feature.get('TFID' + side)
            if fid in self.faces_db:
                face = self.faces_db[fid]
                # Handle both 2010 and older census files.
                # If none of these work, we simply get no city.
                pid = face.get('PLACEFP10') or face.get('PLACEFP00') or face.get('PLACEFP')

                if pid in self.places:
                    place = self.places[pid]
                    # Handle both 2010 and earlier Census files.
                    city = place.get('NAME10') or place['NAME']
        return city
コード例 #6
0
    def _clean_featnames(self, featnames_dbf):
        rel_db = self._load_rel_db(featnames_dbf, 'TLID')
        featnames_db = defaultdict(list)
        for tlid, rows in rel_db.iteritems():
            primary = None
            alternates = []
            for row in rows:
                # TLID is Tiger/Line ID, unique per edge.
                # We use TLID instead of LINEARID as the key because
                # LINEARID is only unique per 'linear feature', which is
                # an implicit union of some edges. So if we used LINEARID,
                # we'd clobber a lot of keys in the call to
                # _load_rel_db().
                # Fixes #14 ("missing blocks").
                if row['MTFCC'] not in VALID_MTFCC:
                    continue
                if not row.get('FULLNAME'):
                    self.log("skipping tlid %r, no fullname" % tlid)
                    continue
                if row['PAFLAG'] == 'P':
                    primary = row
                    featnames_db[tlid].append(row)
                else:
                    alternates.append(row)
            # A lot of alternates seem to be duplicates of the primary name,
            # not useful.
            alternates = [row for row in alternates if row['NAME'].upper() != primary['NAME'].upper()]

            # For now we just log alternates that were found. Ideally we could save these
            # as aliases somehow, but at the moment we don't have a good way to do that.


            for alternate in alternates:
                correct = primary['NAME'].upper()
                incorrect = alternate['NAME'].upper()
                msg = 'Found alternate name for {0} ({1}): {2}\n{3}\n{4}'
                logger.debug(msg.format(correct, primary['TLID'], incorrect,
                                        pprint.pformat(primary),
                                        pprint.pformat(alternate)))
        return featnames_db
コード例 #7
0
    def _get_city(self, feature, side):
        city = ''
        if self.fix_cities:
            from ebpub.db.models import get_city_locations
            overlapping_cities = list(get_city_locations().filter(
                location__intersects=feature.geom.geos))
            if overlapping_cities:
                city = overlapping_cities[0].name
                logger.debug("overriding city to %s" % city)
        else:
            fid = feature.get('TFID' + side)
            if fid in self.faces_db:
                face = self.faces_db[fid]
                # Handle both 2010 and older census files.
                # If none of these work, we simply get no city.
                pid = face.get('PLACEFP10') or face.get(
                    'PLACEFP00') or face.get('PLACEFP')

                if pid in self.places:
                    place = self.places[pid]
                    # Handle both 2010 and earlier Census files.
                    city = place.get('NAME10') or place['NAME']
        return city
コード例 #8
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]
    parser = optparse.OptionParser(usage='%prog edges.shp featnames.dbf faces.dbf place.shp')
    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False)
    parser.add_option('-c', '--city', dest='city', help='A city name to filter against')
    parser.add_option('-f', '--fix-cities', action="store_true", default=False,
                      help='Whether to override "city" attribute of blocks and '
                      'streets by finding an intersecting Location of a city-ish '
                      'type. Only makes sense if you have configured '
                      'multiple_cities=True in the METRO_LIST of your settings.py, '
                      'and after you have created some appropriate Locations.')

    parser.add_option('-b', '--filter-bounds', action="store", default=1,
                      type='int',
                      help='Whether to skip blocks outside the metro extent from your '
                      'settings.py. Default 1 (true); use 0 to disable.')
    parser.add_option('-l', '--filter-location', action="append",
                      help='A location (spelled as location-type-slug:location-slug) '
                      'that will be used to filter out blocks outside its boundaries. '
                      'May be passed more than once.'
                      )

    parser.add_option('-e', '--encoding', dest='encoding',
                      help='Encoding to use when reading the shapefile',
                      default='utf8')
    (options, args) = parser.parse_args(argv)
    if len(args) != 4:
        return parser.error('must provide 4 arguments, see usage')

    if options.filter_bounds:
        from ebpub.utils.geodjango import get_default_bounds
        filter_bounds = get_default_bounds()
    else:
        filter_bounds = None

    # Optionally filter on bounds of some Locations too.
    loc_bounds = None
    for locslug in options.filter_location or []:
        typeslug, locslug = locslug.split(':', 1)
        from ebpub.db.models import Location
        location = Location.objects.get(location_type__slug=typeslug, slug=locslug)
        if loc_bounds is None:
            loc_bounds = location.location
        else:
            loc_bounds = loc_bounds.union(location.location)

    if None not in (filter_bounds, loc_bounds):
        filter_bounds = filter_bounds.intersection(loc_bounds)
    elif loc_bounds is not None:
        filter_bounds = loc_bounds
    else:
        filter_bounds = filter_bounds

    tiger = TigerImporter(*args, verbose=options.verbose,
                           filter_city=options.city, 
                           filter_bounds=filter_bounds,
                           encoding=options.encoding,
                           fix_cities=options.fix_cities)
    if options.verbose:
        import logging
        logger.setLevel(logging.DEBUG)
    num_created = tiger.save()
    logger.info( "Created %d new blocks" % num_created)
    logger.debug("... from %d feature names" % len(tiger.featnames_db))
    logger.debug("feature tlids with blocks: %d" % len(tiger.tlids_with_blocks))

    import pprint
    tlids_wo_blocks = set(tiger.featnames_db.keys()).difference(tiger.tlids_with_blocks)
    logger.debug("feature tlids WITHOUT blocks: %d" % len(tlids_wo_blocks))
    all_rows = []
    for t in tlids_wo_blocks:
        all_rows.extend(tiger.featnames_db[t])
    logger.debug("Rows: %d" % len(all_rows))
    names = [(r['FULLNAME'], r['TLID']) for r in all_rows]
    names.sort()
    logger.debug( "=================")
    for n, t in names:
        logger.debug("%s %s" % (n, t))
    for tlid in sorted(tlids_wo_blocks)[:10]:
        feat = tiger.featnames_db[tlid]
        logger.debug(pprint.pformat(feat))
コード例 #9
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]
    parser = optparse.OptionParser(
        usage='%prog edges.shp featnames.dbf faces.dbf place.shp')
    parser.add_option('-v',
                      '--verbose',
                      action='store_true',
                      dest='verbose',
                      default=False)
    parser.add_option('-c',
                      '--city',
                      dest='city',
                      help='A city name to filter against')
    parser.add_option(
        '-f',
        '--fix-cities',
        action="store_true",
        default=False,
        help='Whether to override "city" attribute of blocks and '
        'streets by finding an intersecting Location of a city-ish '
        'type. Only makes sense if you have configured '
        'multiple_cities=True in the METRO_LIST of your settings.py, '
        'and after you have created some appropriate Locations.')

    parser.add_option(
        '-b',
        '--filter-bounds',
        action="store",
        default=1,
        type='int',
        help='Whether to skip blocks outside the metro extent from your '
        'settings.py. Default 1 (true); use 0 to disable.')
    parser.add_option(
        '-l',
        '--filter-location',
        action="append",
        help='A location (spelled as location-type-slug:location-slug) '
        'that will be used to filter out blocks outside its boundaries. '
        'May be passed more than once.')

    parser.add_option('-e',
                      '--encoding',
                      dest='encoding',
                      help='Encoding to use when reading the shapefile',
                      default='utf8')
    (options, args) = parser.parse_args(argv)
    if len(args) != 4:
        return parser.error('must provide 4 arguments, see usage')

    if options.filter_bounds:
        from ebpub.utils.geodjango import get_default_bounds
        filter_bounds = get_default_bounds()
    else:
        filter_bounds = None

    # Optionally filter on bounds of some Locations too.
    loc_bounds = None
    for locslug in options.filter_location or []:
        typeslug, locslug = locslug.split(':', 1)
        from ebpub.db.models import Location
        location = Location.objects.get(location_type__slug=typeslug,
                                        slug=locslug)
        if loc_bounds is None:
            loc_bounds = location.location
        else:
            loc_bounds = loc_bounds.union(location.location)

    if None not in (filter_bounds, loc_bounds):
        filter_bounds = filter_bounds.intersection(loc_bounds)
    elif loc_bounds is not None:
        filter_bounds = loc_bounds
    else:
        filter_bounds = filter_bounds

    tiger = TigerImporter(*args,
                          verbose=options.verbose,
                          filter_city=options.city,
                          filter_bounds=filter_bounds,
                          encoding=options.encoding,
                          fix_cities=options.fix_cities)
    if options.verbose:
        import logging
        logger.setLevel(logging.DEBUG)
    num_created = tiger.save()
    logger.info("Created %d new blocks" % num_created)
    logger.debug("... from %d feature names" % len(tiger.featnames_db))
    logger.debug("feature tlids with blocks: %d" %
                 len(tiger.tlids_with_blocks))

    import pprint
    tlids_wo_blocks = set(tiger.featnames_db.keys()).difference(
        tiger.tlids_with_blocks)
    logger.debug("feature tlids WITHOUT blocks: %d" % len(tlids_wo_blocks))
    all_rows = []
    for t in tlids_wo_blocks:
        all_rows.extend(tiger.featnames_db[t])
    logger.debug("Rows: %d" % len(all_rows))
    names = [(r['FULLNAME'], r['TLID']) for r in all_rows]
    names.sort()
    logger.debug("=================")
    for n, t in names:
        logger.debug("%s %s" % (n, t))
    for tlid in sorted(tlids_wo_blocks)[:10]:
        feat = tiger.featnames_db[tlid]
        logger.debug(pprint.pformat(feat))
コード例 #10
0
    def gen_blocks(self, feature):
        block_fields = {}
        tlid = feature.get('TLID')
        for field_key, feature_key in (('right_from_num', 'RFROMADD'),
                                       ('left_from_num', 'LFROMADD'),
                                       ('right_to_num',
                                        'RTOADD'), ('left_to_num', 'LTOADD')):
            block_fields[field_key] = feature.get(feature_key)

        block_fields['right_zip'] = feature.get('ZIPR')
        block_fields['left_zip'] = feature.get('ZIPL')
        for side in ('right', 'left'):
            block_fields[side + '_city'] = self._get_city(
                feature, side[0].upper()).upper()
            block_fields[side + '_state'] = self._get_state(
                feature, side[0].upper()).upper()

        if tlid in self.featnames_db:
            suffix_standardizer = geocoder_parsing.STANDARDIZERS['suffix']
            suffix_matcher = geocoder_parsing.TOKEN_REGEXES['suffix']

            for featname in self.featnames_db[tlid]:
                # Prefix eg. 'STATE HWY'.
                block_fields['prefix'] = featname.get('PRETYPABRV',
                                                      '').upper().strip()
                # Main part of the name, eg. 'MAIN'
                block_fields['street'] = featname['NAME'].upper().strip()
                # Prefix direction eg. 'N'.
                block_fields['predir'] = featname['PREDIRABRV'].upper().strip()
                # Suffix direction eg. 'SW'.
                block_fields['postdir'] = featname['SUFDIRABRV'].upper().strip(
                )
                # Road type, eg. 'ST', 'AVE', 'PKWY'.
                block_fields['suffix'] = featname['SUFTYPABRV'].upper().strip()
                if not block_fields['suffix']:
                    # Bug in the data:
                    # Many streets named eg. 'Wilson Park' put the whole thing in the
                    # name and nothing in the suffix.
                    # This breaks our geocoder, because it parses 'Park' as the suffix
                    # and expects to find it in that field.
                    # So, check if the street name ends with a recognized suffix.
                    if block_fields['street'].count(' '):
                        name_parts = block_fields['street'].split()
                        raw_suffix = name_parts.pop()
                        street = ' '.join(name_parts)
                        if suffix_matcher.match(raw_suffix):
                            block_fields['suffix'] = suffix_standardizer(
                                raw_suffix)
                            block_fields['street'] = street

                # More bugs in data: some auxiliary roads have the prefix as
                # part of the name in nonstandard format.
                if not block_fields['prefix']:
                    prefix, street = None, None
                    if block_fields['street'].startswith('INTERSTATE '):
                        prefix, street = block_fields['street'].split(' ', 1)
                    elif block_fields['street'].startswith('I-'):
                        prefix, street = block_fields['street'].split('-', 1)
                    if prefix and street:
                        logger.debug("Splitting prefix %r out of street %r" %
                                     (prefix, street))
                        block_fields['street'] = street.strip()
                        block_fields['prefix'] = prefix.strip()
                yield block_fields.copy()

                self.tlids_with_blocks.add(tlid)