def intersecting_blocks(block):
    """
    Returns a list of blocks that intersect the given one.

    Note that blocks with the same street name and suffix are
    excluded -- this is a heuristic that keeps the adjacent blocks
    of the same street out.
    """
    select_list = ["b.%s" % f.name for f in block._meta.fields] + ["ST_Intersection(a.geom, b.geom)"]
    table = block._meta.db_table
    cursor = connection.cursor()
    sql = """
        SELECT %s
        FROM %s a,
             %s b
        WHERE
            a.id = %%s AND
            ST_Intersects(a.geom, b.geom) AND
            GeometryType(ST_Intersection(a.geom, b.geom)) = 'POINT' AND
            NOT (b.street = a.street AND b.suffix = a.suffix)
        ORDER BY
            b.predir, b.street, b.suffix, b.left_from_num, b.right_from_num
        """ % (", ".join(select_list), table, table)
    cursor.execute(sql, [block.id])
    intersections = []
    for row in cursor.fetchall():
        block = Block(*row[:-1])
        intersection_pt = fromstr(row[-1])
        intersections.append((block, intersection_pt))
    return intersections
Exemple #2
0
def reverse_geocode(point):
    """
    Looks up the nearest block to the point.

    Argument can be either a Point instance, or an (x, y) tuple, or a
    WKT string.

    Returns (block, distance (in degrees I think??))
    """
    # Defer import to avoid cyclical import.
    from ebpub.streets.models import Block

    if isinstance(point, basestring):
        from django.contrib.gis.geos import fromstr
        point = fromstr(point, srid=4326)
    elif isinstance(point, tuple) or isinstance(point, list):
        point = Point(tuple(point))
    # In degrees for now because transforming to a projected space is
    # too slow for this purpose. TODO: store projected versions of the
    # locations alongside the canonical lng/lat versions.
    min_distance = 0.007
    # We use min_distance to cut down on the searchable space, because
    # the distance query we do next that actually compares distances
    # between geometries does not use the spatial index. TODO: convert
    # this to GeoDjango syntax. Should be possible but there are some
    # subtleties / performance issues with the DB API.
    cursor = connection.cursor()
    # Switched to WKT rather than WKB, because constructing WKB as a
    # string leads to psycopg2 getting confused by '%' as per
    # http://stackoverflow.com/questions/1734814/why-isnt-psycopg2-executing-any-of-my-sql-functions-indexerror-tuple-index-ou
    # We could probably do something like
    # str(Binary(point.wkb)).replace('%', '%%') ... but I don't know
    # if that could have other problems?
    # Or maybe a Binary() could be passed as a parameter to cursor.execute().
    # Anyway, WKT is safe.
    params = {'field_list': ', '.join([f.column for f in Block._meta.fields]),
              'pt_wkt': point.wkt,
              'geom_fieldname': 'geom',
              'tablename': Block._meta.db_table,
              'min_distance': min_distance
              }
    sql = """
        SELECT %(field_list)s, ST_Distance(ST_GeomFromText('%(pt_wkt)s', 4326), %(geom_fieldname)s) AS "dist"
        FROM %(tablename)s
        WHERE id IN
            (SELECT id
             FROM %(tablename)s
             WHERE ST_DWithin(%(geom_fieldname)s, ST_GeomFromText('%(pt_wkt)s', 4326), %(min_distance)s))
        ORDER BY "dist"
        LIMIT 1;
    """ % params
    cursor.execute(sql)
    num_fields = len(Block._meta.fields)
    rows = cursor.fetchall()
    if not rows:
        raise ReverseGeocodeError('No results')
    block, distance = [(Block(*row[:num_fields]), row[-1]) for row in rows][0]
    return block, distance
Exemple #3
0
    def save(self, verbose=True):
        num_created = 0
        for feature in self.layer:
            parent_id = None
            if not self.skip_feature(feature):
                for block_fields in self.gen_blocks(feature):
                    # Usually (at least in Boston data) there is only
                    # 1 block per feature.  But sometimes there are
                    # multiple names for one street, eg.
                    # "N. Commercial Wharf" and "Commercial Wharf N.";
                    # in that case those would be yielded by gen_blocks() as
                    # two separate blocks. Is that intentional, or a bug?
                    block = Block(**block_fields)
                    block.geom = feature.geom.geos
                    (block.from_num, block.to_num) = make_block_numbers(
                        block_fields['left_from_num'],
                        block_fields['left_to_num'],
                        block_fields['right_from_num'],
                        block_fields['right_to_num'])

                    street_name, block_name = make_pretty_name(
                        block_fields['left_from_num'],
                        block_fields['left_to_num'],
                        block_fields['right_from_num'],
                        block_fields['right_to_num'],
                        block_fields['predir'],
                        block_fields['street'],
                        block_fields['suffix'],
                        block_fields['postdir']
                    )
                    block.pretty_name = block_name
                    block.street_pretty_name = street_name
                    block.street_slug = slugify(' '.join((block_fields['street'], block_fields['suffix'])))
                    block.save()
                    if parent_id is None:
                        parent_id = block.id
                    else:
                        block.parent_id = parent_id
                        block.save()
                    num_created += 1
                    if verbose:
                        print '%d\tCreated block %s for feature %d' % (num_created, block, feature.get('TLID'))
        return num_created
Exemple #4
0
 def _get_block(self):
     if self.block_center is None:
         return None
     # We buffer the center a bit because exact intersection
     # doesn't always get a match.
     from ebpub.utils.mapmath import buffer_by_meters
     geom = buffer_by_meters(self.block_center, BLOCK_FUZZY_DISTANCE_METERS)
     blocks = Block.objects.filter(geom__intersects=geom)
     if not blocks:
         raise Block.DoesNotExist(
             "No block found at lat %s, lon %s" %
             (self.block_center.y, self.block_center.x))
     # If there's more than one this close, we don't really care.
     return blocks[0]
Exemple #5
0
 def test_add_by_place_id(self, mock_get_object_or_404):
     chain = FilterChain()
     from ebpub.streets.models import Block
     from ebpub.db.schemafilters import BlockFilter
     block = Block(
         city='city',
         street_slug='street_slug',
         pretty_name='pretty_name',
         street_pretty_name='street_pretty_name',
         street='street',
         from_num='123',
         to_num='456',
     )
     mock_get_object_or_404.return_value = block
     chain.add_by_place_id('b:123.1')
     self.assert_(isinstance(chain['location'], BlockFilter))
Exemple #6
0
def reverse_geocode(point):
    """
    Looks up the nearest block to the point.
    """
    # In degrees for now because transforming to a projected space is
    # too slow for this purpose. TODO: store projected versions of the
    # locations alongside the canonical lng/lat versions.
    min_distance = 0.007
    # We use min_distance to cut down on the searchable space, because
    # the distance query we do next that actually compares distances
    # between geometries does not use the spatial index. TODO: convert
    # this to GeoDjango syntax. Should be possible but there are some
    # subtleties / performance issues with the DB API.
    cursor = connection.cursor()
    cursor.execute(
        """
        SELECT %(field_list)s, ST_Distance(ST_GeomFromWKB(E%(pt_wkb)s, 4326), %(geom_fieldname)s) AS "dist"
        FROM %(tablename)s
        WHERE id IN
            (SELECT id
             FROM %(tablename)s
             WHERE ST_DWithin(%(geom_fieldname)s, ST_GeomFromWKB(E%(pt_wkb)s, 4326), %(min_distance)s))
        ORDER BY "dist"
        LIMIT 1;
    """ % {
            'field_list': ', '.join([f.column for f in Block._meta.fields]),
            'pt_wkb': Binary(point.wkb),
            'geom_fieldname': 'location',
            'tablename': Block._meta.db_table,
            'min_distance': min_distance
        })
    num_fields = len(Block._meta.fields)
    try:
        block, distance = [(Block(*row[:num_fields]), row[-1])
                           for row in cursor.fetchall()][0]
    except IndexError:
        raise ReverseGeocodeError()
    return block, distance
Exemple #7
0
 def save(self, verbose=True):
     num_created = 0
     for feature in self.layer:
         parent_id = None
         if not self.skip_feature(feature):
             for block_fields in self.gen_blocks(feature):
                 block = Block(**block_fields)
                 block.geom = feature.geom.geos
                 street_name, block_name = make_pretty_name(
                     block_fields['left_from_num'],
                     block_fields['left_to_num'],
                     block_fields['right_from_num'],
                     block_fields['right_to_num'], block_fields['predir'],
                     block_fields['street'], block_fields['suffix'],
                     block_fields['postdir'])
                 block.pretty_name = block_name
                 block.street_pretty_name = street_name
                 block.street_slug = slugify(' '.join(
                     (block_fields['street'], block_fields['suffix'])))
                 block.save()
                 if parent_id is None:
                     parent_id = block.id
                 else:
                     block.parent_id = parent_id
                     block.save()
                 num_created += 1
                 if verbose:
                     print 'Created block %s' % block
     return num_created
    def save(self):
        alt_names_suff = (u'', u'1', u'2', u'3', u'4', u'5')
        num_created = 0
        for i, feature in enumerate(self.layer):

            #if not self.fcc_pat.search(feature.get('FCC')):

             #   continue
            parent_id = None
            fields = {}
            for esri_fieldname, block_fieldname in FIELD_MAP.items():
                value = feature.get(esri_fieldname)
                if isinstance(value, basestring):
                    value = value.upper()
                elif isinstance(value, int) and value == 0:
                    value = None
                fields[block_fieldname] = value
            if not ((fields['left_from_num'] and fields['left_to_num']) or
                    (fields['right_from_num'] and fields['right_to_num'])):
                continue
            # Sometimes the "from" number is greater than the "to"
            # number in the source data, so we swap them into proper
            # ordering
            for side in ('left', 'right'):
                from_key, to_key = '%s_from_num' % side, '%s_to_num' % side
                if fields[from_key] > fields[to_key]:
                    fields[from_key], fields[to_key] = fields[to_key], fields[from_key]
            if feature.geom.geom_name != 'LINESTRING':
                continue
            #for suffix in alt_names_suff:
            name_fields = {}
            for esri_fieldname, block_fieldname in NAME_FIELD_MAP.items():
                key = esri_fieldname #+ suffix
                name_fields[block_fieldname] = feature.get(key).upper()
            if not name_fields['street']:
                continue
            # Skip blocks with bare number street names and no suffix / type
            if not name_fields['suffix'] and re.search('^\d+$', name_fields['street']):
                continue
            fields.update(name_fields)

            # Ensure we have unicode.
            for key, val in fields.items():
                if isinstance(val, str):
                    fields[key] = val.decode(self.encoding)

            fields['street_pretty_name'], fields['pretty_name'] = make_pretty_name(
                fields['left_from_num'],
                fields['left_to_num'],
                fields['right_from_num'],
                fields['right_to_num'],
                fields['predir'],
                fields['street'],
                fields['suffix'],
                fields['postdir'],
            )

            print >> sys.stderr, 'Looking at block pretty name %s' % fields['street']

            fields['street_slug'] = slugify(u' '.join((fields['street'], fields['suffix'])))

            # Watch out for addresses like '247B' which can't be
            # saved as an IntegerField. But do this after making
            # pretty names.
            for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'):
                fields[addr_key] = fields[addr_key].rstrip(string.letters)

            fields['from_num'], fields['to_num'] = make_block_numbers(
                fields['left_from_num'],
                fields['left_to_num'],
                fields['right_from_num'],
                fields['right_to_num'])

            block = Block(**fields)
            block.geom = feature.geom.geos
            self.log(u'Looking at block %s' % fields['street'])


            block.save()
            if parent_id is None:
                parent_id = block.id
            else:
                block.parent_id = parent_id
                block.save()
            num_created += 1
            self.log('Created block %s' % block)
        return num_created
Exemple #9
0
    def save(self):
        alt_names_suff = (u'', u'1', u'2', u'3', u'4', u'5')
        num_created = 0
        for i, feature in enumerate(self.layer):
            if not self.fcc_pat.search(feature.get('FCC')):
                continue
            parent_id = None
            fields = {}
            for esri_fieldname, block_fieldname in FIELD_MAP.items():
                value = feature.get(esri_fieldname)
                if isinstance(value, basestring):
                    value = value.upper()
                elif isinstance(value, int) and value == 0:
                    value = None
                fields[block_fieldname] = value
            if not ((fields['left_from_num'] and fields['left_to_num']) or
                    (fields['right_from_num'] and fields['right_to_num'])):
                continue
            # Sometimes the "from" number is greater than the "to"
            # number in the source data, so we swap them into proper
            # ordering
            for side in ('left', 'right'):
                from_key, to_key = '%s_from_num' % side, '%s_to_num' % side
                if fields[from_key] > fields[to_key]:
                    fields[from_key], fields[to_key] = fields[to_key], fields[
                        from_key]
            if feature.geom.geom_name != 'LINESTRING':
                continue
            for suffix in alt_names_suff:
                name_fields = {}
                for esri_fieldname, block_fieldname in NAME_FIELD_MAP.items():
                    key = esri_fieldname + suffix
                    name_fields[block_fieldname] = feature.get(key).upper()
                if not name_fields['street']:
                    continue
                # Skip blocks with bare number street names and no suffix / type
                if not name_fields['suffix'] and re.search(
                        '^\d+$', name_fields['street']):
                    continue
                fields.update(name_fields)

                # Ensure we have unicode.
                for key, val in fields.items():
                    if isinstance(val, str):
                        fields[key] = val.decode(self.encoding)

                fields['street_pretty_name'], fields[
                    'pretty_name'] = make_pretty_name(
                        fields['left_from_num'],
                        fields['left_to_num'],
                        fields['right_from_num'],
                        fields['right_to_num'],
                        fields['predir'],
                        fields['street'],
                        fields['suffix'],
                        fields['postdir'],
                    )

                #print >> sys.stderr, 'Looking at block pretty name %s' % fields['street']

                fields['street_slug'] = slugify(u' '.join(
                    (fields['street'], fields['suffix'])))

                # Watch out for addresses like '247B' which can't be
                # saved as an IntegerField. But do this after making
                # pretty names.
                for addr_key in ('left_from_num', 'left_to_num',
                                 'right_from_num', 'right_to_num'):
                    fields[addr_key] = fields[addr_key].rstrip(string.letters)

                fields['from_num'], fields['to_num'] = make_block_numbers(
                    fields['left_from_num'], fields['left_to_num'],
                    fields['right_from_num'], fields['right_to_num'])

                block = Block(**fields)
                block.geom = feature.geom.geos
                self.log(u'Looking at block %s' % fields['street'])

                block.save()
                if parent_id is None:
                    parent_id = block.id
                else:
                    block.parent_id = parent_id
                    block.save()
                num_created += 1
                self.log('Created block %s' % block)
        return num_created
Exemple #10
0
 def _makeBlock(self):
     block = Block(geom=LINESTRING)
     block.save()
     return block
Exemple #11
0
    def save(self):
        num_created = 0
        for feature in self.layer:
            parent_id = None
            if not self.skip_feature(feature):
                for block_fields in self.gen_blocks(feature):

                    # Usually (at least in Boston data) there is only
                    # 1 block per feature.  But sometimes there are
                    # multiple names for one street, eg.
                    # "N. Commercial Wharf" and "Commercial Wharf N.";
                    # in that case those would be yielded by gen_blocks() as
                    # two separate blocks. Is that intentional, or a bug?

                    # Ensure we have unicode.
                    for key, val in block_fields.items():
                        if isinstance(val, str):
                            block_fields[key] = val.decode(self.encoding)

                    block_fields['geom'] = feature.geom.geos

                    block_fields['street_pretty_name'], block_fields[
                        'pretty_name'] = make_pretty_name(
                            block_fields['left_from_num'],
                            block_fields['left_to_num'],
                            block_fields['right_from_num'],
                            block_fields['right_to_num'],
                            block_fields['predir'], block_fields['street'],
                            block_fields['suffix'], block_fields['postdir'])

                    block_fields['street_slug'] = slugify(u' '.join(
                        (block_fields['street'], block_fields['suffix'])))

                    # Watch out for addresses like '247B' which can't be
                    # saved as an IntegerField.
                    # But do this *after* making pretty names.
                    for addr_key in ('left_from_num', 'left_to_num',
                                     'right_from_num', 'right_to_num'):
                        if isinstance(block_fields[addr_key], basestring):
                            value = block_fields[addr_key].rstrip(
                                string.letters)
                            # Also attempt to fix up addresses like
                            # '19-47', by just using the lower number.
                            # This will give misleading output, but
                            # it's probably better than discarding blocks.
                            value = value.split('-')[0]
                            if value:
                                try:
                                    value = int(value)
                                except ValueError:
                                    self.log("Omitting weird value %r for %r" %
                                             (value, addr_key))
                                    value = None
                            else:
                                value = None
                            block_fields[addr_key] = value

                    block_fields['from_num'], block_fields[
                        'to_num'] = make_block_numbers(
                            block_fields['left_from_num'],
                            block_fields['left_to_num'],
                            block_fields['right_from_num'],
                            block_fields['right_to_num'])

                    block = Block(**block_fields)
                    block.save()
                    if parent_id is None:
                        parent_id = block.id
                    else:
                        block.parent_id = parent_id
                        block.save()
                    num_created += 1
                    self.log('%d\tCreated block %s for feature %d' %
                             (num_created, block, feature.get('TLID')))
        return num_created
Exemple #12
0
    def save(self):
        if self.reset:
            logger.warn("Deleting all Block instances and anything that refers to them!")
            Block.objects.all().delete()
        import time
        start = time.time()
        num_created = 0
        num_existing = 0
        for feature in self.layer:
            parent_id = None
            if not self.skip_feature(feature):
                for block_fields in self.gen_blocks(feature):

                    # Usually (at least in Boston data) there is only
                    # 1 block per feature.  But sometimes there are
                    # multiple names for one street, eg.
                    # "N. Commercial Wharf" and "Commercial Wharf N.";
                    # in that case those would be yielded by gen_blocks() as
                    # two separate blocks. Is that intentional, or a bug?

                    # Ensure we have unicode.
                    for key, val in block_fields.items():
                        if isinstance(val, str):
                            block_fields[key] = val.decode(self.encoding)

                    block_fields['geom'] = geos_with_projection(feature.geom, 4326)
                    block_fields['prefix'] = make_pretty_prefix(block_fields['prefix'])

                    block_fields['street_pretty_name'], block_fields['pretty_name'] = make_pretty_name(
                        block_fields['left_from_num'],
                        block_fields['left_to_num'],
                        block_fields['right_from_num'],
                        block_fields['right_to_num'],
                        block_fields['predir'],
                        block_fields['prefix'],
                        block_fields['street'],
                        block_fields['suffix'],
                        block_fields['postdir']
                    )

                    block_fields['street_slug'] = slugify(
                        u' '.join((block_fields['prefix'],
                                   block_fields['street'],
                                   block_fields['suffix'])))

                    # Watch out for addresses like '247B' which can't be
                    # saved as an IntegerField.
                    # But do this *after* making pretty names.
                    # Also attempt to fix up addresses like '19-47',
                    # by just using the lower number.  This will give
                    # misleading output, but it's probably better than
                    # discarding blocks.
                    for addr_key in ('left_from_num', 'left_to_num',
                                     'right_from_num', 'right_to_num'):
                        if isinstance(block_fields[addr_key], basestring):
                            from ebpub.geocoder.parser.parsing import number_standardizer
                            value = number_standardizer(block_fields[addr_key].strip())
                            if not value:
                                value = None
                        else:
                            value = None
                        block_fields[addr_key] = value

                    try:
                        block_fields['from_num'], block_fields['to_num'] = \
                            make_block_numbers(block_fields['left_from_num'],
                                               block_fields['left_to_num'],
                                               block_fields['right_from_num'],
                                               block_fields['right_to_num'])
                    except ValueError, e:
                        logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e))
                        continue

                    # After doing pretty names etc, standardize the fields
                    # that get used for geocoding, since the geocoder
                    # searches for the standardized version.
                    from ebpub.geocoder.parser.parsing import STANDARDIZERS
                    for key, standardizer in STANDARDIZERS.items():
                        if key in block_fields:
                            if key == 'street' and block_fields['prefix']:
                                # Special case: "US Highway 101", not "US Highway 101st".
                                continue

                            block_fields[key] = standardizer(block_fields[key])

                    # Separate out the uniquely identifying fields so
                    # we can avoid duplicate blocks.
                    # NOTE this doesn't work if you're updating from a more
                    # recent shapefile and the street has significant
                    # changes - eg. the street name has changed, or the
                    # address range has changed, or the block has split...
                    # see #257. http://developer.openblockproject.org/ticket/257
                    primary_fields = {}
                    primary_field_keys = ('street_slug',
                                          'from_num', 'to_num',
                                          'left_city', 'right_city',
                                          'left_zip', 'right_zip',
                                          'left_state', 'right_state',
                                          )
                    for key in primary_field_keys:
                        if block_fields[key] != u'':
                            # Some empty fields are fixed
                            # automatically by clean().
                            primary_fields[key] = block_fields[key]

                    existing = list(Block.objects.filter(**primary_fields))
                    if not existing:
                        # Check the old-style way we used to make street slugs
                        # prior to fixing issue #264... we need to keep this
                        # code around indefinitely in case we are reloading the
                        # blocks data and need to overwrite blocks that have
                        # the old bad slug.  Sadly this probably can't just be
                        # fixed by a migration.
                        _old_street_slug = slugify(
                            u' '.join((block_fields['street'],
                                       block_fields['suffix'])))
                        _old_primary_fields = primary_fields.copy()
                        _old_primary_fields['street_slug'] = _old_street_slug
                        existing = list(Block.objects.filter(**_old_primary_fields))
                        if not existing:
                            block = Block(**block_fields)
                            num_created += 1
                            logger.debug("CREATING %s" % unicode(block))

                    if len(existing) == 1:
                        num_existing += 1
                        block = existing[0]
                        logger.debug(u"Block %s already exists" % unicode(existing[0]))
                        for key, val in block_fields.items():
                            setattr(block, key, val)
                    elif len(existing) > 1:
                        num_existing += len(existing)
                        logger.warn("Multiple existing blocks like %s, skipping"
                                    % existing[0])
                        continue
                    try:
                        block.full_clean()
                    except ValidationError:
                        # odd bug: sometimes we get ValidationError even when
                        # the data looks good, and then cleaning again works???
                        try:
                            block.full_clean()
                        except ValidationError, e:
                            logger.warn("validation error on %s, skipping" % str(block))
                            logger.warn(e)
                            continue
                    block.save()
                    if parent_id is None:
                        parent_id = block.id
                    else:
                        block.parent_id = parent_id
                        block.save()
                    logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.fid))
Exemple #13
0
 def _makeBlock(self):
     block = Block(geom=LINESTRING)
     block.save()
     return block
Exemple #14
0
 def save(self, verbose=False):
     alt_names_suff = ('', '1', '2', '3', '4', '5')
     num_created = 0
     for i, feature in enumerate(self.layer):
         if not self.fcc_pat.search(feature.get('FCC')):
             continue
         parent_id = None
         fields = {}
         for esri_fieldname, block_fieldname in FIELD_MAP.items():
             value = feature.get(esri_fieldname)
             if isinstance(value, basestring):
                 value = value.upper()
             elif isinstance(value, int) and value == 0:
                 value = None
             fields[block_fieldname] = value
         if not ((fields['left_from_num'] and fields['left_to_num']) or
                 (fields['right_from_num'] and fields['right_to_num'])):
             continue
         # Sometimes the "from" number is greater than the "to"
         # number in the source data, so we swap them into proper
         # ordering
         for side in ('left', 'right'):
             from_key, to_key = '%s_from_num' % side, '%s_to_num' % side
             if fields[from_key] > fields[to_key]:
                 fields[from_key], fields[to_key] = fields[to_key], fields[from_key]
         if feature.geom.geom_name != 'LINESTRING':
             continue
         for suffix in alt_names_suff:
             name_fields = {}
             for esri_fieldname, block_fieldname in NAME_FIELD_MAP.items():
                 key = esri_fieldname + suffix
                 name_fields[block_fieldname] = feature.get(key).upper()
             if not name_fields['street']:
                 continue
             # Skip blocks with bare number street names and no suffix / type
             if not name_fields['suffix'] and re.search('^\d+$', name_fields['street']):
                 continue
             fields.update(name_fields)
             block = Block(**fields)
             block.geom = feature.geom.geos
             street_name, block_name = make_pretty_name(
                 fields['left_from_num'],
                 fields['left_to_num'],
                 fields['right_from_num'],
                 fields['right_to_num'],
                 fields['predir'],
                 fields['street'],
                 fields['suffix'],
                 fields['postdir']
             )
             block.pretty_name = block_name
             block.street_pretty_name = street_name
             block.street_slug = slugify(' '.join((fields['street'], fields['suffix'])))
             block.save()
             if parent_id is None:
                 parent_id = block.id
             else:
                 block.parent_id = parent_id
                 block.save()
             num_created += 1
             if verbose:
                 print >> sys.stderr, 'Created block %s' % block
     return num_created
Exemple #15
0
    def save(self):
        import time
        start = time.time()
        num_created = 0
        for feature in self.layer:
            parent_id = None
            if not self.skip_feature(feature):
                for block_fields in self.gen_blocks(feature):

                    # Usually (at least in Boston data) there is only
                    # 1 block per feature.  But sometimes there are
                    # multiple names for one street, eg.
                    # "N. Commercial Wharf" and "Commercial Wharf N.";
                    # in that case those would be yielded by gen_blocks() as
                    # two separate blocks. Is that intentional, or a bug?

                    # Ensure we have unicode.
                    for key, val in block_fields.items():
                        if isinstance(val, str):
                            block_fields[key] = val.decode(self.encoding)


                    block_fields['geom'] = feature.geom.geos

                    block_fields['street_pretty_name'], block_fields['pretty_name'] = make_pretty_name(
                        block_fields['left_from_num'],
                        block_fields['left_to_num'],
                        block_fields['right_from_num'],
                        block_fields['right_to_num'],
                        block_fields['predir'],
                        block_fields['street'],
                        block_fields['suffix'],
                        block_fields['postdir']
                    )

                    block_fields['street_slug'] = slugify(u' '.join((block_fields['street'], block_fields['suffix'])))

                    # Watch out for addresses like '247B' which can't be
                    # saved as an IntegerField.
                    # But do this *after* making pretty names.
                    for addr_key in ('left_from_num', 'left_to_num',
                                     'right_from_num', 'right_to_num'):
                        if isinstance(block_fields[addr_key], basestring):
                            value = block_fields[addr_key].rstrip(string.letters)
                            # Also attempt to fix up addresses like
                            # '19-47', by just using the lower number.
                            # This will give misleading output, but
                            # it's probably better than discarding blocks.
                            value = value.split('-')[0]
                            if value:
                                try:
                                    value = int(value)
                                except ValueError:
                                    logger.warn("Omitting weird value %r for %r" % (value, addr_key))
                                    value = None
                            else:
                                value = None
                            block_fields[addr_key] = value

                    try:
                        block_fields['from_num'], block_fields['to_num'] = \
                            make_block_numbers(block_fields['left_from_num'],
                                               block_fields['left_to_num'],
                                               block_fields['right_from_num'],
                                               block_fields['right_to_num'])
                    except ValueError, e:
                        logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e))
                        continue

                    # Separate out the uniquely identifying fields so
                    # we can avoid duplicate blocks.
                    # NOTE this doesn't work if you're updating from a more
                    # recent shapefile and the street has significant
                    # changes - eg. the street name has changed, or the
                    # address range has changed, or the block has split...
                    primary_fields = {}
                    primary_field_keys = ('street_slug',
                                          'from_num', 'to_num',
                                          'left_city', 'right_city',
                                          'left_zip', 'right_zip',
                                          'left_state', 'right_state',
                                          )
                    for key in primary_field_keys:
                        if block_fields[key] != u'':
                            # Some empty fields are fixed
                            # automatically by clean(), so 
                            primary_fields[key] = block_fields[key]

                    existing = list(Block.objects.filter(**primary_fields))
                    if not existing:
                        block = Block(**block_fields)
                        num_created += 1
                    elif len(existing) == 1:
                        block = existing[0]
                        logger.debug(u"Block %s already exists" % unicode(existing[0]))
                        for key, val in block_fields.items():
                            setattr(block, key, val)
                    else:
                        logger.warn("Multiple existing blocks like %s, skipping"
                                    % existing[0])
                        continue
                    try:
                        block.full_clean()
                    except ValidationError:
                        # odd bug: sometimes we get ValidationError even when
                        # the data looks good, and then cleaning again works???
                        try:
                            block.full_clean()
                        except ValidationError, e:
                            logger.warn("validation error on %s, skipping" % str(block))
                            logger.warn(e)
                            continue
                    logger.debug("CREATING %s" % unicode(block))
                    block.save()
                    if parent_id is None:
                        parent_id = block.id
                    else:
                        block.parent_id = parent_id
                        block.save()
                    logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.get('TLID')))
Exemple #16
0
    def save(self):
        num_created = 0
        for feature in self.layer:
            parent_id = None
            if not self.skip_feature(feature):
                for block_fields in self.gen_blocks(feature):

                    # Usually (at least in Boston data) there is only
                    # 1 block per feature.  But sometimes there are
                    # multiple names for one street, eg.
                    # "N. Commercial Wharf" and "Commercial Wharf N.";
                    # in that case those would be yielded by gen_blocks() as
                    # two separate blocks. Is that intentional, or a bug?

                    # Ensure we have unicode.
                    for key, val in block_fields.items():
                        if isinstance(val, str):
                            block_fields[key] = val.decode(self.encoding)

                    block_fields['geom'] = feature.geom.geos

                    block_fields['street_pretty_name'], block_fields['pretty_name'] = make_pretty_name(
                        block_fields['left_from_num'],
                        block_fields['left_to_num'],
                        block_fields['right_from_num'],
                        block_fields['right_to_num'],
                        block_fields['predir'],
                        block_fields['street'],
                        block_fields['suffix'],
                        block_fields['postdir']
                    )

                    block_fields['street_slug'] = slugify(u' '.join((block_fields['street'], block_fields['suffix'])))

                    # Watch out for addresses like '247B' which can't be
                    # saved as an IntegerField.
                    # But do this *after* making pretty names.
                    for addr_key in ('left_from_num', 'left_to_num',
                                     'right_from_num', 'right_to_num'):
                        if isinstance(block_fields[addr_key], basestring):
                            value = block_fields[addr_key].rstrip(string.letters)
                            # Also attempt to fix up addresses like
                            # '19-47', by just using the lower number.
                            # This will give misleading output, but
                            # it's probably better than discarding blocks.
                            value = value.split('-')[0]
                            if value:
                                try:
                                    value = int(value)
                                except ValueError:
                                    logger.warn("Omitting weird value %r for %r" % (value, addr_key))
                                    value = None
                            else:
                                value = None
                            block_fields[addr_key] = value

                    try:
                        block_fields['from_num'], block_fields['to_num'] = \
                            make_block_numbers(block_fields['left_from_num'],
                                               block_fields['left_to_num'],
                                               block_fields['right_from_num'],
                                               block_fields['right_to_num'])
                    except ValueError, e:
                        logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e))
                        continue

                    block = Block(**block_fields)
                    try:
                        block.full_clean()
                    except ValidationError:
                        # odd bug: sometimes we get ValidationError even when
                        # the data looks good, and then cleaning again works???
                        try:
                            block.full_clean()
                        except ValidationError, e:
                            logger.warn("validation error on %s, skipping" % str(block))
                            logger.warn(e)
                            continue

                    block.save()
                    if parent_id is None:
                        parent_id = block.id
                    else:
                        block.parent_id = parent_id
                        block.save()
                    num_created += 1
                    logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.get('TLID')))
Exemple #17
0
    def save(self):
        if self.reset:
            logger.warn(
                "Deleting all Block instances and anything that refers to them!"
            )
            Block.objects.all().delete()
        import time
        start = time.time()
        num_created = 0
        num_existing = 0
        for feature in self.layer:
            parent_id = None
            if not self.skip_feature(feature):
                for block_fields in self.gen_blocks(feature):

                    # Usually (at least in Boston data) there is only
                    # 1 block per feature.  But sometimes there are
                    # multiple names for one street, eg.
                    # "N. Commercial Wharf" and "Commercial Wharf N.";
                    # in that case those would be yielded by gen_blocks() as
                    # two separate blocks. Is that intentional, or a bug?

                    # Ensure we have unicode.
                    for key, val in block_fields.items():
                        if isinstance(val, str):
                            block_fields[key] = val.decode(self.encoding)

                    block_fields['geom'] = geos_with_projection(
                        feature.geom, 4326)
                    block_fields['prefix'] = make_pretty_prefix(
                        block_fields['prefix'])

                    block_fields['street_pretty_name'], block_fields[
                        'pretty_name'] = make_pretty_name(
                            block_fields['left_from_num'],
                            block_fields['left_to_num'],
                            block_fields['right_from_num'],
                            block_fields['right_to_num'],
                            block_fields['predir'], block_fields['prefix'],
                            block_fields['street'], block_fields['suffix'],
                            block_fields['postdir'])

                    block_fields['street_slug'] = slugify(u' '.join(
                        (block_fields['prefix'], block_fields['street'],
                         block_fields['suffix'])))

                    # Watch out for addresses like '247B' which can't be
                    # saved as an IntegerField.
                    # But do this *after* making pretty names.
                    # Also attempt to fix up addresses like '19-47',
                    # by just using the lower number.  This will give
                    # misleading output, but it's probably better than
                    # discarding blocks.
                    for addr_key in ('left_from_num', 'left_to_num',
                                     'right_from_num', 'right_to_num'):
                        if isinstance(block_fields[addr_key], basestring):
                            from ebpub.geocoder.parser.parsing import number_standardizer
                            value = number_standardizer(
                                block_fields[addr_key].strip())
                            if not value:
                                value = None
                        else:
                            try:
                                value = str(int(value))
                            except (ValueError, TypeError):
                                value = None
                        block_fields[addr_key] = value

                    try:
                        block_fields['from_num'], block_fields['to_num'] = \
                            make_block_numbers(block_fields['left_from_num'],
                                               block_fields['left_to_num'],
                                               block_fields['right_from_num'],
                                               block_fields['right_to_num'])
                    except ValueError, e:
                        logger.warn('Skipping %s: %s' %
                                    (block_fields['pretty_name'], e))
                        continue

                    # After doing pretty names etc, standardize the fields
                    # that get used for geocoding, since the geocoder
                    # searches for the standardized version.
                    from ebpub.geocoder.parser.parsing import STANDARDIZERS
                    for key, standardizer in STANDARDIZERS.items():
                        if key in block_fields:
                            if key == 'street' and block_fields['prefix']:
                                # Special case: "US Highway 101", not "US Highway 101st".
                                continue

                            block_fields[key] = standardizer(block_fields[key])

                    # Separate out the uniquely identifying fields so
                    # we can avoid duplicate blocks.
                    # NOTE this doesn't work if you're updating from a more
                    # recent shapefile and the street has significant
                    # changes - eg. the street name has changed, or the
                    # address range has changed, or the block has split...
                    # see #257. http://developer.openblockproject.org/ticket/257
                    primary_fields = {}
                    primary_field_keys = (
                        'street_slug',
                        'from_num',
                        'to_num',
                        'left_city',
                        'right_city',
                        'left_zip',
                        'right_zip',
                        'left_state',
                        'right_state',
                    )
                    for key in primary_field_keys:
                        if block_fields[key] != u'':
                            # Some empty fields are fixed
                            # automatically by clean().
                            primary_fields[key] = block_fields[key]

                    existing = list(Block.objects.filter(**primary_fields))
                    if not existing:
                        # Check the old-style way we used to make street slugs
                        # prior to fixing issue #264... we need to keep this
                        # code around indefinitely in case we are reloading the
                        # blocks data and need to overwrite blocks that have
                        # the old bad slug.  Sadly this probably can't just be
                        # fixed by a migration.
                        _old_street_slug = slugify(u' '.join(
                            (block_fields['street'], block_fields['suffix'])))
                        _old_primary_fields = primary_fields.copy()
                        _old_primary_fields['street_slug'] = _old_street_slug
                        existing = list(
                            Block.objects.filter(**_old_primary_fields))
                        if not existing:
                            block = Block(**block_fields)
                            num_created += 1
                            logger.debug("CREATING %s" % unicode(block))

                    if len(existing) == 1:
                        num_existing += 1
                        block = existing[0]
                        logger.debug(u"Block %s already exists" %
                                     unicode(existing[0]))
                        for key, val in block_fields.items():
                            setattr(block, key, val)
                    elif len(existing) > 1:
                        num_existing += len(existing)
                        logger.warn(
                            "Multiple existing blocks like %s, skipping" %
                            existing[0])
                        continue
                    try:
                        block.full_clean()
                    except ValidationError:
                        # odd bug: sometimes we get ValidationError even when
                        # the data looks good, and then cleaning again works???
                        try:
                            block.full_clean()
                        except ValidationError, e:
                            logger.warn("validation error on %s, skipping" %
                                        str(block))
                            logger.warn(e)
                            continue
                    block.save()
                    if parent_id is None:
                        parent_id = block.id
                    else:
                        block.parent_id = parent_id
                        block.save()
                    logger.debug('%d\tCreated block %s for feature %d' %
                                 (num_created, block, feature.fid))