Exemple #1
0
    def collapse_zip_codes(self):
        # The ESRI ZIP Code layer breaks ZIP Codes up along county
        # boundaries, so we need to collapse them first before
        # proceeding.

        if len(self.zipcode_geoms) > 0:
            return

        for feature in self.layer:
            zipcode = feature.get(self.name_field)
            geom = geos_with_projection(feature.geom)
            if zipcode not in self.zipcode_geoms:
                self.zipcode_geoms[zipcode] = geom
            else:
                # If it's a MultiPolygon geom we're adding to our
                # existing geom, we need to "unroll" it into its
                # constituent polygons 
                if isinstance(geom, MultiPolygon):
                    subgeoms = list(geom)
                else:
                    subgeoms = [geom]
                existing_geom = self.zipcode_geoms[zipcode]
                if not isinstance(existing_geom, MultiPolygon):
                    new_geom = MultiPolygon([existing_geom])
                    new_geom.extend(subgeoms)
                    self.zipcode_geoms[zipcode] = new_geom
                else:
                    existing_geom.extend(subgeoms)
Exemple #2
0
    def create_location(self, name, location_type, geom, display_order=0):
        source = self.source
        geom = geos_with_projection(geom, 4326)
        geom = ensure_valid(geom, name)
        geom = flatten_geomcollection(geom)
        if not isinstance(location_type, int):
            location_type = location_type.id
        kwargs = dict(
            name=name,
            slug=slugify(name),
            location=geom,
            location_type_id=location_type,
            city=self.metro_name,
            source=source,
            is_public=True,
        )
        if not self.should_create_location(kwargs):
            return
        kwargs['defaults'] = {
            'creation_date': self.now,
            'last_mod_date': self.now,
            'display_order': display_order,
            'normalized_name': normalize(name),
            'area': geom.transform(3395, True).area,
            }
        try:
            loc, created = Location.objects.get_or_create(**kwargs)
        except IntegrityError:
            # Usually this means two towns with the same slug.
            # Try to fix that.
            slug = kwargs['slug']
            existing = Location.objects.filter(slug=slug).count()
            if existing:
                slug = slugify('%s-%s' % (slug, existing + 1))
                logger.info("Munged slug %s to %s to make it unique" % (kwargs['slug'], slug))
                kwargs['slug'] = slug
                loc, created = Location.objects.get_or_create(**kwargs)
            else:
                raise

        logger.info('%s %s %s' % (created and 'Created' or 'Already had', self.location_type.name, loc))
        logger.info('Populating newsitem locations ... ')
        populate_ni_loc(loc)
        logger.info('done.\n')

        return created
    def gen_blocks(self, feature):
        block_fields = {}
        block_fields['right_zip'] = feature.get('RZIP')
        block_fields['left_zip'] = feature.get('LZIP')
        block_fields['right_from_num'] = feature.get('FROMRIGHT')
        block_fields['right_to_num'] = feature.get('TORIGHT')
        block_fields['left_from_num'] = feature.get('FROMLEFT')
        block_fields['left_to_num'] = feature.get('TOLEFT')
        block_fields['street'] = feature.get('STREET').upper().strip()
        block_fields['prefix'] = feature.get('PRETYPE').upper().strip()
        block_fields['suffix'] = feature.get('TYPE').upper().strip()

        # For predir and postdir, OpenBlock expects either:
        #   1) N, S, E, or W;
        #   2) 2-letter combinations of these (NE, SW, etc); or
        #   3) blank/Null
        # In our current shapefiles, most of the data fits this paradigm, but 
        # there are a few '0' entries present, presumably intended to mean 
        # Null.
        block_fields['predir'] = re.sub('[^NESW]', '', feature.get('PREDIR').upper().strip())
        block_fields['postdir'] = re.sub('[^NESW]', '', feature.get('SUFDIR').upper().strip())

        for side in ['left', 'right']:
            if block_fields['%s_from_num' % side] == 0 and not (block_fields['%s_to_num' % side] % 2):
                block_fields['%s_from_num' % side] = 2
        # As of OpenBlock 1.2 these values must be strings, else they get turned into None
        for side in ['left', 'right']:
            block_fields['%s_from_num' % side] = str(block_fields['%s_from_num' % side])
            block_fields['%s_to_num' % side] = str(block_fields['%s_to_num' % side])

        cities = list(get_city_locations().filter(location__intersects=geos_with_projection(feature.geom, 4326)))
        city_name = cities[0].name.upper() if cities else ''
        for side in ('right', 'left'):
            block_fields['%s_city' % side] = city_name
            block_fields['%s_state' % side] = 'NC'

        yield block_fields.copy()
Exemple #4
0
    def save(self):
        if self.reset:
            logger.warn("Deleting all Block instances and anything that refers to them!")
            Block.objects.all().delete()
        import time
        start = time.time()
        num_created = 0
        num_existing = 0
        for feature in self.layer:
            parent_id = None
            if not self.skip_feature(feature):
                for block_fields in self.gen_blocks(feature):

                    # Usually (at least in Boston data) there is only
                    # 1 block per feature.  But sometimes there are
                    # multiple names for one street, eg.
                    # "N. Commercial Wharf" and "Commercial Wharf N.";
                    # in that case those would be yielded by gen_blocks() as
                    # two separate blocks. Is that intentional, or a bug?

                    # Ensure we have unicode.
                    for key, val in block_fields.items():
                        if isinstance(val, str):
                            block_fields[key] = val.decode(self.encoding)

                    block_fields['geom'] = geos_with_projection(feature.geom, 4326)
                    block_fields['prefix'] = make_pretty_prefix(block_fields['prefix'])

                    block_fields['street_pretty_name'], block_fields['pretty_name'] = make_pretty_name(
                        block_fields['left_from_num'],
                        block_fields['left_to_num'],
                        block_fields['right_from_num'],
                        block_fields['right_to_num'],
                        block_fields['predir'],
                        block_fields['prefix'],
                        block_fields['street'],
                        block_fields['suffix'],
                        block_fields['postdir']
                    )

                    block_fields['street_slug'] = slugify(
                        u' '.join((block_fields['prefix'],
                                   block_fields['street'],
                                   block_fields['suffix'])))

                    # Watch out for addresses like '247B' which can't be
                    # saved as an IntegerField.
                    # But do this *after* making pretty names.
                    # Also attempt to fix up addresses like '19-47',
                    # by just using the lower number.  This will give
                    # misleading output, but it's probably better than
                    # discarding blocks.
                    for addr_key in ('left_from_num', 'left_to_num',
                                     'right_from_num', 'right_to_num'):
                        if isinstance(block_fields[addr_key], basestring):
                            from ebpub.geocoder.parser.parsing import number_standardizer
                            value = number_standardizer(block_fields[addr_key].strip())
                            if not value:
                                value = None
                        else:
                            value = None
                        block_fields[addr_key] = value

                    try:
                        block_fields['from_num'], block_fields['to_num'] = \
                            make_block_numbers(block_fields['left_from_num'],
                                               block_fields['left_to_num'],
                                               block_fields['right_from_num'],
                                               block_fields['right_to_num'])
                    except ValueError, e:
                        logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e))
                        continue

                    # After doing pretty names etc, standardize the fields
                    # that get used for geocoding, since the geocoder
                    # searches for the standardized version.
                    from ebpub.geocoder.parser.parsing import STANDARDIZERS
                    for key, standardizer in STANDARDIZERS.items():
                        if key in block_fields:
                            if key == 'street' and block_fields['prefix']:
                                # Special case: "US Highway 101", not "US Highway 101st".
                                continue

                            block_fields[key] = standardizer(block_fields[key])

                    # Separate out the uniquely identifying fields so
                    # we can avoid duplicate blocks.
                    # NOTE this doesn't work if you're updating from a more
                    # recent shapefile and the street has significant
                    # changes - eg. the street name has changed, or the
                    # address range has changed, or the block has split...
                    # see #257. http://developer.openblockproject.org/ticket/257
                    primary_fields = {}
                    primary_field_keys = ('street_slug',
                                          'from_num', 'to_num',
                                          'left_city', 'right_city',
                                          'left_zip', 'right_zip',
                                          'left_state', 'right_state',
                                          )
                    for key in primary_field_keys:
                        if block_fields[key] != u'':
                            # Some empty fields are fixed
                            # automatically by clean().
                            primary_fields[key] = block_fields[key]

                    existing = list(Block.objects.filter(**primary_fields))
                    if not existing:
                        # Check the old-style way we used to make street slugs
                        # prior to fixing issue #264... we need to keep this
                        # code around indefinitely in case we are reloading the
                        # blocks data and need to overwrite blocks that have
                        # the old bad slug.  Sadly this probably can't just be
                        # fixed by a migration.
                        _old_street_slug = slugify(
                            u' '.join((block_fields['street'],
                                       block_fields['suffix'])))
                        _old_primary_fields = primary_fields.copy()
                        _old_primary_fields['street_slug'] = _old_street_slug
                        existing = list(Block.objects.filter(**_old_primary_fields))
                        if not existing:
                            block = Block(**block_fields)
                            num_created += 1
                            logger.debug("CREATING %s" % unicode(block))

                    if len(existing) == 1:
                        num_existing += 1
                        block = existing[0]
                        logger.debug(u"Block %s already exists" % unicode(existing[0]))
                        for key, val in block_fields.items():
                            setattr(block, key, val)
                    elif len(existing) > 1:
                        num_existing += len(existing)
                        logger.warn("Multiple existing blocks like %s, skipping"
                                    % existing[0])
                        continue
                    try:
                        block.full_clean()
                    except ValidationError:
                        # odd bug: sometimes we get ValidationError even when
                        # the data looks good, and then cleaning again works???
                        try:
                            block.full_clean()
                        except ValidationError, e:
                            logger.warn("validation error on %s, skipping" % str(block))
                            logger.warn(e)
                            continue
                    block.save()
                    if parent_id is None:
                        parent_id = block.id
                    else:
                        block.parent_id = parent_id
                        block.save()
                    logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.fid))
Exemple #5
0
    def save(self):
        import time
        start = time.time()
        num_created = 0
        for feature in self.layer:
            parent_id = None
            if not self.skip_feature(feature):
                for block_fields in self.gen_blocks(feature):

                    # Usually (at least in Boston data) there is only
                    # 1 block per feature.  But sometimes there are
                    # multiple names for one street, eg.
                    # "N. Commercial Wharf" and "Commercial Wharf N.";
                    # in that case those would be yielded by gen_blocks() as
                    # two separate blocks. Is that intentional, or a bug?

                    # Ensure we have unicode.
                    for key, val in block_fields.items():
                        if isinstance(val, str):
                            block_fields[key] = val.decode(self.encoding)

                    block_fields['geom'] = geos_with_projection(feature.geom, 4326)

                    block_fields['street_pretty_name'], block_fields['pretty_name'] = make_pretty_name(
                        block_fields['left_from_num'],
                        block_fields['left_to_num'],
                        block_fields['right_from_num'],
                        block_fields['right_to_num'],
                        block_fields['predir'],
                        block_fields['street'],
                        block_fields['suffix'],
                        block_fields['postdir']
                    )

                    block_fields['street_slug'] = slugify(u' '.join((block_fields['street'], block_fields['suffix'])))

                    # Watch out for addresses like '247B' which can't be
                    # saved as an IntegerField.
                    # But do this *after* making pretty names.
                    for addr_key in ('left_from_num', 'left_to_num',
                                     'right_from_num', 'right_to_num'):
                        if isinstance(block_fields[addr_key], basestring):
                            value = block_fields[addr_key].rstrip(string.letters)
                            # Also attempt to fix up addresses like
                            # '19-47', by just using the lower number.
                            # This will give misleading output, but
                            # it's probably better than discarding blocks.
                            value = value.split('-')[0]
                            if value:
                                try:
                                    value = int(value)
                                except ValueError:
                                    logger.warn("Omitting weird value %r for %r" % (value, addr_key))
                                    value = None
                            else:
                                value = None
                            block_fields[addr_key] = value

                    try:
                        block_fields['from_num'], block_fields['to_num'] = \
                            make_block_numbers(block_fields['left_from_num'],
                                               block_fields['left_to_num'],
                                               block_fields['right_from_num'],
                                               block_fields['right_to_num'])
                    except ValueError, e:
                        logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e))
                        continue

                    # Separate out the uniquely identifying fields so
                    # we can avoid duplicate blocks.
                    # NOTE this doesn't work if you're updating from a more
                    # recent shapefile and the street has significant
                    # changes - eg. the street name has changed, or the
                    # address range has changed, or the block has split...
                    primary_fields = {}
                    primary_field_keys = ('street_slug',
                                          'from_num', 'to_num',
                                          'left_city', 'right_city',
                                          'left_zip', 'right_zip',
                                          'left_state', 'right_state',
                                          )
                    for key in primary_field_keys:
                        if block_fields[key] != u'':
                            # Some empty fields are fixed
                            # automatically by clean(), so 
                            primary_fields[key] = block_fields[key]

                    existing = list(Block.objects.filter(**primary_fields))
                    if not existing:
                        block = Block(**block_fields)
                        num_created += 1
                    elif len(existing) == 1:
                        block = existing[0]
                        logger.debug(u"Block %s already exists" % unicode(existing[0]))
                        for key, val in block_fields.items():
                            setattr(block, key, val)
                    else:
                        logger.warn("Multiple existing blocks like %s, skipping"
                                    % existing[0])
                        continue
                    try:
                        block.full_clean()
                    except ValidationError:
                        # odd bug: sometimes we get ValidationError even when
                        # the data looks good, and then cleaning again works???
                        try:
                            block.full_clean()
                        except ValidationError, e:
                            logger.warn("validation error on %s, skipping" % str(block))
                            logger.warn(e)
                            continue
                    logger.debug("CREATING %s" % unicode(block))
                    block.save()
                    if parent_id is None:
                        parent_id = block.id
                    else:
                        block.parent_id = parent_id
                        block.save()
                    logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.get('TLID')))
Exemple #6
0
    def save(self):
        if self.reset:
            logger.warn(
                "Deleting all Block instances and anything that refers to them!"
            )
            Block.objects.all().delete()
        import time
        start = time.time()
        num_created = 0
        num_existing = 0
        for feature in self.layer:
            parent_id = None
            if not self.skip_feature(feature):
                for block_fields in self.gen_blocks(feature):

                    # Usually (at least in Boston data) there is only
                    # 1 block per feature.  But sometimes there are
                    # multiple names for one street, eg.
                    # "N. Commercial Wharf" and "Commercial Wharf N.";
                    # in that case those would be yielded by gen_blocks() as
                    # two separate blocks. Is that intentional, or a bug?

                    # Ensure we have unicode.
                    for key, val in block_fields.items():
                        if isinstance(val, str):
                            block_fields[key] = val.decode(self.encoding)

                    block_fields['geom'] = geos_with_projection(
                        feature.geom, 4326)
                    block_fields['prefix'] = make_pretty_prefix(
                        block_fields['prefix'])

                    block_fields['street_pretty_name'], block_fields[
                        'pretty_name'] = make_pretty_name(
                            block_fields['left_from_num'],
                            block_fields['left_to_num'],
                            block_fields['right_from_num'],
                            block_fields['right_to_num'],
                            block_fields['predir'], block_fields['prefix'],
                            block_fields['street'], block_fields['suffix'],
                            block_fields['postdir'])

                    block_fields['street_slug'] = slugify(u' '.join(
                        (block_fields['prefix'], block_fields['street'],
                         block_fields['suffix'])))

                    # Watch out for addresses like '247B' which can't be
                    # saved as an IntegerField.
                    # But do this *after* making pretty names.
                    # Also attempt to fix up addresses like '19-47',
                    # by just using the lower number.  This will give
                    # misleading output, but it's probably better than
                    # discarding blocks.
                    for addr_key in ('left_from_num', 'left_to_num',
                                     'right_from_num', 'right_to_num'):
                        if isinstance(block_fields[addr_key], basestring):
                            from ebpub.geocoder.parser.parsing import number_standardizer
                            value = number_standardizer(
                                block_fields[addr_key].strip())
                            if not value:
                                value = None
                        else:
                            try:
                                value = str(int(value))
                            except (ValueError, TypeError):
                                value = None
                        block_fields[addr_key] = value

                    try:
                        block_fields['from_num'], block_fields['to_num'] = \
                            make_block_numbers(block_fields['left_from_num'],
                                               block_fields['left_to_num'],
                                               block_fields['right_from_num'],
                                               block_fields['right_to_num'])
                    except ValueError, e:
                        logger.warn('Skipping %s: %s' %
                                    (block_fields['pretty_name'], e))
                        continue

                    # After doing pretty names etc, standardize the fields
                    # that get used for geocoding, since the geocoder
                    # searches for the standardized version.
                    from ebpub.geocoder.parser.parsing import STANDARDIZERS
                    for key, standardizer in STANDARDIZERS.items():
                        if key in block_fields:
                            if key == 'street' and block_fields['prefix']:
                                # Special case: "US Highway 101", not "US Highway 101st".
                                continue

                            block_fields[key] = standardizer(block_fields[key])

                    # Separate out the uniquely identifying fields so
                    # we can avoid duplicate blocks.
                    # NOTE this doesn't work if you're updating from a more
                    # recent shapefile and the street has significant
                    # changes - eg. the street name has changed, or the
                    # address range has changed, or the block has split...
                    # see #257. http://developer.openblockproject.org/ticket/257
                    primary_fields = {}
                    primary_field_keys = (
                        'street_slug',
                        'from_num',
                        'to_num',
                        'left_city',
                        'right_city',
                        'left_zip',
                        'right_zip',
                        'left_state',
                        'right_state',
                    )
                    for key in primary_field_keys:
                        if block_fields[key] != u'':
                            # Some empty fields are fixed
                            # automatically by clean().
                            primary_fields[key] = block_fields[key]

                    existing = list(Block.objects.filter(**primary_fields))
                    if not existing:
                        # Check the old-style way we used to make street slugs
                        # prior to fixing issue #264... we need to keep this
                        # code around indefinitely in case we are reloading the
                        # blocks data and need to overwrite blocks that have
                        # the old bad slug.  Sadly this probably can't just be
                        # fixed by a migration.
                        _old_street_slug = slugify(u' '.join(
                            (block_fields['street'], block_fields['suffix'])))
                        _old_primary_fields = primary_fields.copy()
                        _old_primary_fields['street_slug'] = _old_street_slug
                        existing = list(
                            Block.objects.filter(**_old_primary_fields))
                        if not existing:
                            block = Block(**block_fields)
                            num_created += 1
                            logger.debug("CREATING %s" % unicode(block))

                    if len(existing) == 1:
                        num_existing += 1
                        block = existing[0]
                        logger.debug(u"Block %s already exists" %
                                     unicode(existing[0]))
                        for key, val in block_fields.items():
                            setattr(block, key, val)
                    elif len(existing) > 1:
                        num_existing += len(existing)
                        logger.warn(
                            "Multiple existing blocks like %s, skipping" %
                            existing[0])
                        continue
                    try:
                        block.full_clean()
                    except ValidationError:
                        # odd bug: sometimes we get ValidationError even when
                        # the data looks good, and then cleaning again works???
                        try:
                            block.full_clean()
                        except ValidationError, e:
                            logger.warn("validation error on %s, skipping" %
                                        str(block))
                            logger.warn(e)
                            continue
                    block.save()
                    if parent_id is None:
                        parent_id = block.id
                    else:
                        block.parent_id = parent_id
                        block.save()
                    logger.debug('%d\tCreated block %s for feature %d' %
                                 (num_created, block, feature.fid))