def collapse_zip_codes(self): # The ESRI ZIP Code layer breaks ZIP Codes up along county # boundaries, so we need to collapse them first before # proceeding. if len(self.zipcode_geoms) > 0: return for feature in self.layer: zipcode = feature.get(self.name_field) geom = geos_with_projection(feature.geom) if zipcode not in self.zipcode_geoms: self.zipcode_geoms[zipcode] = geom else: # If it's a MultiPolygon geom we're adding to our # existing geom, we need to "unroll" it into its # constituent polygons if isinstance(geom, MultiPolygon): subgeoms = list(geom) else: subgeoms = [geom] existing_geom = self.zipcode_geoms[zipcode] if not isinstance(existing_geom, MultiPolygon): new_geom = MultiPolygon([existing_geom]) new_geom.extend(subgeoms) self.zipcode_geoms[zipcode] = new_geom else: existing_geom.extend(subgeoms)
def create_location(self, name, location_type, geom, display_order=0): source = self.source geom = geos_with_projection(geom, 4326) geom = ensure_valid(geom, name) geom = flatten_geomcollection(geom) if not isinstance(location_type, int): location_type = location_type.id kwargs = dict( name=name, slug=slugify(name), location=geom, location_type_id=location_type, city=self.metro_name, source=source, is_public=True, ) if not self.should_create_location(kwargs): return kwargs['defaults'] = { 'creation_date': self.now, 'last_mod_date': self.now, 'display_order': display_order, 'normalized_name': normalize(name), 'area': geom.transform(3395, True).area, } try: loc, created = Location.objects.get_or_create(**kwargs) except IntegrityError: # Usually this means two towns with the same slug. # Try to fix that. slug = kwargs['slug'] existing = Location.objects.filter(slug=slug).count() if existing: slug = slugify('%s-%s' % (slug, existing + 1)) logger.info("Munged slug %s to %s to make it unique" % (kwargs['slug'], slug)) kwargs['slug'] = slug loc, created = Location.objects.get_or_create(**kwargs) else: raise logger.info('%s %s %s' % (created and 'Created' or 'Already had', self.location_type.name, loc)) logger.info('Populating newsitem locations ... ') populate_ni_loc(loc) logger.info('done.\n') return created
def gen_blocks(self, feature): block_fields = {} block_fields['right_zip'] = feature.get('RZIP') block_fields['left_zip'] = feature.get('LZIP') block_fields['right_from_num'] = feature.get('FROMRIGHT') block_fields['right_to_num'] = feature.get('TORIGHT') block_fields['left_from_num'] = feature.get('FROMLEFT') block_fields['left_to_num'] = feature.get('TOLEFT') block_fields['street'] = feature.get('STREET').upper().strip() block_fields['prefix'] = feature.get('PRETYPE').upper().strip() block_fields['suffix'] = feature.get('TYPE').upper().strip() # For predir and postdir, OpenBlock expects either: # 1) N, S, E, or W; # 2) 2-letter combinations of these (NE, SW, etc); or # 3) blank/Null # In our current shapefiles, most of the data fits this paradigm, but # there are a few '0' entries present, presumably intended to mean # Null. block_fields['predir'] = re.sub('[^NESW]', '', feature.get('PREDIR').upper().strip()) block_fields['postdir'] = re.sub('[^NESW]', '', feature.get('SUFDIR').upper().strip()) for side in ['left', 'right']: if block_fields['%s_from_num' % side] == 0 and not (block_fields['%s_to_num' % side] % 2): block_fields['%s_from_num' % side] = 2 # As of OpenBlock 1.2 these values must be strings, else they get turned into None for side in ['left', 'right']: block_fields['%s_from_num' % side] = str(block_fields['%s_from_num' % side]) block_fields['%s_to_num' % side] = str(block_fields['%s_to_num' % side]) cities = list(get_city_locations().filter(location__intersects=geos_with_projection(feature.geom, 4326))) city_name = cities[0].name.upper() if cities else '' for side in ('right', 'left'): block_fields['%s_city' % side] = city_name block_fields['%s_state' % side] = 'NC' yield block_fields.copy()
def save(self): if self.reset: logger.warn("Deleting all Block instances and anything that refers to them!") Block.objects.all().delete() import time start = time.time() num_created = 0 num_existing = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? # Ensure we have unicode. for key, val in block_fields.items(): if isinstance(val, str): block_fields[key] = val.decode(self.encoding) block_fields['geom'] = geos_with_projection(feature.geom, 4326) block_fields['prefix'] = make_pretty_prefix(block_fields['prefix']) block_fields['street_pretty_name'], block_fields['pretty_name'] = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['prefix'], block_fields['street'], block_fields['suffix'], block_fields['postdir'] ) block_fields['street_slug'] = slugify( u' '.join((block_fields['prefix'], block_fields['street'], block_fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. # But do this *after* making pretty names. # Also attempt to fix up addresses like '19-47', # by just using the lower number. This will give # misleading output, but it's probably better than # discarding blocks. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): if isinstance(block_fields[addr_key], basestring): from ebpub.geocoder.parser.parsing import number_standardizer value = number_standardizer(block_fields[addr_key].strip()) if not value: value = None else: value = None block_fields[addr_key] = value try: block_fields['from_num'], block_fields['to_num'] = \ make_block_numbers(block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) except ValueError, e: logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e)) continue # After doing pretty names etc, standardize the fields # that get used for geocoding, since the geocoder # searches for the standardized version. from ebpub.geocoder.parser.parsing import STANDARDIZERS for key, standardizer in STANDARDIZERS.items(): if key in block_fields: if key == 'street' and block_fields['prefix']: # Special case: "US Highway 101", not "US Highway 101st". continue block_fields[key] = standardizer(block_fields[key]) # Separate out the uniquely identifying fields so # we can avoid duplicate blocks. # NOTE this doesn't work if you're updating from a more # recent shapefile and the street has significant # changes - eg. the street name has changed, or the # address range has changed, or the block has split... # see #257. http://developer.openblockproject.org/ticket/257 primary_fields = {} primary_field_keys = ('street_slug', 'from_num', 'to_num', 'left_city', 'right_city', 'left_zip', 'right_zip', 'left_state', 'right_state', ) for key in primary_field_keys: if block_fields[key] != u'': # Some empty fields are fixed # automatically by clean(). primary_fields[key] = block_fields[key] existing = list(Block.objects.filter(**primary_fields)) if not existing: # Check the old-style way we used to make street slugs # prior to fixing issue #264... we need to keep this # code around indefinitely in case we are reloading the # blocks data and need to overwrite blocks that have # the old bad slug. Sadly this probably can't just be # fixed by a migration. _old_street_slug = slugify( u' '.join((block_fields['street'], block_fields['suffix']))) _old_primary_fields = primary_fields.copy() _old_primary_fields['street_slug'] = _old_street_slug existing = list(Block.objects.filter(**_old_primary_fields)) if not existing: block = Block(**block_fields) num_created += 1 logger.debug("CREATING %s" % unicode(block)) if len(existing) == 1: num_existing += 1 block = existing[0] logger.debug(u"Block %s already exists" % unicode(existing[0])) for key, val in block_fields.items(): setattr(block, key, val) elif len(existing) > 1: num_existing += len(existing) logger.warn("Multiple existing blocks like %s, skipping" % existing[0]) continue try: block.full_clean() except ValidationError: # odd bug: sometimes we get ValidationError even when # the data looks good, and then cleaning again works??? try: block.full_clean() except ValidationError, e: logger.warn("validation error on %s, skipping" % str(block)) logger.warn(e) continue block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.fid))
def save(self): import time start = time.time() num_created = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? # Ensure we have unicode. for key, val in block_fields.items(): if isinstance(val, str): block_fields[key] = val.decode(self.encoding) block_fields['geom'] = geos_with_projection(feature.geom, 4326) block_fields['street_pretty_name'], block_fields['pretty_name'] = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['street'], block_fields['suffix'], block_fields['postdir'] ) block_fields['street_slug'] = slugify(u' '.join((block_fields['street'], block_fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. # But do this *after* making pretty names. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): if isinstance(block_fields[addr_key], basestring): value = block_fields[addr_key].rstrip(string.letters) # Also attempt to fix up addresses like # '19-47', by just using the lower number. # This will give misleading output, but # it's probably better than discarding blocks. value = value.split('-')[0] if value: try: value = int(value) except ValueError: logger.warn("Omitting weird value %r for %r" % (value, addr_key)) value = None else: value = None block_fields[addr_key] = value try: block_fields['from_num'], block_fields['to_num'] = \ make_block_numbers(block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) except ValueError, e: logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e)) continue # Separate out the uniquely identifying fields so # we can avoid duplicate blocks. # NOTE this doesn't work if you're updating from a more # recent shapefile and the street has significant # changes - eg. the street name has changed, or the # address range has changed, or the block has split... primary_fields = {} primary_field_keys = ('street_slug', 'from_num', 'to_num', 'left_city', 'right_city', 'left_zip', 'right_zip', 'left_state', 'right_state', ) for key in primary_field_keys: if block_fields[key] != u'': # Some empty fields are fixed # automatically by clean(), so primary_fields[key] = block_fields[key] existing = list(Block.objects.filter(**primary_fields)) if not existing: block = Block(**block_fields) num_created += 1 elif len(existing) == 1: block = existing[0] logger.debug(u"Block %s already exists" % unicode(existing[0])) for key, val in block_fields.items(): setattr(block, key, val) else: logger.warn("Multiple existing blocks like %s, skipping" % existing[0]) continue try: block.full_clean() except ValidationError: # odd bug: sometimes we get ValidationError even when # the data looks good, and then cleaning again works??? try: block.full_clean() except ValidationError, e: logger.warn("validation error on %s, skipping" % str(block)) logger.warn(e) continue logger.debug("CREATING %s" % unicode(block)) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.get('TLID')))
def save(self): if self.reset: logger.warn( "Deleting all Block instances and anything that refers to them!" ) Block.objects.all().delete() import time start = time.time() num_created = 0 num_existing = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? # Ensure we have unicode. for key, val in block_fields.items(): if isinstance(val, str): block_fields[key] = val.decode(self.encoding) block_fields['geom'] = geos_with_projection( feature.geom, 4326) block_fields['prefix'] = make_pretty_prefix( block_fields['prefix']) block_fields['street_pretty_name'], block_fields[ 'pretty_name'] = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['prefix'], block_fields['street'], block_fields['suffix'], block_fields['postdir']) block_fields['street_slug'] = slugify(u' '.join( (block_fields['prefix'], block_fields['street'], block_fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. # But do this *after* making pretty names. # Also attempt to fix up addresses like '19-47', # by just using the lower number. This will give # misleading output, but it's probably better than # discarding blocks. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): if isinstance(block_fields[addr_key], basestring): from ebpub.geocoder.parser.parsing import number_standardizer value = number_standardizer( block_fields[addr_key].strip()) if not value: value = None else: try: value = str(int(value)) except (ValueError, TypeError): value = None block_fields[addr_key] = value try: block_fields['from_num'], block_fields['to_num'] = \ make_block_numbers(block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) except ValueError, e: logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e)) continue # After doing pretty names etc, standardize the fields # that get used for geocoding, since the geocoder # searches for the standardized version. from ebpub.geocoder.parser.parsing import STANDARDIZERS for key, standardizer in STANDARDIZERS.items(): if key in block_fields: if key == 'street' and block_fields['prefix']: # Special case: "US Highway 101", not "US Highway 101st". continue block_fields[key] = standardizer(block_fields[key]) # Separate out the uniquely identifying fields so # we can avoid duplicate blocks. # NOTE this doesn't work if you're updating from a more # recent shapefile and the street has significant # changes - eg. the street name has changed, or the # address range has changed, or the block has split... # see #257. http://developer.openblockproject.org/ticket/257 primary_fields = {} primary_field_keys = ( 'street_slug', 'from_num', 'to_num', 'left_city', 'right_city', 'left_zip', 'right_zip', 'left_state', 'right_state', ) for key in primary_field_keys: if block_fields[key] != u'': # Some empty fields are fixed # automatically by clean(). primary_fields[key] = block_fields[key] existing = list(Block.objects.filter(**primary_fields)) if not existing: # Check the old-style way we used to make street slugs # prior to fixing issue #264... we need to keep this # code around indefinitely in case we are reloading the # blocks data and need to overwrite blocks that have # the old bad slug. Sadly this probably can't just be # fixed by a migration. _old_street_slug = slugify(u' '.join( (block_fields['street'], block_fields['suffix']))) _old_primary_fields = primary_fields.copy() _old_primary_fields['street_slug'] = _old_street_slug existing = list( Block.objects.filter(**_old_primary_fields)) if not existing: block = Block(**block_fields) num_created += 1 logger.debug("CREATING %s" % unicode(block)) if len(existing) == 1: num_existing += 1 block = existing[0] logger.debug(u"Block %s already exists" % unicode(existing[0])) for key, val in block_fields.items(): setattr(block, key, val) elif len(existing) > 1: num_existing += len(existing) logger.warn( "Multiple existing blocks like %s, skipping" % existing[0]) continue try: block.full_clean() except ValidationError: # odd bug: sometimes we get ValidationError even when # the data looks good, and then cleaning again works??? try: block.full_clean() except ValidationError, e: logger.warn("validation error on %s, skipping" % str(block)) logger.warn(e) continue block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.fid))