def save(self, name_field): verbose = self.verbose source = self.source locs = [] for feature in self.layer: name = feature.get(name_field) geom = feature.geom.transform(4326, True).geos geom = ensure_valid(geom, name) geom = flatten_geomcollection(geom) fields = dict( name = name, slug = slugify(name), location_type = self.get_location_type(feature), location = geom, city = self.metro_name, source = source, is_public = True, ) if not self.should_create_location(fields): continue locs.append(fields) num_created = 0 for i, loc_fields in enumerate(sorted(locs, key=lambda h: h['name'])): kwargs = dict( loc_fields, defaults={ 'creation_date': self.now, 'last_mod_date': self.now, 'display_order': i, 'normalized_name': normalize(loc_fields['name']), 'area': loc_fields['location'].transform(3395, True).area, }) try: loc, created = Location.objects.get_or_create(**kwargs) except IntegrityError: # Usually this means two towns with the same slug. # Try to fix that. slug = kwargs['slug'] existing = Location.objects.filter(slug=slug).count() if existing: slug = slugify('%s-%s' % (slug, existing + 1)) if verbose: print >> sys.stderr, "Munged slug %s to %s to make it unique" % (kwargs['slug'], slug) kwargs['slug'] = slug loc, created = Location.objects.get_or_create(**kwargs) else: raise if created: num_created += 1 if verbose: print >> sys.stderr, '%s %s %s' % (created and 'Created' or 'Already had', self.location_type.name, loc) if verbose: sys.stderr.write('Populating newsitem locations ... ') populate_ni_loc(loc) if verbose: sys.stderr.write('done.\n') return num_created
def create_location(self, name, location_type, geom, display_order=0): source = self.source if hasattr(geom, 'geos'): geom = geom.geos if geom.srid is None: geom.srid = 4326 elif geom.srid != 4326: geom = geom.transform(4326, True) geom = ensure_valid(geom, name) geom = flatten_geomcollection(geom) if not isinstance(location_type, int): location_type = location_type.id kwargs = dict( name=name, slug=slugify(name), location=geom, location_type_id=location_type, city=self.metro_name, source=source, is_public=True, ) if not self.should_create_location(kwargs): return kwargs['defaults'] = { 'creation_date': self.now, 'last_mod_date': self.now, 'display_order': display_order, 'normalized_name': normalize(name), 'area': geom.transform(3395, True).area, } try: loc, created = Location.objects.get_or_create(**kwargs) except IntegrityError: # Usually this means two towns with the same slug. # Try to fix that. slug = kwargs['slug'] existing = Location.objects.filter(slug=slug).count() if existing: slug = slugify('%s-%s' % (slug, existing + 1)) logger.info("Munged slug %s to %s to make it unique" % (kwargs['slug'], slug)) kwargs['slug'] = slug loc, created = Location.objects.get_or_create(**kwargs) else: raise logger.info('%s %s %s' % (created and 'Created' or 'Already had', self.location_type.name, loc)) logger.info('Populating newsitem locations ... ') populate_ni_loc(loc) logger.info('done.\n') return created
def slug_from_blocks(block_a, block_b): slug = u"%s-and-%s" % (slugify( make_dir_street_name(block_a)), slugify(make_dir_street_name(block_b))) # If it's too long for the slug field, drop the directionals if len(slug) > 64: slug = u"%s-and-%s" % (slugify( make_street_pretty_name(block_a.prefix, block_a.street, block_a.suffix)), slugify( make_street_pretty_name( block_b.prefix, block_b.street, block_b.suffix))) # If it's still too long, drop the suffixes if len(slug) > 64: slug = u"%s-and-%s" % (slugify( make_street_pretty_name(block_a.prefix, block_a.street, u'')), slugify( make_street_pretty_name( block_b.prefix, block_b.street, u''))) # If it's *still* too long, drop the prefixes too if len(slug) > 64: slug = u"%s-and-%s" % ( slugify(block_a.street), slugify(block_b.street), ) slug = slug[:64] return slug
def save(self, verbose=True): num_created = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): block = Block(**block_fields) block.geom = feature.geom.geos street_name, block_name = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['street'], block_fields['suffix'], block_fields['postdir']) block.pretty_name = block_name block.street_pretty_name = street_name block.street_slug = slugify(' '.join( (block_fields['street'], block_fields['suffix']))) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() num_created += 1 if verbose: print 'Created block %s' % block return num_created
def import_blocks(blocks_layer): sides = ('R', 'L') for i, feature in enumerate(blocks_layer): for side in sides: from_num = feature['%s_ADD_FROM' % side] to_num = feature['%s_ADD_TO' % side] zip = feature['%s_ZIP' % side] street_name = feature['ST_NAME'] if from_num and to_num and zip and street_name: if from_num > to_num: from_num, to_num = to_num, from_num street_pretty_name, block_pretty_name = make_pretty_name( from_num, to_num, feature['PRE_DIR'], street_name, feature['ST_TYPE'], feature['SUF_DIR']) block, created = Block.objects.get_or_create( pretty_name=block_pretty_name, predir=(feature['PRE_DIR'] or ''), street=street_name, street_slug=slugify('%s %s' % (street_name, (feature['ST_TYPE'] or ''))), street_pretty_name=street_pretty_name, suffix=(feature['ST_TYPE'] or ''), postdir=(feature['SUF_DIR'] or ''), from_num=from_num, to_num=to_num, zip=zip, city='FAKE', # we don't know it yetc state='FL', location='SRID=4326;%s' % str(feature.geometry) ) logger.debug('%s block %r' % (created and 'Created' or 'Already had', block)) if i % 100 == 0: logger.info('Created %s blocks' % i)
def augment_cities(self): # Add in county subdivisions, deleting from their shapes any area # already covered by a "proper" city. fkey = 'cousub' starter_cities = Location.objects.filter(location_type=self.city_type) within_cities = GEOSGeometry('MULTIPOLYGON EMPTY') for city in starter_cities: within_cities = within_cities.union(city.location) city_pks = [l.pk for l in starter_cities] layer = DataSource('%s/%s.shp' % (self.zip_dir, self.datafiles[fkey]['file_name']))[0] loc_importer = LocationImporter(layer, self.city_type, source = self.datafiles[fkey].get('source', 'Unknown'), filter_bounds=False, verbose=True) loc_created_count = loc_importer.save(self.datafiles[fkey]['name_field']) townships = Location.objects.filter(location_type=self.city_type).exclude(pk__in=city_pks) city_names = Location.objects.filter(location_type=self.city_type, pk__in=city_pks).values_list('name', flat=True) city_names = [name.lower() for name in city_names] for township in townships: # If a same-named city already exists, then rename the township to "Cityname area." if township.name.lower() in city_names: township.name = '%s area' % capwords(township.name) else: township.name = capwords(township.name) township.slug = slugify(township.name) township.location = township.location.difference(within_cities) township.save() return loc_created_count
def import_blocks(blocks_layer): sides = ('R', 'L') for i, feature in enumerate(blocks_layer): for side in sides: from_num = feature['%s_ADD_FROM' % side] to_num = feature['%s_ADD_TO' % side] zip = feature['%s_ZIP' % side] street_name = feature['ST_NAME'] if from_num and to_num and zip and street_name: if from_num > to_num: from_num, to_num = to_num, from_num street_pretty_name, block_pretty_name = make_pretty_name( from_num, to_num, feature['PRE_DIR'], street_name, feature['ST_TYPE'], feature['SUF_DIR']) block, created = Block.objects.get_or_create( pretty_name=block_pretty_name, predir=(feature['PRE_DIR'] or ''), street=street_name, street_slug=slugify( '%s %s' % (street_name, (feature['ST_TYPE'] or ''))), street_pretty_name=street_pretty_name, suffix=(feature['ST_TYPE'] or ''), postdir=(feature['SUF_DIR'] or ''), from_num=from_num, to_num=to_num, zip=zip, city='FAKE', # we don't know it yetc state='FL', location='SRID=4326;%s' % str(feature.geometry)) logger.debug('%s block %r' % (created and 'Created' or 'Already had', block)) if i % 100 == 0: logger.info('Created %s blocks' % i)
def create_location(self, name, location_type, geom, display_order=0): source = self.source geom = geos_with_projection(geom, 4326) geom = ensure_valid(geom, name) geom = flatten_geomcollection(geom) if not isinstance(location_type, int): location_type = location_type.id kwargs = dict( name=name, slug=slugify(name), location=geom, location_type_id=location_type, city=self.metro_name, source=source, is_public=True, ) if not self.should_create_location(kwargs): return kwargs['defaults'] = { 'creation_date': self.now, 'last_mod_date': self.now, 'display_order': display_order, 'normalized_name': normalize(name), 'area': geom.transform(3395, True).area, } try: loc, created = Location.objects.get_or_create(**kwargs) except IntegrityError: # Usually this means two towns with the same slug. # Try to fix that. slug = kwargs['slug'] existing = Location.objects.filter(slug=slug).count() if existing: slug = slugify('%s-%s' % (slug, existing + 1)) logger.info("Munged slug %s to %s to make it unique" % (kwargs['slug'], slug)) kwargs['slug'] = slug loc, created = Location.objects.get_or_create(**kwargs) else: raise logger.info('%s %s %s' % (created and 'Created' or 'Already had', self.location_type.name, loc)) logger.info('Populating newsitem locations ... ') populate_ni_loc(loc) logger.info('done.\n') return created
def geotag_page(page_id, source, schema, url, data_tuples, article_headline, article_date): """ Given a Page ID and a list of (location, wkt, excerpt, block) tuples representing the addresses in the page, creates a NewsItem for each address. Returns a list of all created NewsItems. """ if not data_tuples: return if not source: raise ValueError('Provide a source') if not url: raise ValueError('Provide a URL') if not article_headline: raise ValueError('Provide an article headline') if not article_date: raise ValueError('Provide an article date') if not isinstance(article_date, datetime.date): article_date = datetime.date( *time.strptime(article_date, '%Y-%m-%d')[:3]) # If this schema has a "source" SchemaField, then get or create it. try: sf = SchemaField.objects.get(schema__id=schema.id, name='source') except SchemaField.DoesNotExist: source = None else: try: source = Lookup.objects.get(schema_field__id=sf.id, code=source) except Lookup.DoesNotExist: source = Lookup.objects.create(schema_field_id=sf.id, name=source, code=source, slug=slugify(source)[:32], description='') ni_list = [] for location, wkt, excerpt, block in data_tuples: description = excerpt = excerpt.replace('\n', ' ') if source is not None: # u'\u2014' is an em dash. description = u'%s \u2014 %s' % (source.name, description) ni = NewsItem.objects.create( schema=schema, title=article_headline, description=description, url=url, pub_date=article_date, item_date=article_date, location=wkt, location_name=location, block=block, ) atts = {'page_id': page_id, 'excerpt': excerpt} if source is not None: atts['source'] = source.id ni.attributes = atts ni_list.append(ni) return ni_list
def geotag_page(page_id, source, schema, url, data_tuples, article_headline, article_date): """ Given a Page ID and a list of (location, wkt, excerpt, block) tuples representing the addresses in the page, creates a NewsItem for each address. Returns a list of all created NewsItems. """ if not data_tuples: return if not source: raise ValueError('Provide a source') if not url: raise ValueError('Provide a URL') if not article_headline: raise ValueError('Provide an article headline') if not article_date: raise ValueError('Provide an article date') if not isinstance(article_date, datetime.date): article_date = datetime.date(*time.strptime(article_date, '%Y-%m-%d')[:3]) # If this schema has a "source" SchemaField, then get or create it. try: sf = SchemaField.objects.get(schema__id=schema.id, name='source') except SchemaField.DoesNotExist: source = None else: try: source = Lookup.objects.get(schema_field__id=sf.id, code=source) except Lookup.DoesNotExist: source = Lookup.objects.create( schema_field_id=sf.id, name=source, code=source, slug=slugify(source)[:32], description='' ) ni_list = [] for location, wkt, excerpt, block in data_tuples: description = excerpt = excerpt.replace('\n', ' ') if source is not None: # u'\u2014' is an em dash. description = u'%s \u2014 %s' % (source.name, description) ni = NewsItem.objects.create( schema=schema, title=article_headline, description=description, url=url, pub_date=article_date, item_date=article_date, location=wkt, location_name=location, block=block, ) atts = {'page_id': page_id, 'excerpt': excerpt} if source is not None: atts['source'] = source.id ni.attributes = atts ni_list.append(ni) return ni_list
def add_location(name, wkt, loc_type, source='UNKNOWN'): geom = fromstr(wkt, srid=4326) name = name.strip().title() loc, created = Location.objects.get_or_create( name=name, slug=slugify(name), normalized_name=normalize(name), location_type=loc_type, location=geom, display_order=0, city=get_metro()['city_name'].upper(), source=source) print '%s %s %s' % (created and 'Created' or 'Found', loc_type.name, name) return loc
def add_location(name, wkt, loc_type, source='UNKNOWN'): geom = fromstr(wkt, srid=4326) loc, created = Location.objects.get_or_create( name=name, slug=slugify(name), normalized_name=normalize(name), location_type=loc_type, location=geom, centroid=geom.centroid, display_order=0, city=get_metro()['city_name'].upper(), source=source ) print '%s %s %s' % (created and 'Created' or 'Found', loc_type.name, name) return loc
def get_or_create_lookup(self, schema_field, name, code=None, description='', make_text_slug=True, logger=None): """ Returns the Lookup instance matching the given SchemaField, name and Lookup.code, creating it (with the given name/code/description) if it doesn't already exist. If make_text_slug is True, then a slug will be created from the given name. If it's False, then the slug will be the Lookup's ID. """ def log_info(message): if logger is None: return logger.info(message) def log_warn(message): if logger is None: return logger.warn(message) code = code or name # code defaults to name if it wasn't provided try: obj = Lookup.objects.get(schema_field__id=schema_field.id, code=code) except Lookup.DoesNotExist: if make_text_slug: slug = slugify(name) if len(slug) > 32: # Only bother to warn if we're actually going to use the slug. if make_text_slug: log_warn("Trimming slug %r to %r in order to fit 32-char limit." % (slug, slug[:32])) slug = slug[:32] else: # To avoid integrity errors in the slug when creating the Lookup, # use a temporary dummy slug that's guaranteed not to be in use. # We'll change it back immediately afterward. slug = '__3029j3f029jf029jf029__' if len(name) > 255: old_name = name name = name[:250] + '...' # Save the full name in the description. if not description: description = old_name log_warn("Trimming name %r to %r in order to fit 255-char limit." % (old_name, name)) obj = Lookup(schema_field_id=schema_field.id, name=name, code=code, slug=slug, description=description) obj.save() if not make_text_slug: # Set the slug to the ID. obj.slug = obj.id obj.save() log_info('Created %s %r' % (schema_field.name, name)) return obj
def save(self, name_field='name', source='UNKNOWN', verbose=True): locs = [] for feature in self.layer: if not self.should_create_location(feature): continue name = feature.get(name_field) geom = feature.geom.transform(4326, True).geos if not geom.valid: geom = geom.buffer(0.0) if not geom.valid: print >> sys.stderr, 'Warning: invalid geometry: %s' % name fields = dict( name=name, normalized_name=normalize(name), slug=slugify(name), location_type=self.get_location_type(feature), location=geom, centroid=geom.centroid, city=self.metro_name, source=source, area=geom.transform(3395, True).area, is_public=True, display_order=0, # This is overwritten in the next loop ) locs.append(fields) num_created = 0 for i, loc_fields in enumerate(sorted(locs, key=lambda h: h['name'])): kwargs = dict(loc_fields, defaults={ 'creation_date': self.now, 'last_mod_date': self.now, 'display_order': i }) loc, created = Location.objects.get_or_create(**kwargs) if created: num_created += 1 if verbose: print >> sys.stderr, '%s %s %s' % ( created and 'Created' or 'Already had', self.location_type.name, loc) if verbose: sys.stderr.write('Populating newsitem locations ... ') populate_ni_loc(loc) if verbose: sys.stderr.write('done.\n') return num_created
def import_county(self): county_type_data = { 'name': 'County', 'plural_name': 'Counties', 'slug': 'counties', 'is_browsable': True, 'is_significant': False, 'scope': self.metro_name, } try: county_type = LocationType.objects.get(slug=county_type_data['slug']) except LocationType.DoesNotExist: county_type = LocationType.objects.create(**county_type_data) Location.objects.filter(location_type=county_type).delete() county_layer = DataSource('%s/%s.shp' % (self.zip_dir, self.datafiles['county']['file_name']))[0] now = datetime.datetime.now() county_location = None for feature in county_layer: if feature.get('GEOID10') == self.county: name = feature.get(self.datafiles['county']['name_field']) geom = feature.geom.transform(4326, True).geos geom = ensure_valid(geom, name) geom = flatten_geomcollection(geom) loc_fields = dict( name = name, slug = slugify(name), location_type = county_type, location = geom, city = self.metro_name, is_public = True, ) kwargs = dict( loc_fields, ) kwargs.update({ 'creation_date': now, 'last_mod_date': now, 'display_order': 0, 'normalized_name': normalize(loc_fields['name']), 'area': loc_fields['location'].transform(3395, True).area, }) county_location = Location.objects.create(**kwargs) break return county_location
def slug_from_blocks(block_a, block_b): slug = u"%s-and-%s" % (slugify(make_dir_street_name(block_a)), slugify(make_dir_street_name(block_b))) # If it's too long for the slug field, drop the directionals if len(slug) > 64: slug = u"%s-and-%s" % (slugify(make_street_pretty_name(block_a.street, block_a.suffix)), slugify(make_street_pretty_name(block_b.street, block_b.suffix))) # If it's still too long, drop the suffixes if len(slug) > 64: slug = u"%s-and-%s" % (slugify(block_a.street), slugify(block_b.street)) return slug
def save(self, verbose=True): num_created = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? block = Block(**block_fields) block.geom = feature.geom.geos (block.from_num, block.to_num) = make_block_numbers( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) street_name, block_name = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['street'], block_fields['suffix'], block_fields['postdir'] ) block.pretty_name = block_name block.street_pretty_name = street_name block.street_slug = slugify(' '.join((block_fields['street'], block_fields['suffix']))) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() num_created += 1 if verbose: print '%d\tCreated block %s for feature %d' % (num_created, block, feature.get('TLID')) return num_created
def handle(self, **options): shapefile = self.download_file() now = datetime.datetime.now() metro_name = get_metro()['metro_name'].upper() # get or create City location type type_data = {'name': 'City', 'plural_name': 'Cities', 'slug': 'cities', 'is_browsable': True, 'is_significant': True, 'scope': metro_name} try: type_ = LocationType.objects.get(slug=type_data['slug']) except LocationType.DoesNotExist: type_ = LocationType.objects.create(**type_data) # start with a fresh list of cities Location.objects.filter(location_type=type_).delete() # build list of cities locations = {} layer = DataSource(shapefile)[0] for feature in layer: name = self.clean_name(feature['Name']) # convert to 4326 geom = feature.geom.transform(4326, True).geos if name not in locations: locations[name] = { 'name': name, 'slug': slugify(name), 'location_type': type_, 'city': metro_name, 'source': 'Columbus County GIS data', 'is_public': True, 'creation_date': now, 'last_mod_date': now, 'display_order': 0, 'normalized_name': normalize(name), 'location': [], } location = locations[name] location['location'].append(geom) # create city locations for name, location in locations.iteritems(): location['location'] = make_multi(location['location']) Location.objects.create(**location) print 'Imported %d locations' % type_.location_set.count()
def save(self, name_field='name', source='UNKNOWN', verbose=True): locs = [] for feature in self.layer: if not self.should_create_location(feature): continue name = feature.get(name_field) geom = feature.geom.transform(4326, True).geos if not geom.valid: geom = geom.buffer(0.0) if not geom.valid: print >> sys.stderr, 'Warning: invalid geometry: %s' % name fields = dict( name = name, normalized_name = normalize(name), slug = slugify(name), location_type = self.get_location_type(feature), location = geom, centroid = geom.centroid, city = self.metro_name, source = source, area = geom.transform(3395, True).area, is_public = True, display_order = 0, # This is overwritten in the next loop ) locs.append(fields) num_created = 0 for i, loc_fields in enumerate(sorted(locs, key=lambda h: h['name'])): kwargs = dict(loc_fields, defaults={'creation_date': self.now, 'last_mod_date': self.now, 'display_order': i}) loc, created = Location.objects.get_or_create(**kwargs) if created: num_created += 1 if verbose: print >> sys.stderr, '%s %s %s' % (created and 'Created' or 'Already had', self.location_type.name, loc) if verbose: sys.stderr.write('Populating newsitem locations ... ') populate_ni_loc(loc) if verbose: sys.stderr.write('done.\n') return num_created
def save(self, name_field="name", source="UNKNOWN", verbose=True): hoods = [] for feature in self.layer: name = feature.get(name_field) geom = feature.geom.transform(4326, True).geos if not geom.valid: geom = geom.buffer(0.0) if not geom.valid: print >>sys.stderr, "Warning: invalid geometry: %s" % name fields = dict( name=name, normalized_name=normalize(name), slug=slugify(name), location_type=self.location_type, location=geom, centroid=geom.centroid, city=self.metro_name, source=source, area=geom.transform(3395, True).area, is_public=True, display_order=0, # This is overwritten in the next loop ) hoods.append(fields) num_created = 0 for i, hood_fields in enumerate(sorted(hoods, key=lambda h: h["name"])): kwargs = dict( hood_fields, defaults={"creation_date": self.now, "last_mod_date": self.now, "display_order": i} ) hood, created = Location.objects.get_or_create(**kwargs) if created: num_created += 1 if verbose: print >>sys.stderr, "%s neighborhood %s" % (created and "Created" or "Already had", hood) if verbose: sys.stderr.write("Populating newsitem locations ... ") populate_ni_loc(hood) if verbose: sys.stderr.write("done.\n") return num_created
def save(self): if self.reset: logger.warn( "Deleting all Block instances and anything that refers to them!" ) Block.objects.all().delete() import time start = time.time() num_created = 0 num_existing = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? # Ensure we have unicode. for key, val in block_fields.items(): if isinstance(val, str): block_fields[key] = val.decode(self.encoding) block_fields['geom'] = geos_with_projection( feature.geom, 4326) block_fields['prefix'] = make_pretty_prefix( block_fields['prefix']) block_fields['street_pretty_name'], block_fields[ 'pretty_name'] = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['prefix'], block_fields['street'], block_fields['suffix'], block_fields['postdir']) block_fields['street_slug'] = slugify(u' '.join( (block_fields['prefix'], block_fields['street'], block_fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. # But do this *after* making pretty names. # Also attempt to fix up addresses like '19-47', # by just using the lower number. This will give # misleading output, but it's probably better than # discarding blocks. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): if isinstance(block_fields[addr_key], basestring): from ebpub.geocoder.parser.parsing import number_standardizer value = number_standardizer( block_fields[addr_key].strip()) if not value: value = None else: try: value = str(int(value)) except (ValueError, TypeError): value = None block_fields[addr_key] = value try: block_fields['from_num'], block_fields['to_num'] = \ make_block_numbers(block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) except ValueError, e: logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e)) continue # After doing pretty names etc, standardize the fields # that get used for geocoding, since the geocoder # searches for the standardized version. from ebpub.geocoder.parser.parsing import STANDARDIZERS for key, standardizer in STANDARDIZERS.items(): if key in block_fields: if key == 'street' and block_fields['prefix']: # Special case: "US Highway 101", not "US Highway 101st". continue block_fields[key] = standardizer(block_fields[key]) # Separate out the uniquely identifying fields so # we can avoid duplicate blocks. # NOTE this doesn't work if you're updating from a more # recent shapefile and the street has significant # changes - eg. the street name has changed, or the # address range has changed, or the block has split... # see #257. http://developer.openblockproject.org/ticket/257 primary_fields = {} primary_field_keys = ( 'street_slug', 'from_num', 'to_num', 'left_city', 'right_city', 'left_zip', 'right_zip', 'left_state', 'right_state', ) for key in primary_field_keys: if block_fields[key] != u'': # Some empty fields are fixed # automatically by clean(). primary_fields[key] = block_fields[key] existing = list(Block.objects.filter(**primary_fields)) if not existing: # Check the old-style way we used to make street slugs # prior to fixing issue #264... we need to keep this # code around indefinitely in case we are reloading the # blocks data and need to overwrite blocks that have # the old bad slug. Sadly this probably can't just be # fixed by a migration. _old_street_slug = slugify(u' '.join( (block_fields['street'], block_fields['suffix']))) _old_primary_fields = primary_fields.copy() _old_primary_fields['street_slug'] = _old_street_slug existing = list( Block.objects.filter(**_old_primary_fields)) if not existing: block = Block(**block_fields) num_created += 1 logger.debug("CREATING %s" % unicode(block)) if len(existing) == 1: num_existing += 1 block = existing[0] logger.debug(u"Block %s already exists" % unicode(existing[0])) for key, val in block_fields.items(): setattr(block, key, val) elif len(existing) > 1: num_existing += len(existing) logger.warn( "Multiple existing blocks like %s, skipping" % existing[0]) continue try: block.full_clean() except ValidationError: # odd bug: sometimes we get ValidationError even when # the data looks good, and then cleaning again works??? try: block.full_clean() except ValidationError, e: logger.warn("validation error on %s, skipping" % str(block)) logger.warn(e) continue block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.fid))
def get_or_create_lookup(self, schema_field, name, code=None, description='', make_text_slug=True, logger=None): """ Returns the Lookup instance matching the given SchemaField, name and Lookup.code, creating it (with the given name/code/description) if it doesn't already exist. If make_text_slug is True, then a slug will be created from the given name. If it's False, then the slug will be the Lookup's ID. """ def log_info(message): if logger is None: return logger.info(message) def log_warn(message): if logger is None: return logger.warn(message) code = code or name # code defaults to name if it wasn't provided try: obj = Lookup.objects.get(schema_field__id=schema_field.id, code=code) except Lookup.DoesNotExist: if make_text_slug: slug = slugify(name) if len(slug) > 32: # Only bother to warn if we're actually going to use the slug. if make_text_slug: log_warn( "Trimming slug %r to %r in order to fit 32-char limit." % (slug, slug[:32])) slug = slug[:32] else: # To avoid integrity errors in the slug when creating the Lookup, # use a temporary dummy slug that's guaranteed not to be in use. # We'll change it back immediately afterward. slug = '__3029j3f029jf029jf029__' if len(name) > 255: old_name = name name = name[:250] + '...' # Save the full name in the description. if not description: description = old_name log_warn( "Trimming name %r to %r in order to fit 255-char limit." % (old_name, name)) obj = Lookup(schema_field_id=schema_field.id, name=name, code=code, slug=slug, description=description) obj.save() if not make_text_slug: # Set the slug to the ID. obj.slug = obj.id obj.save() log_info('Created %s %r' % (schema_field.name, name)) return obj
def save(self): num_created = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? # Ensure we have unicode. for key, val in block_fields.items(): if isinstance(val, str): block_fields[key] = val.decode(self.encoding) block_fields['geom'] = feature.geom.geos block_fields['street_pretty_name'], block_fields['pretty_name'] = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['street'], block_fields['suffix'], block_fields['postdir'] ) block_fields['street_slug'] = slugify(u' '.join((block_fields['street'], block_fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. # But do this *after* making pretty names. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): if isinstance(block_fields[addr_key], basestring): value = block_fields[addr_key].rstrip(string.letters) # Also attempt to fix up addresses like # '19-47', by just using the lower number. # This will give misleading output, but # it's probably better than discarding blocks. value = value.split('-')[0] if value: try: value = int(value) except ValueError: logger.warn("Omitting weird value %r for %r" % (value, addr_key)) value = None else: value = None block_fields[addr_key] = value try: block_fields['from_num'], block_fields['to_num'] = \ make_block_numbers(block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) except ValueError, e: logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e)) continue block = Block(**block_fields) try: block.full_clean() except ValidationError: # odd bug: sometimes we get ValidationError even when # the data looks good, and then cleaning again works??? try: block.full_clean() except ValidationError, e: logger.warn("validation error on %s, skipping" % str(block)) logger.warn(e) continue block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() num_created += 1 logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.get('TLID')))
def save(self): if self.reset: logger.warn("Deleting all Block instances and anything that refers to them!") Block.objects.all().delete() import time start = time.time() num_created = 0 num_existing = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? # Ensure we have unicode. for key, val in block_fields.items(): if isinstance(val, str): block_fields[key] = val.decode(self.encoding) block_fields['geom'] = geos_with_projection(feature.geom, 4326) block_fields['prefix'] = make_pretty_prefix(block_fields['prefix']) block_fields['street_pretty_name'], block_fields['pretty_name'] = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['prefix'], block_fields['street'], block_fields['suffix'], block_fields['postdir'] ) block_fields['street_slug'] = slugify( u' '.join((block_fields['prefix'], block_fields['street'], block_fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. # But do this *after* making pretty names. # Also attempt to fix up addresses like '19-47', # by just using the lower number. This will give # misleading output, but it's probably better than # discarding blocks. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): if isinstance(block_fields[addr_key], basestring): from ebpub.geocoder.parser.parsing import number_standardizer value = number_standardizer(block_fields[addr_key].strip()) if not value: value = None else: value = None block_fields[addr_key] = value try: block_fields['from_num'], block_fields['to_num'] = \ make_block_numbers(block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) except ValueError, e: logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e)) continue # After doing pretty names etc, standardize the fields # that get used for geocoding, since the geocoder # searches for the standardized version. from ebpub.geocoder.parser.parsing import STANDARDIZERS for key, standardizer in STANDARDIZERS.items(): if key in block_fields: if key == 'street' and block_fields['prefix']: # Special case: "US Highway 101", not "US Highway 101st". continue block_fields[key] = standardizer(block_fields[key]) # Separate out the uniquely identifying fields so # we can avoid duplicate blocks. # NOTE this doesn't work if you're updating from a more # recent shapefile and the street has significant # changes - eg. the street name has changed, or the # address range has changed, or the block has split... # see #257. http://developer.openblockproject.org/ticket/257 primary_fields = {} primary_field_keys = ('street_slug', 'from_num', 'to_num', 'left_city', 'right_city', 'left_zip', 'right_zip', 'left_state', 'right_state', ) for key in primary_field_keys: if block_fields[key] != u'': # Some empty fields are fixed # automatically by clean(). primary_fields[key] = block_fields[key] existing = list(Block.objects.filter(**primary_fields)) if not existing: # Check the old-style way we used to make street slugs # prior to fixing issue #264... we need to keep this # code around indefinitely in case we are reloading the # blocks data and need to overwrite blocks that have # the old bad slug. Sadly this probably can't just be # fixed by a migration. _old_street_slug = slugify( u' '.join((block_fields['street'], block_fields['suffix']))) _old_primary_fields = primary_fields.copy() _old_primary_fields['street_slug'] = _old_street_slug existing = list(Block.objects.filter(**_old_primary_fields)) if not existing: block = Block(**block_fields) num_created += 1 logger.debug("CREATING %s" % unicode(block)) if len(existing) == 1: num_existing += 1 block = existing[0] logger.debug(u"Block %s already exists" % unicode(existing[0])) for key, val in block_fields.items(): setattr(block, key, val) elif len(existing) > 1: num_existing += len(existing) logger.warn("Multiple existing blocks like %s, skipping" % existing[0]) continue try: block.full_clean() except ValidationError: # odd bug: sometimes we get ValidationError even when # the data looks good, and then cleaning again works??? try: block.full_clean() except ValidationError, e: logger.warn("validation error on %s, skipping" % str(block)) logger.warn(e) continue block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.fid))
def save(self): import time start = time.time() num_created = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? # Ensure we have unicode. for key, val in block_fields.items(): if isinstance(val, str): block_fields[key] = val.decode(self.encoding) block_fields['geom'] = feature.geom.geos block_fields['street_pretty_name'], block_fields['pretty_name'] = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['street'], block_fields['suffix'], block_fields['postdir'] ) block_fields['street_slug'] = slugify(u' '.join((block_fields['street'], block_fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. # But do this *after* making pretty names. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): if isinstance(block_fields[addr_key], basestring): value = block_fields[addr_key].rstrip(string.letters) # Also attempt to fix up addresses like # '19-47', by just using the lower number. # This will give misleading output, but # it's probably better than discarding blocks. value = value.split('-')[0] if value: try: value = int(value) except ValueError: logger.warn("Omitting weird value %r for %r" % (value, addr_key)) value = None else: value = None block_fields[addr_key] = value try: block_fields['from_num'], block_fields['to_num'] = \ make_block_numbers(block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) except ValueError, e: logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e)) continue # Separate out the uniquely identifying fields so # we can avoid duplicate blocks. # NOTE this doesn't work if you're updating from a more # recent shapefile and the street has significant # changes - eg. the street name has changed, or the # address range has changed, or the block has split... primary_fields = {} primary_field_keys = ('street_slug', 'from_num', 'to_num', 'left_city', 'right_city', 'left_zip', 'right_zip', 'left_state', 'right_state', ) for key in primary_field_keys: if block_fields[key] != u'': # Some empty fields are fixed # automatically by clean(), so primary_fields[key] = block_fields[key] existing = list(Block.objects.filter(**primary_fields)) if not existing: block = Block(**block_fields) num_created += 1 elif len(existing) == 1: block = existing[0] logger.debug(u"Block %s already exists" % unicode(existing[0])) for key, val in block_fields.items(): setattr(block, key, val) else: logger.warn("Multiple existing blocks like %s, skipping" % existing[0]) continue try: block.full_clean() except ValidationError: # odd bug: sometimes we get ValidationError even when # the data looks good, and then cleaning again works??? try: block.full_clean() except ValidationError, e: logger.warn("validation error on %s, skipping" % str(block)) logger.warn(e) continue logger.debug("CREATING %s" % unicode(block)) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.get('TLID')))
def save(self): num_created = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? # Ensure we have unicode. for key, val in block_fields.items(): if isinstance(val, str): block_fields[key] = val.decode(self.encoding) block_fields['geom'] = feature.geom.geos block_fields['street_pretty_name'], block_fields[ 'pretty_name'] = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['street'], block_fields['suffix'], block_fields['postdir']) block_fields['street_slug'] = slugify(u' '.join( (block_fields['street'], block_fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. # But do this *after* making pretty names. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): if isinstance(block_fields[addr_key], basestring): value = block_fields[addr_key].rstrip( string.letters) # Also attempt to fix up addresses like # '19-47', by just using the lower number. # This will give misleading output, but # it's probably better than discarding blocks. value = value.split('-')[0] if value: try: value = int(value) except ValueError: self.log("Omitting weird value %r for %r" % (value, addr_key)) value = None else: value = None block_fields[addr_key] = value block_fields['from_num'], block_fields[ 'to_num'] = make_block_numbers( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) block = Block(**block_fields) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() num_created += 1 self.log('%d\tCreated block %s for feature %d' % (num_created, block, feature.get('TLID'))) return num_created
def save(self, verbose=False): alt_names_suff = ('', '1', '2', '3', '4', '5') num_created = 0 for i, feature in enumerate(self.layer): if not self.fcc_pat.search(feature.get('FCC')): continue parent_id = None fields = {} for esri_fieldname, block_fieldname in FIELD_MAP.items(): value = feature.get(esri_fieldname) if isinstance(value, basestring): value = value.upper() elif isinstance(value, int) and value == 0: value = None fields[block_fieldname] = value if not ((fields['left_from_num'] and fields['left_to_num']) or (fields['right_from_num'] and fields['right_to_num'])): continue # Sometimes the "from" number is greater than the "to" # number in the source data, so we swap them into proper # ordering for side in ('left', 'right'): from_key, to_key = '%s_from_num' % side, '%s_to_num' % side if fields[from_key] > fields[to_key]: fields[from_key], fields[to_key] = fields[to_key], fields[from_key] if feature.geom.geom_name != 'LINESTRING': continue for suffix in alt_names_suff: name_fields = {} for esri_fieldname, block_fieldname in NAME_FIELD_MAP.items(): key = esri_fieldname + suffix name_fields[block_fieldname] = feature.get(key).upper() if not name_fields['street']: continue # Skip blocks with bare number street names and no suffix / type if not name_fields['suffix'] and re.search('^\d+$', name_fields['street']): continue fields.update(name_fields) block = Block(**fields) block.geom = feature.geom.geos street_name, block_name = make_pretty_name( fields['left_from_num'], fields['left_to_num'], fields['right_from_num'], fields['right_to_num'], fields['predir'], fields['street'], fields['suffix'], fields['postdir'] ) block.pretty_name = block_name block.street_pretty_name = street_name block.street_slug = slugify(' '.join((fields['street'], fields['suffix']))) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() num_created += 1 if verbose: print >> sys.stderr, 'Created block %s' % block return num_created
def save(self): alt_names_suff = (u'', u'1', u'2', u'3', u'4', u'5') num_created = 0 for i, feature in enumerate(self.layer): if not self.fcc_pat.search(feature.get('FCC')): continue parent_id = None fields = {} for esri_fieldname, block_fieldname in FIELD_MAP.items(): value = feature.get(esri_fieldname) if isinstance(value, basestring): value = value.upper() elif isinstance(value, int) and value == 0: value = None fields[block_fieldname] = value if not ((fields['left_from_num'] and fields['left_to_num']) or (fields['right_from_num'] and fields['right_to_num'])): continue # Sometimes the "from" number is greater than the "to" # number in the source data, so we swap them into proper # ordering for side in ('left', 'right'): from_key, to_key = '%s_from_num' % side, '%s_to_num' % side if fields[from_key] > fields[to_key]: fields[from_key], fields[to_key] = fields[to_key], fields[ from_key] if feature.geom.geom_name != 'LINESTRING': continue for suffix in alt_names_suff: name_fields = {} for esri_fieldname, block_fieldname in NAME_FIELD_MAP.items(): key = esri_fieldname + suffix name_fields[block_fieldname] = feature.get(key).upper() if not name_fields['street']: continue # Skip blocks with bare number street names and no suffix / type if not name_fields['suffix'] and re.search( '^\d+$', name_fields['street']): continue fields.update(name_fields) # Ensure we have unicode. for key, val in fields.items(): if isinstance(val, str): fields[key] = val.decode(self.encoding) fields['street_pretty_name'], fields[ 'pretty_name'] = make_pretty_name( fields['left_from_num'], fields['left_to_num'], fields['right_from_num'], fields['right_to_num'], fields['predir'], fields['street'], fields['suffix'], fields['postdir'], ) #print >> sys.stderr, 'Looking at block pretty name %s' % fields['street'] fields['street_slug'] = slugify(u' '.join( (fields['street'], fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. But do this after making # pretty names. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): fields[addr_key] = fields[addr_key].rstrip(string.letters) fields['from_num'], fields['to_num'] = make_block_numbers( fields['left_from_num'], fields['left_to_num'], fields['right_from_num'], fields['right_to_num']) block = Block(**fields) block.geom = feature.geom.geos self.log(u'Looking at block %s' % fields['street']) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() num_created += 1 self.log('Created block %s' % block) return num_created
def save(self): alt_names_suff = (u'', u'1', u'2', u'3', u'4', u'5') num_created = 0 for i, feature in enumerate(self.layer): #if not self.fcc_pat.search(feature.get('FCC')): # continue parent_id = None fields = {} for esri_fieldname, block_fieldname in FIELD_MAP.items(): value = feature.get(esri_fieldname) if isinstance(value, basestring): value = value.upper() elif isinstance(value, int) and value == 0: value = None fields[block_fieldname] = value if not ((fields['left_from_num'] and fields['left_to_num']) or (fields['right_from_num'] and fields['right_to_num'])): continue # Sometimes the "from" number is greater than the "to" # number in the source data, so we swap them into proper # ordering for side in ('left', 'right'): from_key, to_key = '%s_from_num' % side, '%s_to_num' % side if fields[from_key] > fields[to_key]: fields[from_key], fields[to_key] = fields[to_key], fields[from_key] if feature.geom.geom_name != 'LINESTRING': continue #for suffix in alt_names_suff: name_fields = {} for esri_fieldname, block_fieldname in NAME_FIELD_MAP.items(): key = esri_fieldname #+ suffix name_fields[block_fieldname] = feature.get(key).upper() if not name_fields['street']: continue # Skip blocks with bare number street names and no suffix / type if not name_fields['suffix'] and re.search('^\d+$', name_fields['street']): continue fields.update(name_fields) # Ensure we have unicode. for key, val in fields.items(): if isinstance(val, str): fields[key] = val.decode(self.encoding) fields['street_pretty_name'], fields['pretty_name'] = make_pretty_name( fields['left_from_num'], fields['left_to_num'], fields['right_from_num'], fields['right_to_num'], fields['predir'], fields['street'], fields['suffix'], fields['postdir'], ) print >> sys.stderr, 'Looking at block pretty name %s' % fields['street'] fields['street_slug'] = slugify(u' '.join((fields['street'], fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. But do this after making # pretty names. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): fields[addr_key] = fields[addr_key].rstrip(string.letters) fields['from_num'], fields['to_num'] = make_block_numbers( fields['left_from_num'], fields['left_to_num'], fields['right_from_num'], fields['right_to_num']) block = Block(**fields) block.geom = feature.geom.geos self.log(u'Looking at block %s' % fields['street']) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() num_created += 1 self.log('Created block %s' % block) return num_created