def importWOFGeometries(self): cursor = self.db.cursor() itemCount = len(self.items) for type, items in self.progress(self.items.items(), total=itemCount): for id, item in items.items(): if item.geometry is None: continue encodeStream = encodingstream.DeltaEncodingStream() encodeStream.encodeFeature({ 'id': id, 'geometry': item.geometry, 'properties': item.properties }) nameId = 0 if item.name in self.items['name']: nameId = self.items['name'][item.name].dbid cursor.execute('DELETE FROM entities WHERE country_id IS ? AND region_id IS ? AND county_id IS ? AND locality_id IS ? AND neighbourhood_id IS ? AND street_id IS ? AND (name_id IS NULL OR name_id=?)', (item.dbids.get('country', None), item.dbids.get('region', None), item.dbids.get('county', None), item.dbids.get('locality', None), item.dbids.get('neighbourhood', None), item.dbids.get('street', None), nameId)) cursor.execute('INSERT INTO entities(country_id, region_id, county_id, locality_id, neighbourhood_id, street_id, postcode_id, housenumbers, name_id, features, quadindex, rank) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', (item.dbids.get('country', None), item.dbids.get('region', None), item.dbids.get('county', None), item.dbids.get('locality', None), item.dbids.get('neighbourhood', None), item.dbids.get('street', None), None, None, None, sqlite3.Binary(encodeStream.getData()), 0, self.calculateItemRank(item))) cursor.close()
def convertDatabase(self): self.db.execute("BEGIN") cursor = self.db.cursor() cursor1 = self.db.cursor() cursor2 = self.db.cursor() # Set type cursor.execute("ALTER TABLE entities ADD type INTEGER NOT NULL DEFAULT 0") cursor1.execute("SELECT COUNT(*) FROM entities") rowCount = cursor1.fetchone()[0] cursor1.execute("SELECT rowid, housenumbers, country_id, region_id, county_id, locality_id, neighbourhood_id, street_id, NULL, name_id FROM entities") for row in self.progress(cursor1, total=rowCount): if row[-1]: type = CLASS_TABLE['name'] elif row[1]: type = CLASS_TABLE['housenumber'] elif row[-3]: type = CLASS_TABLE['street'] elif row[-4]: type = CLASS_TABLE['neighbourhood'] elif row[-5]: type = CLASS_TABLE['locality'] elif row[-6]: type = CLASS_TABLE['county'] elif row[-7]: type = CLASS_TABLE['region'] elif row[-8]: type = CLASS_TABLE['country'] cursor.execute("UPDATE entities SET type=? WHERE rowid=?", (type, row[0])) # Store name info in separate entitynames table cursor.execute("CREATE TABLE entitynames (entity_id INTEGER NOT NULL, name_id INTEGER NOT NULL)") for idx, field in enumerate(['country', 'region', 'county', 'locality', 'neighbourhood', 'street', 'postcode', 'name']): cursor.execute("INSERT INTO entitynames(entity_id, name_id) SELECT id, %s_id FROM entities WHERE %s_id IS NOT NULL" % (field, field)) # Add entitycount field to names cursor.execute("CREATE INDEX entitynames_entity_name_id ON entitynames(entity_id, name_id)") cursor.execute("CREATE INDEX entitynames_name_id ON entitynames(name_id)") cursor.execute("ALTER TABLE names ADD entitycount INTEGER NOT NULL DEFAULT 0") cursor.execute("UPDATE names SET entitycount=(SELECT COUNT(*) FROM entitynames WHERE entitynames.name_id=names.id)") # Add namecount field to tokens cursor.execute("CREATE INDEX nametokens_token_name_id ON nametokens (token_id, name_id)") cursor.execute("ALTER TABLE tokens ADD namecount INTEGER NOT NULL DEFAULT 0") cursor.execute("UPDATE tokens SET namecount=(SELECT COUNT(*) FROM nametokens WHERE nametokens.token_id=tokens.id)") # Update housenumbers names = {} cursor1.execute("SELECT MAX(id) FROM names") nameId = cursor1.fetchone()[0] or 0 cursor.execute("ALTER TABLE entities RENAME TO old_entities") cursor.execute("CREATE TABLE entities (id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, type INTEGER NOT NULL, features BLOB NOT NULL, housenumbers BLOB NULL, quadindex INTEGER NOT NULL, rank INTEGER NOT NULL)") cursor.execute("INSERT INTO metadata(name, value) VALUES('rank_scale', ?)", (str(RANK_SCALE),)) cursor1.execute("SELECT COUNT(*) FROM old_entities") rowCount = cursor1.fetchone()[0] cursor1.execute("SELECT id, type, quadindex, rank, housenumbers, features FROM old_entities") for row1 in self.progress(cursor1, total=rowCount): encodeStream = encodingstream.DeltaEncodingStream() if row1[4]: houseNums = row1[4].split("|") for houseNum in houseNums: if houseNum not in names: nameId += 1 names[houseNum] = nameId cursor.execute("INSERT INTO names(id, lang, name, type) VALUES(?, ?, ?, ?)", (nameId, None, houseNum, 9)) for token in self.tokenizeName(houseNum): cursor2.execute("SELECT id FROM tokens WHERE token=?", (token,)) for row2 in cursor2: cursor.execute("INSERT INTO nametokens(name_id, token_id) VALUES(?, ?)", (nameId, row2[0])) encodeStream.encodeNumber(names[houseNum]) cursor.execute("INSERT INTO entities(id, type, quadindex, rank, features, housenumbers) VALUES(?, ?, ?, ?, ?, ?)", (row1[0], row1[1], row1[2], int(row1[3] * RANK_SCALE), row1[5], sqlite3.Binary(encodeStream.getData()) if encodeStream.getData() else None)) cursor.execute("DROP TABLE old_entities") # Add lang to nametokens cursor.execute("ALTER TABLE nametokens ADD lang TEXT NULL") cursor1.execute("SELECT id, name, lang FROM names WHERE id IN (SELECT id FROM names WHERE lang IS NOT NULL)") rows1 = cursor1.fetchall() cursor.execute("DELETE FROM nametokens WHERE name_id IN (SELECT id FROM names WHERE lang IS NOT NULL)") for row1 in rows1: for token in self.tokenizeName(row1[1]): cursor.execute("INSERT INTO nametokens(name_id, token_id, lang) SELECT ?, id, ? FROM tokens WHERE token=?", (row1[0], row1[2], token)) # Indices cursor.execute("DROP INDEX nametokens_token_id") cursor.execute("CREATE INDEX entities_id ON entities(id)") cursor.execute("CREATE INDEX entities_type ON entities(type)") cursor.execute("CREATE INDEX entities_quadindex ON entities(quadindex)") # Done cursor2.close() cursor1.close() cursor.close() self.db.commit()
def importPeliasAddress(self, peliasData, buildingsLocator): id, data = self.extractPeliasData(peliasData) if id is None or data is None: return # Find parent info from gazetter entity = self.Entity() if 'center_point' in data: hierarchy = self.wofLocator.findHierarchy((data['center_point']['lon'], data['center_point']['lat'])) if hierarchy: for parentField, parentId in hierarchy[0].items(): parentType = parentField[:-3] entity.dbids[parentType] = self.mapEntityParent(parentId, parentType) entity.geometry = { 'type': 'Point', 'coordinates': (data['center_point']['lon'], data['center_point']['lat']) } else: self.warning('No coordinates for entity: %d' % id) return # Check country if entity.dbids.get('country', None) is None: self.warning('No country for entity: %d' % id) return # Store address info if 'address_parts' in data: if data['address_parts'].get('street', None): entity.dbids['street'] = self.mapEntityName(data['address_parts']['street'], 'street') if data['address_parts'].get('number', None): if entity.dbids.get('street', None) is not None: self.importTokens(data['address_parts']['number'], 'housenumber', None) entity.housenumber = data['address_parts']['number'] else: self.warning('Ignoring housenumber, as street info is missing: %d' % id) if data['address_parts'].get('zip', None): if entity.housenumber is not None and self.importPostcodes: entity.dbids['postcode'] = self.mapEntityName(data['address_parts']['zip'], 'postcode') # Extract optional name and geometry name = data.get('name', {}).get('default', '') if name.isnumeric(): self.warning("Numeric name '%s' for entity: %d" % (name, id)) return extraNames = [] for key, val in data.get('name', {}).items(): if key != 'default' and val and not val.isnumeric(): extraNames.append((key, val)) if entity.dbids.get('street', None) is not None: # Street info is present. Check if we have a building if entity.housenumber is not None and id in self.buildingsGeometry: entity.geometry = { 'type': 'Polygon', 'coordinates': [self.unpackCoordinates(self.buildingsGeometry[id])] } # Import names only if the name is not 'trivial' (streetname + housenumber) if name != '': streetNames = [data.get('address_parts', {}).get('street', '')] if entity.housenumber is not None: streetNames = ['%s %s' % (entity.housenumber, streetNames[0]), '%s %s' % (streetNames[0], entity.housenumber)] if name not in streetNames: entity.dbids['name'] = self.mapEntityName(name, 'name', extraNames) else: # Check if we have a street based on id. if entity.housenumber is None and id in self.streetsGeometry: entity.geometry = { 'type': 'LineString', 'coordinates': self.unpackCoordinates(self.streetsGeometry[id]) } entity.dbids['street'] = self.mapEntityName(name, 'street', extraNames) else: # Not a street, likely a POI. if name == '': self.warning('No name for entity: %d' % id) return # In case of POIs, we will try to locate the building (and thus get full address of the POI). if entity.geometry is not None: if entity.geometry['type'] == 'Point': buildings = buildingsLocator.findGeometry(entity.geometry) if len(buildings) > 0: firstStreetDbid, firstHousenumber = buildings[0] if all([firstStreetDbid == streetDbid for streetDbid, housenumber in buildings]): entity.dbids['street'] = firstStreetDbid if all([firstHousenumber == housenumber for streetDbid, housenumber in buildings]): entity.housenumber = firstHousenumber entity.dbids['name'] = self.mapEntityName(name, 'name', extraNames) # Check entity validity if entity.geometry is None: self.warning('Failed to import geometry: %d' % id) return if self.clipBounds is not None: if not testClipBounds(calculateGeometryBounds(entity.geometry), self.clipBounds): self.warning('Geometry entity geometry outside of clip bounds: %d' % id) return # Try to merge data cursor = self.db.cursor() cursor.execute('SELECT id, features, housenumbers, postcode_id FROM entities WHERE country_id IS ? AND region_id IS ? AND county_id IS ? AND locality_id IS ? AND neighbourhood_id IS ? AND street_id IS ? AND name_id IS ? AND %s' % ('housenumbers IS NOT NULL' if entity.housenumber else 'housenumbers IS NULL'), (entity.dbids.get('country', None), entity.dbids.get('region', None), entity.dbids.get('county', None), entity.dbids.get('locality', None), entity.dbids.get('neighbourhood', None), entity.dbids.get('street', None), entity.dbids.get('name', None))) for row in cursor: categories = self.loadCategories(row[0]) if set(categories) != set(data.get('category', [])): continue encodeStream = encodingstream.DeltaEncodingStream(row[1]) encodeStream.encodeFeature({ 'id': id, 'geometry': entity.geometry, 'properties': {} }) features = sqlite3.Binary(encodeStream.getData()) housenumbers = row[2] + '|' + entity.housenumber.replace('|', ' ') if entity.housenumber else None cursor.execute('UPDATE entities SET features=?, housenumbers=?, postcode_id=? WHERE id=?', (features, housenumbers, row[3] or entity.dbids.get('postcode', None), row[0])) cursor.close() return # Merging not possible, store encodeStream = encodingstream.DeltaEncodingStream() encodeStream.encodeFeature({ 'id': id, 'geometry': entity.geometry, 'properties': {} }) features = sqlite3.Binary(encodeStream.getData()) housenumbers = entity.housenumber.replace('|', ' ') if entity.housenumber else None cursor.execute('INSERT INTO entities(country_id, region_id, county_id, locality_id, neighbourhood_id, street_id, postcode_id, name_id, housenumbers, features, quadindex, rank) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', (entity.dbids.get('country', None), entity.dbids.get('region', None), entity.dbids.get('county', None), entity.dbids.get('locality', None), entity.dbids.get('neighbourhood', None), entity.dbids.get('street', None), entity.dbids.get('postcode', None), entity.dbids.get('name', None), housenumbers, features, 0, self.calculateEntityRank(entity))) self.storeCategories(cursor.lastrowid, data.get('category', [])) cursor.close()
def postProcessFeatures(self): # Calculate origin for geometry cursor1 = self.db.cursor() cursor1.execute('SELECT features FROM entities') geomOrigin = (0, 0) featureCounter = 0 for row in cursor1: encodeStream = encodingstream.DeltaEncodingStream(row[0]) while not encodeStream.eof(): encodeStream.prevCoord = [0, 0] encodeStream.prevNumber = 0 feature = encodeStream.decodeFeature() featureCounter += 1 try: bounds = calculateGeometryBounds(feature['geometry']) except: continue origin = ((bounds[0] + bounds[2]) * 0.5, (bounds[1] + bounds[3]) * 0.5) geomOrigin = (geomOrigin[0] + (origin[0] - geomOrigin[0]) / featureCounter, geomOrigin[1] + (origin[1] - geomOrigin[1]) / featureCounter) # Create inverse mapping lists (dbid -> OSM id) itemOsmIds = {} for type, items in self.items.items(): itemOsmIds[type] = {} for osmId, item in items.items(): itemOsmIds[type][item.dbid] = osmId # Process geometries cursor1.execute('SELECT COUNT(*) FROM entities') rowCount = cursor1.fetchone()[0] cursor1.execute('SELECT id, features, housenumbers, country_id, region_id, county_id, locality_id, neighbourhood_id, street_id, postcode_id, housenumbers, name_id FROM entities ORDER BY id') cursor2 = self.db.cursor() for row in self.progress(cursor1, total=rowCount): entityId = row[0] # Read features and housenumbers into interleaved lists featureCollections = [] housenumbers = [] featureCounter = 0 encodeStream = encodingstream.DeltaEncodingStream(row[1]) while not encodeStream.eof(): encodeStream.prevCoord = [0, 0] encodeStream.prevNumber = 0 feature = encodeStream.decodeFeature() featureCounter += 1 if not validateGeometry(feature['geometry']): self.warning('Geometry not valid: %d' % feature['id']) if row[2]: housenumber = self.normalizeHouseNumber(row[2].split('|')[featureCounter - 1]) if housenumber not in housenumbers: housenumbers.append(housenumber) featureCollections.append([feature]) else: featureCollections[housenumbers.index(housenumber)] += [feature] else: featureCollections.append([feature]) # Add housenumbers to regex builder #if housenumbers: # for housenumber in housenumbers: # self.housenumRegexBuilder.add(housenumber) # Try to simplify and merge features for featureCollection in featureCollections: if not self.importIds: for i in range(0, len(featureCollection)): featureCollection[i]['id'] = 0 i = 0 while i < len(featureCollection): currentFeature = featureCollection[i] featureIndices = [i + j for j, feature in enumerate(featureCollection[i:]) if feature['id'] == currentFeature['id'] and feature['properties'] == currentFeature['properties']] geometry = mergeGeometries([featureCollection[j]['geometry'] for j in featureIndices]) if len(featureIndices) != 1 else featureCollection[i]['geometry'] geometry = simplifyGeometry(geometry, SIMPLIFICATION_FACTOR) if not housenumbers else geometry featureCollection[i] = { 'id': currentFeature['id'], 'geometry': geometry, 'properties': currentFeature['properties'] } for j in reversed(featureIndices[1:]): featureCollection.pop(j) i += 1 # Encode features encodeStream = encodingstream.DeltaEncodingStream(None, geomOrigin) for featureCollection in featureCollections: encodeStream.encodeFeatureCollection(featureCollection) # Calculate quadindex of all the geometries geometries = [] for featureCollection in featureCollections: geometries += [feature['geometry'] for feature in featureCollection] try: bounds = calculateGeometryBounds({ 'type': 'GeometryCollection', 'geometries': geometries }) quadIndex = quadindex.calculateGeometryQuadIndex(bounds) except: cursor2.execute("DELETE FROM entities WHERE id=?", (entityId,)) self.warning('Removing entity %d due to illegal geometry' % entityId) continue for idx, field in [(idx, field) for idx, field in enumerate([description[0] for description in cursor1.description]) if field.endswith('_id')]: type = field[:-3] id = itemOsmIds.get(type, {}).get(row[idx], None) if id is not None: self.items[type][id].geomBoundsList = mergeBoundsLists(self.items[type][id].geomBoundsList, [bounds]) # Update database self.geomBounds = mergeBounds(self.geomBounds, bounds) cursor2.execute('UPDATE entities SET features=?, housenumbers=?, quadindex=? WHERE id=?', (sqlite3.Binary(encodeStream.getData()), '|'.join(housenumbers) if housenumbers else None, quadIndex, entityId)) if self.geomBounds is not None: cursor1.execute("INSERT INTO metadata(name, value) VALUES('bounds', '%.16g,%.16g,%.16g,%.16g')" % self.geomBounds) cursor1.execute("INSERT INTO metadata(name, value) VALUES('origin', '%.16g,%.16g')" % geomOrigin) cursor1.execute("INSERT INTO metadata(name, value) VALUES('encoding_precision', '%.16g')" % encodingstream.PRECISION) cursor1.execute("INSERT INTO metadata(name, value) VALUES('quadindex_level', '%d')" % quadindex.MAX_LEVEL) #cursor1.execute("INSERT INTO metadata(name, value) VALUES('housenumber_regex', '%s')" % self.housenumRegexBuilder.build()) cursor2.close() cursor1.close()