class MapClusterer: def __init__(self, zoom=1, gridSize=256, input_srid=4326, mapTileSize=256): # the srid of the coordinates coming from javascript. input_srid = output_srid self.input_srid = int(input_srid) # the size of the grid in pixels. each grid cell gets its own kmeans clustering self.gridSize = int(gridSize) self.zoom = int(zoom) self.maptools = MapTools(int(mapTileSize)) # filter operators self.valid_operators = ["=", "<", ">", "<=", ">=", "list", "!list"] self.srid_db = self.getDatabaseSRID() # read the srid of the database. def getDatabaseSRID(self): srid_qry = 'SELECT id, ST_SRID(%s) FROM "%s" LIMIT 1;' % (geo_column_str, geo_table) srid_db_objs = Gis.objects.raw(srid_qry) if len(list(srid_db_objs)) > 0: srid_db = srid_db_objs[0].st_srid else: try: srid_db = settings.ANYCLUSTER_COORDINATES_COLUMN_SRID except: srid_db = 4326 return srid_db """--------------------------------------------------------------------------------------------------------------------------- LOADING THE AJAX INPUT - The variables and filters coming from the ajax request are transformed into python-usables like lists and dictionaries - anycluster receives a json object containing geojson and filters ---------------------------------------------------------------------------------------------------------------------------""" def loadJson(self, request): json_str = request.body.decode(encoding="UTF-8") params = json.loads(json_str) if "geojson" in params: request.session["geojson"] = params["geojson"] return params """--------------------------------------------------------------------------------------------------------------------------------- CALCULATE CELL-IDs ACCORDING TO VIEWPORT - given the viewport, expand to the nearest grid and get all cell ids of this grid - returns QuadKey IDS of a viewport according to MapClusterer.gridSize To calculate those cells, the coordinates are transformed as shown below: LatLng --------> Meters (Mercator) ---------> Shifted origin ---------> pixel coords ---------> GRID, depending on tilesize ----------- ----------- ----------- ----------- ----------- | | | | | | | | |00|10|20|30| | | | | | | | | |01|11|21|31| | O | | O | | | | | ----------- | | | | | | | | |02|12|22|32| | | | | | | | | |03|13|23|33| ----------- ----------- O----------- O----------- ----------- LATLNG METERS METERS PIXELS GRID (shifted coordinates) (CELL-IDs according to QuadKey, depends on zoom level) O = origin The coordinate system with shifted origin has only coordinates with positive values (essential). Now, get the CELL-ID (=QuadKey ID) the top-right (and bottom-left) viewport coordinate sits in. Finally calculate all CELL-IDs that are spanned by the top-right and bottom-left cell. ---------------------------------------------------------------------------------------------------------------------------------""" def getClusterCells(self, viewport): if DEBUG: print("VIEWPORT(wgs84datum, 4326, longlat): %s" % viewport) # create points according to input srid topright = Point(viewport["right"], viewport["top"], srid=self.input_srid) bottomleft = Point(viewport["left"], viewport["bottom"], srid=self.input_srid) if self.input_srid != 4326: topright = self.maptools.point_ToLatLng(topright) bottomleft = self.maptools.point_ToLatLng(bottomleft) # Polar areas with abs(latitude) bigger then 85.05112878 are clipped off as google does. if topright.y > 85.0: topright.y = 85.0 if topright.x > 179.9999: topright.x = 179.9999 if bottomleft.y < -85: bottomleft.y = -85 if bottomleft.x < -179.9999: bottomleft.x = -179.9999 if DEBUG: print( "4326, longlat: topright: (%s,%s) | bottomleft: (%s,%s)" % (topright.x, topright.y, bottomleft.x, bottomleft.y) ) # project points to mercator 3875, plane coordinates self.maptools.point_ToMercator(topright) self.maptools.point_ToMercator(bottomleft) if DEBUG: print( "MERCATOR: topright: (%s,%s) | bottomleft: (%s,%s)" % (topright.x, topright.y, bottomleft.x, bottomleft.y) ) # shift origin self.maptools.point_MercatorToWorld(topright) self.maptools.point_MercatorToWorld(bottomleft) if DEBUG: print( "WORLD: topright: (%s,%s) | bottomleft: (%s,%s)" % (topright.x, topright.y, bottomleft.x, bottomleft.y) ) # calculate pixelcoords from world coords depending on zoom self.maptools.point_WorldToPixels(topright, self.zoom) self.maptools.point_WorldToPixels(bottomleft, self.zoom) if DEBUG: print( "PIXELS: topright: (%s,%s) | bottomleft: (%s,%s)" % (topright.x, topright.y, bottomleft.x, bottomleft.y) ) # get topright and bottom left cellID, e.g. (03,01) toprightCell = self.maptools.point_PixelToCellID(topright, self.gridSize) bottomleftCell = self.maptools.point_PixelToCellID(bottomleft, self.gridSize) if DEBUG: print("CELLID: toprightCell: %s | bottomleftCell: %s" % (toprightCell, bottomleftCell)) # get all Cells that need to be clustered clusterCells = self.maptools.get_ClusterCells(toprightCell, bottomleftCell, self.zoom) # from ID-list create list of polygons return clusterCells """--------------------------------------------------------------------------------------------------------------------------------- CACHING MECHANISM Filters, zoomlevel and cellIDs are stored in the django session. If the map is panned, only those cells that are new to the viewport are being clustered and returned. If the map is zoomed, the cache is cleared. dict: {'filters':{}, 'cellIDs':[], 'zoom':1} ---------------------------------------------------------------------------------------------------------------------------------""" def compareWithCache(self, request, geometry, geometry_type, filters, deliver_cache): clustercache = request.session.get("clustercache", {}) new_cluster_geometry = [] last_zoom = clustercache.get("zoom", -1) if geometry_type == "viewport": compare_geometry_with_cache = False # check if we need to compare with the cache or if the cache can be omitted if clustercache and not deliver_cache: if int(self.zoom) == int(last_zoom): last_filters = clustercache.get("filters", []) if filters == last_filters: compare_geometry_with_cache = True if compare_geometry_with_cache: # in this case, geometry is a set of cells cached_cells = set([tuple(cell) for cell in clustercache["clusterAreas"]]) new_clustercells = set(geometry) - cached_cells new_cells_for_cache = cached_cells.union(new_clustercells) else: new_cells_for_cache = set(geometry) new_clustercells = new_cells_for_cache clustercache["clusterAreas"] = list(new_cells_for_cache) # convert new_clustercells into a postgis geometry collection if new_clustercells: # new_cluster_geometry = self.convertCellsToGEOS(new_clustercells) new_cluster_geometry = new_clustercells elif geometry_type == "strict": perform_clustering = False if clustercache: cached_areas = clustercache.get("clusterAreas", None) if geometry != cached_areas or filters != clustercache.get("filters", []): perform_clustering = True if int(self.zoom) != int(last_zoom): perform_clustering = True if perform_clustering: clustercache["clusterAreas"] = geometry new_cluster_geometry = geometry clustercache["filters"] = filters clustercache["zoom"] = self.zoom request.session["clustercache"] = clustercache return new_cluster_geometry """--------------------------------------------------------------------------------------------------------------------------------- CONVERTING QUADKEY CELLS TO POSTGIS USABLE POLYGONS - ST_Collect(geom) - create a geometry collection to reduce the database queries to 1 - this is not yet working ---------------------------------------------------------------------------------------------------------------------------------""" def convertCellsToGEOS(self, cells): # ST_Collect(ST_GeomFromText('POINT(1 2)'),ST_GeomFromText('POINT(-2 3)') ) query_collection = "ST_Collect(ARRAY[" for counter, cell in enumerate(cells): poly = self.clusterCellToBounds(cell) if counter > 0: query_collection += "," # ST_GeomFromText('POLYGON((0 0, 10000 0, 10000 10000, 0 10000, 0 0))',3857) query_collection += "ST_GeomFromText('%s', %s)" % (poly, self.srid_db) query_collection += "])" return query_collection """--------------------------------------------------------------------------------------------------------------------------------- CONVERTING GEOJSON TO GEOS multipolygon and collections are not supported by ST_Within so they need to be split into several geometries ---------------------------------------------------------------------------------------------------------------------------------""" # returns a geos_list def convertGeojsonFeatureToGEOS(self, feature): geos_geometries = [] if "properties" in feature and "srid" in feature["properties"]: srid = feature["properties"]["srid"] else: srid = 4326 if feature["geometry"]["type"] == "MultiPolygon": for polygon in feature["geometry"]["coordinates"][0]: geom = {"type": "Polygon", "coordinates": [polygon]} geos = GEOSGeometry(json.dumps(geom), srid=srid) if geos.srid != self.srid_db: ct = CoordTransform(SpatialReference(geos.srid), SpatialReference(self.srid_db)) geos.transform(ct) geos_geometries.append(geos) else: try: geos = GEOSGeometry(json.dumps(feature["geometry"]), srid=srid) except: return None if geos: if geos.srid != self.srid_db: ct = CoordTransform(SpatialReference(geos.srid), SpatialReference(self.srid_db)) geos.transform(ct) geos_geometries.append(geos) return geos_geometries # returns GEOS instances def getClusterGeometries(self, request, params, clustertype): geojson = params["geojson"] geometry_type = params["geometry_type"] deliver_cache = bool(params.get("cache", False)) """ There are two geometrytypes: "viewport" and "geometry". "viewport" is a rectangle which is expanded to a zoom-level dependant fixed grid. "geometry" just clusters within the given geometry """ clusterGeometries = [] if geometry_type == "viewport": if geojson["geometry"]["type"] == "Polygon": linearString = geojson["geometry"]["coordinates"][0] viewport = { "left": linearString[0][0], "top": linearString[0][1], "right": linearString[1][0], "bottom": linearString[2][1], } else: # Multipolygon when spanning edge linearString = geojson["geometry"]["coordinates"][0] linearString_2 = geojson["geometry"]["coordinates"][1] viewport = { "left": linearString[0][0], "top": linearString[0][1], "right": linearString_2[1][0], "bottom": linearString[2][1], } clustercells_pre = self.getClusterCells(viewport) clusterGeometries_pre = self.compareWithCache( request, clustercells_pre, geometry_type, params["filters"], deliver_cache ) for cell in clusterGeometries_pre: poly = self.clusterCellToBounds(cell) cell_geos = GEOSGeometry(poly, srid=self.srid_db) clusterGeometries.append({"geos": cell_geos, "k": BASE_K}) else: # geojson or [] clusterGeometries_geojson = self.compareWithCache( request, geojson, geometry_type, params["filters"], deliver_cache ) # convert the geojson into strings usable with postgis if clusterGeometries_geojson: if clusterGeometries_geojson["type"] == "FeatureCollection": geos_geometries = [] for feature in clusterGeometries_geojson["features"]: geos_geometries += self.convertGeojsonFeatureToGEOS(feature) elif clusterGeometries_geojson["type"] == "Feature": geos_geometries = self.convertGeojsonFeatureToGEOS(clusterGeometries_geojson) for geos in geos_geometries: k = self.calculateK(geos) clusterGeometries.append({"geos": geos, "k": k}) return clusterGeometries """--------------------------------------------------------------------------------------------------------------------------------- K Calculation this is only used for strict geometries, such as drawn polygons or drawn circles based on the BASE_K in the settings (defaults to 6) it increases the k if one draws a big shape ---------------------------------------------------------------------------------------------------------------------------------""" # k calculation has to be done on square-pixel areas def calculateK(self, geos_geometry): geom_copy = geos_geometry.transform(3857, clone=True) cellarea_pixels = self.gridSize * self.gridSize # 1m = ? pixels init_resolution = self.maptools.mapTileSize / (2 * math.pi * 6378137) resolution = init_resolution * (2 ** self.zoom) area_factor = resolution ** 2 geom_copy_area_pixels = geom_copy.area * area_factor new_k = (BASE_K / cellarea_pixels) * geom_copy_area_pixels if new_k > K_CAP: new_k = K_CAP if new_k < BASE_K: new_k = BASE_K return int(math.ceil(new_k)) """--------------------------------------------------------------------------------------------------------------------------------- SQL FOR FILTERING Converts the filter dictionary into a raw querystring that is added to the raw sql later Used by both kmeans and grid cluster ---------------------------------------------------------------------------------------------------------------------------------""" def parseFilterValue(self, operator, value): if type(value) == str: if operator == "startswith": return "'^%s.*' " % value elif operator == "contains": return "'%s.*'" % value else: return "'%s'" % value elif type(value) == bool: if value == False: return "FALSE" else: return "TRUE" elif isinstance(value, numbers.Number) or isinstance(value, decimal.Decimal): return value else: return value def constructFilterstring(self, filters): operator_mapping = {"=": "=", "!=": "!=", ">=": ">=", "<=": "<=", "startswith": "~", "contains": "~"} filterstring = "" for column in filters: filterparams = filters[column] filterstring += " AND (" operator_pre = filterparams.get("operator", "=") values = filterparams["values"] if "either" in operator_pre: parts = operator_pre.split("_") operator = operator_mapping[parts[-1]] for counter, value in enumerate(values): if counter > 0: filterstring += " OR " sql_value = self.parseFilterValue(parts[-1], value) filterstring += "%s %s %s" % (column, operator, sql_value) else: if type(values) == str or type(values) == bool: operator = operator_mapping[operator_pre] sql_value = self.parseFilterValue(operator_pre, values) elif type(values) == list: if operator_pre == "!=": operator = "NOT IN" else: operator = "IN" sql_value = str(tuple(values)) filterstring += "%s %s %s" % (column, operator, sql_value) filterstring += ")" return filterstring """--------------------------------------------------------------------------------------------------------------------------------- MERGING MARKERS BY DISTANCE - if the geometric centroids are too close to each other after the kmeans algorithm (e.g. overlap), they are merged to one cluster - used by kmeansCluster as phase 2 - uses pixels for calculation as this is constant on every zoom level - transforms cluster.id into a list ---------------------------------------------------------------------------------------------------------------------------------""" def distanceCluster(self, clusters, c_distance=30): clusters_processed = [] for cluster in clusters: clustercoords = getattr(cluster, geo_column_str) added = False for processed_cluster in clusters_processed: processed_coords = getattr(processed_cluster, geo_column_str) pixel_distance = self.maptools.points_calcPixelDistance(clustercoords, processed_coords, self.zoom) if pixel_distance <= c_distance: if not type(processed_cluster.id) == list: processed_cluster.id = [processed_cluster.id] processed_cluster.id.append(cluster.id) processed_cluster.count += cluster.count added = True break if not added: if not type(cluster.id) == list: cluster.id = [cluster.id] clusters_processed.append(cluster) return clusters_processed """--------------------------------------------------------------------------------------------------------------------------------- KMEANS CLUSTERING - cluster only if 1. the geometry contains a new area or 2. the filters changed - perform a raw query on the database, pass the result to phase 2 (distanceCluster) and return the result ---------------------------------------------------------------------------------------------------------------------------------""" def kmeansCluster(self, request, custom_filterstring=""): params = self.loadJson(request) clusterGeometries = self.getClusterGeometries(request, params, "kmeans") markers = [] if clusterGeometries: filterstring = self.constructFilterstring(params["filters"]) filterstring += custom_filterstring for geometry_dic in clusterGeometries: geos_geometry = geometry_dic["geos"] k = geometry_dic["k"] kclusters_queryset = Gis.objects.raw( """ SELECT kmeans AS id, count(*), ST_AsText(ST_Centroid(ST_Collect(%s))) AS %s %s FROM ( SELECT %s kmeans(ARRAY[ST_X(%s), ST_Y(%s)], %s) OVER () AS kmeans, %s FROM "%s" WHERE %s IS NOT NULL AND ST_Intersects(%s, ST_GeometryFromText('%s') ) %s ) AS ksub GROUP BY id ORDER BY kmeans; """ % ( geo_column_str, geo_column_str, pin_qry[0], pin_qry[1], geo_column_str, geo_column_str, k, geo_column_str, geo_table, geo_column_str, geo_column_str, geos_geometry.ewkt, filterstring, ) ) kclusters = list(kclusters_queryset) kclusters = self.distanceCluster(kclusters) for cluster in kclusters: point = getattr(cluster, geo_column_str) if point.srid != self.input_srid: self.maptools.point_AnyToAny(point, point.srid, self.input_srid) if PINCOLUMN: pinimg = cluster.pinimg else: pinimg = None markers.append( { "ids": cluster.id, "count": cluster.count, "center": {"x": point.x, "y": point.y}, "pinimg": pinimg, } ) return markers """--------------------------------------------------------------------------------------------------------------------------------- GRID CLUSTERING ---------------------------------------------------------------------------------------------------------------------------------""" def clusterCellToBounds(self, cell): bounds = [] pixelbounds = self.maptools.cellIDToTileBounds(cell, self.gridSize) mercatorbounds = self.maptools.bounds_PixelToMercator(pixelbounds, self.zoom) # convert mercatorbounds to latlngbounds cell_topright = Point(mercatorbounds["right"], mercatorbounds["top"], srid=3857) cell_bottomleft = Point(mercatorbounds["left"], mercatorbounds["bottom"], srid=3857) self.maptools.point_ToLatLng(cell_topright) self.maptools.point_ToLatLng(cell_bottomleft) # if it is not a latlng database, convert the polygons if self.srid_db != 4326: self.maptools.point_AnyToAny(cell_topright, 4326, self.srid_db) self.maptools.point_AnyToAny(cell_bottomleft, 4326, self.srid_db) poly = self.maptools.bounds_ToPolyString( {"top": cell_topright.y, "right": cell_topright.x, "bottom": cell_bottomleft.y, "left": cell_bottomleft.x} ) if DEBUG: print("%s" % poly) return poly def gridCluster(self, request): params = self.loadJson(request) clusterGeometries = self.getClusterGeometries(request, params, "viewport") gridCells = [] if clusterGeometries: filterstring = self.constructFilterstring(params["filters"]) cursor = connections["default"].cursor() cursor.execute( """CREATE TEMPORARY TABLE temp_clusterareas ( id serial, polygon geometry )""" ) for clusterGeometry in clusterGeometries: cursor.execute( """ INSERT INTO temp_clusterareas (polygon) ( SELECT ( ST_Dump( ST_GeometryFromText('%s') )).geom ) """ % clusterGeometry["geos"].ewkt ) # indexing did not increase performance # cursor.execute('''CREATE INDEX temp_gix ON temp_clusterareas USING GIST (polygon);''') gridcluster_queryset = """ SELECT count(*) AS count, polygon FROM "%s", temp_clusterareas WHERE coordinates IS NOT NULL AND ST_Intersects(coordinates, polygon) %s GROUP BY polygon """ % ( geo_table, filterstring, ) cursor.execute(gridcluster_queryset) gridCells_pre = cursor.fetchall() for cell in gridCells_pre: count = cell[0] geos = GEOSGeometry(cell[1]) geos.transform(self.input_srid) centroid = geos.centroid cellobj = {"count": count, "geojson": geos.geojson, "center": {"x": centroid.x, "y": centroid.y}} gridCells.append(cellobj) return gridCells """--------------------------------------------------------------------------------------------------------------------------------- NON-CLUSTERING FUNCTIONS ---------------------------------------------------------------------------------------------------------------------------------""" # return all IDs of the pins contained by a cluster def getKmeansClusterContent(self, request, custom_filterstring=""): params = self.loadJson(request) x = params["x"] y = params["y"] kmeans_list = params["ids"] kmeans_string = (",").join(str(k) for k in kmeans_list) filters = params["filters"] cluster = Point(x, y, srid=self.input_srid) cell = self.maptools.getCellIDForPoint(cluster, self.zoom, self.gridSize) poly = self.clusterCellToBounds(cell) filterstring = self.constructFilterstring(filters) filterstring += custom_filterstring entries_queryset = Gis.objects.raw( """ SELECT * FROM ( SELECT kmeans(ARRAY[ST_X(%s), ST_Y(%s)], %s) OVER () AS kmeans, "%s".* FROM "%s" WHERE %s IS NOT NULL AND ST_Intersects(%s, ST_GeometryFromText('%s', %s) ) %s ) AS ksub WHERE kmeans IN (%s) """ % ( geo_column_str, geo_column_str, BASE_K, geo_table, geo_table, geo_column_str, geo_column_str, poly, self.srid_db, filterstring, kmeans_string, ) ) return entries_queryset """--------------------------------------------------------------------------------------------------------------------------------- COSTRUCT A FILTERSTRING FOR GEOMETRIES multipolygon and collections are not supported by ST_Within so they need to be split into several geometries this function converts geometries into a string usable as a raw sql query if no request is given, it will take the geometry from the cache first, the geojson is converted to a list of GEOS second, the list is converted to a string ---------------------------------------------------------------------------------------------------------------------------------""" def getGeomFilterstring(self, geojson=None): geomfilterstring = "" if not geojson: geojson = request.session.get("geojson", None) if geojson: if geojson["type"] == "FeatureCollection": geos_geometries = [] for feature in geojson["features"]: geos_geometries += self.convertGeojsonFeatureToGEOS(feature) elif geojson["type"] == "Feature": geos_geometries = self.convertGeojsonFeatureToGEOS(geojson) geomfilterstring += "(" for counter, geos in enumerate(geos_geometries): if counter > 0: geomfilterstring += " OR " geomfilterstring += " ST_Intersects(%s, ST_GeometryFromText('%s', %s) ) " % ( geo_column_str, geos.wkt, self.srid_db, ) geomfilterstring += ")" return geomfilterstring
class MapClusterer(): def __init__(self, zoom=1, gridSize=256, input_srid=4326, mapTileSize=256): # the srid of the coordinates coming from javascript. input_srid = output_srid self.input_srid = int(input_srid) # the size of the grid in pixels. each grid cell gets its own kmeans clustering self.gridSize = int(gridSize) self.zoom = int(zoom) self.maptools = MapTools(int(mapTileSize)) # filter operators self.valid_operators = ['=', '<', '>', '<=', '>=', 'list', '!list'] self.srid_db = self.getDatabaseSRID() # read the srid of the database. def getDatabaseSRID(self): srid_qry = 'SELECT id, ST_SRID(%s) FROM "%s" LIMIT 1;' % ( geo_column_str, geo_table) srid_db_objs = Gis.objects.raw(srid_qry) if len(list(srid_db_objs)) > 0: srid_db = srid_db_objs[0].st_srid else: try: srid_db = settings.ANYCLUSTER_COORDINATES_COLUMN_SRID except: srid_db = 4326 return srid_db '''--------------------------------------------------------------------------------------------------------------------------- LOADING THE AJAX INPUT - The variables and filters coming from the ajax request are transformed into python-usables like lists and dictionaries - anycluster receives a json object containing geojson and filters ---------------------------------------------------------------------------------------------------------------------------''' def loadJson(self, request): json_str = request.body.decode(encoding='UTF-8') params = json.loads(json_str) if "geojson" in params: request.session['geojson'] = params['geojson'] return params '''--------------------------------------------------------------------------------------------------------------------------------- CALCULATE CELL-IDs ACCORDING TO VIEWPORT - given the viewport, expand to the nearest grid and get all cell ids of this grid - returns QuadKey IDS of a viewport according to MapClusterer.gridSize To calculate those cells, the coordinates are transformed as shown below: LatLng --------> Meters (Mercator) ---------> Shifted origin ---------> pixel coords ---------> GRID, depending on tilesize ----------- ----------- ----------- ----------- ----------- | | | | | | | | |00|10|20|30| | | | | | | | | |01|11|21|31| | O | | O | | | | | ----------- | | | | | | | | |02|12|22|32| | | | | | | | | |03|13|23|33| ----------- ----------- O----------- O----------- ----------- LATLNG METERS METERS PIXELS GRID (shifted coordinates) (CELL-IDs according to QuadKey, depends on zoom level) O = origin The coordinate system with shifted origin has only coordinates with positive values (essential). Now, get the CELL-ID (=QuadKey ID) the top-right (and bottom-left) viewport coordinate sits in. Finally calculate all CELL-IDs that are spanned by the top-right and bottom-left cell. ---------------------------------------------------------------------------------------------------------------------------------''' def getClusterCells(self, viewport): if DEBUG: print('VIEWPORT(wgs84datum, 4326, longlat): %s' % viewport) # create points according to input srid topright = Point(viewport['right'], viewport['top'], srid=self.input_srid) bottomleft = Point(viewport['left'], viewport['bottom'], srid=self.input_srid) if self.input_srid != 4326: topright = self.maptools.point_ToLatLng(topright) bottomleft = self.maptools.point_ToLatLng(bottomleft) # Polar areas with abs(latitude) bigger then 85.05112878 are clipped off as google does. if topright.y > 85.0: topright.y = 85.0 if topright.x > 179.9999: topright.x = 179.9999 if bottomleft.y < -85: bottomleft.y = -85 if bottomleft.x < -179.9999: bottomleft.x = -179.9999 if DEBUG: print('4326, longlat: topright: (%s,%s) | bottomleft: (%s,%s)' % (topright.x, topright.y, bottomleft.x, bottomleft.y)) # project points to mercator 3875, plane coordinates self.maptools.point_ToMercator(topright) self.maptools.point_ToMercator(bottomleft) if DEBUG: print('MERCATOR: topright: (%s,%s) | bottomleft: (%s,%s)' % (topright.x, topright.y, bottomleft.x, bottomleft.y)) # shift origin self.maptools.point_MercatorToWorld(topright) self.maptools.point_MercatorToWorld(bottomleft) if DEBUG: print('WORLD: topright: (%s,%s) | bottomleft: (%s,%s)' % (topright.x, topright.y, bottomleft.x, bottomleft.y)) # calculate pixelcoords from world coords depending on zoom self.maptools.point_WorldToPixels(topright, self.zoom) self.maptools.point_WorldToPixels(bottomleft, self.zoom) if DEBUG: print('PIXELS: topright: (%s,%s) | bottomleft: (%s,%s)' % (topright.x, topright.y, bottomleft.x, bottomleft.y)) # get topright and bottom left cellID, e.g. (03,01) toprightCell = self.maptools.point_PixelToCellID( topright, self.gridSize) bottomleftCell = self.maptools.point_PixelToCellID( bottomleft, self.gridSize) if DEBUG: print('CELLID: toprightCell: %s | bottomleftCell: %s' % (toprightCell, bottomleftCell)) # get all Cells that need to be clustered clusterCells = self.maptools.get_ClusterCells(toprightCell, bottomleftCell, self.zoom) # from ID-list create list of polygons return clusterCells '''--------------------------------------------------------------------------------------------------------------------------------- CACHING MECHANISM Filters, zoomlevel and cellIDs are stored in the django session. If the map is panned, only those cells that are new to the viewport are being clustered and returned. If the map is zoomed, the cache is cleared. dict: {'filters':{}, 'cellIDs':[], 'zoom':1} ---------------------------------------------------------------------------------------------------------------------------------''' def compareWithCache(self, request, geometry, geometry_type, filters, deliver_cache): clustercache = request.session.get('clustercache', {}) new_cluster_geometry = [] last_zoom = clustercache.get('zoom', -1) if geometry_type == "viewport": compare_geometry_with_cache = False # check if we need to compare with the cache or if the cache can be omitted if clustercache and not deliver_cache: if int(self.zoom) == int(last_zoom): last_filters = clustercache.get('filters', []) if filters == last_filters: compare_geometry_with_cache = True if compare_geometry_with_cache: # in this case, geometry is a set of cells cached_cells = set( [tuple(cell) for cell in clustercache['clusterAreas']]) new_clustercells = set(geometry) - cached_cells new_cells_for_cache = cached_cells.union(new_clustercells) else: new_cells_for_cache = set(geometry) new_clustercells = new_cells_for_cache clustercache['clusterAreas'] = list(new_cells_for_cache) # convert new_clustercells into a postgis geometry collection if new_clustercells: # new_cluster_geometry = self.convertCellsToGEOS(new_clustercells) new_cluster_geometry = new_clustercells elif geometry_type == "strict": perform_clustering = False if clustercache: cached_areas = clustercache.get('clusterAreas', None) if geometry != cached_areas or filters != clustercache.get( 'filters', []): perform_clustering = True if int(self.zoom) != int(last_zoom): perform_clustering = True if perform_clustering: clustercache['clusterAreas'] = geometry new_cluster_geometry = geometry clustercache['filters'] = filters clustercache['zoom'] = self.zoom request.session['clustercache'] = clustercache return new_cluster_geometry '''--------------------------------------------------------------------------------------------------------------------------------- CONVERTING QUADKEY CELLS TO POSTGIS USABLE POLYGONS - ST_Collect(geom) - create a geometry collection to reduce the database queries to 1 - this is not yet working ---------------------------------------------------------------------------------------------------------------------------------''' def convertCellsToGEOS(self, cells): #ST_Collect(ST_GeomFromText('POINT(1 2)'),ST_GeomFromText('POINT(-2 3)') ) query_collection = "ST_Collect(ARRAY[" for counter, cell in enumerate(cells): poly = self.clusterCellToBounds(cell) if counter > 0: query_collection += "," # ST_GeomFromText('POLYGON((0 0, 10000 0, 10000 10000, 0 10000, 0 0))',3857) query_collection += "ST_GeomFromText('%s', %s)" % (poly, self.srid_db) query_collection += "])" return query_collection '''--------------------------------------------------------------------------------------------------------------------------------- CONVERTING GEOJSON TO GEOS multipolygon and collections are not supported by ST_Within so they need to be split into several geometries ---------------------------------------------------------------------------------------------------------------------------------''' # returns a geos_list def convertGeojsonFeatureToGEOS(self, feature): geos_geometries = [] if "properties" in feature and "srid" in feature["properties"]: srid = feature["properties"]["srid"] else: srid = 4326 if feature["geometry"]["type"] == "MultiPolygon": for polygon in feature["geometry"]["coordinates"][0]: geom = {"type": "Polygon", "coordinates": [polygon]} geos = GEOSGeometry(json.dumps(geom), srid=srid) if geos.srid != self.srid_db: ct = CoordTransform(SpatialReference(geos.srid), SpatialReference(self.srid_db)) geos.transform(ct) geos_geometries.append(geos) else: try: geos = GEOSGeometry(json.dumps(feature["geometry"]), srid=srid) except: return None if geos: if geos.srid != self.srid_db: ct = CoordTransform(SpatialReference(geos.srid), SpatialReference(self.srid_db)) geos.transform(ct) geos_geometries.append(geos) return geos_geometries # returns GEOS instances def getClusterGeometries(self, request, params, clustertype): geojson = params['geojson'] geometry_type = params["geometry_type"] deliver_cache = bool(params.get("cache", False)) ''' There are two geometrytypes: "viewport" and "geometry". "viewport" is a rectangle which is expanded to a zoom-level dependant fixed grid. "geometry" just clusters within the given geometry ''' clusterGeometries = [] if geometry_type == "viewport": if geojson["geometry"]["type"] == "Polygon": linearString = geojson['geometry']['coordinates'][0] viewport = { 'left': linearString[0][0], 'top': linearString[0][1], 'right': linearString[1][0], 'bottom': linearString[2][1] } else: #Multipolygon when spanning edge linearString = geojson['geometry']['coordinates'][0] linearString_2 = geojson['geometry']['coordinates'][1] viewport = { 'left': linearString[0][0], 'top': linearString[0][1], 'right': linearString_2[1][0], 'bottom': linearString[2][1] } clustercells_pre = self.getClusterCells(viewport) clusterGeometries_pre = self.compareWithCache( request, clustercells_pre, geometry_type, params["filters"], deliver_cache) for cell in clusterGeometries_pre: poly = self.clusterCellToBounds(cell) cell_geos = GEOSGeometry(poly, srid=self.srid_db) clusterGeometries.append({"geos": cell_geos, "k": BASE_K}) else: # geojson or [] clusterGeometries_geojson = self.compareWithCache( request, geojson, geometry_type, params["filters"], deliver_cache) #convert the geojson into strings usable with postgis if clusterGeometries_geojson: if clusterGeometries_geojson["type"] == "FeatureCollection": geos_geometries = [] for feature in clusterGeometries_geojson["features"]: geos_geometries += self.convertGeojsonFeatureToGEOS( feature) elif clusterGeometries_geojson["type"] == "Feature": geos_geometries = self.convertGeojsonFeatureToGEOS( clusterGeometries_geojson) for geos in geos_geometries: k = self.calculateK(geos) clusterGeometries.append({"geos": geos, "k": k}) return clusterGeometries '''--------------------------------------------------------------------------------------------------------------------------------- K Calculation this is only used for strict geometries, such as drawn polygons or drawn circles based on the BASE_K in the settings (defaults to 6) it increases the k if one draws a big shape ---------------------------------------------------------------------------------------------------------------------------------''' # k calculation has to be done on square-pixel areas def calculateK(self, geos_geometry): geom_copy = geos_geometry.transform(3857, clone=True) cellarea_pixels = self.gridSize * self.gridSize # 1m = ? pixels init_resolution = self.maptools.mapTileSize / (2 * math.pi * 6378137) resolution = init_resolution * (2**self.zoom) area_factor = resolution**2 geom_copy_area_pixels = geom_copy.area * area_factor new_k = (BASE_K / cellarea_pixels) * geom_copy_area_pixels if new_k > K_CAP: new_k = K_CAP if new_k < BASE_K: new_k = BASE_K return int(math.ceil(new_k)) '''--------------------------------------------------------------------------------------------------------------------------------- SQL FOR FILTERING Converts the filter dictionary into a raw querystring that is added to the raw sql later Used by both kmeans and grid cluster ---------------------------------------------------------------------------------------------------------------------------------''' def parseFilterValue(self, operator, value): if type(value) == str: if operator == "startswith": return "'^%s.*' " % value elif operator == "contains": return "'%s.*'" % value else: return "'%s'" % value elif type(value) == bool: if value == False: return "FALSE" else: return "TRUE" elif isinstance(value, numbers.Number) or isinstance( value, decimal.Decimal): return value else: return value def constructFilterstring(self, filters): operator_mapping = { "=": "=", "!=": "!=", ">=": ">=", "<=": "<=", "startswith": "~", "contains": "~" } filterstring = '' for column in filters: filterparams = filters[column] filterstring += ' AND (' operator_pre = filterparams.get("operator", "=") values = filterparams["values"] if "either" in operator_pre: parts = operator_pre.split('_') operator = operator_mapping[parts[-1]] for counter, value in enumerate(values): if counter > 0: filterstring += " OR " sql_value = self.parseFilterValue(parts[-1], value) filterstring += "%s %s %s" % (column, operator, sql_value) else: if type(values) == str or type(values) == bool: operator = operator_mapping[operator_pre] sql_value = self.parseFilterValue(operator_pre, values) elif type(values) == list: if operator_pre == "!=": operator = "NOT IN" else: operator = "IN" sql_value = str(tuple(values)) filterstring += "%s %s %s" % (column, operator, sql_value) filterstring += ')' return filterstring '''--------------------------------------------------------------------------------------------------------------------------------- MERGING MARKERS BY DISTANCE - if the geometric centroids are too close to each other after the kmeans algorithm (e.g. overlap), they are merged to one cluster - used by kmeansCluster as phase 2 - uses pixels for calculation as this is constant on every zoom level - transforms cluster.id into a list ---------------------------------------------------------------------------------------------------------------------------------''' def distanceCluster(self, clusters, c_distance=30): clusters_processed = [] for cluster in clusters: clustercoords = getattr(cluster, geo_column_str) added = False for processed_cluster in clusters_processed: processed_coords = getattr(processed_cluster, geo_column_str) pixel_distance = self.maptools.points_calcPixelDistance( clustercoords, processed_coords, self.zoom) if pixel_distance <= c_distance: if not type(processed_cluster.id) == list: processed_cluster.id = [processed_cluster.id] processed_cluster.id.append(cluster.id) processed_cluster.count += cluster.count added = True break if not added: if not type(cluster.id) == list: cluster.id = [cluster.id] clusters_processed.append(cluster) return clusters_processed '''--------------------------------------------------------------------------------------------------------------------------------- KMEANS CLUSTERING - cluster only if 1. the geometry contains a new area or 2. the filters changed - perform a raw query on the database, pass the result to phase 2 (distanceCluster) and return the result ---------------------------------------------------------------------------------------------------------------------------------''' def kmeansCluster(self, request, custom_filterstring=""): params = self.loadJson(request) clusterGeometries = self.getClusterGeometries(request, params, "kmeans") markers = [] if clusterGeometries: filterstring = self.constructFilterstring(params["filters"]) filterstring += custom_filterstring for geometry_dic in clusterGeometries: geos_geometry = geometry_dic["geos"] k = geometry_dic["k"] kclusters_queryset = Gis.objects.raw(''' SELECT kmeans AS id, count(*), ST_AsText(ST_Centroid(ST_Collect(%s))) AS %s %s FROM ( SELECT %s kmeans(ARRAY[ST_X(%s), ST_Y(%s)], %s) OVER () AS kmeans, %s FROM "%s" WHERE %s IS NOT NULL AND ST_Intersects(%s, ST_GeometryFromText('%s') ) %s ) AS ksub GROUP BY id ORDER BY kmeans; ''' % (geo_column_str, geo_column_str, pin_qry[0], pin_qry[1], geo_column_str, geo_column_str, k, geo_column_str, geo_table, geo_column_str, geo_column_str, geos_geometry.ewkt, filterstring)) kclusters = list(kclusters_queryset) kclusters = self.distanceCluster(kclusters) for cluster in kclusters: point = getattr(cluster, geo_column_str) if point.srid != self.input_srid: self.maptools.point_AnyToAny(point, point.srid, self.input_srid) if PINCOLUMN: pinimg = cluster.pinimg else: pinimg = None markers.append({ 'ids': cluster.id, 'count': cluster.count, 'center': { 'x': point.x, 'y': point.y }, 'pinimg': pinimg }) return markers '''--------------------------------------------------------------------------------------------------------------------------------- GRID CLUSTERING ---------------------------------------------------------------------------------------------------------------------------------''' def clusterCellToBounds(self, cell): bounds = [] pixelbounds = self.maptools.cellIDToTileBounds(cell, self.gridSize) mercatorbounds = self.maptools.bounds_PixelToMercator( pixelbounds, self.zoom) # convert mercatorbounds to latlngbounds cell_topright = Point(mercatorbounds['right'], mercatorbounds['top'], srid=3857) cell_bottomleft = Point(mercatorbounds['left'], mercatorbounds['bottom'], srid=3857) self.maptools.point_ToLatLng(cell_topright) self.maptools.point_ToLatLng(cell_bottomleft) # if it is not a latlng database, convert the polygons if self.srid_db != 4326: self.maptools.point_AnyToAny(cell_topright, 4326, self.srid_db) self.maptools.point_AnyToAny(cell_bottomleft, 4326, self.srid_db) poly = self.maptools.bounds_ToPolyString({ 'top': cell_topright.y, 'right': cell_topright.x, 'bottom': cell_bottomleft.y, 'left': cell_bottomleft.x }) if DEBUG: print('%s' % poly) return poly def gridCluster(self, request): params = self.loadJson(request) clusterGeometries = self.getClusterGeometries(request, params, "viewport") gridCells = [] if clusterGeometries: filterstring = self.constructFilterstring(params["filters"]) cursor = connections['default'].cursor() cursor.execute('''CREATE TEMPORARY TABLE temp_clusterareas ( id serial, polygon geometry )''') for clusterGeometry in clusterGeometries: cursor.execute(''' INSERT INTO temp_clusterareas (polygon) ( SELECT ( ST_Dump( ST_GeometryFromText('%s') )).geom ) ''' % clusterGeometry["geos"].ewkt) # indexing did not increase performance # cursor.execute('''CREATE INDEX temp_gix ON temp_clusterareas USING GIST (polygon);''') gridcluster_queryset = ''' SELECT count(*) AS count, polygon FROM "%s", temp_clusterareas WHERE coordinates IS NOT NULL AND ST_Intersects(coordinates, polygon) %s GROUP BY polygon ''' % (geo_table, filterstring) cursor.execute(gridcluster_queryset) gridCells_pre = cursor.fetchall() for cell in gridCells_pre: count = cell[0] geos = GEOSGeometry(cell[1]) geos.transform(self.input_srid) centroid = geos.centroid cellobj = { "count": count, "geojson": geos.geojson, "center": { "x": centroid.x, "y": centroid.y } } gridCells.append(cellobj) return gridCells '''--------------------------------------------------------------------------------------------------------------------------------- NON-CLUSTERING FUNCTIONS ---------------------------------------------------------------------------------------------------------------------------------''' # return all IDs of the pins contained by a cluster def getKmeansClusterContent(self, request, custom_filterstring=""): params = self.loadJson(request) x = params["x"] y = params["y"] kmeans_list = params["ids"] kmeans_string = (",").join(str(k) for k in kmeans_list) filters = params["filters"] cluster = Point(x, y, srid=self.input_srid) cell = self.maptools.getCellIDForPoint(cluster, self.zoom, self.gridSize) poly = self.clusterCellToBounds(cell) filterstring = self.constructFilterstring(filters) filterstring += custom_filterstring entries_queryset = Gis.objects.raw(''' SELECT * FROM ( SELECT kmeans(ARRAY[ST_X(%s), ST_Y(%s)], %s) OVER () AS kmeans, "%s".* FROM "%s" WHERE %s IS NOT NULL AND ST_Intersects(%s, ST_GeometryFromText('%s', %s) ) %s ) AS ksub WHERE kmeans IN (%s) ''' % (geo_column_str, geo_column_str, BASE_K, geo_table, geo_table, geo_column_str, geo_column_str, poly, self.srid_db, filterstring, kmeans_string)) return entries_queryset '''--------------------------------------------------------------------------------------------------------------------------------- COSTRUCT A FILTERSTRING FOR GEOMETRIES multipolygon and collections are not supported by ST_Within so they need to be split into several geometries this function converts geometries into a string usable as a raw sql query if no request is given, it will take the geometry from the cache first, the geojson is converted to a list of GEOS second, the list is converted to a string ---------------------------------------------------------------------------------------------------------------------------------''' def getGeomFilterstring(self, geojson=None): geomfilterstring = "" if not geojson: geojson = request.session.get('geojson', None) if geojson: if geojson["type"] == "FeatureCollection": geos_geometries = [] for feature in geojson["features"]: geos_geometries += self.convertGeojsonFeatureToGEOS( feature) elif geojson["type"] == "Feature": geos_geometries = self.convertGeojsonFeatureToGEOS(geojson) geomfilterstring += "(" for counter, geos in enumerate(geos_geometries): if counter > 0: geomfilterstring += " OR " geomfilterstring += " ST_Intersects(%s, ST_GeometryFromText('%s', %s) ) " % ( geo_column_str, geos.wkt, self.srid_db) geomfilterstring += ")" return geomfilterstring