def setup_rtree(self, ndim, clusters=None): if self._rtree: return self._rtree self._ndim = ndim if not ndim: class k(object): def __init__(self, graph): self.graph = graph def insert(self, *args, **kwargs): pass def delete(self, *args, **kwargs): pass def intersection(self, *args, **kwargs): return xrange(len(self.graph.clusters)) self._rtree = k(self) return self._rtree p = RProp() p.dimension = max(2, ndim) p.dat_extension = 'data' p.idx_extension = 'index' if clusters: gen_func = ((i, self.bbox_rtree(c, enlarge=0.005), None) for i, c in enumerate(clusters)) self._rtree = RTree(gen_func, properties=p) else: self._rtree = RTree(properties=p) return self._rtree
def get_sindex(gdf): """Get or build an R-Tree spatial index. Particularly useful for geopandas<0.2.0;>0.7.0;0.9.0 """ sindex = None if (hasattr(gdf, '_rtree_sindex')): return getattr(gdf, '_rtree_sindex') if (isinstance(gdf, geopandas.GeoDataFrame) and hasattr(gdf.geometry, 'sindex')): sindex = gdf.geometry.sindex elif isinstance(gdf, geopandas.GeoSeries) and hasattr(gdf, 'sindex'): sindex = gdf.sindex if sindex is not None: if (hasattr(sindex, "nearest") and sindex.__class__.__name__ != "PyGEOSSTRTreeIndex"): # probably rtree.index.Index return sindex else: # probably PyGEOSSTRTreeIndex but unfortunately, 'nearest' # with 'num_results' is required sindex = None if rtree and len(gdf) >= rtree_threshold: # Manually populate a 2D spatial index for speed sindex = Index() # slow, but reliable for idx, item in enumerate(gdf.bounds.itertuples()): sindex.add(idx, item[1:]) # cache the index for later setattr(gdf, '_rtree_sindex', sindex) return sindex
def read_airspace(self, airspace): index = Index() with open(airspace, 'r') as f: reader = openair.Reader(f) for record, error in reader: if error: logging.warning( f'line {error.lineno} of {os.path.basename(airspace)} - {error}' ) else: try: zone = Airspace(record) if not self.agl_validable and (zone.ground_floor or zone.ground_ceiling): logging.warning( f'{zone.name} will not be checked because ground altitude of flight could not be retrieved.' ) else: if zone.bounds: index.insert(id(zone), zone.bounds, obj=zone) except KeyError: logging.warning( f'line {reader.reader.lineno} of {os.path.basename(airspace)} - error in previous record' ) return index
def create_spatial_index(shape_dict): print >> sys.stderr, 'Making spatial index...', spatial_index = Index() for index, (blockid, shape) in enumerate(shape_dict.iteritems()): spatial_index.insert(index, shape.bounds, obj=blockid) print >> sys.stderr, 'done.' return spatial_index
class GeneticLabeler(BaseGeneticLabeler): def __init__(self, points, bounding_box): BaseGeneticLabeler.__init__(self, points, bounding_box) self.build_index() def build_index(self): label_candidates = [] for p in self.points: label_candidates.extend(p.label_candidates) self.items = [] self.items.extend(label_candidates) self.items.extend(self.points) self.items.extend(self.bounding_box.border_config) self.idx = Index() for i, item in enumerate(self.items): item.index = i self.idx.insert(i, item.box) def evaluate_fitness(self, individual): penalty = 0 for lpid, pos in enumerate(individual): self.labeled_points[lpid].label_candidates[pos].select() for lpid, lcid in enumerate(individual): lp = self.labeled_points[lpid] lc = lp.label_candidates[lcid] penalty += evaluate_label(lc, self.items, self.idx, selected_only=True) return (-penalty,)
def __init__(self, resolution, basename=None, overwrite=False): # type: (float) -> None self._res = resolution self._cnt = 0 if basename is None: self._index = Index(interleaved=True) else: p = Property(overwrite=overwrite) self._index = Index(basename, interleaved=True, properties=p)
def _construct_index(self): """! Separate the index construction from the constructor, allowing a GUI override """ self.index = Index() for i, j, k in self.ikle: t = Polygon([self.points[i], self.points[j], self.points[k]]) self.triangles[i, j, k] = t self.index.insert(i, t.bounds, obj=(i, j, k))
def _construct_index(self, iter_pbar): """! Separate the index construction from the constructor, allowing a GUI override @param iter_pbar: iterable progress bar """ self.index = Index() for i, j, k in iter_pbar(self.ikle, unit='elements'): t = Polygon([self.points[i], self.points[j], self.points[k]]) self.triangles[i, j, k] = t self.index.insert(i, t.bounds, obj=(i, j, k))
def compute_indicatormatrix(orig, dest, orig_proj='latlong', dest_proj='latlong'): """ Compute the indicatormatrix The indicatormatrix I[i,j] is a sparse representation of the ratio of the area in orig[j] lying in dest[i], where orig and dest are collections of polygons, i.e. A value of I[i,j] = 1 indicates that the shape orig[j] is fully contained in shape dest[j]. Note that the polygons must be in the same crs. Parameters --------- orig : Collection of shapely polygons dest : Collection of shapely polygons Returns ------- I : sp.sparse.lil_matrix Indicatormatrix """ dest = reproject_shapes(dest, dest_proj, orig_proj) indicator = sp.sparse.lil_matrix((len(dest), len(orig)), dtype=np.float) try: from rtree.index import Index idx = Index() for j, o in enumerate(orig): idx.insert(j, o.bounds) for i, d in enumerate(dest): for j in idx.intersection(d.bounds): o = orig[j] area = d.intersection(o).area indicator[i, j] = area / o.area except ImportError: logger.warning( "Rtree is not available. Falling back to slower algorithm.") dest_prepped = list(map(prep, dest)) for i, j in product(range(len(dest)), range(len(orig))): if dest_prepped[i].intersects(orig[j]): area = dest[i].intersection(orig[j]).area indicator[i, j] = area / orig[j].area return indicator
def build_index(self): label_candidates = [] for p in self.points: label_candidates.extend(p.label_candidates) self.items = [] self.items.extend(label_candidates) self.items.extend(self.points) self.items.extend(self.bounding_box.border_config) self.idx = Index() for i, item in enumerate(self.items): item.index = i self.idx.insert(i, item.box)
class StreetIndex(object): def __init__(self, streets_file): self.idx = Index() with open(streets_file) as f: for line in f.readlines(): street = json.loads(line) street_id = street['properties']['id'] street_shape = asShape(street['geometry']) for i in range(len(street_shape.geoms)): seg_id = self.encode_seg_id(i, street_id) self.idx.insert(seg_id, street_shape.geoms[i].coords[0]) self.idx.insert(-seg_id, street_shape.geoms[i].coords[-1]) self.bb_idx = Index() with open(streets_file) as f: for line in f.readlines(): street = json.loads(line) street_id = int(street['properties']['id']) street_shape = asShape(street['geometry']) self.bb_idx.insert(street_id, list(street_shape.bounds)) def encode_seg_id(self, i, street_id): return i * 1000000 + int(street_id) def decode_seg_id(self, seg_id): i = abs(seg_id) / 1000000 return abs(seg_id) - i def find_nearest_street(self, shape): shape = asShape(shape['geometry']) shape_type = shape.geom_type if shape_type == 'Polygon' or shape_type == 'MultiPolygon': ref_point = ( float(shape.centroid.coords.xy[0][0]), float(shape.centroid.coords.xy[1][0]) ) else: ref_point = ( float(shape.coords.xy[0][0]), float(shape.coords.xy[1][0]) ) street_id = list(self.bb_idx.nearest(ref_point))[0] return str(street_id) def find_connected_street(self, street): street_id = int(street['properties']['id']) street_shape = asShape(street['geometry']) street_start = street_shape.geoms[0].coords[0] street_end = street_shape.geoms[-1].coords[-1] seg_ids = list(self.idx.intersection(street_start)) seg_ids += list(self.idx.intersection(street_end)) street_ids = set(map(self.decode_seg_id, seg_ids)) if street_id in street_ids: street_ids.remove(street_id) return street_ids
def __init__(self, stype, bbox=None, **kwargs): """initializer""" ci = kwargs.pop('custom_index', None) self._filename = kwargs.pop('filename', None) self._bbox = bbox self._stype = stype.lower() self._df = None if ci and stype.lower() == 'custom': self._index = ci elif stype.lower() == 'quadtree' and bbox: self._index = QIndex(bbox=bbox) elif RIndex and stype.lower() == 'rtree': self._index = RIndex(self._filename) else: raise ValueError("Could not create the spatial index.")
def __init__(self, input_header, construct_index=False, iter_pbar=lambda x, unit: x): """! @param input_header <slf.Serafin.SerafinHeader>: input Serafin header @param construct_index <bool>: perform the index construction @param iter_pbar: iterable progress bar """ self.x, self.y = input_header.x[:input_header.nb_nodes_2d], input_header.y[:input_header.nb_nodes_2d] self.ikle = input_header.ikle_2d - 1 # back to 0-based indexing self.triangles = {} self.nb_points = self.x.shape[0] self.nb_triangles = self.ikle.shape[0] self.points = np.stack([self.x, self.y], axis=1) if not construct_index: self.index = Index() else: self._construct_index(iter_pbar)
class Mesh2D: """! The general representation of mesh in Serafin 2D. The basis for interpolation, volume calculations etc. """ def __init__(self, input_header, construct_index=False, iter_pbar=lambda x: x): """! @param input_header <slf.Serafin.SerafinHeader>: input Serafin header @param construct_index <bool>: perform the index construction @param iter_pbar: iterable progress bar """ self.x, self.y = input_header.x[:input_header. nb_nodes_2d], input_header.y[: input_header . nb_nodes_2d] self.ikle = input_header.ikle_2d - 1 # back to 0-based indexing self.triangles = {} self.nb_points = self.x.shape[0] self.nb_triangles = self.ikle.shape[0] self.points = np.stack([self.x, self.y], axis=1) if not construct_index: self.index = Index() else: self._construct_index(iter_pbar) def _construct_index(self, iter_pbar): """! Separate the index construction from the constructor, allowing a GUI override @param iter_pbar: iterable progress bar """ self.index = Index() for i, j, k in iter_pbar(self.ikle, unit='elements'): t = Polygon([self.points[i], self.points[j], self.points[k]]) self.triangles[i, j, k] = t self.index.insert(i, t.bounds, obj=(i, j, k)) def get_intersecting_elements(self, bounding_box): """! @brief Return the triangles in the mesh intersecting the bounding box @param bounding_box <tuple>: (left, bottom, right, top) of a 2d geometrical object @return <[tuple]>: The list of triangles (i,j,k) intersecting the bounding box """ return list(self.index.intersection(bounding_box, objects='raw'))
def create_rtree(self, clusters): if not len(clusters[0].bbox[0]): class k(object): def intersection(self, foo): return xrange(len(clusters)) return k() ndim = len(clusters[0].bbox[0]) + 1 p = RProp() p.dimension = ndim p.dat_extension = 'data' p.idx_extension = 'index' rtree = RTree(properties=p) for idx, c in enumerate(clusters): rtree.insert(idx, c.bbox[0] + (0,) + c.bbox[1] + (1,)) return rtree
def test_tpr(self): # TODO : this freezes forever on some windows cloud builds if os.name == 'nt': return # Cartesians list for brute force objects = dict() tpr_tree = Index(properties=Property(type=RT_TPRTree)) for operation, t_now, object_ in data_generator(): if operation == "INSERT": tpr_tree.insert(object_.id, object_.get_coordinates()) objects[object_.id] = object_ elif operation == "DELETE": tpr_tree.delete(object_.id, object_.get_coordinates(t_now)) del objects[object_.id] elif operation == "QUERY": tree_intersect = set( tpr_tree.intersection(object_.get_coordinates())) # Brute intersect brute_intersect = set() for tree_object in objects.values(): x_low, y_low = tree_object.getXY(object_.start_time) x_high, y_high = tree_object.getXY(object_.end_time) if intersects( x_low, y_low, x_high, y_high, # Line object_.x, object_.y, object_.dx, object_.dy): # Rect brute_intersect.add(tree_object.id) # Tree should match brute force approach assert tree_intersect == brute_intersect
def demo_delete(): seed = 1 # Seed for random points countries = get_countries() country_id_to_remove = 170 # United States of America country_uuids_to_remove = [] # Polygons' ids to remove from the index properties = Property() # properties.writethrough = True # properties.leaf_capacity = 1000 # properties.fill_factor = 0.5 index = Index(properties=properties) points_per_polygon = 1 points = [] # Inserts countries data to the index for i, (country_name, geometry) in enumerate(countries): for polygon in get_polygons(geometry): temp_uuid = uuid.uuid1().int index.insert(temp_uuid, polygon.bounds, country_name) if i == country_id_to_remove: # Saves index ids of the polygon to be removed later country_uuids_to_remove.append(temp_uuid) # Generates random points in every polygon and saves them random_points = gen_random_point(points_per_polygon, polygon, seed) points.append((country_name, random_points)) # Checks every generated point has matches for (country_name, country_points) in points: for point in country_points: hits = list(index.intersection(point.bounds, objects=True)) assert any(hit.object == country_name for hit in hits) # Remove geometry geometry = countries[country_id_to_remove][1] for i, polygon in enumerate(get_polygons(geometry)): index.delete(country_uuids_to_remove[i], polygon.bounds) points_missing = [] # Checks (again) if every generated point has matches for (country_name, country_points) in points: for point in country_points: hits = list(index.intersection(point.bounds, objects=True)) # Save any point without matches if not any(hit.object == country_name for hit in hits): points_missing.append(str(point) + " - " + country_name) # Print missing points for point in points_missing: print(point)
def get_sindex(gdf): """Helper function to get or build a spatial index Particularly useful for geopandas<0.2.0 """ assert isinstance(gdf, geopandas.GeoDataFrame) has_sindex = hasattr(gdf, 'sindex') if has_sindex: sindex = gdf.geometry.sindex elif rtree and len(gdf) >= rtree_threshold: # Manually populate a 2D spatial index for speed sindex = Index() # slow, but reliable for idx, (segnum, row) in enumerate(gdf.bounds.iterrows()): sindex.add(idx, tuple(row)) else: sindex = None return sindex
def create_rtree(self, clusters): if not len(clusters[0].bbox[0]): class k(object): def intersection(self, foo): return xrange(len(clusters)) return k() ndim = len(clusters[0].bbox[0]) + 1 p = RProp() p.dimension = ndim p.dat_extension = 'data' p.idx_extension = 'index' rtree = RTree(properties=p) for idx, c in enumerate(clusters): rtree.insert(idx, c.bbox[0] + (0, ) + c.bbox[1] + (1, )) return rtree
def __init__(self, streets_file): self.idx = Index() with open(streets_file) as f: for line in f.readlines(): street = json.loads(line) street_id = street['properties']['id'] street_shape = asShape(street['geometry']) for i in range(len(street_shape.geoms)): seg_id = self.encode_seg_id(i, street_id) self.idx.insert(seg_id, street_shape.geoms[i].coords[0]) self.idx.insert(-seg_id, street_shape.geoms[i].coords[-1]) self.bb_idx = Index() with open(streets_file) as f: for line in f.readlines(): street = json.loads(line) street_id = int(street['properties']['id']) street_shape = asShape(street['geometry']) self.bb_idx.insert(street_id, list(street_shape.bounds))
def construct_rtree(self, clusters): if not len(clusters[0].bbox[0]): class k(object): def intersection(self, foo): return xrange(len(clusters)) return k() ndim = max(2, len(clusters[0].centroid)) p = RProp() p.dimension = ndim p.dat_extension = 'data' p.idx_extension = 'index' rtree = RTree(properties=p) for idx, c in enumerate(clusters): box = c.bbox #self.scale_box(c.bbox) if ndim == 1: rtree.insert(idx, box[0] + [0] + box[1] + [1]) else: rtree.insert(idx, box[0] + box[1]) return rtree
def snap_to_edge_position(gdf, points, k=3, rtree=None): """ Snap given points in the plane to edges in GeoDataFrame of edges. Parameters ---------- gdf : GeoDataframe The edges of spatial network as a Geodataframe. points : array of floats, shape (M, 2) The cartesian coordinates of the points to be snapped. k : integer, optional Number of nearest edges to consider. Returns ------- nearest_edges : list of integers, length M Indices of nearest edges in the GeoDataframe. refdistances : list of floats, length M Linear referencing distances of points along nearest edge. """ X, Y = points.T geom = gdf["geometry"] # If not passed, build the r-tree spatial index by position for subsequent iloc if rtree == None: rtree = RTreeIndex() for pos, bounds in enumerate(geom.bounds.values): rtree.insert(pos, bounds) # use r-tree to find possible nearest neighbors, one point at a time, # then minimize euclidean distance from point to the possible matches nearest_edges = list() refdistances = list() for xy in zip(X, Y): p = Point(xy) dists = geom.iloc[list(rtree.nearest(xy, num_results=k))].distance(p) ne = geom[dists.idxmin()] nearest_edges.append(dists.idxmin()) refdistances.append(ne.project(p)) return nearest_edges, refdistances
def get_rtree(geometries, fp): fp = fp.as_posix() if not os.path.exists(fp + '.idx'): # Populate R-tree index with bounds of geometries print('Populate {} tree'.format(fp)) idx = Index(fp) for i, geo in enumerate(geometries): idx.insert(i, geo.bounds) idx.close() return {'rtree': Index(fp), 'geometries': geometries}
def build_rtree(self): ''' Construct an R-tree for the domain. This may reduce the computational complexity of the methods `intersection_count`, `contains`, `orient_simplices`, and `snap`. ''' # create a bounding box for each simplex and add those # bounding boxes to the R-tree if self.rtree is not None: # do nothing because the R-tree already exists logger.debug('R-tree already exists') return smp_min = self.vertices[self.simplices].min(axis=1) smp_max = self.vertices[self.simplices].max(axis=1) bounds = np.hstack((smp_min, smp_max)) p = Property() p.dimension = self.dim self.rtree = Index(properties=p) for i, bnd in enumerate(bounds): self.rtree.add(i, bnd)
def build_cache(self): label_candidates = [] for p in self.points: label_candidates.extend(p.label_candidates) items = [] items.extend(label_candidates) items.extend(self.points) items.extend(self.bounding_box.border_config) idx = Index() for i, item in enumerate(items): item.index = i idx.insert(i, item.box) for lc in label_candidates: lc.penalty = POSITION_WEIGHT * lc.position lc.label_penalties = [0 for i in range(len(label_candidates))] intersecting_item_ids = idx.intersection(lc.box) bbox_counted = False for item_id in intersecting_item_ids: item = items[item_id] if item == lc or item == lc.point: continue if isinstance(item, Label): if lc.point == item.point: continue else: lc.label_penalties[item.index] = item.overlap(lc) continue if isinstance(item, BoundingBoxBorder): if bbox_counted: continue bbox_counted = True lc.penalty += item.overlap(lc)
class PolyStore(object): def __init__(self): self.index = Index() def load_from_shapefile(self, sf): self.shapes = sf.shapes() self.records = sf.records() for index, shape in enumerate(self.shapes): if len(shape.parts) > 1: print self.records[index], len(shape.parts) self.index.insert(index, shape.bbox) def get_shape_at_point(self, (x, y)): candidates = self.index.intersection((x, y, x, y)) for candidate in candidates: shape = self.shapes[candidate] for i, part in enumerate(shape_to_parts_list(shape)): if Polygon(part).contains(Point(x, y)): if i in SHAPE_LAND.get(self.records[candidate][4], []): return '0' return self.records[candidate][4] return None
def local_search(points, bounding_box, iterations): labeled_points = [p for p in points if p.text] items = [] items.extend([p.label for p in labeled_points]) items.extend(points) items.extend(bounding_box.border_config) idx = Index() for i, item in enumerate(items): item.index = i idx.insert(item.index, item.box) for i in range(iterations): for lp in labeled_points: best_candidate = None min_penalty = None for lc1 in lp.label_candidates: penalty = POSITION_WEIGHT * lc1.position # Check overlap with other labels and points intersecting_item_ids = idx.intersection(lc1.box) for item_id in intersecting_item_ids: item = items[item_id] if hasattr(item, "point") and lc1.point == item.point: continue penalty += item.overlap(lc1) if min_penalty is None or penalty < min_penalty: min_penalty = penalty best_candidate = lc1 # Remove the old label from the index idx.delete(lp.label.index, lp.label.box) # Select the new label best_candidate.select() # Add the new label to the index and item list idx.insert(len(items), lp.label.box) items.append(lp.label)
def evaluate_labels(labels, points, bounding_box): items = [] items.extend(labels) items.extend(points) items.extend(bounding_box.border_config) t1 = time.clock() idx = Index() for i, item in enumerate(items): item.index = i idx.insert(i, item.box) t2 = time.clock() # print(f"Index creation: {t2-t1}") # Update penalties for overlap with other objects penalties = [evaluate_label(l, items, idx) for l in labels] t3 = time.clock() # print(f"Overlap checking: {t3 - t2}") print(f"Total time: {t3 - t1}") return penalties
def flush(self): """ Saves the index to disk if a filename is given for an R-Tree Spatial Index. **This applies only to the R-Tree implementation of the spatial index.** :returns: Boolean """ if hasattr(self._index, 'flush'): getattr(self._index, 'flush')() elif self._stype == 'rtree' and \ self._filename: self._index.close() self._index = RIndex(self._filename) else: return False return True
def make_index(shapes): """Creates an index for fast and efficient spatial queries. Args: shapes: shapely shapes to bulk-insert bounding boxes for into the spatial index. Returns: The spatial index created from the shape's bounding boxes. """ # Todo: benchmark these for our use-cases prop = Property() prop.dimension = 2 prop.leaf_capacity = 1000 prop.fill_factor = 0.9 def bounded(): for i, shape in enumerate(shapes): yield (i, shape.bounds, None) return Index(bounded(), properties=prop)
def __init__(self, context: DyCleeContext): self.context = context self.dense_µclusters: Set[MicroCluster] = Set() self.semidense_µclusters: Set[MicroCluster] = Set() self.outlier_µclusters: Set[MicroCluster] = Set() self.long_term_memory: Set[MicroCluster] = Set() self.eliminated: Set[MicroCluster] = Set() self.next_µcluster_index: int = 0 self.next_class_label: int = 0 self.n_steps: int = 0 self.last_partitioning_step: int = 0 self.last_density_step: int = 0 if self.context.maintain_rtree: p = RTreeProperty(dimension=self.context.n_features) self.rtree = RTreeIndex(properties=p) # This mapping is used to retrieve microcluster objects from their hashes # stored with their locations in the R*-tree self.µcluster_map: Optional[dict[int, MicroCluster]] = {} else: self.rtree = None self.µcluster_map = None
class SpatialIndex(): """ A spatial index is a type of extended index that allows you to index a spatial column. A spatial column is a table column that contains data of a spatial data type. Spatial indexes help to improve spatial query performance on a dataframe. Identifying a feature, selecting features, and joining data all have better performace when using spatial indexing. ==================== ================================================== Arguement Description -------------------- -------------------------------------------------- stype Required String. This sets the type of spatial index being used by the user. The current types of spatial indexes are: custom, rtree and quadtree. -------------------- -------------------------------------------------- bbox Optional Tuple. The extent of the spatial data as: (xmin, ymin, xmax, ymax). This parameter is required if a QuadTree Spatial Index is being used. Example: bbox=(-100, -50, 100, 50) -------------------- -------------------------------------------------- filename Optional String. The name of the spatial index file. This is only supported by rtree spatial indexes. For large datasets an rtree index can be saved to disk and used at a later time. If this is not provided the r-tree index will be in-memory. -------------------- -------------------------------------------------- custom_index Optional Object. Sometimes QuadTree and Rtree indexing is not enough. A custom spatial index class can be giving to the SpatialIndex class and used using encapsulation. The custom index must have two methods: `intersect` that excepts a tuple, and `insert` which must accept an oid and a bounding box. This object is required when `stype` of 'custom' is specified. ==================== ================================================== """ _stype = None _bbox = None _index = None _df = None #---------------------------------------------------------------------- def __init__(self, stype, bbox=None, **kwargs): """initializer""" ci = kwargs.pop('custom_index', None) self._filename = kwargs.pop('filename', None) self._bbox = bbox self._stype = stype.lower() self._df = None if ci and stype.lower() == 'custom': self._index = ci elif stype.lower() == 'quadtree' and bbox: self._index = QIndex(bbox=bbox) elif RIndex and stype.lower() == 'rtree': self._index = RIndex(self._filename) else: raise ValueError("Could not create the spatial index.") #---------------------------------------------------------------------- def intersect(self, bbox): """ Returns the spatial features that intersect the bbox :bbox: tuple - (xmin,ymin,xmax,ymax) :returns: list """ if self._stype.lower() in ['rtree']: return list(self._index.intersection(bbox)) elif self._stype.lower() in ['quadtree']: return list(self._index.intersect(bbox=bbox)) else: return list(self._index.intersect(bbox)) #---------------------------------------------------------------------- def insert(self, oid, bbox): """ Inserts the entry into the spatial index :oid: unique id :bbox: tuple - (xmin,ymin,xmax,ymax) """ if self._index is None: raise Exception(("Could not insert into a spatial index because " "it does not exist.")) if self._stype == 'rtree' and \ HASRTREE and \ isinstance(self._index, RIndex): r = self._index.insert(id=oid, coordinates=bbox, obj=None) self.flush() return r elif self._stype.lower() == 'quadtree': return self._index.insert(item=oid, bbox=bbox) elif self._stype.lower() == 'custom': r = self._index.intersect(oid, bbox) self.flush() return r #---------------------------------------------------------------------- def flush(self): """ Saves the index to disk if a filename is given for an R-Tree Spatial Index. **This applies only to the R-Tree implementation of the spatial index.** :returns: Boolean """ if hasattr(self._index, 'flush'): getattr(self._index, 'flush')() elif self._stype == 'rtree' and \ self._filename: self._index.close() self._index = RIndex(self._filename) else: return False return True
def __init__(self): self.index = Index()
class AdjacencyGraph(object): def __init__(self, clusters, partitions_complete=True): self.partitions_complete = partitions_complete self.graph = defaultdict(set) self.cid = 0 self.clusters = [] self.id2c = dict() self.c2id = dict() self._rtree = None # internal datastructure self._ndim = None self.bulk_init(clusters) def to_json(self): data = { 'clusters' : [c and c.__dict__ or None for c in self.clusters], 'id2c' : [(key, c.__dict__) for key, c in self.id2c.items()], 'c2id' : [(c.__dict__, val) for c, val in self.c2id.items()], 'graph' : [(key.__dict__, [val.__dict__ for val in vals]) for key, vals in self.graph.itemsiter()], 'cid' : self.cid, '_ndim' : self._ndim, '_rtreename' : 'BLAH' } return json.dumps(data) def from_json(self, encoded): data = json.loads(encoded) self.clusters = [c and Cluster.from_dict(c) or None for c in data['clusters']] self.id2c = dict([(key, Cluster.from_dict(val)) for key, val in data['id2c']]) self.c2id = dict([(Cluster.from_dict(key), val) for key, val in data['c2id']]) self.graph = dict([(Cluster.from_dict(key), map(Cluster.from_dict, vals)) for key, vals in data['graph']]) self.cid = data['cid'] self._ndim = data['_ndim'] self._rtree = None def setup_rtree(self, ndim, clusters=None): if self._rtree: return self._rtree self._ndim = ndim if not ndim: class k(object): def __init__(self, graph): self.graph = graph def insert(self, *args, **kwargs): pass def delete(self, *args, **kwargs): pass def intersection(self, *args, **kwargs): return xrange(len(self.graph.clusters)) self._rtree = k(self) return self._rtree p = RProp() p.dimension = max(2, ndim) p.dat_extension = 'data' p.idx_extension = 'index' if clusters: gen_func = ((i, self.bbox_rtree(c, enlarge=0.00001), None) for i, c in enumerate(clusters)) self._rtree = RTree(gen_func, properties=p) else: self._rtree = RTree(properties=p) return self._rtree def bbox_rtree(self, cluster, enlarge=0.): bbox = cluster.bbox lower, higher = map(list, bbox) if self._ndim == 1: lower.append(0) higher.append(1) if enlarge != 1.: lower = [v - enlarge for v in lower] higher = [v + enlarge for v in higher] bbox = lower + higher return bbox def insert_rtree(self, idx, cluster): self.setup_rtree(len(cluster.bbox[0])) self._rtree.insert(idx,self.bbox_rtree(cluster)) return cluster def remove_rtree(self, idx, cluster): self.setup_rtree(len(cluster.bbox[0])) self._rtree.delete(idx, self.bbox_rtree(cluster)) return cluster def search_rtree(self, cluster): self.setup_rtree(len(cluster.bbox[0])) bbox = self.bbox_rtree(cluster, enlarge=0.00001) res = [self.clusters[idx] for idx in self._rtree.intersection(bbox)] return filter(bool, res) def bulk_init(self, clusters): if clusters: self.setup_rtree(len(clusters[0].bbox[0]), clusters) self.clusters.extend(clusters) for cid, c in enumerate(clusters): self.id2c[cid] = c self.c2id[c] = cid for idx, c in enumerate(clusters): for n in self.search_rtree(c): if self.c2id[n] <= idx: continue if c.discretes_contains(n) and box_completely_contained(c.bbox, n.bbox): continue if not c.adjacent(n, 0.8): continue self.graph[c].add(n) self.graph[n].add(c) def insert(self, cluster): if cluster in self.graph: return self.graph[cluster] = set() #for o in self.search_rtree(cluster): for o in self.graph.keys(): if cluster == o: continue if cluster.adjacent(o, 0.8) or (volume(intersection_box(cluster.bbox, o.bbox)) > 0 and not cluster.contains(o)): self.graph[cluster].add(o) self.graph[o].add(cluster) cid = len(self.clusters) self.clusters.append(cluster) self.id2c[cid] = cluster self.c2id[cluster] = cid self.insert_rtree(cid, cluster) def remove(self, cluster): if cluster not in self.graph: return try: for neigh in self.graph[cluster]: if not neigh == cluster: self.graph[neigh].remove(cluster) except: pdb.set_trace() del self.graph[cluster] cid = self.c2id[cluster] self.remove_rtree(cid, cluster) del self.c2id[cluster] del self.id2c[cid] self.clusters[cid] = None def neighbors(self, cluster): if not self.partitions_complete: return filter(bool, self.clusters) if cluster in self.graph: return self.graph[cluster] ret = set() intersects = self.search_rtree(cluster) for key in filter(cluster.adjacent, intersects): if box_completely_contained(key.bbox, cluster.bbox): continue ret.update(self.graph[key]) return ret
class AdjacencyVersion(object): def __init__(self, feature_mapper): #self.partitions_complete = partitions_complete self.cid = 0 self.disc_idxs = {} self.feature_mapper = feature_mapper self.radius = .15 self.metric = 'hamming' self._rtree = None # internal datastructure self._ndim = None self.clusters = [] self.id2c = dict() self.c2id = dict() def to_json(self): data = { 'clusters' : [c and c.__dict__ or None for c in self.clusters], 'id2c' : [(key, c.__dict__) for key, c in self.id2c.items()], 'c2id' : [(c.__dict__, val) for c, val in self.c2id.items()], 'cid' : self.cid, '_ndim' : self._ndim, '_rtreename' : 'BLAH' } return json.dumps(data) def from_json(self, encoded): data = json.loads(encoded) self.clusters = [c and Cluster.from_dict(c) or None for c in data['clusters']] self.id2c = dict([(key, Cluster.from_dict(val)) for key, val in data['id2c']]) self.c2id = dict([(Cluster.from_dict(key), val) for key, val in data['c2id']]) self.cid = data['cid'] self._ndim = data['_ndim'] self._rtree = None def setup_rtree(self, ndim, clusters=None): if self._rtree: return self._rtree self._ndim = ndim if not ndim: class k(object): def __init__(self, graph): self.graph = graph def insert(self, *args, **kwargs): pass def delete(self, *args, **kwargs): pass def intersection(self, *args, **kwargs): return xrange(len(self.graph.clusters)) self._rtree = k(self) return self._rtree p = RProp() p.dimension = max(2, ndim) p.dat_extension = 'data' p.idx_extension = 'index' if clusters: gen_func = ((i, self.bbox_rtree(c, enlarge=0.005), None) for i, c in enumerate(clusters)) self._rtree = RTree(gen_func, properties=p) else: self._rtree = RTree(properties=p) return self._rtree def bbox_rtree(self, cluster, enlarge=0.): cols = cluster.cols bbox = cluster.bbox lower, higher = map(list, bbox) if self._ndim == 1: lower.append(0) higher.append(1) if enlarge != 0: for idx, col in enumerate(cols): rng = enlarge * self.feature_mapper.ranges[col] lower[idx] -= rng higher[idx] += rng bbox = lower + higher return bbox def insert_rtree(self, idx, cluster): self.setup_rtree(len(cluster.bbox[0])) self._rtree.insert(idx,self.bbox_rtree(cluster)) return cluster def remove_rtree(self, idx, cluster): self.setup_rtree(len(cluster.bbox[0])) self._rtree.delete(idx, self.bbox_rtree(cluster)) return cluster def search_rtree(self, cluster): self.setup_rtree(len(cluster.bbox[0])) bbox = self.bbox_rtree(cluster, enlarge=0.01) return self._rtree.intersection(bbox) res = [self.clusters[idx] for idx in self._rtree.intersection(bbox)] return filter(bool, res) def bulk_init(self, clusters): if not clusters: return self.setup_rtree(len(clusters[0].bbox[0]), clusters) self.clusters = clusters for cid, c in enumerate(clusters): self.id2c[cid] = c self.c2id[c] = cid for dim in self.feature_mapper.attrs: Xs = [] for cidx, c in enumerate(clusters): Xs.append(self.feature_mapper(c, dim)) idx = NearestNeighbors( radius=self.radius, algorithm='ball_tree', metric=self.metric ) self.disc_idxs[dim] = idx self.disc_idxs[dim].fit(np.array(Xs)) def contains(self, cluster): return cluster in self.c2id def remove(self, cluster): if cluster in self.c2id: cid = self.c2id[cluster] self.remove_rtree(cid, cluster) del self.c2id[cluster] del self.id2c[cid] self.clusters[cid] = None return True return False def neighbors(self, cluster): ret = None for name, vals in cluster.discretes.iteritems(): if name not in self.disc_idxs: return [] vect = self.feature_mapper(cluster, name) index = self.disc_idxs[name] dists, idxs = index.radius_neighbors(vect, radius=self.radius) idxs = set(idxs[0].tolist()) if ret is None: ret = idxs else: ret.intersection_update(idxs) #ret.update(idxs) if not ret: return [] idxs = self.search_rtree(cluster) if ret is None: ret = set(idxs) else: ret.intersection_update(set(idxs)) return filter(bool, [self.clusters[idx] for idx in ret]) """
def main(input_dir, output_dir): formatter = logging.Formatter('%(asctime)s %(levelname)s [%(name)s]: %(message)s') handler = logging.StreamHandler(sys.stderr) handler.setFormatter(formatter) logger.addHandler(handler) logger.setLevel(logging.INFO) city_names = [] rtree = RTreeIndex() cities_filename = os.path.join(tempfile.gettempdir(), 'cities.json') subprocess.check_call(['wget', 'https://raw.githubusercontent.com/mapzen/metroextractor-cities/master/cities.json', '-O', cities_filename]) all_cities = json.load(open(cities_filename)) i = 0 for k, v in all_cities['regions'].iteritems(): for city, data in v['cities'].iteritems(): bbox = data['bbox'] rtree.insert(i, (float(bbox['left']), float(bbox['bottom']), float(bbox['right']), float(bbox['top']))) city_names.append(city) i += 1 files = {name: open(os.path.join(output_dir, 'cities', '{}.geojson'.format(name)), 'w') for name in city_names} planet = open(os.path.join(output_dir, 'planet.geojson'), 'w') planet_addresses_only = open(os.path.join(output_dir, 'planet_addresses_only.json'), 'w') i = 0 seen = set() for url, canonical, venues in gen_venues(input_dir): domain = urlparse.urlsplit(url).netloc.strip('www.') for props in venues: lat = props.get('latitude') lon = props.get('longitude') props['canonical'] = canonical props['url'] = url street = props.get('street_address') name = props.get('name') planet_hash = hashlib.md5(u'|'.join((name, street, str(lat), str(lon), domain)).encode('utf-8')).digest() address_hash = hashlib.md5(u'|'.join((name, street, domain)).encode('utf-8')).digest() props['guid'] = props.get('guid', random_guid()) venue = venue_to_geojson(props) if lat is not None and lon is not None: try: lat = float(lat) lon = float(lon) except Exception: lat = None lon = None if lat is not None and lon is not None and planet_hash not in seen: cities = list(rtree.intersection((lon, lat, lon, lat))) if cities: for c in cities: f = files[city_names[c]] f.write(json.dumps(venue) + '\n') if planet_hash not in seen: planet.write(json.dumps(venue) + '\n') seen.add(planet_hash) if address_hash not in seen: planet_addresses_only.write(json.dumps(props) + '\n') seen.add(address_hash) i += 1 if i % 1000 == 0 and i > 0: logger.info('did {}'.format(i)) logger.info('Creating manifest files') manifest_files = [] for k, v in all_cities['regions'].iteritems(): for city, data in v['cities'].iteritems(): f = files[city] if f.tell() == 0: f.close() os.unlink(os.path.join(output_dir, 'cities', '{}.geojson'.format(city))) continue bbox = data['bbox'] lat = midpoint(float(bbox['top']), float(bbox['bottom'])) lon = midpoint(float(bbox['left']), float(bbox['right'])) manifest_files.append({'latitude': lat, 'longitude': lon, 'file': '{}.geojson'.format(city), 'name': city.replace('_', ', ').replace('-', ' ').title()}) manifest = {'files': manifest_files} json.dump(manifest, open(os.path.join(output_dir, 'manifest.json'), 'w')) logger.info('Done!')
def create_spatial_index(shape_dict): spatial_index = Index() for index, (name, shape) in enumerate(shape_dict.iteritems()): spatial_index.insert(index, shape.bounds, obj=name) return spatial_index
def __init__(self, *args): RTreeIndex.__init__(self, *args)
def __init__(self, *args): if not base.HAS_SINDEX: raise ImportError("SpatialIndex needs `rtree`") RTreeIndex.__init__(self, *args)