def overlap_summary(groupbydata, valuedata, fieldmapping=[]): # prep data1,data2 = groupbydata,valuedata if fieldmapping: aggfields,aggtypes = zip(*fieldmapping) aggfunctions = dict([("count",len), ("sum",sum), ("max",max), ("min",min), ("average",lambda seq: sum(seq)/float(len(seq)) ) ]) # create spatial index if not hasattr(data1, "spindex"): data1.create_spatial_index() if not hasattr(data2, "spindex"): data2.create_spatial_index() # create new new = GeoTable() new.fields = list(data1.fields) if fieldmapping: for aggfield,aggtype in fieldmapping: new.fields.append(aggfield) # for each groupby feature for i,feat in enumerate(data1.quick_overlap(data2.bbox)): geom = geoj2shapely(feat.geometry) geom = supershapely(geom) matches = [] # get all value features that intersect for otherfeat in data2.quick_overlap(feat.bbox): othergeom = geoj2shapely(otherfeat.geometry) if geom.intersects(othergeom): matches.append(otherfeat) # make newrow from original row newrow = list(feat.row) # if any matches if matches: def make_number(value): try: return float(value) except: return None # add summary values to newrow based on fieldmapping for aggfield,aggtype in fieldmapping: values = [otherfeat[aggfield] for otherfeat in matches] if aggtype in ("sum","max","min","average"): # only consider number values if numeric stats values = [make_number(value) for value in values if make_number(value) != None] aggregatefunc = aggfunctions[aggtype] summaryvalue = aggregatefunc(values) newrow.append(summaryvalue) # otherwise, add empty values else: newrow.extend(("" for _ in fieldmapping)) # write feature to output new.add_feature(newrow, feat.geometry) return new
def within(feat, other): if geodetic: buff = geojson2shapely(geodetic_buffer(feat.geometry, radius)) else: buff = geom.buffer(radius) superbuff = supershapely(buff) otherfeats = other.quick_overlap(buff.bounds) if hasattr(other, "spindex") else other for otherfeat in otherfeats: if superbuff.intersects(otherfeat._shapely): yield otherfeat
def clip(data, other, clip_type, condition=None, where=None, by=None): """ Pairwise clip operation between each pair of features. - clip_type: intersection, union, or difference """ # Note: if no by, then pairwise, if by, then accept fieldmapping (and clip_type will be run cumulatively within each group) # ie for union/dissolve/collapse, use clip(data, other, "union", by="ID", fieldmapping=...) # this way can also specify continguous dissolve, overlapping dissolve, disjoint dissolve, etc via the where param... # for isect and diff is different, only geom clip_type will be applied within each group, each row still intact. # hmmm... # create spatial index if not hasattr(data, "spindex"): data.create_spatial_index() if not hasattr(clipper, "spindex"): clipper.create_spatial_index() out = VectorData() out.fields = list(data.fields) if clip_type == "intersection": iterable = ((feat,feat.get_shapely()) for feat in data.quick_overlap(clipper.bbox)) for feat,geom in iterable: supergeom = supershapely(geom) iterable2 = ((clipfeat,clipfeat.get_shapely()) for clipfeat in clipper.quick_overlap(feat.bbox)) for clipfeat,clipgeom in iterable2: if key: if key(feat,clipfeat) and supergeom.intersects(clipgeom) and not geom.touches(clipgeom): intsec = geom.intersection(clipgeom) if not intsec.is_empty and data.type in intsec.geom_type and intsec.area > 0.00000000001: # replace with optional snapping out.add_feature(feat.row, intsec.__geo_interface__) else: if supergeom.intersects(clipgeom) and not geom.touches(clipgeom): intsec = geom.intersection(clipgeom) if not intsec.is_empty and data.type in intsec.geom_type and intsec.area > 0.00000000001: # replace with optional snapping out.add_feature(feat.row, intsec.__geo_interface__) elif clip_type == "difference": pass elif clip_type == "union": pass return out
def spatial_stats(groupbydata, valuedata, fieldmapping=[], keepall=True, subkey=None, key=None, **kwargs): """ Summarizes the values of "valuedata" that overlap "groupbydata", and adds the summary statistics to the output data. "groupbydata" must be vector instance, but "valuedata" can be either either a vector or raster instance. "fieldmapping" is a list of ('outfieldname', 'getvaluefunction', 'statistic name or function') tuples that decides which variables to summarize and how to do so. Valid statistics are count, sum, max, min, and average. Key is a function for determining if a pair of features should be processed, taking feat and clipfeat as input args and returning True or False """ from . import sql out = VectorData() # add fields out.fields = list(groupbydata.fields) out.fields.extend([name for name,valfunc,aggfunc in fieldmapping if name not in out.fields]) # loop if not hasattr(groupbydata, "spindex"): groupbydata.create_spatial_index() groupfeats = groupbydata if keepall else groupbydata.quick_overlap(valuedata.bbox) # take advantage of spindex if not keeping all if isinstance(valuedata, VectorData): # vector in vector if not hasattr(valuedata, "spindex"): valuedata.create_spatial_index() for groupfeat in groupfeats: if not groupfeat.geometry: if keepall: newrow = list(groupfeat.row) newrow.extend( (None for _ in fieldmapping) ) out.add_feature(newrow, None) continue geom = groupfeat.get_shapely() supergeom = supershapely(geom) print groupfeat valuefeats = ((valfeat,valfeat.get_shapely()) for valfeat in valuedata.quick_overlap(groupfeat.bbox)) # aggregate if groupbydata.type == valuedata.type == "Polygon": # when comparing polys to polys, dont count neighbouring polygons that just touch on the edge def overlaps(valgeom): if supergeom.intersects(valgeom) and not geom.touches(valgeom): return True else: # for lines and points, ok that just touches on the edge def overlaps(valgeom): return supergeom.intersects(valgeom) if key: matches = (valfeat for valfeat,valgeom in valuefeats if key(groupfeat,valfeat) and overlaps(valgeom)) else: matches = ((valfeat,valgeom) for valfeat,valgeom in valuefeats if overlaps(valgeom)) # clean potential junk, maybe allow user setting of minimum area (put on hold for now, maybe user should make sure of this in advance?) def cleaned(): for valfeat,valgeom in matches: yield valfeat matches = list(cleaned()) if subkey: if matches: for group in sql.groupby(matches, subkey): aggreg = sql.aggreg(group, fieldmapping) newrow = list(groupfeat.row) newrow.extend( aggreg ) out.add_feature(newrow, geom.__geo_interface__) elif keepall: newrow = list(groupfeat.row) newrow.extend( (None for _ in fieldmapping) ) out.add_feature(newrow, geom.__geo_interface__) else: if matches: aggreg = sql.aggreg(matches, fieldmapping) # add if matches: newrow = list(groupfeat.row) newrow.extend( aggreg ) out.add_feature(newrow, geom.__geo_interface__) elif keepall: newrow = list(groupfeat.row) newrow.extend( (None for _ in fieldmapping) ) out.add_feature(newrow, geom.__geo_interface__) else: # raster in vector # TODO: For very large files, something in here produces a crash after returning output even though memory use seems low... from .. import raster for f in groupfeats: print f try: cropped = raster.manager.crop(valuedata, f.bbox) except: continue # TODO: only check overlapping tiles # TODO: how to calc stat on multiple overlapping tiles fdata = VectorData() fdata.add_feature([], f.geometry) clipped = raster.manager.clip(cropped, fdata) #import pythongis as pg #mapp = pg.renderer.Map() #mapp.add_layer(clipped) #mapp.add_layer(fdata, fillcolor=None) #mapp.add_legend() #mapp.view() del fdata del cropped gc.collect() row = f.row + [None for _ in fieldmapping] outfeat = out.add_feature(row, f.geometry) for statfield,bandnum,outstat in fieldmapping: stat = clipped.bands[bandnum].summarystats(outstat)[outstat] outfeat[statfield] = stat del clipped gc.collect() return out
def spatial_join(data, other, condition, subkey=None, keepall=False, clip=False, **kwargs): """ Pairwise joining with all unique pairs that match the spatial "condition" and the optional "subkey" function. Returns a new spatially joined dataset. Note: if the other dataset has fields with the same name as the main dataset, those will not be joined, keeping only the ones in the main dataset. Arguments: data: The main VectorData dataset to be joined to. other: The other VectorData dataset to join to the main one. condition: The spatial condition required for joining a pair of features. Valid options include: - "distance" (along with "radius" and/or "n" args) - "intersects", "within", "contains", "crosses", "touches", "equals", "covers" - "disjoint" subkey (optional): If set, acts as an additional non-spatial condition. Only the pairs that pass this condition will be tested for the spatial condition. Specified as a function that takes a pair of features as its argument, and returns True if they should be joined. keepall (optional): If True, keeps all features in the main dataset regardless (default), otherwise only keeps the ones that match. clip (optional): If the user is interested in the unique spatial relationship of each feature, the clip argument can be used to clip or alter the geometry of each joined pair. The default behavior is for each joined pair to get the geometry of the original left feature. Valid values include "intersection", "difference", "union", or a function expecting two features and returning a GeoJSON dict or None, which will be performed on the joined geometries. The clip argument can also be used to ignore geometries alltogether, especially since joins with many matching pairs and duplicate geometries may lead to a large memory footprint. To reduce the memory footprint, the clip argument can be set to a function that returns None, returning a non-spatial table without geometries. """ # TODO: switch if point is other # NEW condition = condition.lower() # create spatial index if not hasattr(data, "spindex"): data.create_spatial_index() if not hasattr(other, "spindex"): other.create_spatial_index() out = VectorData() out.fields = list(data.fields) out.fields += (field for field in other.fields if field not in data.fields) otheridx = [i for i,field in enumerate(other.fields) if field not in data.fields] if isinstance(clip, basestring): clipname = clip # determine correct output type for each operation if clipname == 'intersection': # lowest dimension if 'Point' in (data.type,other.type): newtyp = 'Point' newmultiobj = shapely.geometry.MultiPoint elif 'LineString' in (data.type,other.type): newtyp = 'LineString' newmultiobj = shapely.geometry.MultiLineString elif 'Polygon' in (data.type,other.type): newtyp = 'Polygon' newmultiobj = shapely.geometry.MultiPolygon elif clipname == 'union': # highest dimension if 'Polygon' in (data.type,other.type): newtyp = 'Polygon' newmultiobj = shapely.geometry.MultiPolygon elif 'LineString' in (data.type,other.type): newtyp = 'LineString' newmultiobj = shapely.geometry.MultiLineString elif 'Point' in (data.type,other.type): newtyp = 'Point' newmultiobj = shapely.geometry.MultiPoint elif clipname == 'difference': # same as main newtyp = data.type if 'Point' in newtyp: newmultiobj = shapely.geometry.MultiPoint elif 'LineString' in newtyp: newmultiobj = shapely.geometry.MultiLineString elif 'Polygon' in newtyp: newmultiobj = shapely.geometry.MultiPolygon print(newtyp,newmultiobj) def clip(f1,f2): clipfunc = getattr(f1.get_shapely(), clipname) #print 'clipping feat' try: geom = clipfunc(f2._shapely) except shapely.errors.TopologicalError: warnings.warn('A clip operation failed due to invalid geometries, replacing with null-geometry') return None if geom: #print geom.geom_type if geom.geom_type == 'GeometryCollection': # only get the subgeoms corresponding to the right type sgeoms = [g for g in geom.geoms if g.geom_type == newtyp] # single geoms mgeoms = [g for g in geom.geoms if g.geom_type == 'Multi'+newtyp] # multi geoms flatmgeoms = [g for mg in mgeoms for g in mg.geoms] # flatten multigeoms geom = newmultiobj(sgeoms + flatmgeoms) return geom.__geo_interface__ elif newtyp in geom.geom_type: # normal return geom.__geo_interface__ else: # ignore wrong types return None if condition in ("distance",): radius = kwargs.get("radius") n = kwargs.get("n") geodetic = kwargs.get("geodetic", True) if not (radius or n): raise Exception("The 'distance' join condition requires a 'radius' or 'n' arg") # prep geoms in other for otherfeat in other: if not otherfeat.geometry: continue otherfeat._shapely = otherfeat.get_shapely() # match funcs def within(feat, other): if geodetic: buff = geojson2shapely(geodetic_buffer(feat.geometry, radius)) else: buff = geom.buffer(radius) superbuff = supershapely(buff) otherfeats = other.quick_overlap(buff.bounds) if hasattr(other, "spindex") else other for otherfeat in otherfeats: if superbuff.intersects(otherfeat._shapely): yield otherfeat def nearest(feat, otherfeats): # TODO: implement optional geodetic distance for otherfeat in sorted(otherfeats, key=lambda otherfeat: geom.distance(otherfeat._shapely)): yield otherfeat # begin for feat in data: #print feat if not feat.geometry: if keepall: newrow = list(feat.row) newrow += (None for i in otheridx) out.add_feature(newrow, None) continue geom = feat.get_shapely() supergeom = supershapely(geom) # test conditions # first find overlaps overlaps = [] nonoverlaps = [] for otherfeat in other.quick_overlap(feat.bbox): if subkey and not subkey(feat,otherfeat): continue if supergeom.intersects(otherfeat._shapely): overlaps.append(otherfeat) else: nonoverlaps.append(otherfeat) if n and len(overlaps) >= n: # check if sufficient break # otherwise proceed to nonoverlaps matches = overlaps proceed = len(matches) < n if n else True if proceed: # limit to those within radius if radius: # test within # NOTE: seems faster to just use existing spindex and exclude those already added nonoverlaps = (otherfeat for otherfeat in within(feat, other) if otherfeat not in matches) # add remainder of nonoverlaps else: for otherfeat in other.quick_disjoint(feat.bbox): nonoverlaps.append(otherfeat) # filter by key if subkey: nonoverlaps = (otherfeat for otherfeat in nonoverlaps if subkey(feat, otherfeat)) # then calc dist for nonoverlaps if n: nonoverlaps = list(nonoverlaps) #print "nearsort",len(nonoverlaps) for otherfeat in nearest(feat, nonoverlaps): # if it gets this far it will be slow regardless of n, # since all dists have to be calculated in order to sort them matches.append(otherfeat) if n and len(matches) >= n: #print "ne",len(matches) break else: # means radius is only criteria, # so join with all (within radius) matches.extend(list(nonoverlaps)) #print "wt2",len(matches) # add if matches: for match in matches: if clip: geoj = clip(feat, match) else: geoj = feat.geometry newrow = list(feat.row) newrow += (match.row[i] for i in otheridx) out.add_feature(newrow, geoj) elif keepall: # no matches newrow = list(feat.row) newrow += (None for i in otheridx) out.add_feature(newrow, feat.geometry) return out elif condition in ("intersects", "within", "contains", "crosses", "touches", "equals", "covers"): # prep geoms in other for otherfeat in other: if not otherfeat.geometry: continue otherfeat._shapely = otherfeat.get_shapely() # begin for feat in data.quick_overlap(other.bbox): #print feat if not feat.geometry: if keepall: newrow = list(feat.row) newrow += (None for i in otheridx) out.add_feature(newrow, None) continue # match funcs geom = feat.get_shapely() if condition in ("intersects", "contains", "covers"): supergeom = supershapely(geom) matchtest = getattr(supergeom, condition) else: matchtest = getattr(geom, condition) # get spindex possibilities matches = (otherfeat for otherfeat in other.quick_overlap(feat.bbox)) # filter by subkey if subkey: matches = (otherfeat for otherfeat in matches if subkey(feat, otherfeat)) # test spatial matches = [otherfeat for otherfeat in matches if matchtest(otherfeat._shapely)] if matches: for match in matches: if clip: geoj = clip(feat, match) else: geoj = feat.geometry newrow = list(feat.row) newrow += (match.row[i] for i in otheridx) out.add_feature(newrow, geoj) elif keepall: # no matches newrow = list(feat.row) newrow += (None for i in otheridx) out.add_feature(newrow, feat.geometry) return out elif condition in ("disjoint",): # prep geoms in other for otherfeat in other: if not otherfeat.geometry: continue otherfeat._shapely = otherfeat.get_shapely() # begin for feat in data: # check empty geom if not feat.geometry: if keepall: newrow = list(feat.row) newrow += (None for i in otheridx) out.add_feature(newrow, None) continue # first add those whose bboxes clearly dont overlap nonoverlaps = [] for otherfeat in other.quick_disjoint(feat.bbox): if subkey and not subkey(feat,otherfeat): continue nonoverlaps.append(otherfeat) # then check those that might overlap geom = feat.get_shapely() # get spindex possibilities closeones = (otherfeat for otherfeat in other.quick_overlap(feat.bbox)) # filter by subkey if subkey: closeones = (otherfeat for otherfeat in closeones if subkey(feat, otherfeat)) # test spatial closeones = [otherfeat for otherfeat in closeones if geom.disjoint(otherfeat._shapely)] # add matches = nonoverlaps + closeones if matches: for match in matches: if clip: geoj = clip(feat, match) else: geoj = feat.geometry newrow = list(feat.row) newrow += (match.row[i] for i in otheridx) out.add_feature(newrow, geoj) elif keepall: # no matches newrow = list(feat.row) newrow += (None for i in otheridx) out.add_feature(newrow, feat.geometry) return out else: raise Exception("%s is not a valid join condition" % condition)
def overlap_summary(groupbydata, valuedata, fieldmapping=[], keepall=True, key=None, **kwargs): """ Summarizes the values of "valuedata" that overlap "groupbydata", and adds the summary statistics to the output data. "fieldmapping" is a list of ('outfieldname', 'getvaluefunction', 'statistic name or function') tuples that decides which variables to summarize and how to do so. Valid statistics are count, sum, max, min, and average. Key is a function for determining if a pair of features should be processed, taking feat and clipfeat as input args and returning True or False """ from . import sql out = VectorData() # add fields out.fields = list(groupbydata.fields) out.fields.extend([name for name,valfunc,aggfunc in fieldmapping]) # loop if not hasattr(groupbydata, "spindex"): groupbydata.create_spatial_index() if not hasattr(valuedata, "spindex"): valuedata.create_spatial_index() groupfeats = groupbydata if keepall else groupbydata.quick_overlap(valuedata.bbox) # take advantage of spindex if not keeping all for groupfeat in groupfeats: # testing ## if groupfeat["CNTRY_NAME"] not in ("Taiwan",): ## continue newrow = list(groupfeat.row) geom = groupfeat.get_shapely() supergeom = supershapely(geom) valuefeats = ((valfeat,valfeat.get_shapely()) for valfeat in valuedata.quick_overlap(groupfeat.bbox)) # aggregate if groupbydata.type == valuedata.type == "Polygon": # when comparing polys to polys, dont count neighbouring polygons that just touch on the edge def overlaps(valgeom): if supergeom.intersects(valgeom) and not geom.touches(valgeom): intsec = geom.intersection(valgeom) if not intsec.is_empty and groupbydata.type in intsec.geom_type and intsec.area > 0.00000000001: return True else: # for lines and points, ok that just touches on the edge def overlaps(valgeom): return supergeom.intersects(valgeom) if key: matches = (valfeat for valfeat,valgeom in valuefeats if key(groupfeat,valfeat) and overlaps(valgeom)) else: matches = ((valfeat,valgeom) for valfeat,valgeom in valuefeats if overlaps(valgeom)) # clean potential junk, maybe allow user setting of minimum area (put on hold for now, maybe user should make sure of this in advance?) def cleaned(): for valfeat,valgeom in matches: ## intsec = geom.intersection(valgeom) ## if groupbydata.type in intsec.geom_type and intsec.area > 0.00000000001: ## yield valfeat yield valfeat matches = list(cleaned()) # testing... ## print "groupfeat",zip(groupbydata.fields,groupfeat.row) ## groupfeat.view(1000,600,bbox=groupfeat.bbox, fillcolor="red") ## for vf in matches: ## print "valfeat",zip(valuedata.fields,vf.row) ## vf.view(1000,600,bbox=groupfeat.bbox, fillcolor="blue") ## from .data import Feature ## intsec = groupfeat.get_shapely().intersection(vf.get_shapely()) ## print intsec.area ## Feature(groupbydata, [], intsec.__geo_interface__).view(1000,500,bbox=groupfeat.bbox, fillcolor="yellow") if matches: aggreg = sql.aggreg(matches, fieldmapping) # add if matches: newrow.extend( aggreg ) out.add_feature(newrow, geom.__geo_interface__) elif keepall: newrow.extend( ("" for _ in fieldmapping) ) out.add_feature(newrow, geom.__geo_interface__) ## # insert groupby data fields into fieldmapping ## basefm = [(name,lambda f:f[name],"first") for name in groupbydata.fields] ## fieldmapping = basefm + fieldmapping ## out.fields = [name for name,valfunc,aggfunc in fieldmapping] ## ## # group by each groupby feature ## iterable = ([(feat,feat.get_shapely()),(otherfeat,otherfeat.get_shapely())] ## for feat in groupbydata.quick_intersect(valuedata.bbox) ## for otherfeat in valuedata.quick_intersect(feat.bbox)) ## for group in sql.groupby(iterable, lambda([(f,g),(of,og)]): id(f)): ## ## # filter to only those that intersect ## group = sql.where(group, lambda([(f,g),(of,og)]): g.intersects(og)) ## ## # make iter as usually expected by fieldmapping ## group = ((of,og) for [(f,g),(of,og)] in group) ## ## # aggregate and add ## # (not sure if will be correct, in terms of args expected by fieldmapping...?) ## row,geom = sql.aggreg(group, fieldmapping, lambda(itr): next(itr)[1]) ## out.add_feature(row, geom) return out
def overlap_summary(groupbydata, valuedata, fieldmapping=[]): # prep data1, data2 = groupbydata, valuedata if fieldmapping: aggfields, aggtypes = zip(*fieldmapping) aggfunctions = dict([("count", len), ("sum", sum), ("max", max), ("min", min), ("average", lambda seq: sum(seq) / float(len(seq)))]) # create spatial index if not hasattr(data1, "spindex"): data1.create_spatial_index() if not hasattr(data2, "spindex"): data2.create_spatial_index() # create new new = GeoTable() new.fields = list(data1.fields) if fieldmapping: for aggfield, aggtype in fieldmapping: new.fields.append(aggfield) # for each groupby feature for i, feat in enumerate(data1.quick_overlap(data2.bbox)): geom = geoj2shapely(feat.geometry) geom = supershapely(geom) matches = [] # get all value features that intersect for otherfeat in data2.quick_overlap(feat.bbox): othergeom = geoj2shapely(otherfeat.geometry) if geom.intersects(othergeom): matches.append(otherfeat) # make newrow from original row newrow = list(feat.row) # if any matches if matches: def make_number(value): try: return float(value) except: return None # add summary values to newrow based on fieldmapping for aggfield, aggtype in fieldmapping: values = [otherfeat[aggfield] for otherfeat in matches] if aggtype in ("sum", "max", "min", "average"): # only consider number values if numeric stats values = [ make_number(value) for value in values if make_number(value) != None ] aggregatefunc = aggfunctions[aggtype] summaryvalue = aggregatefunc(values) newrow.append(summaryvalue) # otherwise, add empty values else: newrow.extend(("" for _ in fieldmapping)) # write feature to output new.add_feature(newrow, feat.geometry) return new