def log_jaccard(im_id: str, cls: int, true_mask: np.ndarray, mask: np.ndarray, poly_mask: np.ndarray, true_poly: MultiPolygon, poly: MultiPolygon, valid_polygons=False): assert len(mask.shape) == 2 pixel_jc = utils.mask_tp_fp_fn(mask, true_mask, 0.5) if valid_polygons: if not true_poly.is_valid: true_poly = utils.to_multipolygon(true_poly.buffer(0)) if not poly.is_valid: poly = utils.to_multipolygon(poly.buffer(0)) tp = true_poly.intersection(poly).area fn = true_poly.difference(poly).area fp = poly.difference(true_poly).area poly_jc = tp, fp, fn else: poly_jc = utils.mask_tp_fp_fn(poly_mask, true_mask, 0.5) logger.info( '{} cls-{} pixel jaccard: {:.5f}, polygon jaccard: {:.5f}'.format( im_id, cls, jaccard(pixel_jc), jaccard(poly_jc))) return pixel_jc, poly_jc
def pixelwise_vector_f1(gt: List[Polygon], pred: List[Polygon], v: bool = True): """ Measures pixelwise f1-score, but for vector representation instead of raster. :param gt: list of shapely Polygons, represents ground truth; :param pred: list of shapely Polygons or Points (according to the 'format' param, represents prediction; :param format: 'vector' or 'point', means format of prediction and corresponding variant of algorithm; :param v: is_verbose :return: float, f1-score and string, log """ log = '' gt_mp = MultiPolygon(gt) pred_mp = MultiPolygon(pred) # try making polygons valid gt_mp = gt_mp.buffer(0) pred_mp = pred_mp.buffer(0) tp = gt_mp.intersection(pred_mp).area fp = pred_mp.area - tp fn = gt_mp.area - tp if tp == 0: f1 = 0. else: precision = tp / (tp + fp) recall = tp / (tp + fn) f1 = 2 * (precision * recall) / (precision + recall) if v: log += 'True Positive = ' + str(tp) + ', False Negative = ' + str( fn) + ', False Positive = ' + str(fp) + '\n' return f1, log
def get_geom_list(features, buffer_distance): ''' creates a list of geobetries based on an input geojson["features"] # requires shapely and the transform coordinate function # returns a list with all the geometries appended ''' geom_list = [] for feature_json in features: if feature_json["geometry"]["type"] == "MultiPolygon": polygons = [] for part in feature_json["geometry"]["coordinates"]: if len(part) == 1: polygons.append(Polygon(part[0])) else: polygons.append(Polygon(part[0], part[:1])) geom = MultiPolygon(polygons) elif feature_json["geometry"]["type"] == "MultiLineString": geom = MultiLineString([ LineString(coord) for coord in feature_json["geometry"]["coordinates"] ]) elif feature_json["geometry"]["type"] == "MultiPoint": geom = MultiPoint([ Point(coord) for coord in feature_json["geometry"]["coordinates"] ]) else: geom = asShape(feature_json["geometry"]) if buffer_distance != 0: geom = geom.buffer(buffer_distance) geom_list.append(geom) print("Created Geometry List") return geom_list
def cache_mask( self, ogr_path: Path, layer_name: str, mask_name: str, ) -> None: """Cache given mask to disk.""" mask_directory = self.directory / "mask" / mask_name mask_pickle = mask_directory / "mask.pkl" if mask_pickle.exists(): print("Already cached!") return else: mask_directory.mkdir(parents=True, exist_ok=True) with fiona.open(ogr_path, "r", layer=layer_name) as src: mask = MultiPolygon( [vector.fiona_polygon(feature) for feature in track(src)] ) print("Buffering multipolygon pickle... ", end="") mask = mask.buffer(0.0) print("Done!") print("Writing pickle file... ", end="") mask_pickle.write_bytes( pickle.dumps(mask, protocol=pickle.HIGHEST_PROTOCOL), ) print("Done!")
def buildFilledContourLayer(self, polygons, asLayers=False): name = self.uOutputName.text() zField = self._zField zmin=zField+'_min' zmax=zField+'_max' vl = self.createVectorLayer("MultiPolygon", name, FILLED, [('index',int), (zmin,float), (zmax,float), ('label',str) ]) pr = vl.dataProvider() fields = pr.fields() msg = list() symbols=[] ninvalid=0 dx,dy=self._origin for i, level_min, level_max, polygon in polygons: level_min=float(level_min) level_max=float(level_max) levels = ( self.formatLevel(level_min) + " - " + self.formatLevel(level_max) + self.uLabelUnits.text() ) try: feat = QgsFeature(fields) try: geom=MultiPolygon(polygon) if not geom.is_valid: # Try buffering to create a valid alternative for geometry # Test area is not significantly altered geom2=geom.buffer(0.0) if geom2.area > 0.0 and abs(1-geom.area/geom2.area) < 0.000001: geom=geom2 if not geom.is_valid: ninvalid += 1 qgeom=QgsGeometry.fromWkt(geom.to_wkt()) qgeom.translate(dx,dy) feat.setGeometry(qgeom) except: continue feat['index']=i feat[zmin]=level_min feat[zmax]=level_max feat['label']=levels pr.addFeatures( [ feat ] ) symbols.append([level_min,levels]) except Exception as ex: self.warnUser(ex.message) msg.append(unicode(levels)) if len(msg) > 0: self.warnUser("Levels not represented : "+", ".join(msg),"Filled Contour issue") if ninvalid > 0: self.warnUser("Matplotlib contouring routine has creating {0} invalid geometries" .format(ninvalid)) vl.updateExtents() vl.commitChanges() self.applyRenderer(vl,'polygon',zmin,symbols) return vl
def process_shp(session, spno, shp): for feature in shp: try: props = feature['properties'] # Convert property names to uppercase props = { key.upper(): props[key] for key in props } if spno != props['SPNO']: log.error('SPNO does not match %s != %s' % (spno, props['SPNO'])) return if props['RNGE'] in (8,9): # TODO - investigate what these numbers mean return taxon_id = props['TAXONID'] parts = _taxon_re.match(taxon_id) if parts is None: log.error("Invalid taxon id format: %s" % taxon_id) return prefix = parts.group(1) suffix = parts.group(2) or '' geometry = shape(feature['geometry']) if type(geometry) == Polygon: geometry = MultiPolygon([geometry]) geometry = reproject(geometry, pyproj.Proj(shp.crs), pyproj.Proj('+init=EPSG:4326')) for s in suffix.split("."): taxon_exists = len(session.execute("SELECT 1 FROM taxon WHERE id = :id", { 'id': prefix + s }).fetchall()) > 0 if taxon_exists: session.execute("""INSERT INTO taxon_range (taxon_id, range_id, breeding_range_id, geometry) VALUES (:taxon_id, :range_id, :breeding_range_id, ST_GeomFromWKB(_BINARY :geom_wkb))""", { 'taxon_id': prefix + s, 'range_id': props['RNGE'] or None, 'breeding_range_id': props['BRRNGE'] or None, 'geom_wkb': shapely.wkb.dumps(geometry) } ) if insert_subdivided: for geom in subdivide_geometry(geometry.buffer(0)): geom = to_multipolygon(geom) if not geom.is_empty: session.execute("""INSERT INTO taxon_range_subdiv (taxon_id, range_id, breeding_range_id, geometry) VALUES (:taxon_id, :range_id, :breeding_range_id, ST_GeomFromWKB(_BINARY :geom_wkb))""", { 'taxon_id': prefix + s, 'range_id': props['RNGE'] or None, 'breeding_range_id': props['BRRNGE'] or None, 'geom_wkb': shapely.wkb.dumps(to_multipolygon(geom)) } ) except: log.error("Error processing row: %s" % props) raise
def mask_to_polygons(mask, epsilon=1, min_area=1., engine='opencv', buffer_amount=0.001): # print('Mask toi polygon') # __author__ = Konstantin Lopuhin # https://www.kaggle.com/lopuhin/dstl-satellite-imagery-feature-detection/full-pipeline-demo-poly-pixels-ml-poly # first, find contours with cv2: it's much faster than shapely if engine == 'opencv': # TODO check this shit. Тут добавил >=0.5 чтобы обойти кривые маски в файле (без бинаризации) image, contours, hierarchy = cv2.findContours( ((mask >= 0.5) * 255).astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS) # create approximate contours to have reasonable submission size approx_contours = [ cv2.approxPolyDP(cnt, epsilon, True) for cnt in contours ] if not contours: return MultiPolygon() # now messy stuff to associate parent and child contours cnt_children = defaultdict(list) child_contours = set() assert hierarchy.shape[0] == 1 # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]): if parent_idx != -1: child_contours.add(idx) cnt_children[parent_idx].append(approx_contours[idx]) # create actual polygons filtering by area (removes artifacts) all_polygons = [] for idx, cnt in enumerate(approx_contours): if idx not in child_contours and cv2.contourArea(cnt) >= min_area: assert cnt.shape[1] == 1 poly = Polygon(shell=cnt[:, 0, :], holes=[ c[:, 0, :] for c in cnt_children.get(idx, []) if cv2.contourArea(c) >= min_area ]) all_polygons.append(poly) # approximating polygons might have created invalid ones, fix them else: all_polygons = [] for shape, value in features.shapes(mask.astype(np.int16), mask=(mask == 1), transform=rasterio.Affine( 1.0, 0, 0, 0, 1.0, 0)): all_polygons.append(shapely.geometry.shape(shape)) all_polygons = MultiPolygon(all_polygons) if True: # not all_polygons.is_valid: all_polygons = all_polygons.buffer(buffer_amount) # Sometimes buffer() converts a simple Multipolygon to just a Polygon, # need to keep it a Multi throughout if all_polygons.type == 'Polygon': all_polygons = MultiPolygon([all_polygons]) return all_polygons
def mask_to_polygons(mask, img_id, epsilon=1, min_area=1., test=True): """ Generate polygons from mask :param mask: :param epsilon: :param min_area: :return: """ # find contours, cv2 switches the x-y coordiante of mask to y-x in contours # This matches the wkt data in train_wkt_v4, which is desirable for submission image, contours, hierarchy = cv2.findContours( ((mask == 1) * 255).astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS) # create approximate contours approx_contours = [cv2.approxPolyDP(cnt, epsilon, True) for cnt in contours] if not contours: return MultiPolygon() cnt_children = defaultdict(list) child_contours = set() assert hierarchy.shape[0] == 1 for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]): if parent_idx != -1: child_contours.add(idx) cnt_children[parent_idx].append(approx_contours[idx]) # create actual polygon filtering by area (remove artifacts) all_polygons = [] for idx, cnt in enumerate(approx_contours): if idx not in child_contours and cv2.contourArea(cnt) >= min_area: assert cnt.shape[1] == 1 poly = Polygon(shell = cnt[:, 0, :], holes = [c[:, 0, :] for c in cnt_children.get(idx, []) if cv2.contourArea(c) >= min_area]) all_polygons.append(poly) # approximating polygons might have created invalid ones, fix them all_polygons = MultiPolygon(all_polygons) if not all_polygons.is_valid: all_polygons = all_polygons.buffer(0) # Sometimes buffer() converts a simple Multipolygon to just a Polygon, # need to keep it a Multi throughout if all_polygons.type == 'Polygon': all_polygons = MultiPolygon([all_polygons]) id = test_IDs_dict[img_id] if test else train_IDs_dict[img_id] x_max = grid_sizes[grid_sizes.ImageId == id].Xmax.values[0] y_min = grid_sizes[grid_sizes.ImageId == id].Ymin.values[0] x_scaler, y_scaler = x_max / mask.shape[1], y_min / mask.shape[0] scaled_pred_polygons = scale(all_polygons, xfact=x_scaler, yfact=y_scaler, origin=(0., 0., 0.)) return scaled_pred_polygons
def mask_to_polygons(self, mask, epsilon=1, min_area=2., max_area=100, buffer_value=0.0001): """transform predicted mask to wkt format""" # first, find contours with cv2: it's much faster than shapely image, contours, hierarchy = cv2.findContours( ((mask == 1) * 255).astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS) # create approximate contours to have reasonable submission size approx_contours = [ cv2.approxPolyDP(cnt, epsilon, True) for cnt in contours ] if not contours: return MultiPolygon() # now messy stuff to associate parent and child contours cnt_children = defaultdict(list) child_contours = set() assert hierarchy.shape[0] == 1 # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]): if parent_idx != -1: child_contours.add(idx) cnt_children[parent_idx].append(approx_contours[idx]) # create actual polygons filtering by area (removes artifacts) all_polygons = [] for idx, cnt in enumerate(approx_contours): if idx not in child_contours and cv2.contourArea( cnt) >= min_area and cv2.contourArea(cnt) < max_area: assert cnt.shape[1] == 1 poly = Polygon(shell=cnt[:, 0, :], holes=[ c[:, 0, :] for c in cnt_children.get(idx, []) if cv2.contourArea(c) >= min_area and cv2.contourArea(c) <= max_area ]) all_polygons.append(poly) # approximating polygons might have created invalid ones, fix them all_polygons = MultiPolygon(all_polygons) if not all_polygons.is_valid: all_polygons = all_polygons.buffer(buffer_value) # Sometimes buffer() converts a simple Multipolygon to just a Polygon, # need to keep it a Multi throughout if all_polygons.type == 'Polygon': all_polygons = MultiPolygon([all_polygons]) return all_polygons
def test_get_field_write_target(self): p1 = 'Polygon ((-116.94238466549290933 52.12861711455555991, -82.00526805089285176 61.59075286434307372, ' \ '-59.92695130138864101 31.0207758265680269, -107.72286778108455962 22.0438778075388484, ' \ '-122.76523743459291893 37.08624746104720771, -116.94238466549290933 52.12861711455555991))' p2 = 'Polygon ((-63.08099655131782413 21.31602121140134898, -42.70101185946779765 9.42769680782217279, ' \ '-65.99242293586783603 9.912934538580501, -63.08099655131782413 21.31602121140134898))' p1 = wkt.loads(p1) p2 = wkt.loads(p2) mp1 = MultiPolygon([p1, p2]) mp2 = mp1.buffer(0.1) geoms = [mp1, mp2] gvar = GeometryVariable(name='gc', value=geoms, dimensions='elementCount') gc = gvar.convert_to(node_dim_name='n_node') field = gc.parent self.assertEqual(field.grid.node_dim.name, 'n_node') actual = DriverESMFUnstruct._get_field_write_target_(field) self.assertEqual(field.grid.node_dim.name, 'n_node') self.assertNotEqual(id(field), id(actual)) self.assertEqual(actual['numElementConn'].dtype, np.int32) self.assertEqual(actual['elementConn'].dtype, np.int32) self.assertNotIn(field.grid.cindex.name, actual) self.assertEqual(actual['nodeCoords'].dimensions[0].name, 'nodeCount') path = self.get_temporary_file_path('foo.nc') actual.write(path) # Optional test for loading the mesh file if ESMF is available. try: import ESMF except ImportError: pass else: _ = ESMF.Mesh(filename=path, filetype=ESMF.FileFormat.ESMFMESH) path2 = self.get_temporary_file_path('foo2.nc') driver = DriverKey.NETCDF_ESMF_UNSTRUCT field.write(path2, driver=driver) # Test the polygons are equivalent when read from the ESMF unstructured file. rd = ocgis.RequestDataset(path2, driver=driver) self.assertEqual(rd.driver.key, driver) efield = rd.get() self.assertEqual(efield.driver.key, driver) grid_actual = efield.grid self.assertEqual(efield.driver.key, driver) self.assertEqual(grid_actual.parent.driver.key, driver) self.assertEqual(grid_actual.x.ndim, 1) for g in grid_actual.archetype.iter_geometries(): self.assertPolygonSimilar(g[1], geoms[g[0]]) ngv = grid_actual.archetype.convert_to() self.assertIsInstance(ngv, GeometryVariable)
def mask_to_polygons(mask, epsilon=5, min_area=.1, rect_polygon=False): horiz_axis = float(mask.shape[0] - 1) / 2 vert_axis = float(mask.shape[1] - 1) / 2 # first, find contours with cv2: it's much faster than shapely image, contours, hierarchy = cv2.findContours( ((mask == 1) * 255).astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS) # create approximate contours to have reasonable submission size approx_contours = [ cv2.approxPolyDP(cnt, epsilon, True) for cnt in contours ] if not contours: return MultiPolygon() # now messy stuff to associate parent and child contours cnt_children = defaultdict(list) child_contours = set() assert hierarchy.shape[0] == 1 # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]): if parent_idx != -1: child_contours.add(idx) cnt_children[parent_idx].append(approx_contours[idx]) # create actual polygons filtering by area (removes artifacts) all_polygons = [] for idx, cnt in enumerate(approx_contours): if idx not in child_contours and cv2.contourArea(cnt) >= min_area: assert cnt.shape[1] == 1 x_coord = cnt[:, 0, 0] y_coord = cnt[:, 0, 1] cnt[:, 0, 1] = 2 * vert_axis - y_coord if rect_polygon: cnt = cv2.boxPoints( cv2.minAreaRect(cnt)) # rectangular polygons poly = Polygon(shell=cnt[:, :], holes=[ c[:, 0, :] for c in cnt_children.get(idx, []) if cv2.contourArea(c) >= min_area ]) else: poly = Polygon(shell=cnt[:, 0, :], holes=[ c[:, 0, :] for c in cnt_children.get(idx, []) if cv2.contourArea(c) >= min_area ]) all_polygons.append(poly) # approximating polygons might have created invalid ones, fix them all_polygons = MultiPolygon(all_polygons) if not all_polygons.is_valid: all_polygons = all_polygons.buffer(0) # Sometimes buffer() converts a simple Multipolygon to just a Polygon, # need to keep it a Multi throughout if all_polygons.type == 'Polygon': all_polygons = MultiPolygon([all_polygons]) return all_polygons
def get_intersections(offs, array_p, array_m, vfunc, gt, n_pixels): polys = [] indices = [] if len(array_p) > 0 and len(array_m) > 0: array = np.multiply(array_p, array_m) unique_values = np.unique(array) temp = np.zeros(array.shape, dtype=np.uint8) for u in unique_values: if u != 0: temp[array == u] = 1 contours, hier = cv2.findContours(temp, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_L1) p = dict() for i, c in enumerate(contours): if len(c) > 2 and Polygon(p[0] for p in c).area > n_pixels: coords = vfunc(*zip( np.array([[p[0][0] + offs[0], p[0][1] + offs[1]][::-1] for p in c]).T), gt=gt) coords = np.array([coords[0][0], coords[1][0]]) if hier[0, i, 3] == -1: p[i] = dict() p[i]['exterior'] = coords else: if 'interior' in p[hier[0, i, 3]]: p[hier[0, i, 3]]['interior'].append(coords) else: p[hier[0, i, 3]]['interior'] = [coords] if p: pp = [] for key in p.keys(): pp.append( Polygon( np.array(p[key]['exterior']).T, [ np.array(p[key]['interior'][i]).T for i in range(len(p[key]['interior'])) ] if 'interior' in p[key] else [])) if len(pp) > 1: poly = MultiPolygon(pp) else: poly = pp[0] if not poly.is_valid: poly_b = poly.buffer(0) if not poly_b.is_empty and poly_b.area > poly.area / 2: polys.append(poly_b) else: polys.append(poly) else: polys.append(poly) indices.append(u) temp = np.zeros(array.shape, dtype=np.uint8) return polys, indices
def test_get_field_write_target(self): p1 = 'Polygon ((-116.94238466549290933 52.12861711455555991, -82.00526805089285176 61.59075286434307372, ' \ '-59.92695130138864101 31.0207758265680269, -107.72286778108455962 22.0438778075388484, ' \ '-122.76523743459291893 37.08624746104720771, -116.94238466549290933 52.12861711455555991))' p2 = 'Polygon ((-63.08099655131782413 21.31602121140134898, -42.70101185946779765 9.42769680782217279, ' \ '-65.99242293586783603 9.912934538580501, -63.08099655131782413 21.31602121140134898))' p1 = wkt.loads(p1) p2 = wkt.loads(p2) mp1 = MultiPolygon([p1, p2]) mp2 = mp1.buffer(0.1) geoms = [mp1, mp2] gvar = GeometryVariable(name='gc', value=geoms, dimensions='elementCount') gc = gvar.convert_to(node_dim_name='n_node') field = gc.parent self.assertEqual(field.grid.node_dim.name, 'n_node') actual = DriverESMFUnstruct._get_field_write_target_(field) self.assertEqual(field.grid.node_dim.name, 'n_node') self.assertNotEqual(id(field), id(actual)) self.assertEqual(actual['numElementConn'].dtype, np.int32) self.assertEqual(actual['elementConn'].dtype, np.int32) self.assertNotIn(field.grid.cindex.name, actual) self.assertEqual(actual['nodeCoords'].dimensions[0].name, 'nodeCount') path = self.get_temporary_file_path('foo.nc') actual.write(path) try: import ESMF except ImportError: pass else: _ = ESMF.Mesh(filename=path, filetype=ESMF.FileFormat.ESMFMESH) path2 = self.get_temporary_file_path('foo2.nc') driver = DriverKey.NETCDF_ESMF_UNSTRUCT field.write(path2, driver=driver) # Test the polygons are equivalent when read from the ESMF unstructured file. rd = ocgis.RequestDataset(path2, driver=driver) self.assertEqual(rd.driver.key, driver) efield = rd.get() self.assertEqual(efield.driver.key, driver) grid_actual = efield.grid self.assertEqual(efield.driver.key, driver) self.assertEqual(grid_actual.parent.driver.key, driver) self.assertEqual(grid_actual.x.ndim, 1) for g in grid_actual.archetype.iter_geometries(): self.assertPolygonSimilar(g[1], geoms[g[0]]) ngv = grid_actual.archetype.convert_to() self.assertIsInstance(ngv, GeometryVariable)
def multipolygon(self) -> MultiPolygon: triangles = self._grd.elements.triangulation.triangles triangle_edges, counts = numpy.unique( numpy.sort( numpy.concatenate( [triangles[:, :2], triangles[:, 1:], triangles[:, [0, 2]]], axis=0), axis=1, ), axis=0, return_counts=True, ) boundary_edges = triangle_edges[counts == 1] boundary_edge_points = self._grd.nodes.iloc[:, :2].values[ boundary_edges] exterior_polygons = collect_interiors( list(polygonize(boundary_edge_points.tolist()))) coords = self._grd.nodes.values x = coords[:, 0] y = coords[:, 1] total_triangle_area = numpy.sum( numpy.abs((x[triangles[:, 0]] * (y[triangles[:, 1]] - y[triangles[:, 2]]) + x[triangles[:, 1]] * (y[triangles[:, 2]] - y[triangles[:, 0]]) + x[triangles[:, 2]] * (y[triangles[:, 0]] - y[triangles[:, 1]])) / 2)) if not numpy.isclose(exterior_polygons[-1].area, total_triangle_area): polygon_collection = [] coords = self._grd.coords.values for rings in self.sorted().values(): exterior = coords[rings['exterior'][:, 0], :] interiors = [] for interior in rings['interiors']: interiors.append(coords[interior[:, 0], :]) polygon_collection.append(Polygon(exterior, interiors)) exterior_polygons.extend(polygon_collection) exterior_polygons = collect_interiors(exterior_polygons) multipolygon = MultiPolygon(exterior_polygons) if not multipolygon.is_valid: try: multipolygon = multipolygon.buffer(0) except Exception as error: logging.exception(error) return multipolygon
def geometry_from_feature_collection(feature_collection): polygons = [] for feature in feature_collection['features']: geometry = feature['geometry'] if geometry['type'] == 'Polygon': polygons.append(asShape(geometry)) if polygons: mp = MultiPolygon(polygons) if not mp.is_valid: mp = mp.buffer(0) return mp
def create_land_areas(polygon_shapefile, extents_wktfile, buffer=land_area_buffer, tolerance=land_area_tolerance, min_points=0, verbose=False): areas = [] driver = ogr.GetDriverByName('ESRI Shapefile') print("Loading land area definition from " + polygon_shapefile) datasource = driver.Open(polygon_shapefile, 0) if datasource is None: raise RuntimeError('Cannot open land areas file ' + polygon_file) layer = datasource.GetLayer() npoints = 0 nskip = 0 areas = [] for feature in layer: mp = wkb.loads(feature.GetGeometryRef().ExportToWkb()) if type(mp) == Polygon: mp = [mp] for p in mp: npoints1 = len(p.exterior.coords) if min_points and len(p.exterior.coords) < min_points: nskip += 1 continue p = Polygon(p.exterior) p = buffered_polygon(p, buffer, tolerance) npoints2 = len(p.exterior.coords) if type(p) == Polygon: p = [p] areas.extend(p) npoints += npoints2 if verbose: print("Polygon: {0} points reduced to {1} points".format( npoints1, npoints2)) if verbose: print("Skipped {0} polygons < {1} points".format(nskip, min_points)) if areas: if verbose: print( "Forming union of areas - total of {0} points in {1} polygons". format(npoints, len(areas))) areas = MultiPolygon(areas) areas = areas.buffer(0) try: if verbose: print("Writing wkt file {0}".format(extents_wktfile)) from shapely.wkt import dumps with open(extents_wktfile, "w") as laf: laf.write(dumps(areas)) except: pass
def mask_to_polygons(mask, epsilon=5, min_area=1.): """ Pravi (multi)poligone od output slike mreze Input: - mask: mask image - epsilon: margin of error - min_area: minimal area for polygon Returns: - all_polygons: all polygons found in mask """ # __author__ = Konstantin Lopuhin # https://www.kaggle.com/lopuhin/dstl-satellite-imagery-feature-detection/full-pipeline-demo-poly-pixels-ml-poly # first, find contours with cv2: it's much faster than shapely contours, hierarchy = cv2.findContours( ((mask == 1) * 255).astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS) # create approximate contours to have reasonable submission size approx_contours = [cv2.approxPolyDP(cnt, epsilon, True) for cnt in contours] if not contours: return MultiPolygon() # now messy stuff to associate parent and child contours cnt_children = defaultdict(list) child_contours = set() assert hierarchy.shape[0] == 1 # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]): if parent_idx != -1: child_contours.add(idx) cnt_children[parent_idx].append(approx_contours[idx]) # create actual polygons filtering by area (removes artifacts) all_polygons = [] for idx, cnt in enumerate(approx_contours): if idx not in child_contours and cv2.contourArea(cnt) >= min_area: assert cnt.shape[1] == 1 poly = Polygon( shell=cnt[:, 0, :], holes=[c[:, 0, :] for c in cnt_children.get(idx, []) if cv2.contourArea(c) >= min_area]) all_polygons.append(poly) # approximating polygons might have created invalid ones, fix them all_polygons = MultiPolygon(all_polygons) if not all_polygons.is_valid: all_polygons = all_polygons.buffer(0) # Sometimes buffer() converts a simple Multipolygon to just a Polygon, # need to keep it a Multi throughout if all_polygons.type == 'Polygon': all_polygons = MultiPolygon([all_polygons]) return all_polygons
def polygonize(self, mask): """Create polygons from binary pixel masks and output as a MultiPolygon. Uses OpenCV's ``findContours`` function to extract polygons and the Douglas Peucker algorithm to simplify them. """ mask[mask < 0.5] = 0 mask[mask > 0] = 1 # first, find contours with cv2: it's much faster than shapely image, contours, hierarchy = cv2.findContours( ((mask == 1).astype(np.uint8) * 255).astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS) # create approximate contours to have reasonable submission size approx_contours = [cv2.approxPolyDP(cnt, self.epsilon, True) for cnt in contours] if not contours: return MultiPolygon() # now messy stuff to associate parent and child contours cnt_children = defaultdict(list) child_contours = set() assert hierarchy.shape[0] == 1 # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]): if parent_idx != -1: child_contours.add(idx) cnt_children[parent_idx].append(approx_contours[idx]) # create actual polygons filtering by area (removes artifacts) all_polygons = [] for idx, cnt in enumerate(approx_contours): if idx not in child_contours and cv2.contourArea(cnt) >= self.min_area: assert cnt.shape[1] == 1 try: poly = Polygon( shell=cnt[:, 0, :], holes=[c[:, 0, :] for c in cnt_children.get(idx, []) if cv2.contourArea(c) >= self.min_area]) all_polygons.append(poly) except: pass # approximating polygons might have created invalid ones, fix them all_polygons = MultiPolygon(all_polygons) if not all_polygons.is_valid: all_polygons = all_polygons.buffer(0) # Sometimes buffer() converts a simple Multipolygon to just a Polygon, # need to keep it a Multi throughout if all_polygons.type == 'Polygon': all_polygons = MultiPolygon([all_polygons]) return all_polygons
def mask2Polygons(mask): """ 将二值化图像转为多边形对象列表 :param mask: ndarray 类型。二值化预测结果 :return: list 类型。多边形对象列表 """ epsilon = 2 # first, find contours with cv2: it's much faster than shapely image, contours, hierarchy = cv2.findContours(((mask == 1) * 255).astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS) # create approximate contours to have reasonable submission size approx_contours = [cv2.approxPolyDP(cnt, epsilon, True) for cnt in contours] if not contours: return MultiPolygon() # now messy stuff to associate parent and child contours cnt_children = defaultdict(list) child_contours = set() assert hierarchy.shape[0] == 1 # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]): if parent_idx != -1: child_contours.add(idx) cnt_children[parent_idx].append(approx_contours[idx]) # create actual polygons filtering by area (removes artifacts) all_polygons = [] for idx, cnt in enumerate(approx_contours): if idx not in child_contours and cv2.contourArea(cnt) >= 1.: assert cnt.shape[1] == 1 poly = Polygon( shell=cnt[:, 0, :], holes=[c[:, 0, :] for c in cnt_children.get(idx, []) if cv2.contourArea(c) >= 1.]) all_polygons.append(poly) all_polygons = MultiPolygon(all_polygons) if not all_polygons.is_valid: # return all_polygons.buffer(0) all_polygons = all_polygons.buffer(0) if all_polygons.type == 'Polygon': all_polygons = MultiPolygon([all_polygons]) return all_polygons
def mask_to_polygons(mask, epsilon=5, min_area=1.): # __author__ = Konstantin Lopuhin # https://www.kaggle.com/lopuhin/dstl-satellite-imagery-feature-detection/full-pipeline-demo-poly-pixels-ml-poly # first, find contours with cv2: it's much faster than shapely threashold_mask = ((mask == 1) * 255).astype(np.uint8) # opencv 3 # image, contours, hierarchy = cv2.findContours(threashold_mask, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS) contours, hierarchy = cv2.findContours(threashold_mask, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS) # create approximate contours to have reasonable submission size approx_contours = [cv2.approxPolyDP(cnt, epsilon, True) for cnt in contours] if not contours: return MultiPolygon() # now messy stuff to associate parent and child contours cnt_children = defaultdict(list) child_contours = set() assert hierarchy.shape[0] == 1 # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]): if parent_idx != -1: child_contours.add(idx) cnt_children[parent_idx].append(approx_contours[idx]) # create actual polygons filtering by area (removes artifacts) all_polygons = [] for idx, cnt in enumerate(approx_contours): if idx not in child_contours and cv2.contourArea(cnt) >= min_area: assert cnt.shape[1] == 1 poly = Polygon( shell=cnt[:, 0, :], holes=[c[:, 0, :] for c in cnt_children.get(idx, []) if cv2.contourArea(c) >= min_area]) all_polygons.append(poly) # approximating polygons might have created invalid ones, fix them all_polygons = MultiPolygon(all_polygons) if not all_polygons.is_valid: all_polygons = all_polygons.buffer(0) # Sometimes buffer() converts a simple Multipolygon to just a Polygon, # need to keep it a Multi throughout if all_polygons.type == 'Polygon': all_polygons = MultiPolygon([all_polygons]) return all_polygons
def load_coordinates(self): with open(self.file_name) as f: folder = parser.parse(f).getroot().Document.Folder # Need to check for multiple LineString elements _lines = [] alts = [] for pm in folder.Placemark: _line = [] alt = None for points in pm.LineString.coordinates.text.split(): lon, lat, alt = points.split(",") _line.append((float(lon), float(lat))) _lines.append(_line) alts.append(float(alt)) ml = MultiLineString(_lines) self.region_data, regions = self.find_regions(ml, alts) mlp = MultiPolygon(regions) self.boundary = unary_union(mlp.buffer(0.001)).exterior.xy
def polygon_coor(inpolygon): eps = 0.75 # width for dilating and eroding (buffer) dist = 0.3 # threshold distance # read the original shapefile df = gpd.read_file(inpolygon) # create new result shapefile col = ['geometry'] res = gpd.GeoDataFrame(columns=col) df_explode = df.explode() dis = [] for i, j in list(itertools.combinations(df_explode.index, 2)): distance = df_explode.geometry[i].distance(df_explode.geometry[j]) # distance between polygons i and j in the shapefile dis.append(distance) if distance < dist: e = MultiPolygon([df_explode.geometry[i], df_explode.geometry[j]]) fx = e.buffer(eps, 1, join_style=JOIN_STYLE.mitre).buffer( -eps, 1, join_style=JOIN_STYLE.mitre) res = res.append({'geometry': fx}, ignore_index=True) res_explode = res.explode() res_explode = gpd.GeoDataFrame( {'geometry': unary_union(res_explode["geometry"])}) res_explode["area"] = res_explode['geometry'].area #Compute 95 percentile of the area value as the major polygon include_area = np.percentile(res_explode["area"].to_numpy(), 95) res_explode1 = res_explode[res_explode["area"] > include_area] #Simplify the shape res_explode1 = res_explode1.simplify(0.05, preserve_topology=True) coordinates = [] #Collect coordinates for the verticies for i in res_explode1: coordlist = list(zip(i.exterior.coords.xy[0], i.exterior.coords.xy[1])) #print (coordlist) for j in coordlist: coordinates.insert(0, round(j[1], 6)) coordinates.insert(0, round(j[0], 6)) # save the resulting shapefile to disk res_explode1.crs = df.crs res_explode1.to_file( os.path.join(os.path.dirname(inpolygon), os.path.basename(inpolygon)[:-4] + "_simplified.shp")) #coordinates.reverse() return (','.join(map(str, coordinates)))
def mask_to_polygons(mask, epsilon=1, min_area=1.): # __author__ = Konstantin Lopuhin # https://www.kaggle.com/lopuhin/dstl-satellite-imagery-feature-detection/full-pipeline-demo-poly-pixels-ml-poly # first, find contours with cv2: it's much faster than shapely image, contours, hierarchy = cv2.findContours( ((mask == 1) * 255).astype(np.uint8), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_L1) # create approximate contours to have reasonable submission size approx_contours = [cv2.approxPolyDP(cnt, epsilon, True) for cnt in contours] if not approx_contours: return MultiPolygon() # now messy stuff to associate parent and child contours cnt_children = defaultdict(list) child_contours = set() assert hierarchy.shape[0] == 1 # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]): if parent_idx != -1: child_contours.add(idx) cnt_children[parent_idx].append(approx_contours[idx]) # create actual polygons filtering by area (removes artifacts) all_polygons = [] for idx, cnt in enumerate(approx_contours): if idx not in child_contours and cv2.contourArea(cnt) >= min_area: assert cnt.shape[1] == 1 poly = Polygon( shell=cnt[:, 0, :], holes=[c[:, 0, :] for c in cnt_children.get(idx, []) if cv2.contourArea(c) >= min_area]) all_polygons.append(poly) # approximating polygons might have created invalid ones, fix them all_polygons = MultiPolygon(all_polygons) if not all_polygons.is_valid: all_polygons = all_polygons.buffer(0) # need to re add in the check for type of all_polygons all_polygons = MultiPolygon(all_polygons) return all_polygons
def mask2multipolygon(mask_data, mask, trans=(1.0, 0.0, 0.0, 0.0, 1.0, 0.0), conn=4): geom_results = ({ 'properties': { 'raster_val': v }, 'geometry': s } for i, (s, v) in enumerate( shapes(mask_data, mask=mask, connectivity=conn, transform=trans))) geometries = list(geom_results) multi = MultiPolygon( [shape(geometries[i]['geometry']) for i in range(len(geometries))]) if not (multi.is_valid): print('Not a valid polygon, using it' 's buffer!') multi = multi.buffer(0) return multi
def polygonize(mask, epsilon=1., min_area=10.): # https://www.programcreek.com/python/example/70440/cv2.findContours contours, hierarchy = cv2.findContours(mask, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_TC89_KCOS) # create approximate contours to have reasonable submission size approx_contours = [ cv2.approxPolyDP(cnt, epsilon, True) for cnt in contours ] approx_contours = contours if not contours: return MultiPolygon() # now messy stuff to associate parent and child contours cnt_children = defaultdict(list) child_contours = set() assert hierarchy.shape[0] == 1 # http://docs.opencv.org/3.1.0/d9/d8b/tutorial_py_contours_hierarchy.html for idx, (_, _, _, parent_idx) in enumerate(hierarchy[0]): if parent_idx != -1: child_contours.add(idx) cnt_children[parent_idx].append(approx_contours[idx]) # create actual polygons filtering by area (removes artifacts) all_polygons = [] for idx, cnt in enumerate(approx_contours): if idx not in child_contours and cv2.contourArea(cnt) >= min_area: assert cnt.shape[1] == 1 poly = Polygon(shell=cnt[:, 0, :], holes=[ c[:, 0, :] for c in cnt_children.get(idx, []) if cv2.contourArea(c) >= min_area ]) all_polygons.append(poly) # approximating polygons might have created invalid ones, fix them all_polygons = MultiPolygon(all_polygons) if not all_polygons.is_valid: all_polygons = all_polygons.buffer(0) # Sometimes buffer() converts a simple Multipolygon to just a Polygon, # need to keep it a Multi throughout if all_polygons.type == 'Polygon': all_polygons = MultiPolygon([all_polygons]) return all_polygons
def mask2multipolygon(mask_data, mask, transform=IDENTITY, connectivity=4): """Convert from binary mask to shapely multipolygon.""" geom_results = ({ 'properties': { 'raster_val': v }, 'geometry': s } for i, (s, v) in enumerate( shapes(mask_data, mask=mask, connectivity=connectivity, transform=transform))) geometries = list(geom_results) multi = MultiPolygon( [shape(geometries[i]['geometry']) for i in range(len(geometries))]) if not multi.is_valid: print('Not a valid polygon, using it' 's buffer!') multi = multi.buffer(0) return multi
def evolve_agat(first_layer, fig=None, N_apexes=-1, layer_width=0.05, min_area=0.001): if (N_apexes == -1): N_apexes = len(first_layer) if fig is None: fig = plt.figure(1, figsize=(5, 5), dpi=90) if len(fig.get_axes()) == 0: ax = fig.add_subplot(111) ax.set_aspect(1) ax.set_title('Agate') ax.set_facecolor('black') else: ax = fig.get_axes()[0] layer = MultiPolygon([Polygon(first_layer)]) while (layer.area > min_area): for polygon in layer: x, y = polygon.exterior.xy ax.plot(x, y, choice(colours, 1)[0]) ax.fill(x, y, choice(colours, 1)[0]) layer = layer.buffer(-layer_width, N_apexes) if (layer.__class__.__name__ == 'Polygon'): layer = MultiPolygon([layer])
def zonal_stats(vectors, raster, layer_num=0, band_num=1, func=None, nodata_value=None, categorical=False, stats=None, copy_properties=False, all_touched=False, transform=None): if not stats: if not categorical: stats = ['count', 'min', 'max', 'mean', 'std'] if func: stats.append('func') # must have transform arg if not transform: raise Exception("Must provide the 'transform' kwarg") rgt = transform rsize = (raster.shape[1], raster.shape[0]) rbounds = raster_extent_as_bounds(rgt, rsize) features_iter, strategy, spatial_ref = get_features(vectors, layer_num) global_src_offset = (0, 0, raster.shape[0], raster.shape[1]) global_src_array = raster mem_drv = ogr.GetDriverByName('Memory') driver = gdal.GetDriverByName('MEM') results = [] entity_images = [] for i, feat in enumerate(features_iter): if feat['type'] == "Feature": geom = shape(feat['geometry']) else: # it's just a geometry geom = shape(feat) # Point and MultiPoint don't play well with GDALRasterize # convert them into box polygons the size of a raster cell buff = rgt[1] / 2.0 if geom.type == "MultiPoint": geom = MultiPolygon( [box(*(pt.buffer(buff).bounds)) for pt in geom.geoms]) elif geom.type == 'Point': geom = box(*(geom.buffer(buff).bounds)) ogr_geom_type = shapely_to_ogr_type(geom.type) # "Clip" the geometry bounds to the overall raster bounding box # This should avoid any rasterIO errors for partially overlapping polys geom_bounds = list(geom.bounds) if geom_bounds[0] < rbounds[0]: geom_bounds[0] = rbounds[0] if geom_bounds[1] < rbounds[1]: geom_bounds[1] = rbounds[1] if geom_bounds[2] > rbounds[2]: geom_bounds[2] = rbounds[2] if geom_bounds[3] > rbounds[3]: geom_bounds[3] = rbounds[3] # calculate new geotransform of the feature subset src_offset = bbox_to_pixel_offsets(rgt, geom_bounds) new_gt = ((rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5]) if src_offset[2] <= 0 or src_offset[3] <= 0: # we're off the raster completely, no overlap at all # so there's no need to even bother trying to calculate feature_stats = dict([(s, None) for s in stats]) img = {'__fid__': i, 'img': None} else: # derive array from global source extent array # useful *only* when disk IO or raster format inefficiencies # are your limiting factor # advantage: reads raster data in one pass before loop # disadvantage: large vector extents combined with big rasters # need lotsa memory xa = src_offset[0] - global_src_offset[0] ya = src_offset[1] - global_src_offset[1] xb = xa + src_offset[2] yb = ya + src_offset[3] src_array = global_src_array[ya:yb, xa:xb] # Create a temporary vector layer in memory mem_ds = mem_drv.CreateDataSource('out') mem_layer = mem_ds.CreateLayer('out', spatial_ref, ogr_geom_type) ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn()) ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt) ogr_feature.SetGeometryDirectly(ogr_geom) mem_layer.CreateFeature(ogr_feature) # Rasterize it rvds = driver.Create('rvds', src_offset[2], src_offset[3], 1, gdal.GDT_Byte) rvds.SetGeoTransform(new_gt) if all_touched: gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1], options=['ALL_TOUCHED=True']) else: gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1], options=['ALL_TOUCHED=False']) rv_array = rvds.ReadAsArray() # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 to get the correct mask effect # we also mask out nodata values explictly masked = np.ma.MaskedArray(src_array, mask=np.logical_or( src_array == nodata_value, np.logical_not(rv_array))) feature_stats = {} if 'min' in stats: feature_stats['min'] = float(masked.min()) if 'max' in stats: feature_stats['max'] = float(masked.max()) if 'mean' in stats: feature_stats['mean'] = float(masked.mean()) if 'count' in stats: feature_stats['count'] = int(masked.count()) if 'std' in stats: feature_stats['std'] = float(masked.std()) # optional if 'func' in stats: feature_stats[func.__name__] = func(masked) if 'sum' in stats: feature_stats['sum'] = float(masked.sum()) if 'std' in stats: feature_stats['std'] = float(masked.std()) if 'median' in stats: feature_stats['median'] = float(np.median(masked.compressed())) if 'range' in stats: try: rmin = feature_stats['min'] except KeyError: rmin = float(masked.min()) try: rmax = feature_stats['max'] except KeyError: rmax = float(masked.max()) feature_stats['range'] = rmax - rmin img = {'__fid__': i, 'img': masked} # Use the enumerated id as __fid__ feature_stats['__fid__'] = i if 'properties' in feat and copy_properties: for key, val in list(feat['properties'].items()): feature_stats[key] = val results.append(feature_stats) entity_images.append(img) return results, entity_images
def raster_stats( vectors, raster, layer_num=0, band_num=1, nodata_value=None, exclude_ranges=None, global_src_extent=False, categorical=False, stats=None, copy_properties=False, ): if not stats: if not categorical: stats = DEFAULT_STATS else: stats = [] else: if isinstance(stats, basestring): if stats in ["*", "ALL"]: stats = VALID_STATS else: stats = stats.split() for x in stats: if x not in VALID_STATS: raise RasterStatsError("Stat `%s` not valid;" " must be one of \n %r" % (x, VALID_STATS)) # print "helloRezaTest" run_count = False if categorical or "majority" in stats or "minority" in stats or "unique" in stats: # run the counter once, only if needed run_count = True rds = gdal.Open(raster, GA_ReadOnly) if not rds: raise RasterStatsError("Cannot open %r as GDAL raster" % raster) rb = rds.GetRasterBand(band_num) rgt = rds.GetGeoTransform() rsize = (rds.RasterXSize, rds.RasterYSize) rbounds = raster_extent_as_bounds(rgt, rsize) if nodata_value is not None: nodata_value = float(nodata_value) rb.SetNoDataValue(nodata_value) else: nodata_value = rb.GetNoDataValue() features_iter, strategy, spatial_ref = get_features(vectors, layer_num) if global_src_extent: # create an in-memory numpy array of the source raster data # covering the whole extent of the vector layer if strategy != "ogr": raise RasterStatsError("global_src_extent requires OGR vector") # find extent of ALL features ds = ogr.Open(vectors) layer = ds.GetLayer(layer_num) ex = layer.GetExtent() # transform from OGR extent to xmin, ymin, xmax, ymax layer_extent = (ex[0], ex[2], ex[1], ex[3]) global_src_offset = bbox_to_pixel_offsets(rgt, layer_extent) global_src_array = rb.ReadAsArray(*global_src_offset) mem_drv = ogr.GetDriverByName("Memory") driver = gdal.GetDriverByName("MEM") results = [] for i, feat in enumerate(features_iter): if feat["type"] == "Feature": geom = shape(feat["geometry"]) else: # it's just a geometry geom = shape(feat) # Point and MultiPoint don't play well with GDALRasterize # convert them into box polygons the size of a raster cell buff = rgt[1] / 2.0 if geom.type == "MultiPoint": geom = MultiPolygon([box(*(pt.buffer(buff).bounds)) for pt in geom.geoms]) elif geom.type == "Point": geom = box(*(geom.buffer(buff).bounds)) ogr_geom_type = shapely_to_ogr_type(geom.type) # "Clip" the geometry bounds to the overall raster bounding box # This should avoid any rasterIO errors for partially overlapping polys geom_bounds = list(geom.bounds) if geom_bounds[0] < rbounds[0]: geom_bounds[0] = rbounds[0] if geom_bounds[1] < rbounds[1]: geom_bounds[1] = rbounds[1] if geom_bounds[2] > rbounds[2]: geom_bounds[2] = rbounds[2] if geom_bounds[3] > rbounds[3]: geom_bounds[3] = rbounds[3] # calculate new geotransform of the feature subset src_offset = bbox_to_pixel_offsets(rgt, geom_bounds) new_gt = ((rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5]) if src_offset[2] < 0 or src_offset[3] < 0: # we're off the raster completely, no overlap at all # so there's no need to even bother trying to calculate feature_stats = dict([(s, None) for s in stats]) else: if not global_src_extent: # use feature's source extent and read directly from source # fastest option when you have fast disks and well-indexed raster # advantage: each feature uses the smallest raster chunk # disadvantage: lots of disk reads on the source raster src_array = rb.ReadAsArray(*src_offset) else: # derive array from global source extent array # useful *only* when disk IO or raster format inefficiencies are your limiting factor # advantage: reads raster data in one pass before loop # disadvantage: large vector extents combined with big rasters need lotsa memory xa = src_offset[0] - global_src_offset[0] ya = src_offset[1] - global_src_offset[1] xb = xa + src_offset[2] yb = ya + src_offset[3] src_array = global_src_array[ya:yb, xa:xb] # Create a temporary vector layer in memory mem_ds = mem_drv.CreateDataSource("out") mem_layer = mem_ds.CreateLayer("out", spatial_ref, ogr_geom_type) ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn()) ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt) ogr_feature.SetGeometryDirectly(ogr_geom) mem_layer.CreateFeature(ogr_feature) # Rasterize it rvds = driver.Create("rvds", src_offset[2], src_offset[3], 1, gdal.GDT_Byte) rvds.SetGeoTransform(new_gt) gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1]) rv_array = rvds.ReadAsArray() # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 to get the correct mask effect # we also mask out nodata values explictly # masked = np.ma.MaskedArray( # src_array, # mask=np.logical_or( # src_array == nodata_value,# 1 if true # np.logical_not(rv_array) # flips 0s to 1s # ) # ) # masked = np.ma.masked_outside(src_array,1,100) # masked = np.ma.masked_where(np.logical_or(src_array<1,src_array>100),src_array); # nodata_value_min = 1 # nodata_value_max = 100 masked = np.ma.masked_where(False, src_array) # start with all # if you want to exclude, # make it true where the range is not specified # start with true # then set the range to false # nodata_value=105 # nodata_value_min=1 # nodata_value_max=100 # 1,50 60,100 places_to_mask = False * len(src_array) places_to_mask = np.logical_or(np.logical_not(rv_array), places_to_mask) if nodata_value is not None: places_to_mask = np.logical_or(src_array == nodata_value, places_to_mask) if exclude_ranges is not None: for range in exclude_ranges.split(" "): nodata_values = range.split(",") nodata_value_min = int(nodata_values[0]) nodata_value_max = int(nodata_values[1]) places_to_mask = np.logical_or( np.logical_and(src_array >= nodata_value_min, src_array <= nodata_value_max), places_to_mask ) masked = np.ma.masked_where(places_to_mask, src_array) if run_count: pixel_count = Counter(masked.compressed()) if categorical: feature_stats = dict(pixel_count) else: feature_stats = {} if "min" in stats: feature_stats["min"] = float(masked.min()) if "max" in stats: feature_stats["max"] = float(masked.max()) if "mean" in stats: feature_stats["mean"] = float(masked.mean()) if "count" in stats: feature_stats["count"] = int(masked.count()) # optional if "sum" in stats: feature_stats["sum"] = float(masked.sum()) if "std" in stats: feature_stats["std"] = float(masked.std()) if "median" in stats: feature_stats["median"] = float(np.median(masked.compressed())) if "majority" in stats: try: feature_stats["majority"] = pixel_count.most_common(1)[0][0] except IndexError: feature_stats["majority"] = None if "minority" in stats: try: feature_stats["minority"] = pixel_count.most_common()[-1][0] except IndexError: feature_stats["minority"] = None if "unique" in stats: feature_stats["unique"] = len(pixel_count.keys()) if "range" in stats: try: rmin = feature_stats["min"] except KeyError: rmin = float(masked.min()) try: rmax = feature_stats["max"] except KeyError: rmax = float(masked.max()) feature_stats["range"] = rmax - rmin try: # Use the provided feature id as __fid__ feature_stats["__fid__"] = feat["id"] except KeyError: # use the enumerator feature_stats["__fid__"] = i if feat.has_key("properties") and copy_properties: for key, val in feat["properties"].items(): feature_stats[key] = val results.append(feature_stats) return results
def raster_stats(vectors, raster, layer_num=0, band_num=1, nodata_value=None, global_src_extent=False, categorical=False, stats=None, copy_properties=False): if not stats: if not categorical: stats = DEFAULT_STATS else: stats = [] else: if isinstance(stats, basestring): if stats in ['*', 'ALL']: stats = VALID_STATS else: stats = stats.split() for x in stats: if x not in VALID_STATS: raise RasterStatsError("Stat `%s` not valid;" \ " must be one of \n %r" % (x, VALID_STATS)) run_count = False if categorical or 'majority' in stats or 'minority' in stats or 'unique' in stats or 'all' in stats: # run the counter once, only if needed run_count = True rds = gdal.Open(raster, GA_ReadOnly) if not rds: raise RasterStatsError("Cannot open %r as GDAL raster" % raster) rb = rds.GetRasterBand(band_num) rgt = rds.GetGeoTransform() rsize = (rds.RasterXSize, rds.RasterYSize) rbounds = raster_extent_as_bounds(rgt, rsize) if nodata_value is not None: nodata_value = float(nodata_value) rb.SetNoDataValue(nodata_value) else: nodata_value = rb.GetNoDataValue() features_iter, strategy, spatial_ref = get_features(vectors, layer_num) if global_src_extent: # create an in-memory numpy array of the source raster data # covering the whole extent of the vector layer if strategy != "ogr": raise RasterStatsError("global_src_extent requires OGR vector") # find extent of ALL features ds = ogr.Open(vectors) layer = ds.GetLayer(layer_num) ex = layer.GetExtent() # transform from OGR extent to xmin, ymin, xmax, ymax layer_extent = (ex[0], ex[2], ex[1], ex[3]) global_src_offset = bbox_to_pixel_offsets(rgt, layer_extent) global_src_array = rb.ReadAsArray(*global_src_offset) mem_drv = ogr.GetDriverByName('Memory') driver = gdal.GetDriverByName('MEM') results = [] for i, feat in enumerate(features_iter): if feat['type'] == "Feature": try: geom = shape(feat['geometry']) except: next else: # it's just a geometry geom = shape(feat) # Point and MultiPoint don't play well with GDALRasterize # convert them into box polygons the size of a raster cell buff = rgt[1] / 2.0 if geom.type == "MultiPoint": geom = MultiPolygon( [box(*(pt.buffer(buff).bounds)) for pt in geom.geoms]) elif geom.type == 'Point': geom = box(*(geom.buffer(buff).bounds)) ogr_geom_type = shapely_to_ogr_type(geom.type) # "Clip" the geometry bounds to the overall raster bounding box # This should avoid any rasterIO errors for partially overlapping polys geom_bounds = list(geom.bounds) if geom_bounds[0] < rbounds[0]: geom_bounds[0] = rbounds[0] if geom_bounds[1] < rbounds[1]: geom_bounds[1] = rbounds[1] if geom_bounds[2] > rbounds[2]: geom_bounds[2] = rbounds[2] if geom_bounds[3] > rbounds[3]: geom_bounds[3] = rbounds[3] # calculate new geotransform of the feature subset src_offset = bbox_to_pixel_offsets(rgt, geom_bounds) new_gt = ((rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5]) if src_offset[2] < 0 or src_offset[3] < 0: # we're off the raster completely, no overlap at all, so there's no need to even bother trying to calculate feature_stats = dict([(s, None) for s in stats]) else: if not global_src_extent: # use feature's source extent and read directly from source # fastest option when you have fast disks and well-indexed raster # advantage: each feature uses the smallest raster chunk # disadvantage: lots of disk reads on the source raster src_array = rb.ReadAsArray(*src_offset) if src_array is None: src_offset = (src_offset[0], src_offset[1], src_offset[2], src_offset[3] - 1) src_array = rb.ReadAsArray(*src_offset) else: # derive array from global source extent array # useful *only* when disk IO or raster format inefficiencies are your limiting factor # advantage: reads raster data in one pass before loop # disadvantage: large vector extents combined with big rasters need lot of memory xa = src_offset[0] - global_src_offset[0] ya = src_offset[1] - global_src_offset[1] xb = xa + src_offset[2] yb = ya + src_offset[3] src_array = global_src_array[ya:yb, xa:xb] # Create a temporary vector layer in memory mem_ds = mem_drv.CreateDataSource('out') mem_layer = mem_ds.CreateLayer('out', spatial_ref, ogr_geom_type) ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn()) ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt) ogr_feature.SetGeometryDirectly(ogr_geom) mem_layer.CreateFeature(ogr_feature) # Rasterize it rvds = driver.Create('rvds', src_offset[2], src_offset[3], 1, gdal.GDT_Byte) rvds.SetGeoTransform(new_gt) gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1]) rv_array = rvds.ReadAsArray() # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 to get the correct mask effect # we also mask out nodata values explicitly # ATTENTION : probleme possible si src_array == None. test_ok = True if src_array is None: #print("WARNING!!! src_array = "+ str(src_array) + ", nodata_value = " + str(nodata_value)) test_ok = False else: masked = np.ma.MaskedArray(src_array, mask=np.logical_or( src_array is nodata_value, np.logical_not(rv_array))) if run_count: if test_ok: pixel_count = Counter(masked.compressed()) else: pixel_count = 0 if categorical: feature_stats = dict(pixel_count) else: feature_stats = {} if 'min' in stats: if test_ok and masked.min().any(): try: feature_stats['min'] = float(masked.min()) except: feature_stats['min'] = 0.0 else: feature_stats['min'] = 0.0 if 'max' in stats: if test_ok and masked.max().any(): try: feature_stats['max'] = float(masked.max()) except: feature_stats['max'] = 0.0 else: feature_stats['max'] = 0.0 if 'mean' in stats: if test_ok and masked.mean().any(): try: feature_stats['mean'] = float(masked.mean()) except: feature_stats['mean'] = 0.0 else: feature_stats['mean'] = 0.0 if 'count' in stats: if test_ok and masked.count().any(): try: feature_stats['count'] = int(masked.count()) except: feature_stats['count'] = 0 else: feature_stats['count'] = 0 # optional if 'sum' in stats: if test_ok and masked.sum().any(): try: feature_stats['sum'] = float(masked.sum()) except: feature_stats['sum'] = 0.0 else: feature_stats['sum'] = 0.0 if 'std' in stats: if test_ok and masked.std().any(): try: feature_stats['std'] = float(masked.std()) except: feature_stats['std'] = 0.0 else: feature_stats['std'] = 0.0 if 'median' in stats: if test_ok and masked.compressed().any(): try: feature_stats['median'] = float( np.median(masked.compressed())) except: feature_stats['median'] = 0.0 else: feature_stats['median'] = 0.0 # Ajout option 'all' GFT le 17/03/2014 if 'all' in stats: try: feature_stats['all'] = pixel_count.most_common() except IndexError: feature_stats['all'] = None if 'majority' in stats: try: feature_stats['majority'] = pixel_count.most_common( 1)[0][0] except IndexError: feature_stats['majority'] = None if 'minority' in stats: try: feature_stats['minority'] = pixel_count.most_common( )[-1][0] except IndexError: feature_stats['minority'] = None if 'unique' in stats: if test_ok: feature_stats['unique'] = len(pixel_count.keys()) else: feature_stats['unique'] = 0 if 'range' in stats: try: rmin = feature_stats['min'] except KeyError: if test_ok and masked.min().any(): try: rmin = float(masked.min()) except: rmin = 0.0 else: rmin = 0.0 try: rmax = feature_stats['max'] except KeyError: if test_ok and masked.max().any(): try: rmax = float(masked.max()) except: rmax = 0.0 else: rmax = 0.0 feature_stats['range'] = rmax - rmin try: # Use the provided feature id as __fid__ feature_stats['__fid__'] = feat['id'] except KeyError: # use the enumerator feature_stats['__fid__'] = i if 'properties' in feat and copy_properties: for key, val in feat['properties'].items(): feature_stats[key] = val results.append(feature_stats) return results
def zonal_stats(vectors, raster, layer_num=0, band_num=1, nodata_value=None, global_src_extent=False, categorical=False, stats=None, copy_properties=False, all_touched=False, transform=None, add_stats=None, raster_out=False): """Summary statistics of a raster, broken out by vector geometries. Attributes ---------- vectors : path to an OGR vector source or list of geo_interface or WKT str raster : ndarray or path to a GDAL raster source If ndarray is passed, the `transform` kwarg is required. layer_num : int, optional If `vectors` is a path to an OGR source, the vector layer to use (counting from 0). defaults to 0. band_num : int, optional If `raster` is a GDAL source, the band number to use (counting from 1). defaults to 1. nodata_value : float, optional If `raster` is a GDAL source, this value overrides any NODATA value specified in the file's metadata. If `None`, the file's metadata's NODATA value (if any) will be used. `ndarray`s don't support `nodata_value`. defaults to `None`. global_src_extent : bool, optional Pre-allocate entire raster before iterating over vector features. Use `True` if limited by disk IO or indexing into raster; requires sufficient RAM to store array in memory Use `False` with fast disks and a well-indexed raster, or when memory-constrained. Ignored when `raster` is an ndarray, because it is already completely in memory. defaults to `False`. categorical : bool, optional stats : list of str, or space-delimited str, optional Which statistics to calculate for each zone. All possible choices are listed in `VALID_STATS`. defaults to `DEFAULT_STATS`, a subset of these. copy_properties : bool, optional Include feature properties alongside the returned stats. defaults to `False` all_touched : bool, optional Whether to include every raster cell touched by a geometry, or only those having a center point within the polygon. defaults to `False` transform : list of float, optional GDAL-style geotransform coordinates when `raster` is an ndarray. Required when `raster` is an ndarray, otherwise ignored. add_stats : Dictionary with names and functions of additional statistics to compute, optional raster_out : Include the masked numpy array for each feature, optional Each feature dictionary will have the following additional keys: clipped raster (`mini_raster`) Geo-transform (`mini_raster_GT`) No Data Value (`mini_raster_NDV`) Returns ------- list of dicts Each dict represents one vector geometry. Its keys include `__fid__` (the geometry feature id) and each of the `stats` requested. """ if not stats: if not categorical: stats = DEFAULT_STATS else: stats = [] else: if isinstance(stats, str): if stats in ['*', 'ALL']: stats = VALID_STATS else: stats = stats.split() for x in stats: if x.startswith("percentile_"): try: get_percentile(x) except ValueError: raise RasterStatsError( "Stat `%s` is not valid; must use" " `percentile_` followed by a float >= 0 or <= 100") elif x not in VALID_STATS: raise RasterStatsError( "Stat `%s` not valid; " "must be one of \n %r" % (x, VALID_STATS)) run_count = False if categorical or 'majority' in stats or 'minority' in stats or \ 'unique' in stats: # run the counter once, only if needed run_count = True if isinstance(raster, np.ndarray): raster_type = 'ndarray' # must have transform arg if not transform: raise RasterStatsError("Must provide the 'transform' kwarg when " "using ndarrays as src raster") rgt = transform rsize = (raster.shape[1], raster.shape[0]) # global_src_extent is implicitly turned on, array is already in memory if not global_src_extent: global_src_extent = True if nodata_value: raise NotImplementedError("ndarrays don't support 'nodata_value'") else: raster_type = 'gdal' rds = gdal.Open(raster, GA_ReadOnly) if not rds: raise RasterStatsError("Cannot open %r as GDAL raster" % raster) rb = rds.GetRasterBand(band_num) rgt = rds.GetGeoTransform() rsize = (rds.RasterXSize, rds.RasterYSize) if nodata_value is not None: nodata_value = float(nodata_value) rb.SetNoDataValue(nodata_value) else: nodata_value = rb.GetNoDataValue() features_iter, strategy, spatial_ref = get_features(vectors, layer_num) if global_src_extent and raster_type == 'gdal': # create an in-memory numpy array of the source raster data # covering the whole extent of the vector layer if strategy != "ogr": raise RasterStatsError("global_src_extent requires OGR vector") # find extent of ALL features ds = ogr.Open(vectors) layer = ds.GetLayer(layer_num) ex = layer.GetExtent() # transform from OGR extent to xmin, ymin, xmax, ymax layer_extent = (ex[0], ex[2], ex[1], ex[3]) global_src_offset = bbox_to_pixel_offsets(rgt, layer_extent, rsize) global_src_array = rb.ReadAsArray(*global_src_offset) elif global_src_extent and raster_type == 'ndarray': global_src_offset = (0, 0, raster.shape[0], raster.shape[1]) global_src_array = raster mem_drv = ogr.GetDriverByName('Memory') driver = gdal.GetDriverByName('MEM') results = [] for i, feat in enumerate(features_iter): if feat['type'] == "Feature": geom = shape(feat['geometry']) else: # it's just a geometry geom = shape(feat) # Point and MultiPoint don't play well with GDALRasterize # convert them into box polygons the size of a raster cell buff = rgt[1] / 2.0 if geom.type == "MultiPoint": geom = MultiPolygon([box(*(pt.buffer(buff).bounds)) for pt in geom.geoms]) elif geom.type == 'Point': geom = box(*(geom.buffer(buff).bounds)) ogr_geom_type = shapely_to_ogr_type(geom.type) geom_bounds = list(geom.bounds) # calculate new pixel coordinates of the feature subset src_offset = bbox_to_pixel_offsets(rgt, geom_bounds, rsize) new_gt = ( (rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5] ) if src_offset[2] <= 0 or src_offset[3] <= 0: # we're off the raster completely, no overlap at all # so there's no need to even bother trying to calculate feature_stats = dict([(s, None) for s in stats]) else: if not global_src_extent: # use feature's source extent and read directly from source # fastest option when you have fast disks and fast raster # advantage: each feature uses the smallest raster chunk # disadvantage: lots of disk reads on the source raster src_array = rb.ReadAsArray(*src_offset) else: # derive array from global source extent array # useful *only* when disk IO or raster format inefficiencies # are your limiting factor # advantage: reads raster data in one pass before loop # disadvantage: large vector extents combined with big rasters # require lotsa memory xa = src_offset[0] - global_src_offset[0] ya = src_offset[1] - global_src_offset[1] xb = xa + src_offset[2] yb = ya + src_offset[3] src_array = global_src_array[ya:yb, xa:xb] # Create a temporary vector layer in memory mem_ds = mem_drv.CreateDataSource('out') mem_layer = mem_ds.CreateLayer('out', spatial_ref, ogr_geom_type) ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn()) ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt) ogr_feature.SetGeometryDirectly(ogr_geom) mem_layer.CreateFeature(ogr_feature) # Rasterize it rvds = driver.Create('rvds', src_offset[2], src_offset[3], 1, gdal.GDT_Byte) rvds.SetGeoTransform(new_gt) if all_touched: gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1], options=['ALL_TOUCHED=True']) else: gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1], options=['ALL_TOUCHED=False']) rv_array = rvds.ReadAsArray() # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 for the correct mask effect # we also mask out nodata values explicitly masked = np.ma.MaskedArray( src_array, mask=np.logical_or( src_array == nodata_value, np.logical_not(rv_array) ) ) if run_count: pixel_count = Counter(masked.compressed()) if categorical: feature_stats = dict(pixel_count) else: feature_stats = {} if 'min' in stats: feature_stats['min'] = float(masked.min()) if 'max' in stats: feature_stats['max'] = float(masked.max()) if 'mean' in stats: feature_stats['mean'] = float(masked.mean()) if 'count' in stats: feature_stats['count'] = int(masked.count()) # optional if 'sum' in stats: feature_stats['sum'] = float(masked.sum()) if 'std' in stats: feature_stats['std'] = float(masked.std()) if 'median' in stats: feature_stats['median'] = float(np.median(masked.compressed())) if 'majority' in stats:cd try: feature_stats['majority'] = pixel_count.most_common(1)[0][0] except IndexError: feature_stats['majority'] = None if 'minority' in stats: try: feature_stats['minority'] = pixel_count.most_common()[-1][0] except IndexError: feature_stats['minority'] = None if 'unique' in stats: feature_stats['unique'] = len(list(pixel_count.keys())) if 'range' in stats: try: rmin = feature_stats['min'] except KeyError: rmin = float(masked.min()) try: rmax = feature_stats['max'] except KeyError: rmax = float(masked.max()) feature_stats['range'] = rmax - rmin for pctile in [s for s in stats if s.startswith('percentile_')]: q = get_percentile(pctile) pctarr = masked.compressed() if pctarr.size == 0: feature_stats[pctile] = None else: feature_stats[pctile] = np.percentile(pctarr, q) if add_stats is not None: for stat_name, stat_func in add_stats.items(): feature_stats[stat_name] = stat_func(masked) if raster_out: masked.fill_value = nodata_value masked.data[masked.mask] = nodata_value feature_stats['mini_raster'] = masked feature_stats['mini_raster_GT'] = new_gt feature_stats['mini_raster_NDV'] = nodata_value # Use the enumerated id as __fid__ feature_stats['__fid__'] = i if 'properties' in feat and copy_properties: for key, val in list(feat['properties'].items()): feature_stats[key] = val results.append(feature_stats)
def _process_element(self, element): if not bool(element): return element.clone(crs=self.p.projection) crs = element.crs proj = self.p.projection if (isinstance(crs, ccrs.PlateCarree) and not isinstance(proj, ccrs.PlateCarree) and crs.proj4_params['lon_0'] != 0): element = self.instance(projection=ccrs.PlateCarree())(element) if isinstance(proj, ccrs.CRS) and not isinstance(proj, ccrs.Projection): raise ValueError('invalid transform:' ' Spherical contouring is not supported - ' ' consider using PlateCarree/RotatedPole.') if isinstance(element, Polygons): geoms = polygons_to_geom_dicts(element, skip_invalid=False) else: geoms = path_to_geom_dicts(element, skip_invalid=False) projected = [] for path in geoms: geom = path['geometry'] # Ensure minimum area for polygons (precision issues cause errors) if isinstance(geom, Polygon) and geom.area < 1e-15: continue elif isinstance(geom, MultiPolygon): polys = [g for g in geom if g.area > 1e-15] if not polys: continue geom = MultiPolygon(polys) elif (not geom or isinstance(geom, GeometryCollection)): continue proj_geom = proj.project_geometry(geom, element.crs) # Attempt to fix geometry without being noisy about it logger = logging.getLogger() try: prev = logger.level logger.setLevel(logging.ERROR) if not proj_geom.is_valid: proj_geom = proj.project_geometry(geom.buffer(0), element.crs) except: continue finally: logger.setLevel(prev) if proj_geom.geom_type == 'GeometryCollection' and len(proj_geom) == 0: continue data = dict(path, geometry=proj_geom) if 'holes' in data: data.pop('holes') projected.append(data) if len(geoms) and len(projected) == 0: self.warning('While projecting a %s element from a %s coordinate ' 'reference system (crs) to a %s projection none of ' 'the projected paths were contained within the bounds ' 'specified by the projection. Ensure you have specified ' 'the correct coordinate system for your data.' % (type(element).__name__, type(element.crs).__name__, type(self.p.projection).__name__)) # Try casting back to original types if element.interface is GeoPandasInterface: import geopandas as gpd projected = gpd.GeoDataFrame(projected, columns=element.data.columns) elif element.interface is MultiInterface: x, y = element.kdims item = element.data[0] if element.data else None if item is None or (isinstance(item, dict) and 'geometry' in item): return element.clone(projected, crs=self.p.projection) projected = [geom_dict_to_array_dict(p, [x.name, y.name]) for p in projected] if any('holes' in p for p in projected): pass elif pd and isinstance(item, pd.DataFrame): projected = [pd.DataFrame(p, columns=item.columns) for p in projected] elif isinstance(item, np.ndarray): projected = [np.column_stack([p[d.name] for d in element.dimensions()]) for p in projected] return element.clone(projected, crs=self.p.projection)
def zonal_stats(vectors, raster, layer_num=0, band_num=1, nodata_value=None, global_src_extent=False, categorical=False, stats=None, copy_properties=False, all_touched=False, transform=None): if not stats: if not categorical: stats = DEFAULT_STATS else: stats = [] else: if isinstance(stats, str): if stats in ['*', 'ALL']: stats = VALID_STATS else: stats = stats.split() for x in stats: if x not in VALID_STATS: raise RasterStatsError("Stat `%s` not valid;" \ " must be one of \n %r" % (x, VALID_STATS)) run_count = False if categorical or 'majority' in stats or 'minority' in stats or \ 'unique' in stats: # run the counter once, only if needed run_count = True if isinstance(raster, np.ndarray): raster_type = 'ndarray' # must have transform arg if not transform: raise RasterStatsError("Must provide the 'transform' kwarg when "\ "using ndarrays as src raster") rgt = transform rsize = (raster.shape[1], raster.shape[0]) # global_src_extent is implicitly turned on, array is already in memory if not global_src_extent: global_src_extent = True if nodata_value: raise NotImplementedError("ndarrays don't support 'nodata_value'") else: raster_type = 'gdal' rds = gdal.Open(raster, GA_ReadOnly) if not rds: raise RasterStatsError("Cannot open %r as GDAL raster" % raster) rb = rds.GetRasterBand(band_num) rgt = rds.GetGeoTransform() rsize = (rds.RasterXSize, rds.RasterYSize) if nodata_value is not None: nodata_value = float(nodata_value) rb.SetNoDataValue(nodata_value) else: nodata_value = rb.GetNoDataValue() rbounds = raster_extent_as_bounds(rgt, rsize) features_iter, strategy, spatial_ref = get_features(vectors, layer_num) if global_src_extent and raster_type == 'gdal': # create an in-memory numpy array of the source raster data # covering the whole extent of the vector layer if strategy != "ogr": raise RasterStatsError("global_src_extent requires OGR vector") # find extent of ALL features ds = ogr.Open(vectors) layer = ds.GetLayer(layer_num) ex = layer.GetExtent() # transform from OGR extent to xmin, ymin, xmax, ymax layer_extent = (ex[0], ex[2], ex[1], ex[3]) global_src_offset = bbox_to_pixel_offsets(rgt, layer_extent) global_src_array = rb.ReadAsArray(*global_src_offset) elif global_src_extent and raster_type == 'ndarray': global_src_offset = (0, 0, raster.shape[0], raster.shape[1]) global_src_array = raster mem_drv = ogr.GetDriverByName('Memory') driver = gdal.GetDriverByName('MEM') results = [] for i, feat in enumerate(features_iter): if feat['type'] == "Feature": geom = shape(feat['geometry']) else: # it's just a geometry geom = shape(feat) # Point and MultiPoint don't play well with GDALRasterize # convert them into box polygons the size of a raster cell buff = rgt[1] / 2.0 if geom.type == "MultiPoint": geom = MultiPolygon([box(*(pt.buffer(buff).bounds)) for pt in geom.geoms]) elif geom.type == 'Point': geom = box(*(geom.buffer(buff).bounds)) ogr_geom_type = shapely_to_ogr_type(geom.type) # "Clip" the geometry bounds to the overall raster bounding box # This should avoid any rasterIO errors for partially overlapping polys geom_bounds = list(geom.bounds) if geom_bounds[0] < rbounds[0]: geom_bounds[0] = rbounds[0] if geom_bounds[1] < rbounds[1]: geom_bounds[1] = rbounds[1] if geom_bounds[2] > rbounds[2]: geom_bounds[2] = rbounds[2] if geom_bounds[3] > rbounds[3]: geom_bounds[3] = rbounds[3] # calculate new geotransform of the feature subset src_offset = bbox_to_pixel_offsets(rgt, geom_bounds) new_gt = ( (rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5] ) if src_offset[2] <= 0 or src_offset[3] <= 0: # we're off the raster completely, no overlap at all # so there's no need to even bother trying to calculate feature_stats = dict([(s, None) for s in stats]) else: if not global_src_extent: # use feature's source extent and read directly from source # fastest option when you have fast disks and well-indexed raster # advantage: each feature uses the smallest raster chunk # disadvantage: lots of disk reads on the source raster src_array = rb.ReadAsArray(*src_offset) else: # derive array from global source extent array # useful *only* when disk IO or raster format inefficiencies are your limiting factor # advantage: reads raster data in one pass before loop # disadvantage: large vector extents combined with big rasters need lotsa memory xa = src_offset[0] - global_src_offset[0] ya = src_offset[1] - global_src_offset[1] xb = xa + src_offset[2] yb = ya + src_offset[3] src_array = global_src_array[ya:yb, xa:xb] # Create a temporary vector layer in memory mem_ds = mem_drv.CreateDataSource('out') mem_layer = mem_ds.CreateLayer('out', spatial_ref, ogr_geom_type) ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn()) ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt) ogr_feature.SetGeometryDirectly(ogr_geom) mem_layer.CreateFeature(ogr_feature) # Rasterize it rvds = driver.Create('rvds', src_offset[2], src_offset[3], 1, gdal.GDT_Byte) rvds.SetGeoTransform(new_gt) if all_touched: gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1], options = ['ALL_TOUCHED=True']) else: gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, burn_values=[1], options = ['ALL_TOUCHED=False']) rv_array = rvds.ReadAsArray() # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 to get the correct mask effect # we also mask out nodata values explictly masked = np.ma.MaskedArray( src_array, mask=np.logical_or( src_array == nodata_value, np.logical_not(rv_array) ) ) if run_count: pixel_count = Counter(masked.compressed()) if categorical: feature_stats = dict(pixel_count) else: feature_stats = {} if 'min' in stats: feature_stats['min'] = float(masked.min()) if 'max' in stats: feature_stats['max'] = float(masked.max()) if 'mean' in stats: feature_stats['mean'] = float(masked.mean()) if 'count' in stats: feature_stats['count'] = int(masked.count()) # optional if 'sum' in stats: feature_stats['sum'] = float(masked.sum()) if 'std' in stats: feature_stats['std'] = float(masked.std()) if 'median' in stats: feature_stats['median'] = float(np.median(masked.compressed())) if 'majority' in stats: try: feature_stats['majority'] = pixel_count.most_common(1)[0][0] except IndexError: feature_stats['majority'] = None if 'minority' in stats: try: feature_stats['minority'] = pixel_count.most_common()[-1][0] except IndexError: feature_stats['minority'] = None if 'unique' in stats: feature_stats['unique'] = len(list(pixel_count.keys())) if 'range' in stats: try: rmin = feature_stats['min'] except KeyError: rmin = float(masked.min()) try: rmax = feature_stats['max'] except KeyError: rmax = float(masked.max()) feature_stats['range'] = rmax - rmin # Use the enumerated id as __fid__ feature_stats['__fid__'] = i if 'properties' in feat and copy_properties: for key, val in list(feat['properties'].items()): feature_stats[key] = val results.append(feature_stats) return results
def zonal_stats(vectors, raster, layer_num=0, band_num=1, nodata_value=None, global_src_extent=False, categorical=False, stats=None, copy_properties=False, all_touched=False, transform=None, affine=None, add_stats=None, raster_out=False, opt_georaster=False): """Summary statistics of a raster, broken out by vector geometries. Attributes ---------- vectors : path to an OGR vector source or list of geo_interface or WKT str raster : ndarray or path to a GDAL raster source If ndarray is passed, the `transform` kwarg is required. layer_num : int, optional If `vectors` is a path to an OGR source, the vector layer to use (counting from 0). defaults to 0. band_num : int, optional If `raster` is a GDAL source, the band number to use (counting from 1). defaults to 1. nodata_value : float, optional If `raster` is a GDAL source, this value overrides any NODATA value specified in the file's metadata. If `None`, the file's metadata's NODATA value (if any) will be used. `ndarray`s don't support `nodata_value`. defaults to `None`. global_src_extent : bool, optional Pre-allocate entire raster before iterating over vector features. Use `True` if limited by disk IO or indexing into raster; requires sufficient RAM to store array in memory Use `False` with fast disks and a well-indexed raster, or when memory-constrained. Ignored when `raster` is an ndarray, because it is already completely in memory. defaults to `False`. categorical : bool, optional stats : list of str, or space-delimited str, optional Which statistics to calculate for each zone. All possible choices are listed in `VALID_STATS`. defaults to `DEFAULT_STATS`, a subset of these. copy_properties : bool, optional Include feature properties alongside the returned stats. defaults to `False` all_touched : bool, optional Whether to include every raster cell touched by a geometry, or only those having a center point within the polygon. defaults to `False` transform : list or tuple of 6 floats or Affine object, optional Required when `raster` is an ndarray. 6-tuple for GDAL-style geotransform coordinates Affine for rasterio-style geotransform coordinates Can use the keyword `affine` which is an alias for `transform` add_stats : Dictionary with names and functions of additional statistics to compute, optional raster_out : Include the masked numpy array for each feature, optional Each feature dictionary will have the following additional keys: clipped raster (`mini_raster`) Geo-transform (`mini_raster_GT`) No Data Value (`mini_raster_NDV`) opt_georaster : Whether the raster should be GeoRaster or not (Boolean, default=False) Returns ------- list of dicts Each dict represents one vector geometry. Its keys include `__fid__` (the geometry feature id) and each of the `stats` requested. """ if not stats: if not categorical: stats = DEFAULT_STATS else: stats = [] else: if isinstance(stats, str): if stats in ['*', 'ALL']: stats = VALID_STATS else: stats = stats.split() for x in stats: if x.startswith("percentile_"): get_percentile(x) elif x not in VALID_STATS: raise ValueError( "Stat `%s` not valid; " "must be one of \n %r" % (x, VALID_STATS)) if opt_georaster: import georasters run_count = False if categorical or 'majority' in stats or 'minority' in stats or \ 'unique' in stats: # run the counter once, only if needed run_count = True if isinstance(raster, np.ndarray): raster_type = 'ndarray' # must have transform info if affine: transform = affine if not transform: raise ValueError("Must provide the 'transform' kwarg " "when using ndarrays as src raster") try: rgt = transform.to_gdal() # an Affine object except AttributeError: rgt = transform # a GDAL geotransform rshape = (raster.shape[1], raster.shape[0]) # global_src_extent is implicitly turned on, array is already in memory global_src_extent = True if nodata_value: raise NotImplementedError("ndarrays don't support 'nodata_value'") else: raster_type = 'gdal' with rasterio.drivers(): with rasterio.open(raster, 'r') as src: affine = src.affine rgt = affine.to_gdal() rshape = (src.width, src.height) rnodata = src.nodata if nodata_value is not None: # override with specified nodata nodata_value = float(nodata_value) else: nodata_value = rnodata features_iter, strategy, spatial_ref = get_features(vectors, layer_num) if global_src_extent and raster_type == 'gdal': # create an in-memory numpy array of the source raster data extent = raster_extent_as_bounds(rgt, rshape) global_src_offset = bbox_to_pixel_offsets(rgt, extent, rshape) window = pixel_offsets_to_window(global_src_offset) with rasterio.drivers(): with rasterio.open(raster, 'r') as src: global_src_array = src.read( band_num, window=window, masked=False) elif global_src_extent and raster_type == 'ndarray': global_src_offset = (0, 0, raster.shape[0], raster.shape[1]) global_src_array = raster results = [] for i, feat in enumerate(features_iter): if feat['type'] == "Feature": geom = shape(feat['geometry']) else: # it's just a geometry geom = shape(feat) # Point and MultiPoint don't play well with GDALRasterize # convert them into box polygons the size of a raster cell buff = rgt[1] / 2.0 if geom.type == "MultiPoint": geom = MultiPolygon([box(*(pt.buffer(buff).bounds)) for pt in geom.geoms]) elif geom.type == 'Point': geom = box(*(geom.buffer(buff).bounds)) geom_bounds = list(geom.bounds) # calculate new pixel coordinates of the feature subset src_offset = bbox_to_pixel_offsets(rgt, geom_bounds, rshape) new_gt = ( (rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5] ) if src_offset[2] <= 0 or src_offset[3] <= 0: # we're off the raster completely, no overlap at all # so there's no need to even bother trying to calculate feature_stats = dict([(s, None) for s in stats]) else: if not global_src_extent: # use feature's source extent and read directly from source window = pixel_offsets_to_window(src_offset) with rasterio.drivers(): with rasterio.open(raster, 'r') as src: src_array = src.read( band_num, window=window, masked=False) else: # subset feature array from global source extent array xa = src_offset[0] - global_src_offset[0] ya = src_offset[1] - global_src_offset[1] xb = xa + src_offset[2] yb = ya + src_offset[3] src_array = global_src_array[ya:yb, xa:xb] # create ndarray of rasterized geometry rv_array = rasterize_geom(geom, src_offset, new_gt, all_touched) assert rv_array.shape == src_array.shape # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 for the correct mask effect # we also mask out nodata values explicitly masked = np.ma.MaskedArray( src_array, mask=np.logical_or( src_array == nodata_value, np.logical_not(rv_array) ) ) if run_count: pixel_count = Counter(masked.compressed().tolist()) if categorical: feature_stats = dict(pixel_count) else: feature_stats = {} if 'min' in stats: feature_stats['min'] = float(masked.min()) if 'max' in stats: feature_stats['max'] = float(masked.max()) if 'mean' in stats: feature_stats['mean'] = float(masked.mean()) if 'count' in stats: feature_stats['count'] = int(masked.count()) # optional if 'sum' in stats: feature_stats['sum'] = float(masked.sum()) if 'std' in stats: feature_stats['std'] = float(masked.std()) if 'median' in stats: feature_stats['median'] = float(np.median(masked.compressed())) if 'majority' in stats: try: feature_stats['majority'] = float(pixel_count.most_common(1)[0][0]) except IndexError: feature_stats['majority'] = None if 'minority' in stats: try: feature_stats['minority'] = float(pixel_count.most_common()[-1][0]) except IndexError: feature_stats['minority'] = None if 'unique' in stats: feature_stats['unique'] = len(list(pixel_count.keys())) if 'range' in stats: try: rmin = feature_stats['min'] except KeyError: rmin = float(masked.min()) try: rmax = feature_stats['max'] except KeyError: rmax = float(masked.max()) feature_stats['range'] = rmax - rmin for pctile in [s for s in stats if s.startswith('percentile_')]: q = get_percentile(pctile) pctarr = masked.compressed() if pctarr.size == 0: feature_stats[pctile] = None else: feature_stats[pctile] = np.percentile(pctarr, q) if add_stats is not None: for stat_name, stat_func in add_stats.items(): feature_stats[stat_name] = stat_func(masked) if raster_out: masked.fill_value = nodata_value masked.data[masked.mask] = nodata_value if opt_georaster: feature_stats['mini_raster'] = georasters.GeoRaster( masked, new_gt, nodata_value=nodata_value, projection=spatial_ref) else: feature_stats['mini_raster'] = masked feature_stats['mini_raster_GT'] = new_gt feature_stats['mini_raster_NDV'] = nodata_value if 'fid' in feat: # Use the fid directly, # likely came from OGR data via .utils.feature_to_geojson feature_stats['__fid__'] = feat['fid'] else: # Use the enumerated id feature_stats['__fid__'] = i if 'properties' in feat and copy_properties: for key, val in list(feat['properties'].items()): feature_stats[key] = val results.append(feature_stats) return results
def zonal_stats(vectors, raster, layer_num=0, band_num=1, func=None, nodata_value=None, categorical=False, stats=None, copy_properties=False, all_touched=False, transform=None): if not stats: if not categorical: stats = ['count', 'min', 'max', 'mean', 'std'] if func: stats.append('func') # must have transform arg if not transform: raise Exception("Must provide the 'transform' kwarg") rgt = transform rsize = (raster.shape[1], raster.shape[0]) rbounds = raster_extent_as_bounds(rgt, rsize) features_iter, strategy, spatial_ref = get_features(vectors, layer_num) global_src_offset = (0, 0, raster.shape[0], raster.shape[1]) global_src_array = raster mem_drv = ogr.GetDriverByName('Memory') driver = gdal.GetDriverByName('MEM') results = [] entity_images = [] for i, feat in enumerate(features_iter): if feat['type'] == "Feature": geom = shape(feat['geometry']) else: # it's just a geometry geom = shape(feat) # Point and MultiPoint don't play well with GDALRasterize # convert them into box polygons the size of a raster cell buff = rgt[1] / 2.0 if geom.type == "MultiPoint": geom = MultiPolygon([box(*(pt.buffer(buff).bounds)) for pt in geom.geoms]) elif geom.type == 'Point': geom = box(*(geom.buffer(buff).bounds)) ogr_geom_type = shapely_to_ogr_type(geom.type) # "Clip" the geometry bounds to the overall raster bounding box # This should avoid any rasterIO errors for partially overlapping polys geom_bounds = list(geom.bounds) if geom_bounds[0] < rbounds[0]: geom_bounds[0] = rbounds[0] if geom_bounds[1] < rbounds[1]: geom_bounds[1] = rbounds[1] if geom_bounds[2] > rbounds[2]: geom_bounds[2] = rbounds[2] if geom_bounds[3] > rbounds[3]: geom_bounds[3] = rbounds[3] # calculate new geotransform of the feature subset src_offset = bbox_to_pixel_offsets(rgt, geom_bounds) new_gt = ( (rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5] ) if src_offset[2] <= 0 or src_offset[3] <= 0: # we're off the raster completely, no overlap at all # so there's no need to even bother trying to calculate feature_stats = dict([(s, None) for s in stats]) img = {'__fid__': i, 'img': None} else: # derive array from global source extent array # useful *only* when disk IO or raster format inefficiencies # are your limiting factor # advantage: reads raster data in one pass before loop # disadvantage: large vector extents combined with big rasters # need lotsa memory xa = src_offset[0] - global_src_offset[0] ya = src_offset[1] - global_src_offset[1] xb = xa + src_offset[2] yb = ya + src_offset[3] src_array = global_src_array[ya:yb, xa:xb] # Create a temporary vector layer in memory mem_ds = mem_drv.CreateDataSource('out') mem_layer = mem_ds.CreateLayer('out', spatial_ref, ogr_geom_type) ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn()) ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt) ogr_feature.SetGeometryDirectly(ogr_geom) mem_layer.CreateFeature(ogr_feature) # Rasterize it rvds = driver.Create( 'rvds', src_offset[2], src_offset[3], 1, gdal.GDT_Byte) rvds.SetGeoTransform(new_gt) if all_touched: gdal.RasterizeLayer( rvds, [1], mem_layer, None, None, burn_values=[1], options=['ALL_TOUCHED=True']) else: gdal.RasterizeLayer( rvds, [1], mem_layer, None, None, burn_values=[1], options=['ALL_TOUCHED=False']) rv_array = rvds.ReadAsArray() # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 to get the correct mask effect # we also mask out nodata values explictly masked = np.ma.MaskedArray( src_array, mask=np.logical_or( src_array == nodata_value, np.logical_not(rv_array) ) ) feature_stats = {} if 'min' in stats: feature_stats['min'] = float(masked.min()) if 'max' in stats: feature_stats['max'] = float(masked.max()) if 'mean' in stats: feature_stats['mean'] = float(masked.mean()) if 'count' in stats: feature_stats['count'] = int(masked.count()) if 'std' in stats: feature_stats['std'] = float(masked.std()) # optional if 'func' in stats: feature_stats[func.__name__] = func(masked) if 'sum' in stats: feature_stats['sum'] = float(masked.sum()) if 'std' in stats: feature_stats['std'] = float(masked.std()) if 'median' in stats: feature_stats['median'] = float(np.median(masked.compressed())) if 'range' in stats: try: rmin = feature_stats['min'] except KeyError: rmin = float(masked.min()) try: rmax = feature_stats['max'] except KeyError: rmax = float(masked.max()) feature_stats['range'] = rmax - rmin img = {'__fid__': i, 'img': masked} # Use the enumerated id as __fid__ feature_stats['__fid__'] = i if 'properties' in feat and copy_properties: for key, val in list(feat['properties'].items()): feature_stats[key] = val results.append(feature_stats) entity_images.append(img) return results, entity_images
def zonal_stats(vectors, raster, layer=0, band_num=1, nodata_value=None, global_src_extent=False, categorical=False, stats=None, copy_properties=False, all_touched=False, transform=None, affine=None, add_stats=None, raster_out=False, category_map=None, **kwargs): """Summary statistics of a raster, broken out by vector geometries. Attributes ---------- vectors : path to an OGR vector source or list of geo_interface or WKT str raster : ndarray or path to a GDAL raster source If ndarray is passed, the `transform` kwarg is required. layer : int or string, optional If `vectors` is a path to an fiona source, specify the vector layer to use either by name or number. defaults to 0 band_num : int, optional If `raster` is a GDAL source, the band number to use (counting from 1). defaults to 1. nodata_value : float, optional If `raster` is a GDAL source, this value overrides any NODATA value specified in the file's metadata. If `None`, the file's metadata's NODATA value (if any) will be used. `ndarray`s don't support `nodata_value`. defaults to `None`. global_src_extent : bool, optional Pre-allocate entire raster before iterating over vector features. Use `True` if limited by disk IO or indexing into raster; requires sufficient RAM to store array in memory Use `False` with fast disks and a well-indexed raster, or when memory-constrained. Ignored when `raster` is an ndarray, because it is already completely in memory. defaults to `False`. categorical : bool, optional stats : list of str, or space-delimited str, optional Which statistics to calculate for each zone. All possible choices are listed in `utils.VALID_STATS`. defaults to `DEFAULT_STATS`, a subset of these. copy_properties : bool, optional Include feature properties alongside the returned stats. defaults to `False` all_touched : bool, optional Whether to include every raster cell touched by a geometry, or only those having a center point within the polygon. defaults to `False` transform : list or tuple of 6 floats or Affine object, optional Required when `raster` is an ndarray. 6-tuple for GDAL-style geotransform coordinates Affine for rasterio-style geotransform coordinates Can use the keyword `affine` which is an alias for `transform` add_stats : Dictionary with names and functions of additional statistics to compute, optional raster_out : Include the masked numpy array for each feature, optional Each feature dictionary will have the following additional keys: clipped raster (`mini_raster`) Geo-transform (`mini_raster_GT`) No Data Value (`mini_raster_NDV`) category_map : A dictionary mapping raster values to human-readable categorical names Only applies when categorical is True Returns ------- list of dicts Each dict represents one vector geometry. Its keys include `__fid__` (the geometry feature id) and each of the `stats` requested. """ stats, run_count = check_stats(stats, categorical) rtype, rgt, rshape, global_src_extent, nodata_value = \ raster_info(raster, global_src_extent, nodata_value, affine, transform) features_iter = read_features(vectors, layer) if global_src_extent and rtype == 'gdal': # create an in-memory numpy array of the source raster data extent = raster_extent_as_bounds(rgt, rshape) global_src_offset = bbox_to_pixel_offsets(rgt, extent, rshape) window = pixel_offsets_to_window(global_src_offset) with rasterio.drivers(): with rasterio.open(raster, 'r') as src: global_src_array = src.read( band_num, window=window, masked=False) elif global_src_extent and rtype == 'ndarray': global_src_offset = (0, 0, raster.shape[0], raster.shape[1]) global_src_array = raster results = [] for i, feat in enumerate(features_iter): geom = shape(feat['geometry']) # Point and MultiPoint don't play well with GDALRasterize # convert them into box polygons the size of a raster cell # TODO warning, suggest point_query instead buff = rgt[1] / 2.0 if geom.type == "MultiPoint": geom = MultiPolygon([box(*(pt.buffer(buff).bounds)) for pt in geom.geoms]) elif geom.type == 'Point': geom = box(*(geom.buffer(buff).bounds)) geom_bounds = list(geom.bounds) # calculate new pixel coordinates of the feature subset src_offset = bbox_to_pixel_offsets(rgt, geom_bounds, rshape) new_gt = ( (rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5] ) if src_offset[2] <= 0 or src_offset[3] <= 0: # we're off the raster completely, no overlap at all # so there's no need to even bother trying to calculate feature_stats = dict([(s, None) for s in stats]) else: if not global_src_extent: # use feature's source extent and read directly from source window = pixel_offsets_to_window(src_offset) with rasterio.drivers(): with rasterio.open(raster, 'r') as src: src_array = src.read( band_num, window=window, masked=False) else: # subset feature array from global source extent array xa = src_offset[0] - global_src_offset[0] ya = src_offset[1] - global_src_offset[1] xb = xa + src_offset[2] yb = ya + src_offset[3] src_array = global_src_array[ya:yb, xa:xb] # create ndarray of rasterized geometry rv_array = rasterize_geom(geom, src_offset, new_gt, all_touched) assert rv_array.shape == src_array.shape # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 for the correct mask effect # we also mask out nodata values explicitly masked = np.ma.MaskedArray( src_array, mask=np.logical_or( src_array == nodata_value, np.logical_not(rv_array) ) ) if masked.compressed().size == 0: # nothing here, fill with None and move on feature_stats = dict([(stat, None) for stat in stats]) if 'count' in stats: # special case, zero makes sense here feature_stats['count'] = 0 else: if run_count: keys, counts = np.unique(masked.compressed(), return_counts=True) pixel_count = dict(zip([np.asscalar(k) for k in keys], [np.asscalar(c) for c in counts])) if categorical: feature_stats = dict(pixel_count) if category_map: feature_stats = remap_categories(category_map, feature_stats) else: feature_stats = {} if 'min' in stats: feature_stats['min'] = float(masked.min()) if 'max' in stats: feature_stats['max'] = float(masked.max()) if 'mean' in stats: feature_stats['mean'] = float(masked.mean()) if 'count' in stats: feature_stats['count'] = int(masked.count()) # optional if 'sum' in stats: feature_stats['sum'] = float(masked.sum()) if 'std' in stats: feature_stats['std'] = float(masked.std()) if 'median' in stats: feature_stats['median'] = float(np.median(masked.compressed())) if 'majority' in stats: try: feature_stats['majority'] = float(key_assoc_val(pixel_count, max)) except IndexError: feature_stats['majority'] = None if 'minority' in stats: try: feature_stats['minority'] = float(key_assoc_val(pixel_count, min)) except IndexError: feature_stats['minority'] = None if 'unique' in stats: feature_stats['unique'] = len(list(pixel_count.keys())) if 'range' in stats: try: rmin = feature_stats['min'] except KeyError: rmin = float(masked.min()) try: rmax = feature_stats['max'] except KeyError: rmax = float(masked.max()) feature_stats['range'] = rmax - rmin for pctile in [s for s in stats if s.startswith('percentile_')]: q = get_percentile(pctile) pctarr = masked.compressed() if pctarr.size == 0: feature_stats[pctile] = None else: feature_stats[pctile] = np.percentile(pctarr, q) if 'nodata' in stats: featmasked = np.ma.MaskedArray(src_array, mask=np.logical_not(rv_array)) keys, counts = np.unique(featmasked.compressed(), return_counts=True) pixel_count = dict(zip([np.asscalar(k) for k in keys], [np.asscalar(c) for c in counts])) feature_stats['nodata'] = pixel_count.get(nodata_value, 0) if add_stats is not None: for stat_name, stat_func in add_stats.items(): feature_stats[stat_name] = stat_func(masked) if raster_out: masked.fill_value = nodata_value masked.data[masked.mask] = nodata_value feature_stats['mini_raster'] = masked feature_stats['mini_raster_GT'] = new_gt feature_stats['mini_raster_NDV'] = nodata_value if 'fid' in feat: # Use the fid directly, # likely came from OGR data via .utils.feature_to_geojson feature_stats['__fid__'] = feat['fid'] else: # Use the enumerated id feature_stats['__fid__'] = i if 'properties' in feat and copy_properties: for key, val in list(feat['properties'].items()): feature_stats[key] = val results.append(feature_stats) return results
print(elapsed_time_fl) # 0.2 sec for 3 particles ## Remove loop import fiona polyShp = fiona.open('./habitat/rock_lobster_polygons_fixed.shp') polyList = [] polyProperties = [] for poly in polyShp: polyGeom = Polygon(poly['geometry']['coordinates'][0]) polyList.append(polyGeom) polyProperties.append(poly['properties']) #print(polyList[10]) #print(polyProperties[10]) multiShp = MultiPolygon(polyList) multiShp = multiShp.buffer(0) #print(multiShp.is_valid) #print(type(multiShp)) start = time.time() for i in range(len(lons)): pt = Point(lons[i], lats[i]) in_area = pt.within(multiShp) if in_area == True: print("In habitat") else: print("No habitat") # get time taken to run elapsed_time_fl = (time.time() - start) print(elapsed_time_fl) # 0.01 sec for the 3 particles
def raster_stats_multi(vectors, rasterlist, geom_attr='GeomWKT', id_attr='fid', band_num=1, nodata_value=None, global_src_extent=False, categorical=False, stats=None, copy_properties=False, all_touched = False): ''' Multi-raster version of the raster_stats (zonal_stats) function found in rasterstats package. When running zonal stats using the rasterstats package each feature (zone) must first be rasterized. These are then used to mask the input raster. However we often need to run raster stats on many (thousands) of input rasters (all with identical geotransforms) for the same zones. In this scenario the rasterization of the zones is a major overhead. This version rasterizes once and then runs the overlay against all rasters (which must have the same resolution / extent as one another). It returns a generator so the stats for each raster are generated when the calling code is ready for them. ''' DEFAULT_STATS = ['count', 'min', 'max', 'mean'] VALID_STATS = DEFAULT_STATS + \ ['sum', 'std', 'median', 'majority', 'minority', 'unique', 'range'] if not stats: if not categorical: stats = DEFAULT_STATS else: stats = [] else: if isinstance(stats, basestring): if stats in ['*', 'ALL']: stats = VALID_STATS else: stats = stats.split() for x in stats: if x not in VALID_STATS: raise RasterStatsError("Stat `%s` not valid;" \ " must be one of \n %r" % (x, VALID_STATS)) run_count = False if categorical or 'majority' in stats or 'minority' in stats or \ 'unique' in stats: # run the counter once, only if needed run_count = True # open the first raster and use this, we will assume they are all the same size / bounds etc initrast = rasterlist[0] rds = gdal.Open(initrast, gdal.GA_ReadOnly) if not rds: raise RasterStatsError("Cannot open %r as GDAL raster" % raster) rb = rds.GetRasterBand(band_num) rgt = rds.GetGeoTransform() rsize = (rds.RasterXSize, rds.RasterYSize) rbounds = raster_extent_as_bounds(rgt, rsize) if nodata_value is not None: nodata_value = float(nodata_value) rb.SetNoDataValue(nodata_value) else: nodata_value = rb.GetNoDataValue() mem_drv = ogr.GetDriverByName('Memory') driver = gdal.GetDriverByName('MEM') results = [] # in order to avoid re-rasterizing the zones for every values raster we've moved the rasterization out of the loop # and will save the rasterized zone arrays into a dictionary (so we need enough memory to hold that) zoneFeatureRasters = {} globL = inf globB = inf globT = -inf globR = -inf for i,feat in enumerate(vectors): #for i,feat in vectors.iteritems(): try: geomWKT = feat[geom_attr] except KeyError: print "No geom attr found in feature!" continue geom = wkt.loads(geomWKT) # Point and MultiPoint don't play well with GDALRasterize # convert them into box polygons the size of a raster cell buff = rgt[1] / 2.0 if geom.type == "MultiPoint": geom = MultiPolygon([box(*(pt.buffer(buff).bounds)) for pt in geom.geoms]) elif geom.type == 'Point': geom = box(*(geom.buffer(buff).bounds)) ogr_geom_type = shapely_to_ogr_type(geom.type) # "Clip" the geometry bounds to the overall raster bounding box # This should avoid any rasterIO errors for partially overlapping polys geom_bounds = list(geom.bounds) if geom_bounds[0] < rbounds[0]: geom_bounds[0] = rbounds[0] if geom_bounds[1] < rbounds[1]: geom_bounds[1] = rbounds[1] if geom_bounds[2] > rbounds[2]: geom_bounds[2] = rbounds[2] if geom_bounds[3] > rbounds[3]: geom_bounds[3] = rbounds[3] # Record the overall bounds of the features if geom_bounds[0] < globL: globL = geom_bounds[0] if geom_bounds[1] < globB: globB = geom_bounds[1] if geom_bounds[2] > globR: globR = geom_bounds[2] if geom_bounds[3] > globT: globT = geom_bounds[3] # calculate new geotransform of the feature subset src_offset = bbox_to_pixel_offsets(rgt, geom_bounds, rsize) new_gt = ( (rgt[0] + (src_offset[0] * rgt[1])), rgt[1], 0.0, (rgt[3] + (src_offset[1] * rgt[5])), 0.0, rgt[5] ) fid = None try: fid= feat[id_attr] except KeyError: fid = i if src_offset[2] < 0 or src_offset[3] < 0: # we're off the raster completely, no overlap at all # so there's no need to even bother trying to calculate print "Feature "+fid+" is off raster extent - skipping!" zoneFeatureRasters[fid] = None else: # Create a temporary vector layer in memory mem_ds = mem_drv.CreateDataSource('out') mem_layer = mem_ds.CreateLayer('out', None, ogr_geom_type) ogr_feature = ogr.Feature(feature_def=mem_layer.GetLayerDefn()) ogr_geom = ogr.CreateGeometryFromWkt(geom.wkt) ogr_feature.SetGeometryDirectly(ogr_geom) mem_layer.CreateFeature(ogr_feature) # Rasterize it rvds = driver.Create('rvds', src_offset[2], src_offset[3], 1, gdal.GDT_Byte) rvds.SetGeoTransform(new_gt) #(raster_dataset, [1], shape_layer, None, None, burn_values=[1], ['ALL_TOUCHED=TRUE'] gdal.RasterizeLayer(rvds, [1], mem_layer, None, None, [1], ['ALL_TOUCHED='+str(all_touched)]) rv_array = rvds.ReadAsArray() zoneFeatureRasters[fid] = { "zonearray":rv_array, "src_offset":src_offset } initrast=None if global_src_extent: # outside the loop: everything except actually reading the raster data # create an in-memory numpy array of the source raster data # covering the whole extent of the vector layer #if strategy != "ogr": # raise RasterStatsError("global_src_extent requires OGR vector") # find extent of ALL features #ds = ogr.Open(vectors) #layer = ds.GetLayer(layer_num) #ex = layer.GetExtent() # transform from OGR extent to xmin, ymin, xmax, ymax #layer_extent = (ex[0], ex[2], ex[1], ex[3]) layer_extent = (globL, globB, globR, globT) global_src_offset = bbox_to_pixel_offsets(rgt, layer_extent, rsize) # now do the raster calculation aspects of the original task once for each input raster but getting the zone rasters from the populated dictionary # rather than re-rasterizing each time for rast in rasterlist: rastresults = [] rds = gdal.Open(rast, gdal.GA_ReadOnly) if not rds: # raise RasterStatsError("Cannot open %r as GDAL raster" % rast) print print ("Cannot open %r as GDAL raster" % rast) print continue rb = rds.GetRasterBand(band_num) # we have to assume the raster size and transform are the same thisRgt = rds.GetGeoTransform() thisRsize = (rds.RasterXSize, rds.RasterYSize) thisRbounds = raster_extent_as_bounds(rgt, rsize) if (thisRgt != rgt or thisRsize != rsize or thisRbounds != rbounds): print "Raster " + rast +" has differing size or geotransform from others - skipping!" continue if global_src_extent: global_src_array = rb.ReadAsArray(*global_src_offset) if nodata_value is not None: nodata_value = float(nodata_value) rb.SetNoDataValue(nodata_value) else: nodata_value = rb.GetNoDataValue() #for i, feat in enumerate(features_iter): # for i,feat in vectors.iteritems(): for i, feat in enumerate(vectors): fid = None try: fid = feat[id_attr] except: fid = i if zoneFeatureRasters[fid] is None: # this happens when the feature was outside the raster extent so rasterizing it was skipped #feature_stats = dict([(s,None) for s in stats]) continue else: zone_array = zoneFeatureRasters[fid]["zonearray"] src_offset = zoneFeatureRasters[fid]["src_offset"] if not global_src_extent: # use feature's source extent and read directly from source # fastest option when you have fast disks and well-indexed raster # advantage: each feature uses the smallest raster chunk # disadvantage: lots of disk reads on the source raster src_array = rb.ReadAsArray(*src_offset) else: # derive array from global source extent array # useful *only* when disk IO or raster format inefficiencies are your limiting factor # advantage: reads raster data in one pass before loop # disadvantage: large vector extents combined with big rasters need lotsa memory xa = src_offset[0] - global_src_offset[0] ya = src_offset[1] - global_src_offset[1] xb = xa + src_offset[2] yb = ya + src_offset[3] src_array = global_src_array[ya:yb, xa:xb] # Mask the source data array with our current feature # we take the logical_not to flip 0<->1 to get the correct mask effect # we also mask out nodata values explictly masked = numpy.ma.MaskedArray( src_array, mask=numpy.logical_or( src_array == nodata_value, numpy.logical_not(zone_array) ) ) if run_count: pixel_count = Counter(masked.compressed()) if categorical: feature_stats = dict(pixel_count) else: feature_stats = {} if 'min' in stats: feature_stats['min'] = float(masked.min()) if 'max' in stats: feature_stats['max'] = float(masked.max()) if 'mean' in stats: feature_stats['mean'] = float(masked.mean()) if 'count' in stats: feature_stats['count'] = int(masked.count()) # optional if 'sum' in stats: feature_stats['sum'] = float(masked.sum()) if 'std' in stats: feature_stats['std'] = float(masked.std()) if 'median' in stats: feature_stats['median'] = float(numpy.median(masked.compressed())) if 'majority' in stats: try: feature_stats['majority'] = pixel_count.most_common(1)[0][0] except IndexError: feature_stats['majority'] = None if 'minority' in stats: try: feature_stats['minority'] = pixel_count.most_common()[-1][0] except IndexError: feature_stats['minority'] = None if 'unique' in stats: feature_stats['unique'] = len(pixel_count.keys()) if 'range' in stats: try: rmin = feature_stats['min'] except KeyError: rmin = float(masked.min()) try: rmax = feature_stats['max'] except KeyError: rmax = float(masked.max()) feature_stats['range'] = rmax - rmin try: # Use the provided feature id as __fid__ feature_stats[id_attr] = feat[id_attr] except: # use the enumerator feature_stats[id_attr] = i if copy_properties: for key, val in feat.iteritems(): if key == id_attr or key == geom_attr: continue feature_stats[key] = val rastresults.append(feature_stats) yield {'rastername':rast,'stats':rastresults} rb = None rds = None zoneFeatureRasters = None ds = None