def read_polygons_from_small_patch(in_shp, in_raster): '''read polygons and seperate to touch and not touch image edge groups''' print(datetime.now(), 'reading polygons to touch and not touch image edge group') polygons = vector_gpd.read_polygons_gpd(in_shp, b_fix_invalid_polygon=False) img_bound = raster_io.get_image_bound_box(in_raster) img_resx, img_resy = raster_io.get_xres_yres_file(in_raster) half_res = img_resx / 2.0 image_edge = vector_gpd.convert_image_bound_to_shapely_polygon(img_bound) polygons_buff = [item.buffer(half_res) for item in polygons] # buffer half pixel # polygons_touch_img_edge_index = [] polygon_no_touch = [] polygons_touch = [] for idx, (polybuff, poly) in enumerate(zip(polygons_buff, polygons)): if polybuff.within(image_edge): polygon_no_touch.append(poly) else: # polygons_touch_img_edge_index.append(idx) polygons_touch.append(poly) # return polygons,polygons_touch_img_edge_index return polygon_no_touch, polygons_touch
def get_tile_min_overlap(raster_file_or_files): if isinstance(raster_file_or_files,str): io_function.is_file_exist(raster_file_or_files) image_tiles = [raster_file_or_files] elif isinstance(raster_file_or_files,list): image_tiles = raster_file_or_files else: raise ValueError('unsupport type for %s'%str(raster_file_or_files)) xres, yres = raster_io.get_xres_yres_file(image_tiles[0]) tile_min_overlap = abs(xres * yres) return tile_min_overlap
def get_one_sub_image_label(idx,center_polygon, class_int, polygons_all,class_int_all, bufferSize, img_tile_boxes,image_tile_list): ''' get an sub image and the corresponding labe raster :param idx: the polygon index :param center_polygon: the polygon in training polygon :param class_int: the class number of this polygon :param polygons_all: the full set of training polygons, for generating label images :param class_int_all: the class number for the full set of training polygons :param bufferSize: the buffer area to generate sub-images :param img_tile_boxes: the bound boxes of all the image tiles :param image_tile_list: the list of image paths :return: ''' ############# This function is not working ############# # center_polygon corresponds to one polygon in the full set of training polygons, so it is not necessary to check # get adjacent polygon adj_polygons, adj_polygons_class = get_adjacent_polygons(center_polygon, polygons_all, class_int_all, bufferSize) # add the center polygons to adj_polygons adj_polygons.extend([center_polygon]) adj_polygons_class.extend([class_int]) basic.outputlogMessage('get a sub image covering %d training polygons'%len(adj_polygons)) # find the images which the center polygon overlap (one or two images) img_resx, img_resy = raster_io.get_xres_yres_file(image_tile_list[0]) img_index = get_overlap_image_index(adj_polygons, img_tile_boxes,min_overlap_area=abs(img_resx*img_resy)) if len(img_index) < 1: basic.outputlogMessage('Warning, %dth polygon and the adjacent ones do not overlap any image tile, please check ' '(1) the shape file and raster have the same projection' 'and (2) this polygon is in the extent of images'%idx) image_list = [image_tile_list[item] for item in img_index] # open the raster to get projection, resolution # with rasterio.open(image_list[0]) as src: # resX = src.res[0] # resY = src.res[1] # src_profile = src.profile src = rasterio.open(image_list[0]) resX = src.res[0] resY = src.res[1] src_profile = src.profile # rasterize the shapes burn_shapes = [(item_shape, item_class_int) for (item_shape, item_class_int) in zip(adj_polygons,adj_polygons_class)] burn_boxes = get_bounds_of_polygons(adj_polygons) # check weather the extent is too large burn_boxes_width = math.ceil((burn_boxes[2]- burn_boxes[0])/resX) burn_boxes_height = math.ceil((burn_boxes[3] - burn_boxes[1])/resY) if burn_boxes_width*burn_boxes_height > 10000*10000: raise ValueError('error, the polygons want to burn cover a very large area') # fill as 255 for region outsize shapes for test purpose # set all_touched as True, may good small shape # new_transform = (burn_boxes[0], resX, 0, burn_boxes[3], 0, -resY ) # (X_min, resX, 0, Y_max, 0, -resY) # GDAL-style transforms, have been deprecated after raster 1.0 # affine.Affine() vs. GDAL-style geotransforms: https://rasterio.readthedocs.io/en/stable/topics/migrating-to-v1.html new_transform = (resX ,0, burn_boxes[0] , 0, -resY, burn_boxes[3]) # (resX, 0, X_min, 0, -resY, Y_max) out_label = rasterize(burn_shapes, out_shape=(burn_boxes_width,burn_boxes_height), transform=new_transform, fill=0, all_touched=False, dtype=rasterio.uint8) print('new_transform', new_transform) print('out_label', out_label.shape) # test, save to disk kwargs = src.meta kwargs.update( dtype=rasterio.uint8, count=1, width=burn_boxes_width, height = burn_boxes_height, transform=new_transform) with rasterio.open('test_6_albers.tif', 'w', **kwargs) as dst: dst.write_band(1, out_label.astype(rasterio.uint8)) # mask, get pixels cover by polygons, set all_touched as True polygons_json = [mapping(item) for item in adj_polygons] out_image, out_transform = mask(src, polygons_json, nodata=0, all_touched=True, crop=True) #test: output infomation print('out_transform', out_transform) print('out_image',out_image.shape) # test: save it to disk out_meta = src.meta.copy() out_meta.update({"driver": "GTiff", "height": out_image.shape[1], "width": out_image.shape[2], "transform": out_transform}) # note that, the saved image have a small offset compared to the original ones (~0.5 pixel) save_path = "masked_of_polygon_%d.tif"%(idx+1) with rasterio.open(save_path, "w", **out_meta) as dest: dest.write(out_image) # return image_array, label_array return 1, 1
def get_sub_image(idx,selected_polygon, image_tile_list, image_tile_bounds, save_path, dstnodata, brectangle ): ''' get a mask image based on a selected polygon, it may cross two image tiles :param selected_polygon: selected polygons :param image_tile_list: image list :param image_tile_bounds: the boxes of images in the list :param save_path: save path :param brectangle: if brectangle is True, crop the raster using bounds, else, use the polygon :return: True is successful, False otherwise ''' img_resx, img_resy = raster_io.get_xres_yres_file(image_tile_list[0]) # find the images which the center polygon overlap (one or two images) img_index = get_overlap_image_index([selected_polygon], image_tile_bounds,min_overlap_area=abs(img_resx*img_resy)) if len(img_index) < 1: basic.outputlogMessage( 'Warning, %dth polygon do not overlap any image tile, please check ' #and its buffer area '(1) the shape file and raster have the same projection' ' and (2) this polygon is in the extent of images' % idx) return False image_list = [image_tile_list[item] for item in img_index] # check it cross two or more images if len(image_list) == 1: # for the case that the polygon only overlap one raster with rasterio.open(image_list[0]) as src: polygon_json = mapping(selected_polygon) # not necessary # overlap_win = rasterio.features.geometry_window(src, [polygon_json], pad_x=0, pad_y=0, north_up=True, rotated=False, # pixel_precision=3) if brectangle: # polygon_box = selected_polygon.bounds polygon_json = mapping(selected_polygon.envelope) #shapely.geometry.Polygon([polygon_box]) # crop image and saved to disk out_image, out_transform = mask(src, [polygon_json], nodata=dstnodata, all_touched=True, crop=True) # test: save it to disk out_meta = src.meta.copy() out_meta.update({"driver": "GTiff", "height": out_image.shape[1], "width": out_image.shape[2], "transform": out_transform, "nodata":dstnodata}) # note that, the saved image have a small offset compared to the original ones (~0.5 pixel) with rasterio.open(save_path, "w", **out_meta) as dest: dest.write(out_image) pass else: # for the case it overlap more than one raster, need to produce a mosaic tmp_saved_files = [] for k_img,image_path in enumerate(image_list): with rasterio.open(image_path) as src: polygon_json = mapping(selected_polygon) if brectangle: # polygon_box = selected_polygon.bounds polygon_json = mapping(selected_polygon.envelope) # shapely.geometry.Polygon([polygon_box]) # crop image and saved to disk out_image, out_transform = mask(src, [polygon_json], nodata=dstnodata, all_touched=True, crop=True) non_nodata_loc = np.where(out_image != dstnodata) if non_nodata_loc[0].size < 1 or np.std(out_image[non_nodata_loc]) < 0.0001: basic.outputlogMessage('out_image is total black or white, ignore, %s: %d' % (save_path, k_img)) continue tmp_saved = os.path.splitext(save_path)[0] +'_%d'%k_img + os.path.splitext(save_path)[1] # test: save it to disk out_meta = src.meta.copy() out_meta.update({"driver": "GTiff", "height": out_image.shape[1], "width": out_image.shape[2], "transform": out_transform, "nodata":dstnodata}) # note that, the saved image have a small offset compared to the original ones (~0.5 pixel) with rasterio.open(tmp_saved, "w", **out_meta) as dest: dest.write(out_image) tmp_saved_files.append(tmp_saved) if len(tmp_saved_files) < 1: basic.outputlogMessage('Warning, %dth polygon overlap multiple image tiles, but all are black or white, please check ' % idx) return False elif len(tmp_saved_files) == 1: io_function.move_file_to_dst(tmp_saved_files[0],save_path) del tmp_saved_files[0] else: # mosaic files in tmp_saved_files mosaic_args_list = ['gdal_merge.py', '-o', save_path,'-n',str(dstnodata),'-a_nodata',str(dstnodata)] mosaic_args_list.extend(tmp_saved_files) if basic.exec_command_args_list_one_file(mosaic_args_list,save_path) is False: raise IOError('error, obtain a mosaic (%s) failed'%save_path) # # for test # if idx==13: # raise ValueError('for test') # remove the tmp files for tmp_file in tmp_saved_files: io_function.delete_file_or_dir(tmp_file) # if it will output a very large image (10000 by 10000 pixels), then raise a error return True
def zonal_stats_multiRasters(in_shp, raster_file_or_files, tile_min_overlap=None, nodata=None, band=1, stats=None, prefix='', range=None, buffer=None, all_touched=True, process_num=1): ''' zonal statistic based on vectors, along multiple rasters (image tiles) Args: in_shp: input vector file raster_file_or_files: a raster file or multiple rasters nodata: band: band stats: like [mean, std, max, min] range: interested values [min, max], None means infinity buffer: expand polygon with buffer (meter) before the statistic all_touched: process_num: process number for calculation Returns: ''' io_function.is_file_exist(in_shp) if stats is None: basic.outputlogMessage('warning, No input stats, set to ["mean"])') stats = ['mean'] stats_backup = stats.copy() if 'area' in stats: stats.remove('area') if 'count' not in stats: stats.append('count') if isinstance(raster_file_or_files, str): io_function.is_file_exist(raster_file_or_files) image_tiles = [raster_file_or_files] elif isinstance(raster_file_or_files, list): image_tiles = raster_file_or_files else: raise ValueError('unsupport type for %s' % str(raster_file_or_files)) # check projection (assume we have the same projection), check them outside this function # get image box img_tile_boxes = [ raster_io.get_image_bound_box(tile) for tile in image_tiles ] img_tile_polygons = [ vector_gpd.convert_image_bound_to_shapely_polygon(box) for box in img_tile_boxes ] polygons = vector_gpd.read_polygons_gpd(in_shp) if len(polygons) < 1: basic.outputlogMessage('No polygons in %s' % in_shp) return False # polygons_json = [mapping(item) for item in polygons] # no need when use new verion of rasterio if buffer is not None: polygons = [poly.buffer(buffer) for poly in polygons] # process polygons one by one polygons and the corresponding image tiles (parallel and save memory) # also to avoid error: daemonic processes are not allowed to have children if process_num == 1: stats_res_list = [] for idx, polygon in enumerate(polygons): out_stats = zonal_stats_one_polygon( idx, polygon, image_tiles, img_tile_polygons, stats, nodata=nodata, range=range, band=band, all_touched=all_touched, tile_min_overlap=tile_min_overlap) stats_res_list.append(out_stats) elif process_num > 1: threadpool = Pool(process_num) para_list = [(idx, polygon, image_tiles, img_tile_polygons, stats, nodata, range, band, all_touched, tile_min_overlap) for idx, polygon in enumerate(polygons)] stats_res_list = threadpool.starmap(zonal_stats_one_polygon, para_list) threadpool.close() else: raise ValueError('Wrong process number: %s ' % str(process_num)) # save to shapefile add_attributes = {} new_key_list = [prefix + '_' + key for key in stats_res_list[0].keys()] for new_ley in new_key_list: add_attributes[new_ley] = [] for stats_result in stats_res_list: for key in stats_result.keys(): add_attributes[prefix + '_' + key].append(stats_result[key]) if 'area' in stats_backup: dx, dy = raster_io.get_xres_yres_file(image_tiles[0]) add_attributes[prefix + '_' + 'area'] = [ count * dx * dy for count in add_attributes[prefix + '_' + 'count'] ] if 'count' not in stats_backup: del add_attributes[prefix + '_' + 'count'] vector_gpd.add_attributes_to_shp(in_shp, add_attributes) pass