def calculate_distance_medial_axis(input_shp, out_shp, process_num=4, enlarge_m=20): print('calculating polygon width based on medial axis') code_dir = os.path.expanduser('~/codes/PycharmProjects/ChangeDet_DL/thawSlumpChangeDet') sys.path.insert(0, code_dir) # after test, found that when polygons are very narrow and irregular, cal_retreat_rate output wrong results. # use buffer enlarge the polygons polygons = vector_gpd.read_polygons_gpd(input_shp) # for poly in polygons: # if poly.geom_type == 'MultiPolygon': # print(poly.geom_type,poly) # cal_retreat_rate only use exterior, fill hole for buffer # polygon_large = [ vector_gpd.fill_holes_in_a_polygon(item) for item in polygons] polygon_large = polygons # buffer polygon_large = [item.buffer(enlarge_m) for item in polygon_large] wkt = map_projection.get_raster_or_vector_srs_info_wkt(input_shp) # save_large_shp = io_function.get_name_by_adding_tail(input_shp,'larger') save_pd = pd.DataFrame({'Polygon':polygon_large}) vector_gpd.save_polygons_to_files(save_pd,'Polygon',wkt,out_shp) # calculate width based on expanding areas import cal_retreat_rate if cal_retreat_rate.cal_expand_area_distance(out_shp, proc_num=process_num,save_medial_axis=True): os.system('rm save_medial_axis_radius*.txt out_polygon_vertices_*.txt') return out_shp
def main(): latlon_csv = os.path.expanduser( '~/Data/PDO/PDO_statistics_swatchs/LatLonPDOv3.csv') nc_name_latlon = [] with open(latlon_csv) as f_obj: reader = csv.reader(f_obj) for row in reader: if len(row) >= 4 and 'PDO' in row[0]: nc_name_latlon.append(row) print(row) print('total %d nc files' % len(nc_name_latlon)) polygon_latlon = [] nc_file_name = [] for idx, nc_info in enumerate(nc_name_latlon): fname, polygon = latlon_2_polygons(nc_info) polygon_latlon.append(polygon) nc_file_name.append(fname) # save polygons latlon_csv = os.path.expanduser( '~/Data/PDO/PDO_statistics_swatchs/swatch_bounding_boxes.shp') save_polyons_attributes = { 'Polygons': polygon_latlon, 'nc_file': nc_file_name } polygon_df = pd.DataFrame(save_polyons_attributes) vector_gpd.save_polygons_to_files(polygon_df, 'Polygons', {'init': 'epsg:4326'}, latlon_csv) pass
def remove_based_on_area(slope_bin_shp,min_area, max_area,wkt, rm_area_shp): polygons = vector_gpd.read_polygons_gpd(slope_bin_shp,b_fix_invalid_polygon=False) remain_polygons = [] # remove relative large but narrow ones. remove_count = 0 for idx, poly in enumerate(polygons): # remove quite large or too small ones if poly.area > max_area or poly.area < min_area: remove_count += 1 continue remain_polygons.append(poly) basic.outputlogMessage('remove %d polygons based on area, remain %d ones saving to %s' % (remove_count, len(remain_polygons), rm_area_shp)) polyons_noMulti = [vector_gpd.MultiPolygon_to_polygons(idx, poly) for idx, poly in enumerate(remain_polygons)] remain_polygons = [] for polys in polyons_noMulti: polys = [poly for poly in polys if poly.area > min_area] # remove tiny polygon remain_polygons.extend(polys) print('convert MultiPolygon to polygons and remove tiny polgyons, remain %d' % (len(remain_polygons))) if len(remain_polygons) < 1: return False save_pd = pd.DataFrame({'Polygon':remain_polygons}) vector_gpd.save_polygons_to_files(save_pd,'Polygon',wkt,rm_area_shp) return rm_area_shp
def save_planet_images_to_shapefile(geojson_list, save_shp_path, wkt_string, extent_polygon=None, b_group_date=False): ''' get the meta data and extent of download :param geojson_list: geojson_list :param save_shp_path: :param extent_polygon: a extent polygon :param b_group_date: :return: ''' # remove incomplete scenes geojson_list = [ item for item in geojson_list if 'incomplete_scenes' not in item ] if len(geojson_list) < 1: raise ValueError('No geojson files (exclude incomplete_scenes) the given folder') if extent_polygon is not None and len(extent_polygon) > 1: raise ValueError('Only support one extent polygon') extent = extent_polygon[0] if b_group_date is False: geojson_group = {'all': geojson_list} else: geojson_group = group_geojson_by_date(geojson_list) for key in geojson_group.keys(): sub_geojsons = geojson_group[key] if len(sub_geojsons) < 1: continue sel_geojson_list, sel_polygons = get_geojson_list_overlap_a_polygon(extent,sub_geojsons) if len(sel_geojson_list) < 1: continue scene_table, scene_without_asset = get_meta_dict(sel_geojson_list) if len(scene_table['scene_id']) != len(sel_polygons): raise ValueError('The count of scence ID and polygon are different, could due to some duplicated scenes ') # to strings, ESRI Shapefile does not support datetime fields scene_table['acquisitionDate'] = [ timeTools.datetime2str(item) for item in scene_table['acquisitionDate']] scene_table['downloadTime'] = [ timeTools.datetime2str(item) for item in scene_table['downloadTime']] scene_table['Polygons'] = sel_polygons df = pd.DataFrame(scene_table) if key=="all": save_path = save_shp_path else: date_str = timeTools.date2str(key) save_path = io_function.get_name_by_adding_tail(save_shp_path,date_str) vector_gpd.save_polygons_to_files(df,'Polygons', wkt_string, save_path) return True
def test_get_dem_tif_ext_polygons(): work_dir = os.path.expanduser( '~/Data/Arctic/canada_arctic/DEM/WR_dem_diff/dem_tifs') os.chdir(work_dir) tifs = io_function.get_file_list_by_ext('.tif', work_dir, bsub_folder=False) polygons = dem_mosaic_crop.get_dem_tif_ext_polygons(tifs) data = {'poly': polygons} pddata = pd.DataFrame(data) wkt_str = map_projection.get_raster_or_vector_srs_info_wkt(tifs[0]) save_path = 'tif_extent.shp' vector_gpd.save_polygons_to_files(pddata, 'poly', wkt_str, save_path)
def save_selected_girds_and_ids(selected_gird_id_list, select_grid_polys, proj, save_path): # save to shapefile to download and processing # change numpy.uint16 to int, avoid become negative when saving to shapefile selected_gird_id_list = [int(item) for item in selected_gird_id_list] save_pd = pd.DataFrame({ 'grid_id': selected_gird_id_list, 'Polygon': select_grid_polys }) vector_gpd.save_polygons_to_files(save_pd, 'Polygon', proj, save_path) basic.outputlogMessage('saved %d grids to %s' % (len(select_grid_polys), save_path)) # save the ids to txt save_id_txt = os.path.splitext(save_path)[0] + '_grid_ids.txt' selected_grid_ids_str = [str(item) for item in selected_gird_id_list] io_function.save_list_to_txt(save_id_txt, selected_grid_ids_str)
def main(options, args): img_dir = args[0] output = options.output if os.path.isdir(img_dir): img_pattern = options.image_pattern if output is None: output = os.path.basename(img_dir) + '_boxes.gpkg' img_list = io_function.get_file_list_by_pattern(img_dir, img_pattern) if len(img_list) < 1: raise ValueError('No images in %s with pattern: %s' % (img_dir, img_pattern)) else: # if it's a file img_list = [img_dir] if output is None: output = os.path.basename(img_dir) + '_bound.gpkg' print('Find %d rasters in %s' % (len(img_list), img_dir)) # check projections? prj4_1st = raster_io.get_projection(img_list[0], 'proj4') for idx in range(1, len(img_list)): prj4 = raster_io.get_projection(img_list[idx], 'proj4') if prj4_1st != prj4: raise ValueError('projection inconsistent between %s and %s ' % (img_list[0], img_list[idx])) img_boxes = [ raster_io.get_image_bound_box(img_path) for img_path in img_list ] img_box_polygons = [ vector_gpd.convert_image_bound_to_shapely_polygon(box) for box in img_boxes ] # save to file wkt = map_projection.get_raster_or_vector_srs_info_proj4(img_list[0]) save_pd = pd.DataFrame({'raster': img_list, 'Polygon': img_box_polygons}) vector_gpd.save_polygons_to_files(save_pd, 'Polygon', wkt, output, format='GPKG') print('save raster extents to %s' % output) return
def merge_shape_files(file_list, save_path): if os.path.isfile(save_path): print('%s already exists' % save_path) return True if len(file_list) < 1: raise IOError("no input shapefiles") ref_prj = get_raster_or_vector_srs_info_proj4(file_list[0]) # read polygons as shapely objects attribute_names = None polygons_list = [] polygon_attributes_list = [] b_get_field_name = False for idx, shp_path in enumerate(file_list): # check projection prj = get_raster_or_vector_srs_info_proj4(file_list[idx]) if prj != ref_prj: raise ValueError( 'Projection inconsistent: %s is different with the first one' % shp_path) shapefile = gpd.read_file(shp_path) if len(shapefile.geometry.values) < 1: basic.outputlogMessage('warning, %s is empty, skip' % shp_path) continue # go through each geometry for ri, row in shapefile.iterrows(): # if idx == 0 and ri==0: if b_get_field_name is False: attribute_names = row.keys().to_list() attribute_names = attribute_names[:len(attribute_names) - 1] # basic.outputlogMessage("attribute names: "+ str(row.keys().to_list())) b_get_field_name = True polygons_list.append(row['geometry']) polygon_attributes = row[:len(row) - 1].to_list() if len(polygon_attributes) < len(attribute_names): polygon_attributes.extend( [None] * (len(attribute_names) - len(polygon_attributes))) polygon_attributes_list.append(polygon_attributes) # save results save_polyons_attributes = {} for idx, attribute in enumerate(attribute_names): # print(idx, attribute) values = [item[idx] for item in polygon_attributes_list] save_polyons_attributes[attribute] = values save_polyons_attributes["Polygons"] = polygons_list polygon_df = pd.DataFrame(save_polyons_attributes) return vector_gpd.save_polygons_to_files(polygon_df, 'Polygons', ref_prj, save_path)
def get_surrounding_polygons(remain_polyons,surrounding_shp,wkt, dem_diff_tif,buffer_surrounding,process_num): if os.path.isfile(surrounding_shp): # also check the file is complete surr_polys, surr_demD = vector_gpd.read_polygons_attributes_list(surrounding_shp,'demD_mean') if len(surr_polys) < len(remain_polyons) or surr_demD is None or len(surr_demD) < len(remain_polyons): basic.outputlogMessage('%s already exists, but not complete, will be overwritten'%surrounding_shp) else: basic.outputlogMessage('%s already exists, skip'%surrounding_shp) return surrounding_shp # based on the merged polygons, calculate the relative dem_diff surrounding_polygons = vector_gpd.get_surrounding_polygons(remain_polyons, buffer_surrounding) surr_pd = pd.DataFrame({'Polygon': surrounding_polygons}) vector_gpd.save_polygons_to_files(surr_pd, 'Polygon', wkt, surrounding_shp) raster_statistic.zonal_stats_multiRasters(surrounding_shp, dem_diff_tif, tile_min_overlap=tile_min_overlap, stats=['mean', 'std', 'count'],prefix='demD', process_num=process_num) return surrounding_shp
def main(): # grid polygons grid_20km = os.path.join(shp_dir,'grid_20km.shp') ArcticDEM_coverage = os.path.join(shp_dir,'tiles.shp') # qtp_grid_50km and qtb_main_perma_area_simp have the same projection grid_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20km) perma_area_prj = map_projection.get_raster_or_vector_srs_info_proj4(ArcticDEM_coverage) if grid_prj != perma_area_prj: raise ValueError('%s and %s do not have the same projection'%(grid_prj,perma_area_prj)) grids = vector_gpd.read_polygons_gpd(grid_20km) DEM_areas = vector_gpd.read_polygons_gpd(ArcticDEM_coverage) keep_grids = [] keep_grid_ids = [] id = 0 for idx, grid in enumerate(grids): print(' processing %dth grid'%idx) for dem_area in DEM_areas: inte_res = dem_area.intersection(grid) if inte_res.is_empty is False: if inte_res.area < 100*100: # if it's too small, ignore it continue keep_grids.append(grid) keep_grid_ids.append(id) id += 1 # save save_path = os.path.join(shp_dir,'ArcticDEM_grid_20km.shp') save_polyons_attributes = {'id':keep_grid_ids, "Polygons":keep_grids} # wkt_string = map_projection.get_raster_or_vector_srs_info_wkt(qtp_main_perma_area_simp) wkt_string = map_projection.get_raster_or_vector_srs_info_proj4(grid_20km) polygon_df = pd.DataFrame(save_polyons_attributes) vector_gpd.save_polygons_to_files(polygon_df, 'Polygons', wkt_string, save_path) pass
def post_processing_subsidence(in_shp): polygons = vector_gpd.read_polygons_gpd(in_shp) # get shapeinfo # poly_shapeinfo_list = [] save_polyons = [] for poly in polygons: # get INarea, INperimete, WIDTH, HEIGHT, ratio_w_h, hole_count # shapeinfo = vector_gpd.calculate_polygon_shape_info(poly) # error: 'MultiPolygon' object has no attribute 'interiors' # poly_shapeinfo_list.append(shapeinfo) # if shapeinfo['INarea'] < 40: # remove the one with area smaller than 40 m^2 if poly.area < 90: # remove the one with area smaller than 40 m^2 continue save_polyons.append(poly) save_pd = pd.DataFrame({'Polygon': save_polyons}) wkt = map_projection.get_raster_or_vector_srs_info_wkt(in_shp) save_shp = io_function.get_name_by_adding_tail(in_shp,'post') vector_gpd.save_polygons_to_files(save_pd,'Polygon',wkt,save_shp)
def merge_multi_headwall_shp_to_one(shp_list, save_path): ''' merge multiple shapefile of headwall on different dates to one file :param shp_dir: :param save_path: :return: ''' # shp_list = io_function.get_file_list_by_ext('.shp',shp_dir,bsub_folder=False) if len(shp_list) < 1: print('Warning, no input shapefile, skip merging multiple shapefiles') return False if os.path.isfile(save_path): print('warning, %s already exists, skip' % save_path) return True # merge shapefile, one by one, and add the year and date from filename line_list = [] id_list = [] year_list = [] date_list = [] length_m_list = [] # length in meters for shp in shp_list: # these are line vector, we still can use the following function to read them lines, lengths = vector_gpd.read_polygons_attributes_list( shp, 'length_m') curr_count = len(id_list) acuiqsition_date = timeTools.get_yeardate_yyyymmdd( os.path.basename(shp)) year = acuiqsition_date.year for idx, (line, length) in enumerate(zip(lines, lengths)): id_list.append(idx + curr_count) line_list.append(line) length_m_list.append(length) year_list.append(year) date_list.append(timeTools.date2str(acuiqsition_date)) save_pd = pd.DataFrame({ 'id': id_list, 'length_m': length_m_list, 'dem_year': year_list, 'dem_date': date_list, 'Line': line_list }) ref_prj = map_projection.get_raster_or_vector_srs_info_proj4(shp_list[0]) return vector_gpd.save_polygons_to_files(save_pd, 'Line', ref_prj, save_path)
def raster2shapefile_large(in_raster, working_dir, out_shp=None, connect8=True, process_num=1): if out_shp is None: out_shp = os.path.splitext(in_raster)[0] + '.shp' if os.path.isfile(out_shp): print('%s exists, skip' % out_shp) return out_shp if os.path.isdir(working_dir) is False: io_function.mkdir(working_dir) pre_name = os.path.splitext(os.path.basename(in_raster))[0] wkt = map_projection.get_raster_or_vector_srs_info_proj4(in_raster) # height, width, band_num, date_type = raster_io.get_height_width_bandnum_dtype(in_raster) # print('input image: height, width, band_num, date_type',height, width, band_num, date_type) out_raster_list = split_image.split_image(in_raster, working_dir, 1024, 1024, adj_overlay_x=0, adj_overlay_y=0, out_format='GTIFF', pre_name=pre_name) patch_count = len(out_raster_list) patch_shps = [] polygons_no_touch = [] polygons_touch_edge = [] if process_num == 1: for idx, tif in enumerate(out_raster_list): patch_shp = polygonzie_one_small_raster(idx, tif, patch_count, in_raster, connect8=connect8) patch_shps.append(patch_shp) # read polygons, poly_no_touch, poly_touch = read_polygons_from_small_patch( patch_shp, tif) polygons_no_touch.extend(poly_no_touch) polygons_touch_edge.extend(poly_touch) elif process_num > 1: theadPool = Pool(process_num) # multi processes parameters_list = [(idx, tif, patch_count, True) for idx, tif in enumerate(out_raster_list)] results = theadPool.starmap(polygonzie_read_one_patch, parameters_list) # need python3 for poly_no_touch, poly_touch in results: polygons_no_touch.extend(poly_no_touch) polygons_touch_edge.extend(poly_touch) theadPool.close() else: raise ValueError('Wrong process number %s ' % str(process_num)) # save touch edge to file (for test) save_pd = pd.DataFrame({'Polygons': polygons_touch_edge}) save_polygon_edge = os.path.splitext(out_shp)[0] + '_polyTouchEdge.gpkg' vector_gpd.save_polygons_to_files(save_pd, 'Polygons', wkt, save_polygon_edge, format='GPKG') save_polygon_edge_2 = os.path.splitext(out_shp)[0] + '_polyTouchEdge.shp' vector_gpd.save_polygons_to_files(save_pd, 'Polygons', wkt, save_polygon_edge_2) sys.exit(0) print('count of polygons touch patch edge', len(polygons_touch_edge)) # merge all polygons touch edge print(timeTools.get_now_time_str(), 'start building adjacent_matrix') # if 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name: # print('Warning, some problem of parallel running in build_adjacent_map_of_polygons on curc, but ok in my laptop and uist, change process_num = 1') # process_num = 1 polygons_touch_edge_buff = [ item.buffer(0.1) for item in polygons_touch_edge ] adjacent_matrix = vector_gpd.build_adjacent_map_of_polygons( polygons_touch_edge_buff, process_num=process_num) print(timeTools.get_now_time_str(), 'finish building adjacent_matrix') if adjacent_matrix is False: return False merged_polygons = vector_features.merge_touched_polygons( polygons_touch_edge, adjacent_matrix) print( timeTools.get_now_time_str(), 'finish merging touched polygons, get %d ones' % (len(merged_polygons))) # save all polygons to file polygons_no_touch.extend(merged_polygons) id_list = [idx for idx in range(len(polygons_no_touch))] save_pd = pd.DataFrame({'id': id_list, 'Polygons': polygons_no_touch}) vector_gpd.save_polygons_to_files(save_pd, 'Polygons', wkt, out_shp)
def yolo_results_to_shapefile(curr_dir, img_idx, area_save_dir, nms_overlap_thr, test_id): img_save_dir = os.path.join(area_save_dir, 'I%d' % img_idx) res_yolo_json = img_save_dir + '_result.json' res_json_files = [] if os.path.isfile(res_yolo_json): print('found %s in %s, will get shapefile from it' % (res_yolo_json, area_save_dir)) else: if os.path.isdir(img_save_dir): res_json_files = io_function.get_file_list_by_ext( '.json', img_save_dir, bsub_folder=False) if len(res_json_files) < 1: print('Warning, no YOLO results in %s, skip' % (img_save_dir)) return None print( 'found %d json files for patches in %s, will get shapefile from them' % (len(res_json_files), img_save_dir)) else: print('Warning, folder: %s doest not exist, skip' % img_save_dir) return None out_name = os.path.basename(area_save_dir) + '_' + test_id # to shapefile out_shp = 'I%d' % img_idx + '_' + out_name + '.shp' out_shp_path = os.path.join(img_save_dir, out_shp) if os.path.isfile(out_shp_path): print('%s already exist' % out_shp_path) else: class_id_list = [] name_list = [] box_bounds_list = [] confidence_list = [] source_image_list = [] if len(res_json_files) < 1: # use the result in *_result.json yolo_res_dict_list = io_function.read_dict_from_txt_json( res_yolo_json) total_frame = len(yolo_res_dict_list) image1 = yolo_res_dict_list[0]['filename'] for idx, res_dict in enumerate(yolo_res_dict_list): id_list, na_list, con_list, box_list, image1 = boxes_yoloXY_to_imageXY( idx, total_frame, res_dict, ref_image=None) class_id_list.extend(id_list) name_list.extend(na_list) confidence_list.extend(con_list) box_bounds_list.extend(box_list) source_image_list.extend([os.path.basename(image1)] * len(box_list)) else: # use the results in I0/*.json image1 = io_function.read_list_from_txt( os.path.join(area_save_dir, '%d.txt' % img_idx))[0] total_frame = len(res_json_files) # the patch numbers # only open image once with rasterio.open(image1) as src: for idx, f_json in enumerate(res_json_files): id_list, na_list, con_list, box_list = boxes_minXYmaxXY_to_imageXY( idx, total_frame, f_json, src) class_id_list.extend(id_list) name_list.extend(na_list) confidence_list.extend(con_list) box_bounds_list.extend(box_list) source_image_list.extend([os.path.basename(image1)] * len(box_bounds_list)) if len(box_bounds_list) < 1: print('Warning, no predicted boxes in %s' % img_save_dir) return None # apply non_max_suppression # print('box_bounds_list',box_bounds_list) # print('confidence_list',confidence_list) pick_index = non_max_suppression(np.array(box_bounds_list), probs=np.array(confidence_list), overlapThresh=nms_overlap_thr, b_geo=True) # print('pick_index', pick_index) class_id_list = [class_id_list[idx] for idx in pick_index] name_list = [name_list[idx] for idx in pick_index] confidence_list = [confidence_list[idx] for idx in pick_index] box_bounds_list = [box_bounds_list[idx] for idx in pick_index] source_image_list = [source_image_list[idx] for idx in pick_index] # to polygon box_poly_list = [ vector_gpd.convert_image_bound_to_shapely_polygon(item) for item in box_bounds_list ] # box_poly_list # save to shapefile detect_boxes_dict = { 'class_id': class_id_list, 'name': name_list, 'source_img': source_image_list, 'confidence': confidence_list, "Polygon": box_poly_list } save_pd = pd.DataFrame(detect_boxes_dict) ref_prj = map_projection.get_raster_or_vector_srs_info_proj4(image1) vector_gpd.save_polygons_to_files(save_pd, 'Polygon', ref_prj, out_shp_path) return out_shp_path
def remove_merge_polygon_in_one_shp(in_shp, org_raster, attribute_name, attribute_range, min_area, max_area, process_num=1): # attribute_range: [min, max], lower = attribute_range[0] upper = attribute_range[1] save_shp = io_function.get_name_by_adding_tail(in_shp, 'post') if os.path.isfile(save_shp): basic.outputlogMessage('%s exists, skip'%save_shp) return save_shp shp_pre = io_function.get_name_no_ext(in_shp) # read polygons and label from segment algorithm, note: some polygons may have the same label polygons, attr_value_list = vector_gpd.read_polygons_attributes_list(in_shp,attribute_name) print('Read %d polygons'%len(polygons)) if attr_value_list is None: raise ValueError('%s not in %s, need to remove it and then re-create'%(attribute_name,in_shp)) remain_polyons = [] rm_min_area_count = 0 rm_att_value_count = 0 for poly, att_value in zip(polygons, attr_value_list): if poly.area < min_area: rm_min_area_count += 1 continue if lower is None: if att_value >= upper: rm_att_value_count += 1 continue elif upper is None: if att_value <= lower: rm_att_value_count += 1 continue else: # out of range, rmeove if att_value < lower or att_value > upper: rm_att_value_count += 1 continue remain_polyons.append(poly) print('remove %d polygons based on min_area, %d polygons based on attribute_range, remain %d ones'%(rm_min_area_count, rm_diff_thr_count,len(remain_polyons))) if len(remain_polyons) > 1: # we should only merge polygon with similar reduction, but we already remove polygons with mean reduction > threshhold # merge touch polygons print(timeTools.get_now_time_str(), 'start building adjacent_matrix') # adjacent_matrix = vector_features.build_adjacent_map_of_polygons(remain_polyons) machine_name = os.uname()[1] # if 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name: # print('Warning, some problem of parallel running in build_adjacent_map_of_polygons on curc, but ok in my laptop and uist, change process_num = 1') # process_num = 1 adjacent_matrix = vector_gpd.build_adjacent_map_of_polygons(remain_polyons, process_num=process_num) print(timeTools.get_now_time_str(), 'finish building adjacent_matrix') if adjacent_matrix is False: return False merged_polygons = vector_features.merge_touched_polygons(remain_polyons,adjacent_matrix) print(timeTools.get_now_time_str(), 'finish merging touched polygons, get %d ones'%(len(merged_polygons))) # remove large ones remain_polyons = [] rm_max_area_count = 0 for poly in merged_polygons: if poly.area > max_area: rm_max_area_count += 1 continue remain_polyons.append(poly) print('remove %d polygons based on max_area, remain %d'%(rm_max_area_count, len(remain_polyons))) wkt = map_projection.get_raster_or_vector_srs_info_wkt(in_shp) polyons_noMulti = [ vector_gpd.MultiPolygon_to_polygons(idx,poly) for idx,poly in enumerate(remain_polyons) ] remain_polyons = [] for polys in polyons_noMulti: polys = [poly for poly in polys if poly.area > min_area] # remove tiny polygon before buffer remain_polyons.extend(polys) print('convert MultiPolygon to polygons, remove some small polygons, remain %d' % (len(remain_polyons))) # based on the merged polygons, calculate the mean dem diff, relative dem_diff buffer_surrounding = 20 # meters surrounding_polygons = vector_gpd.get_surrounding_polygons(remain_polyons,buffer_surrounding) surrounding_shp = io_function.get_name_by_adding_tail(in_shp, 'surrounding') surr_pd = pd.DataFrame({'Polygon': surrounding_polygons}) vector_gpd.save_polygons_to_files(surr_pd, 'Polygon', wkt, surrounding_shp) raster_statistic.zonal_stats_multiRasters(surrounding_shp, org_raster, stats=['mean', 'std', 'count'], prefix='demD',process_num=process_num) # calcualte attributes of remain ones: area, dem_diff: mean, std merged_pd = pd.DataFrame({'Polygon': remain_polyons}) merged_shp = io_function.get_name_by_adding_tail(in_shp, 'merged') vector_gpd.save_polygons_to_files(merged_pd, 'Polygon', wkt, merged_shp) raster_statistic.zonal_stats_multiRasters(merged_shp, dem_diff_tif, stats=['mean','std','count'], prefix='demD', process_num=process_num) # calculate the relative dem diff surr_dem_diff_list = vector_gpd.read_attribute_values_list(surrounding_shp,'demD_mean') merge_poly_dem_diff_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_mean') if len(surr_dem_diff_list) != len(merge_poly_dem_diff_list): raise ValueError('The number of surr_dem_diff_list and merge_poly_dem_diff_list is different') relative_dem_diff_list = [ mer - sur for sur, mer in zip(surr_dem_diff_list, merge_poly_dem_diff_list) ] merge_poly_demD_std_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_std') merge_poly_demD_count_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_count') # remove large ones save_polyons = [] save_demD_mean_list = [] save_demD_std_list = [] save_demD_count_list = [] save_rel_diff_list = [] save_surr_demD_list = [] rm_rel_dem_diff_count = 0 rm_min_area_count = 0 for idx in range(len(remain_polyons)): # relative dem diff if relative_dem_diff_list[idx] > dem_diff_thread_m: # rm_rel_dem_diff_count += 1 continue # when convert MultiPolygon to Polygon, may create some small polygons if remain_polyons[idx].area < min_area: rm_min_area_count += 1 continue save_polyons.append(remain_polyons[idx]) save_demD_mean_list.append(merge_poly_dem_diff_list[idx]) save_demD_std_list.append(merge_poly_demD_std_list[idx]) save_demD_count_list.append(merge_poly_demD_count_list[idx]) save_rel_diff_list.append(relative_dem_diff_list[idx]) save_surr_demD_list.append(surr_dem_diff_list[idx]) print('remove %d polygons based on relative rel_demD and %d based on min_area, remain %d' % (rm_rel_dem_diff_count, rm_min_area_count, len(save_polyons))) poly_ids = [ item+1 for item in range(len(save_polyons)) ] poly_areas = [poly.area for poly in save_polyons] save_pd = pd.DataFrame({'poly_id':poly_ids, 'poly_area':poly_areas,'demD_mean':save_demD_mean_list, 'demD_std':save_demD_std_list, 'demD_count':save_demD_count_list, 'surr_demD':save_surr_demD_list, 'rel_demD':save_rel_diff_list ,'Polygon': save_polyons}) vector_gpd.save_polygons_to_files(save_pd, 'Polygon', wkt, save_shp) # add date difference if there are available date_diff_base = os.path.basename(dem_diff_tif).replace('DEM_diff','date_diff') date_diff_tif = os.path.join(os.path.dirname(dem_diff_tif) , date_diff_base) if os.path.isfile(date_diff_tif): raster_statistic.zonal_stats_multiRasters(save_shp, date_diff_tif, stats=['mean', 'std'], prefix='dateD', process_num=process_num) return save_shp
def remove_polygons_based_relative_dem_diff(remain_polyons,merged_shp,surrounding_shp,wkt, save_shp, min_area, dem_diff_thread_m): if os.path.isfile(save_shp): # also check the file is complete polys, demD_values = vector_gpd.read_polygons_attributes_list(save_shp, 'demD_mean') if len(polys) < 1 or demD_values is None or len(demD_values) < 1: basic.outputlogMessage('%s already exists, but not complete, will be overwritten' % save_shp) else: basic.outputlogMessage('%s exists, skip'%save_shp) return save_shp # calculate the relative dem diff surr_dem_diff_list = vector_gpd.read_attribute_values_list(surrounding_shp,'demD_mean') merge_poly_dem_diff_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_mean') # convert to float type (can change None to nan) surr_dem_diff_list = np.array(surr_dem_diff_list, dtype=float) merge_poly_dem_diff_list = np.array(merge_poly_dem_diff_list, dtype=float) if len(surr_dem_diff_list) != len(merge_poly_dem_diff_list): raise ValueError('The number of surr_dem_diff_list and merge_poly_dem_diff_list is different') relative_dem_diff_list = [ mer - sur for sur, mer in zip(surr_dem_diff_list, merge_poly_dem_diff_list) ] merge_poly_demD_std_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_std') merge_poly_demD_count_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_count') # remove large ones save_polyons = [] save_demD_mean_list = [] save_demD_std_list = [] save_demD_count_list = [] save_rel_diff_list = [] save_surr_demD_list = [] rm_rel_dem_diff_count = 0 rm_min_area_count = 0 for idx in range(len(remain_polyons)): # relative dem diff if relative_dem_diff_list[idx] > dem_diff_thread_m: # rm_rel_dem_diff_count += 1 continue # when convert MultiPolygon to Polygon, may create some small polygons (in function merge shp) if remain_polyons[idx].area < min_area: rm_min_area_count += 1 continue save_polyons.append(remain_polyons[idx]) save_demD_mean_list.append(merge_poly_dem_diff_list[idx]) save_demD_std_list.append(merge_poly_demD_std_list[idx]) save_demD_count_list.append(merge_poly_demD_count_list[idx]) save_rel_diff_list.append(relative_dem_diff_list[idx]) save_surr_demD_list.append(surr_dem_diff_list[idx]) print('remove %d polygons based on relative rel_demD and %d based on min_area, remain %d' % (rm_rel_dem_diff_count, rm_min_area_count, len(save_polyons))) if len(save_polyons) < 1: print('Warning, no polygons after remove based on relative demD') return None poly_ids = [ item+1 for item in range(len(save_polyons)) ] poly_areas = [poly.area for poly in save_polyons] save_pd = pd.DataFrame({'poly_id':poly_ids, 'poly_area':poly_areas,'demD_mean':save_demD_mean_list, 'demD_std':save_demD_std_list, 'demD_count':save_demD_count_list, 'surr_demD':save_surr_demD_list, 'rel_demD':save_rel_diff_list ,'Polygon': save_polyons}) vector_gpd.save_polygons_to_files(save_pd, 'Polygon', wkt, save_shp) return save_shp
def main(): # grid polygons grid_50km = os.path.join(shp_dir,'PAMPA_outline_utm_50grid.shp') # main permaforst areas based on permafrost map, has been pre-processed: remove small ones, simply the boundaries main_area_simp = os.path.join(shp_dir,'PAMPA_outline_utm.shp') # qtp_grid_50km and qtb_main_perma_area_simp have the same projection grid_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_50km) perma_area_prj = map_projection.get_raster_or_vector_srs_info_proj4(main_area_simp) if grid_prj != perma_area_prj: raise ValueError('%s and %s do not have the same projection'%(grid_prj,perma_area_prj)) grids = vector_gpd.read_polygons_gpd(grid_50km) perma_areas = vector_gpd.read_polygons_gpd(main_area_simp) # perma_size_list = vector_gpd.read_attribute_values_list(qtp_main_perma_area_simp,'Area_km2') small_perma_areas_list = [] for idx, perma_poly in enumerate(perma_areas): print(' processing %dth permafrost area'%idx) # if the permafrost area is < 50*50 km^2, then do not split it to smaller ones. # if size < 2500: # perma_poly = vector_gpd.fill_holes_in_a_polygon(perma_poly) # small_perma_areas_list.append(perma_poly) # continue # split the big permafrost area into many small ones for grid in grids: inte_res = perma_poly.intersection(grid) if inte_res.is_empty is False: inte_res_multi = vector_gpd.MultiPolygon_to_polygons(idx,inte_res) for tmp in inte_res_multi: # remove holes if they exist small_ones = vector_gpd.fill_holes_in_a_polygon(tmp) ################################# # we should remove some really small polygons (< 1 km^2) small_perma_areas_list.append(small_ones) ############################## # have to manually merge small polygons in QGIS to its adjacent ones. # save save_path = io_function.get_name_by_adding_tail(main_area_simp,'small') save_path = os.path.join(shp_dir,os.path.basename(save_path)) save_polyons_attributes = {} save_polyons_attributes["Polygons"] = small_perma_areas_list # wkt_string = map_projection.get_raster_or_vector_srs_info_wkt(qtp_main_perma_area_simp) wkt_string = map_projection.get_raster_or_vector_srs_info_proj4(main_area_simp) polygon_df = pd.DataFrame(save_polyons_attributes) vector_gpd.save_polygons_to_files(polygon_df, 'Polygons', wkt_string, save_path) pass
def filter_merge_polygons(in_shp,merged_shp,wkt, min_area,max_area,dem_diff_tif,dem_diff_thread_m,process_num): if os.path.isfile(merged_shp): # also check the file is complete polys, demD_values = vector_gpd.read_polygons_attributes_list(merged_shp,'demD_mean') if len(polys) < 1 or demD_values is None or len(demD_values) < 1: basic.outputlogMessage('%s already exists, but not complete, will be overwritten'%merged_shp) else: basic.outputlogMessage('%s exists, skip'%merged_shp) return merged_shp # read polygons and label from segment algorithm, note: some polygons may have the same label # polygons, demD_mean_list = vector_gpd.read_polygons_attributes_list(in_shp,'demD_mean') polygons, attributes = vector_gpd.read_polygons_attributes_list(in_shp,['demD_mean','DN']) demD_mean_list = attributes[0] DN_list = attributes[1] print('Read %d polygons'%len(polygons)) if demD_mean_list is None: raise ValueError('demD_mean not in %s, need to remove it and then re-create'%in_shp) # replace None (if exists) as nan demD_mean_list = np.array(demD_mean_list, dtype=float) # replace nan values as 0 demD_mean_list = np.nan_to_num(demD_mean_list) remain_polyons = [] rm_min_area_count = 0 rm_diff_thr_count = 0 for poly, demD_mean in zip(polygons, demD_mean_list): if poly.area < min_area: rm_min_area_count += 1 continue # mean value: not subsidence if demD_mean > dem_diff_thread_m: # rm_diff_thr_count += 1 continue remain_polyons.append(poly) print('remove %d polygons based on min_area, %d polygons based on dem_diff_threshold, remain %d ones'%(rm_min_area_count, rm_diff_thr_count,len(remain_polyons))) if len(remain_polyons) < 1: return None # we should only merge polygon with similar reduction, but we already remove polygons with mean reduction > threshhold # merge touch polygons # print(timeTools.get_now_time_str(), 'start building adjacent_matrix') # # adjacent_matrix = vector_features.build_adjacent_map_of_polygons(remain_polyons) # machine_name = os.uname()[1] # if 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name: # print('Warning, some problem of parallel running in build_adjacent_map_of_polygons on curc, ' # 'but ok in my laptop and uist, change process_num = 1') # process_num = 1 ############################################################ ## build adjacent_matrix then merge for entire raster # adjacent_matrix = vector_gpd.build_adjacent_map_of_polygons(remain_polyons, process_num=process_num) # print(timeTools.get_now_time_str(), 'finish building adjacent_matrix') # # if adjacent_matrix is False: # return None # merged_polygons = vector_features.merge_touched_polygons(remain_polyons, adjacent_matrix) ############################################################ # ## build adjacent_matrix then merge, patch by patch (not too many improvements) # label_id_range_txt = os.path.splitext(in_shp)[0] + '_label_IDrange.txt' # merged_polygons = merge_polygons_patchBYpatch(label_id_range_txt, remain_polyons, DN_list, process_num=process_num) ############################################################ ## merge polygons using rasterize label_raster = os.path.splitext(in_shp)[0] + '_label.tif' merged_polygons = merge_polygon_rasterize(label_raster, remain_polyons) print(timeTools.get_now_time_str(), 'finish merging touched polygons, get %d ones' % (len(merged_polygons))) # remove large ones remain_polyons = [] rm_max_area_count = 0 for poly in merged_polygons: if poly.area > max_area: rm_max_area_count += 1 continue remain_polyons.append(poly) print('remove %d polygons based on max_area, remain %d' % (rm_max_area_count, len(remain_polyons))) polyons_noMulti = [vector_gpd.MultiPolygon_to_polygons(idx, poly) for idx, poly in enumerate(remain_polyons)] remain_polyons = [] for polys in polyons_noMulti: polys = [poly for poly in polys if poly.area > min_area] # remove tiny polygon before buffer remain_polyons.extend(polys) print('convert MultiPolygon (filter_merge_polygons) to polygons and remove small ones, remain %d' % (len(remain_polyons))) if len(remain_polyons) < 1: return None # calcualte attributes of remain ones: area, dem_diff: mean, std merged_pd = pd.DataFrame({'Polygon': remain_polyons}) vector_gpd.save_polygons_to_files(merged_pd, 'Polygon', wkt, merged_shp) # based on the merged polygons, calculate the mean dem diff raster_statistic.zonal_stats_multiRasters(merged_shp, dem_diff_tif, tile_min_overlap=tile_min_overlap, stats=['mean', 'std', 'count'], prefix='demD',process_num=process_num) return merged_shp