def resample_crop_raster(ref_raster, input_raster, output_raster=None, resample_method='near'): if output_raster is None: output_raster = io_function.get_name_by_adding_tail( os.path.basename(input_raster), 'res_sub') if os.path.isfile(output_raster): print('Warning, %s exists' % output_raster) return output_raster # check projection prj4_ref = map_projection.get_raster_or_vector_srs_info_proj4(ref_raster) prj4_input = map_projection.get_raster_or_vector_srs_info_proj4( input_raster) if prj4_ref != prj4_input: raise ValueError('projection inconsistent: %s and %s' % (ref_raster, input_raster)) # crop RSImageProcess.subset_image_baseimage(output_raster, input_raster, ref_raster, same_res=True, resample_m=resample_method) if os.path.isfile(output_raster): return output_raster else: return False
def main(options, args): extent_shp = args[0] img_path = args[1] save_dir = options.save_dir #check projection extent_prj = map_projection.get_raster_or_vector_srs_info_proj4(extent_shp) img_prj = map_projection.get_raster_or_vector_srs_info_proj4(img_path) if img_prj != extent_prj: raise ValueError('Project of %s and %s is different' % (extent_shp, img_path)) out_img = io_function.get_name_by_adding_tail(img_path, 'sub') out_img = os.path.join(save_dir, os.path.basename(out_img)) extent_polys = vector_gpd.read_polygons_gpd(extent_shp) if len(extent_polys) != 1: raise ValueError('current only support one polygon') for ext_poly in extent_polys: subset_image_by_polygon_min(img_path, out_img, ext_poly, resample_m='bilinear', o_format='GTiff', out_res=None) pass
def merge_shape_files(file_list, save_path): if os.path.isfile(save_path): print('%s already exists' % save_path) return True if len(file_list) < 1: raise IOError("no input shapefiles") ref_prj = map_projection.get_raster_or_vector_srs_info_proj4(file_list[0]) # read polygons as shapely objects attribute_names = None polygons_list = [] polygon_attributes_list = [] b_get_field_name = False for idx, shp_path in enumerate(file_list): # check projection prj = map_projection.get_raster_or_vector_srs_info_proj4( file_list[idx]) if prj != ref_prj: raise ValueError( 'Projection inconsistent: %s is different with the first one' % shp_path) shapefile = gpd.read_file(shp_path) if len(shapefile.geometry.values) < 1: basic.outputlogMessage('warning, %s is empty, skip' % shp_path) continue # go through each geometry for ri, row in shapefile.iterrows(): # if idx == 0 and ri==0: if b_get_field_name is False: attribute_names = row.keys().to_list() attribute_names = attribute_names[:len(attribute_names) - 1] # basic.outputlogMessage("attribute names: "+ str(row.keys().to_list())) b_get_field_name = True polygons_list.append(row['geometry']) polygon_attributes = row[:len(row) - 1].to_list() if len(polygon_attributes) < len(attribute_names): polygon_attributes.extend( [None] * (len(attribute_names) - len(polygon_attributes))) polygon_attributes_list.append(polygon_attributes) # save results save_polyons_attributes = {} for idx, attribute in enumerate(attribute_names): # print(idx, attribute) values = [item[idx] for item in polygon_attributes_list] save_polyons_attributes[attribute] = values save_polyons_attributes["Polygons"] = polygons_list polygon_df = pd.DataFrame(save_polyons_attributes) return save_polygons_to_files(polygon_df, 'Polygons', ref_prj, save_path)
def get_occurence_for_multi_observation(shp_list): if len(shp_list) < 1: return False # check projection of the shape file, should be the same new_shp_proj4 = map_projection.get_raster_or_vector_srs_info_proj4( shp_list[0]) for idx in range(len(shp_list) - 1): shp_proj4 = map_projection.get_raster_or_vector_srs_info_proj4( shp_list[idx + 1]) if shp_proj4 != new_shp_proj4: raise ValueError('error, projection insistence between %s and %s' % (new_shp_proj4, shp_proj4)) polygons_change_analyze.cal_multi_temporal_iou_and_occurrence(shp_list, '')
def main(): # save ArcticDEM_grid_20km = os.path.join(shp_dir, 'ArcticDEM_grid_20km.shp') # ref_raster='grid_20km_bin.tif' polygons, ids = vector_gpd.read_polygons_attributes_list( ArcticDEM_grid_20km, 'id', b_fix_invalid_polygon=False) save_raster = os.path.join(shp_dir, 'ArcticDEM_grid_20km_id.tif') # raster_io.burn_polygons_to_a_raster(ref_raster,polygons,ids,save_raster,date_type='uint16') # if no reference raster extent = vector_gpd.get_vector_file_bounding_box(ArcticDEM_grid_20km) # print(extent) res = 20000 # 20 km wkt_string = map_projection.get_raster_or_vector_srs_info_proj4( ArcticDEM_grid_20km) nodata = 2**16 - 1 raster_io.burn_polygons_to_a_raster(None, polygons, ids, save_raster, date_type='uint16', xres=res, yres=res, extent=extent, ref_prj=wkt_string, nodata=nodata) pass
def get_occurence_for_multi_observation(shp_list): if len(shp_list) < 2: return False # need for calculating the occurrence. cd_dir = os.path.expanduser('~/codes/PycharmProjects/ChangeDet_DL/thawSlumpChangeDet') sys.path.insert(0, cd_dir) import polygons_change_analyze # check projection of the shape file, should be the same new_shp_proj4 = map_projection.get_raster_or_vector_srs_info_proj4(shp_list[0]) for idx in range(len(shp_list)-1): shp_proj4 = map_projection.get_raster_or_vector_srs_info_proj4(shp_list[ idx+1 ]) if shp_proj4 != new_shp_proj4: raise ValueError('error, projection insistence between %s and %s'%(new_shp_proj4, shp_proj4)) polygons_change_analyze.cal_multi_temporal_iou_and_occurrence(shp_list, '')
def main(options, args): extent_shp = args[0] dem_index_shp = args[1] pre_name = os.path.splitext(os.path.basename(extent_shp))[0] pre_name += '_Tile' if 'Tile' in os.path.basename( dem_index_shp) else '_Strip' xlsx_size_path = os.path.splitext( os.path.basename(dem_index_shp))[0] + '_fileSize.xlsx' print('xlsx file for saving file size', xlsx_size_path) # extent polygons and projection (proj4) extent_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( extent_shp) dem_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( dem_index_shp) if extent_shp_prj != dem_shp_prj: basic.outputlogMessage( '%s and %s do not have the same projection, will reproject %s' % (extent_shp, dem_index_shp, os.path.basename(extent_shp))) epsg = map_projection.get_raster_or_vector_srs_info_epsg(dem_index_shp) # print(epsg) # extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_shp_prj.strip()) extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp, epsg) else: extent_polys = vector_gpd.read_polygons_gpd(extent_shp) # read 'grid_id' if the extent shp is from grid shp file, if not, grid_id_list will be None grid_id_list = vector_gpd.read_attribute_values_list(extent_shp, 'grid_id') if len(extent_polys) < 1: raise ValueError('No polygons in %s' % extent_shp) else: basic.outputlogMessage('%d extent polygons in %s' % (len(extent_polys), extent_shp)) get_file_size_dem_tarball(dem_index_shp, extent_polys, pre_name, xlsx_size_path, poly_ids=grid_id_list)
def main(): # grid polygons grid_20km = os.path.join(shp_dir,'grid_20km.shp') ArcticDEM_coverage = os.path.join(shp_dir,'tiles.shp') # qtp_grid_50km and qtb_main_perma_area_simp have the same projection grid_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20km) perma_area_prj = map_projection.get_raster_or_vector_srs_info_proj4(ArcticDEM_coverage) if grid_prj != perma_area_prj: raise ValueError('%s and %s do not have the same projection'%(grid_prj,perma_area_prj)) grids = vector_gpd.read_polygons_gpd(grid_20km) DEM_areas = vector_gpd.read_polygons_gpd(ArcticDEM_coverage) keep_grids = [] keep_grid_ids = [] id = 0 for idx, grid in enumerate(grids): print(' processing %dth grid'%idx) for dem_area in DEM_areas: inte_res = dem_area.intersection(grid) if inte_res.is_empty is False: if inte_res.area < 100*100: # if it's too small, ignore it continue keep_grids.append(grid) keep_grid_ids.append(id) id += 1 # save save_path = os.path.join(shp_dir,'ArcticDEM_grid_20km.shp') save_polyons_attributes = {'id':keep_grid_ids, "Polygons":keep_grids} # wkt_string = map_projection.get_raster_or_vector_srs_info_wkt(qtp_main_perma_area_simp) wkt_string = map_projection.get_raster_or_vector_srs_info_proj4(grid_20km) polygon_df = pd.DataFrame(save_polyons_attributes) vector_gpd.save_polygons_to_files(polygon_df, 'Polygons', wkt_string, save_path) pass
def remove_polygons_outside_extent(input_shp, extent_shp, output): ''' remove polygons not in the extent :param input_shp: :param extent_shp: :param output: :return: ''' # check projection, must be the same input_proj4 = map_projection.get_raster_or_vector_srs_info_proj4(input_shp) extent_proj4 = map_projection.get_raster_or_vector_srs_info_proj4(extent_shp) if input_proj4 != extent_proj4: raise ValueError('error, projection insistence between %s and %s'%(input_shp, extent_shp)) ## -progress: Only works if input layers have the "fast feature count" capability. # ogr2ogr - progress - clipsrc ${extent_shp} ${save_shp} ${input_shp} arg_list = ['ogr2ogr', '-progress', '-clipsrc', extent_shp, output, input_shp] return basic.exec_command_args_list_one_file(arg_list, output)
def resample_crop_raster_using_shp(ref_shp, input_raster, output_raster=None, resample_method='near', save_dir='./', out_res=10, dst_nondata=128): if output_raster is None: output_raster = io_function.get_name_by_adding_tail( os.path.basename(input_raster), 'res_sub') output_raster = os.path.join(save_dir, output_raster) # check projection prj4_ref = map_projection.get_raster_or_vector_srs_info_proj4(ref_shp) prj4_input = map_projection.get_raster_or_vector_srs_info_proj4( input_raster) if prj4_ref != prj4_input: raise ValueError('projection inconsistent: %s and %s' % (ref_shp, input_raster)) if os.path.isfile(output_raster): print('Warning, %s exists' % output_raster) return output_raster # crop # RSImageProcess.subset_image_baseimage(output_raster, input_raster, ref_raster, same_res=True,resample_m=resample_method) RSImageProcess.subset_image_by_shapefile(input_raster, ref_shp, save_path=output_raster, dst_nondata=dst_nondata, resample_m=resample_method, xres=out_res, yres=out_res, compress='lzw', tiled='yes', bigtiff='IF_SAFER') if os.path.isfile(output_raster): return output_raster else: return False
def main(options, args): ref_raster = args[0] img_path = args[1] save_dir = options.save_dir save_path = options.save_path #check projection extent_prj = map_projection.get_raster_or_vector_srs_info_proj4(ref_raster) img_prj = map_projection.get_raster_or_vector_srs_info_proj4(img_path) if img_prj != extent_prj: raise ValueError('Project of %s and %s is different' % (ref_raster, img_path)) if save_path is None: out_img = io_function.get_name_by_adding_tail(img_path, 'sub') out_img = os.path.join(save_dir, os.path.basename(out_img)) else: out_img = save_path subset_image_by_ref_image(img_path, out_img, ref_raster, resample_m='near') pass
def get_projection_proj4(geo_file): ''' get the proj4 string :param geo_file: a shape file or raster file :return: projection string in prj4 format ''' # shp_args_list = ['gdalsrsinfo','-o','proj4',geo_file] # prj4_str = basic.exec_command_args_list_one_string(shp_args_list) # if prj4_str is False: # raise ValueError('error, get projection information of %s failed'%geo_file) # return prj4_str.decode().strip() import basic_src.map_projection as map_projection return map_projection.get_raster_or_vector_srs_info_proj4(geo_file)
def rasterize_polygons(poly_path, burn_value, attribute_name, xres, yres, save_path, datatype='Byte'): ''' :param poly_path: :param burn_value: :param attribute_name: :param xres: :param yres: :param save_path: :param datatype: :return: ''' import raster_io import vector_gpd import basic_src.map_projection as map_projection if attribute_name is not None: polygons, values = vector_gpd.read_polygons_attributes_list( poly_path, attribute_name, b_fix_invalid_polygon=False) burn_value = values else: polygons = vector_gpd.read_polygons_gpd(poly_path, b_fix_invalid_polygon=False) if datatype == 'Byte': dtype = 'uint8' elif datatype == 'UInt16': dtype = 'uint16' else: dtype = 'int32' extent = vector_gpd.get_vector_file_bounding_box(poly_path) wkt_string = map_projection.get_raster_or_vector_srs_info_proj4(poly_path) return raster_io.burn_polygons_to_a_raster(None, polygons, burn_value, save_path, dtype, xres=xres, yres=yres, extent=extent, ref_prj=wkt_string)
def read_extent_shapefile_epgs4326(exent_shp): ''' read extent polygon, need in latlon projections :param exent_shp: :return: ''' if exent_shp is None: return None shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(exent_shp).strip() if shp_prj != '+proj=longlat +datum=WGS84 +no_defs': raise ValueError('only support the projections of longlat (EPSG:4326)') extent_polygons = vector_gpd.read_polygons_gpd(exent_shp) return extent_polygons
def main(options, args): img_dir = args[0] output = options.output if os.path.isdir(img_dir): img_pattern = options.image_pattern if output is None: output = os.path.basename(img_dir) + '_boxes.gpkg' img_list = io_function.get_file_list_by_pattern(img_dir, img_pattern) if len(img_list) < 1: raise ValueError('No images in %s with pattern: %s' % (img_dir, img_pattern)) else: # if it's a file img_list = [img_dir] if output is None: output = os.path.basename(img_dir) + '_bound.gpkg' print('Find %d rasters in %s' % (len(img_list), img_dir)) # check projections? prj4_1st = raster_io.get_projection(img_list[0], 'proj4') for idx in range(1, len(img_list)): prj4 = raster_io.get_projection(img_list[idx], 'proj4') if prj4_1st != prj4: raise ValueError('projection inconsistent between %s and %s ' % (img_list[0], img_list[idx])) img_boxes = [ raster_io.get_image_bound_box(img_path) for img_path in img_list ] img_box_polygons = [ vector_gpd.convert_image_bound_to_shapely_polygon(box) for box in img_boxes ] # save to file wkt = map_projection.get_raster_or_vector_srs_info_proj4(img_list[0]) save_pd = pd.DataFrame({'raster': img_list, 'Polygon': img_box_polygons}) vector_gpd.save_polygons_to_files(save_pd, 'Polygon', wkt, output, format='GPKG') print('save raster extents to %s' % output) return
def merge_multi_headwall_shp_to_one(shp_list, save_path): ''' merge multiple shapefile of headwall on different dates to one file :param shp_dir: :param save_path: :return: ''' # shp_list = io_function.get_file_list_by_ext('.shp',shp_dir,bsub_folder=False) if len(shp_list) < 1: print('Warning, no input shapefile, skip merging multiple shapefiles') return False if os.path.isfile(save_path): print('warning, %s already exists, skip' % save_path) return True # merge shapefile, one by one, and add the year and date from filename line_list = [] id_list = [] year_list = [] date_list = [] length_m_list = [] # length in meters for shp in shp_list: # these are line vector, we still can use the following function to read them lines, lengths = vector_gpd.read_polygons_attributes_list( shp, 'length_m') curr_count = len(id_list) acuiqsition_date = timeTools.get_yeardate_yyyymmdd( os.path.basename(shp)) year = acuiqsition_date.year for idx, (line, length) in enumerate(zip(lines, lengths)): id_list.append(idx + curr_count) line_list.append(line) length_m_list.append(length) year_list.append(year) date_list.append(timeTools.date2str(acuiqsition_date)) save_pd = pd.DataFrame({ 'id': id_list, 'length_m': length_m_list, 'dem_year': year_list, 'dem_date': date_list, 'Line': line_list }) ref_prj = map_projection.get_raster_or_vector_srs_info_proj4(shp_list[0]) return vector_gpd.save_polygons_to_files(save_pd, 'Line', ref_prj, save_path)
def reproject_planet_image(tif_path, new_prj_wkt, new_prj_proj4, save_dir='planet_images_reproj'): ''' reprojection of images :param tif_path: image path :param new_prj_wkt: new projection in wkt format (more accurate) :param new_prj_proj4: new projection in proj format (not accurate, but good for comparision) :param save_dir: output save folder. :return: ''' # if multiple processes try to derive the same rgb images, it may have problem. # save output to 'planet_images_reproj' + processID if os.path.isdir(save_dir) is False: io_function.mkdir(save_dir) # filename_no_ext output = os.path.splitext(os.path.basename(tif_path))[0] fin_output = os.path.join(save_dir, output + '_prj.tif') if os.path.isfile(fin_output): basic.outputlogMessage( "Skip, because File %s exists in current folder: %s" % (fin_output, os.getcwd())) return fin_output tif_prj4 = map_projection.get_raster_or_vector_srs_info_proj4( tif_path).strip() # if they have the same projection, then return False, no need to reproject if tif_prj4 == new_prj_proj4: return False # reproject to the new projection # gdalwarp -t_srs EPSG:4326 -overwrite tmp.tif $out cmd_str = 'gdalwarp -t_srs %s -of VRT %s %s' % (new_prj_wkt, tif_path, fin_output) status, result = basic.exec_command_string(cmd_str) if status != 0: print(result) sys.exit(status) return fin_output
def build_dict_of_dem_cover_grid_ids(dem_info_shp, grid_20_shp, save_dict_txt): # this will take time, but only need to run once at the beginning if os.path.isfile(save_dict_txt): print('warning, %s exists, skip build_dict_of_dem_cover_grid_ids' % save_dict_txt) return True # extent polygons and projection (proj4) dem_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( dem_info_shp) if dem_shp_prj == '': raise ValueError('get proj4 of %s failed' % dem_info_shp) grid_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( grid_20_shp) if grid_shp_prj == '': raise ValueError('get proj4 of %s failed' % grid_20_shp) if dem_shp_prj != grid_shp_prj: raise ValueError('%s and %s do not have the same projection' % (dem_info_shp, grid_20_shp)) # read DEM info dem_polygons, dem_names = vector_gpd.read_polygons_attributes_list( dem_info_shp, 'name', b_fix_invalid_polygon=False) # dem_name: eg. SETSM_GE01_20090818_1050410001E0CF00_1050410001D80200_seg1_2m_v3.0 or 11_27_2_1_2m_v3.0 dem_poly_count = len(dem_polygons) # check if there is duplicate dem names if len(dem_names) != len(set(dem_names)): raise ValueError('some duplicate dem name in %s' % dem_info_shp) # read grid polygons and ids all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') dem_cover_grids = {} # this will take time. # for idx, (dem_poly,dem_name) in enumerate(zip(dem_polygons, dem_names)): # print(timeTools.get_now_time_str(), idx, dem_poly_count) # index = vector_gpd.get_poly_index_within_extent(all_grid_polys, dem_poly) # gird_ids = [ all_ids[idx] for idx in index ] # # if dem_name in dem_cover_grids.keys(): # # basic.outputlogMessage('\n Warning, %s already in dict \n'%dem_name) # dem_cover_grids[dem_name] = gird_ids ### parallel version theadPool = Pool(multiprocessing.cpu_count()) # multi processes parameters_list = [ (all_ids, all_grid_polys, dem_poly, dem_name, idx, dem_poly_count) for idx, (dem_poly, dem_name) in enumerate(zip(dem_polygons, dem_names)) ] results = theadPool.starmap(get_overlap_grids_for_one_extent, parameters_list) # need python3 for res in results: dem_name, gird_ids = res dem_cover_grids[dem_name] = gird_ids # save to dict io_function.save_dict_to_txt_json(save_dict_txt, dem_cover_grids) theadPool.close() return True
o_res, resample_m='bilinear', o_format='GTiff', compress='lzw', tiled='yes', bigtiff='if_safer') pass # crop, mosaic, and reproject if necessary if extent_shp_or_ids_txt.endswith('.shp'): pre_name = os.path.splitext(os.path.basename(extent_shp_or_ids_txt))[0] # extent polygons and projection (proj4) extent_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( extent_shp_or_ids_txt) grid_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( grid_20_shp) if extent_shp_prj != grid_shp_prj: basic.outputlogMessage( '%s and %s do not have the same projection, will reproject %s' % (extent_shp_or_ids_txt, grid_20_shp, os.path.basename(extent_shp_or_ids_txt))) epsg = map_projection.get_raster_or_vector_srs_info_epsg(grid_20_shp) extent_polys = vector_gpd.read_shape_gpd_to_NewPrj( extent_shp_or_ids_txt, epsg) else: extent_polys = vector_gpd.read_polygons_gpd(extent_shp_or_ids_txt) crop_mosaic_reproject_dem_diff(grid_dem_tifs,
def get_grid_20(extent_shp_or_id_txt, grid_polys, ids): ''' get grid polygons and ids based on input extent (polygon in shpaefile) or ids (txt file) if "file_name_base+'_grid_ids.txt'" exists, it will read id in this file directly. :param extent_shp_or_id_txt: :param grid_polys: :param ids: :return: ''' io_function.is_file_exist(extent_shp_or_id_txt) if extent_shp_or_id_txt.endswith('.txt'): grid_ids = io_function.read_list_from_txt(extent_shp_or_id_txt) grid_ids = [int(item) for item in grid_ids ] else: shp_corresponding_grid_ids_txt = get_corresponding_grid_ids_txt(extent_shp_or_id_txt) if os.path.isfile(shp_corresponding_grid_ids_txt): print('corresponding grid ids txt file for %s exists, read grid id from txt'%extent_shp_or_id_txt) grid_ids = [ int(item) for item in io_function.read_list_from_txt(shp_corresponding_grid_ids_txt)] basic.outputlogMessage('read %d grids within the extents (%s)' % (len(grid_ids), os.path.basename(extent_shp_or_id_txt))) else: # extent polygons and projection (proj4) extent_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(extent_shp_or_id_txt) if extent_shp_prj == '': raise ValueError('get proj4 of %s failed'%extent_shp_or_id_txt) grid_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20_shp) if grid_shp_prj=='': raise ValueError('get proj4 of %s failed' % grid_20_shp) if extent_shp_prj != grid_shp_prj: basic.outputlogMessage('%s and %s do not have the same projection, will reproject %s' % (extent_shp_or_id_txt, grid_20_shp, os.path.basename(extent_shp_or_id_txt))) epsg = map_projection.get_raster_or_vector_srs_info_epsg(grid_20_shp) # print(epsg) # extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_shp_prj.strip()) extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp_or_id_txt, epsg) else: extent_polys = vector_gpd.read_polygons_gpd(extent_shp_or_id_txt) ext_poly_count = len(extent_polys) if ext_poly_count < 1: raise ValueError('No polygons in %s'%extent_shp_or_id_txt) grid_index = [] # if there are many polygons, this will take time. for idx,ext_poly in enumerate(extent_polys): print(timeTools.get_now_time_str(), 'get grids for extent idx', idx, 'total polygons:',ext_poly_count) index = vector_gpd.get_poly_index_within_extent(grid_polys, ext_poly) grid_index.extend(index) grid_index = list(set(grid_index)) # remove duplicated ids basic.outputlogMessage('find %d grids within the extents (%s)' % (len(grid_index), os.path.basename(extent_shp_or_id_txt)) ) grid_ids = [ ids[idx] for idx in grid_index] grid_ids_str = [str(item) for item in grid_ids ] io_function.save_list_to_txt(shp_corresponding_grid_ids_txt,grid_ids_str) id_index = [ids.index(id) for id in grid_ids] selected_grid_polys = [grid_polys[idx] for idx in id_index ] return selected_grid_polys, grid_ids
def main(options, args): # get multi-temporal shapefile list para_file = options.para_file b_remove = parameters.get_bool_parameters_None_if_absence( para_file, 'b_remove_polygons_using_multitemporal_results') # exit if b_remove is None or b_remove is False: basic.outputlogMessage( 'Warning, b_remove_polygons_using_multitemporal_results not set or is NO' ) return True shp_dir = args[0] file_pattern = args[1] polyon_shps_list = io_function.get_file_list_by_pattern( shp_dir, file_pattern) if len(polyon_shps_list) < 2: raise ValueError( 'Error, less than two shapefiles, cannot conduct multi-polygon anlysis' ) # make polyon_shps_list in order: I0 to In polyon_shps_list.sort( key=lambda x: int(re.findall('I\d+', os.path.basename(x))[0][1:])) # print(polyon_shps_list) # sys.exit() # check projection of the shape file, should be the same new_shp_proj4 = map_projection.get_raster_or_vector_srs_info_proj4( polyon_shps_list[0]) for idx in range(len(polyon_shps_list) - 1): shp_proj4 = map_projection.get_raster_or_vector_srs_info_proj4( polyon_shps_list[idx + 1]) if shp_proj4 != new_shp_proj4: raise ValueError('error, projection insistence between %s and %s' % (new_shp_proj4, shp_proj4)) import remove_nonActive_thawSlumps import polygons_change_analyze # polygon change analysis polygons_change_analyze.cal_multi_temporal_iou_and_occurrence( polyon_shps_list, para_file) # remove non active polygons remove_nonActive_thawSlumps.remove_non_active_thaw_slumps( polyon_shps_list, para_file) # back up files and conduct evaluation for idx, shp_path in enumerate(polyon_shps_list): # evaluation files shp_rmTimeiou = io_function.get_name_by_adding_tail( shp_path, 'rmTimeiou') basic.outputlogMessage('(%d/%d) evaluation of %s' % (idx, len(polyon_shps_list), shp_rmTimeiou)) # evaluation args_list = [ os.path.join(deeplabRS, 'evaluation_result.py'), '-p', para_file, shp_rmTimeiou ] if basic.exec_command_args_list_one_file( args_list, 'evaluation_report.txt') is False: return False I_idx_str = re.findall('I\d+', os.path.basename(shp_rmTimeiou)) old_eva_report = io_function.get_file_list_by_pattern( shp_dir, I_idx_str[0] + '*eva_report*' + '.txt') old_eva_report = [ item for item in old_eva_report if 'rmTimeiou' not in item ] old_eva_report_name = old_eva_report[0] eva_report_name = io_function.get_name_by_adding_tail( old_eva_report_name, 'rmTimeiou') # io_function.move_file_to_dst(old_eva_report,backup_eva_report) # io_function.move_file_to_dst('evaluation_report.txt', old_eva_report) io_function.move_file_to_dst('evaluation_report.txt', eva_report_name, overwrite=True) # back up the shape files (no_need) basic.outputlogMessage( 'Finish removing polygons using multi-temporal mapping results')
def get_projection_proj4(geo_file): import basic_src.map_projection as map_projection return map_projection.get_raster_or_vector_srs_info_proj4(geo_file)
def yolo_results_to_shapefile(curr_dir, img_idx, area_save_dir, nms_overlap_thr, test_id): img_save_dir = os.path.join(area_save_dir, 'I%d' % img_idx) res_yolo_json = img_save_dir + '_result.json' res_json_files = [] if os.path.isfile(res_yolo_json): print('found %s in %s, will get shapefile from it' % (res_yolo_json, area_save_dir)) else: if os.path.isdir(img_save_dir): res_json_files = io_function.get_file_list_by_ext( '.json', img_save_dir, bsub_folder=False) if len(res_json_files) < 1: print('Warning, no YOLO results in %s, skip' % (img_save_dir)) return None print( 'found %d json files for patches in %s, will get shapefile from them' % (len(res_json_files), img_save_dir)) else: print('Warning, folder: %s doest not exist, skip' % img_save_dir) return None out_name = os.path.basename(area_save_dir) + '_' + test_id # to shapefile out_shp = 'I%d' % img_idx + '_' + out_name + '.shp' out_shp_path = os.path.join(img_save_dir, out_shp) if os.path.isfile(out_shp_path): print('%s already exist' % out_shp_path) else: class_id_list = [] name_list = [] box_bounds_list = [] confidence_list = [] source_image_list = [] if len(res_json_files) < 1: # use the result in *_result.json yolo_res_dict_list = io_function.read_dict_from_txt_json( res_yolo_json) total_frame = len(yolo_res_dict_list) image1 = yolo_res_dict_list[0]['filename'] for idx, res_dict in enumerate(yolo_res_dict_list): id_list, na_list, con_list, box_list, image1 = boxes_yoloXY_to_imageXY( idx, total_frame, res_dict, ref_image=None) class_id_list.extend(id_list) name_list.extend(na_list) confidence_list.extend(con_list) box_bounds_list.extend(box_list) source_image_list.extend([os.path.basename(image1)] * len(box_list)) else: # use the results in I0/*.json image1 = io_function.read_list_from_txt( os.path.join(area_save_dir, '%d.txt' % img_idx))[0] total_frame = len(res_json_files) # the patch numbers # only open image once with rasterio.open(image1) as src: for idx, f_json in enumerate(res_json_files): id_list, na_list, con_list, box_list = boxes_minXYmaxXY_to_imageXY( idx, total_frame, f_json, src) class_id_list.extend(id_list) name_list.extend(na_list) confidence_list.extend(con_list) box_bounds_list.extend(box_list) source_image_list.extend([os.path.basename(image1)] * len(box_bounds_list)) if len(box_bounds_list) < 1: print('Warning, no predicted boxes in %s' % img_save_dir) return None # apply non_max_suppression # print('box_bounds_list',box_bounds_list) # print('confidence_list',confidence_list) pick_index = non_max_suppression(np.array(box_bounds_list), probs=np.array(confidence_list), overlapThresh=nms_overlap_thr, b_geo=True) # print('pick_index', pick_index) class_id_list = [class_id_list[idx] for idx in pick_index] name_list = [name_list[idx] for idx in pick_index] confidence_list = [confidence_list[idx] for idx in pick_index] box_bounds_list = [box_bounds_list[idx] for idx in pick_index] source_image_list = [source_image_list[idx] for idx in pick_index] # to polygon box_poly_list = [ vector_gpd.convert_image_bound_to_shapely_polygon(item) for item in box_bounds_list ] # box_poly_list # save to shapefile detect_boxes_dict = { 'class_id': class_id_list, 'name': name_list, 'source_img': source_image_list, 'confidence': confidence_list, "Polygon": box_poly_list } save_pd = pd.DataFrame(detect_boxes_dict) ref_prj = map_projection.get_raster_or_vector_srs_info_proj4(image1) vector_gpd.save_polygons_to_files(save_pd, 'Polygon', ref_prj, out_shp_path) return out_shp_path
def main(): # grid polygons grid_50km = os.path.join(shp_dir,'PAMPA_outline_utm_50grid.shp') # main permaforst areas based on permafrost map, has been pre-processed: remove small ones, simply the boundaries main_area_simp = os.path.join(shp_dir,'PAMPA_outline_utm.shp') # qtp_grid_50km and qtb_main_perma_area_simp have the same projection grid_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_50km) perma_area_prj = map_projection.get_raster_or_vector_srs_info_proj4(main_area_simp) if grid_prj != perma_area_prj: raise ValueError('%s and %s do not have the same projection'%(grid_prj,perma_area_prj)) grids = vector_gpd.read_polygons_gpd(grid_50km) perma_areas = vector_gpd.read_polygons_gpd(main_area_simp) # perma_size_list = vector_gpd.read_attribute_values_list(qtp_main_perma_area_simp,'Area_km2') small_perma_areas_list = [] for idx, perma_poly in enumerate(perma_areas): print(' processing %dth permafrost area'%idx) # if the permafrost area is < 50*50 km^2, then do not split it to smaller ones. # if size < 2500: # perma_poly = vector_gpd.fill_holes_in_a_polygon(perma_poly) # small_perma_areas_list.append(perma_poly) # continue # split the big permafrost area into many small ones for grid in grids: inte_res = perma_poly.intersection(grid) if inte_res.is_empty is False: inte_res_multi = vector_gpd.MultiPolygon_to_polygons(idx,inte_res) for tmp in inte_res_multi: # remove holes if they exist small_ones = vector_gpd.fill_holes_in_a_polygon(tmp) ################################# # we should remove some really small polygons (< 1 km^2) small_perma_areas_list.append(small_ones) ############################## # have to manually merge small polygons in QGIS to its adjacent ones. # save save_path = io_function.get_name_by_adding_tail(main_area_simp,'small') save_path = os.path.join(shp_dir,os.path.basename(save_path)) save_polyons_attributes = {} save_polyons_attributes["Polygons"] = small_perma_areas_list # wkt_string = map_projection.get_raster_or_vector_srs_info_wkt(qtp_main_perma_area_simp) wkt_string = map_projection.get_raster_or_vector_srs_info_proj4(main_area_simp) polygon_df = pd.DataFrame(save_polyons_attributes) vector_gpd.save_polygons_to_files(polygon_df, 'Polygons', wkt_string, save_path) pass
def select_polygons_overlap_others_in_group2(polys_group1_path, polys_group2_path, save_path, buffer_size=0, process_num=1): ''' select polygons in group 1 that that any polygons in group 2 :param polys_group1_path: :param polys_group2_path: :param buffer_size: :param process_num: it will take a lot of memory when using multiple processs :return: ''' if os.path.isfile(save_path): print('Warning, %s exists, skip' % (save_path)) return True # check projections shp1_prj = map_projection.get_raster_or_vector_srs_info_proj4( polys_group1_path) shp2_prj = map_projection.get_raster_or_vector_srs_info_proj4( polys_group2_path) if shp1_prj is False or shp2_prj is False: return False if shp1_prj != shp2_prj: raise ValueError('%s and %s dont have the same projection' % (polys_group1_path, polys_group2_path)) polys_group1 = vector_gpd.read_polygons_gpd(polys_group1_path, b_fix_invalid_polygon=False) if buffer_size > 0: polys_group1 = [item.buffer(buffer_size) for item in polys_group1] print(datetime.now(), 'read %d polygons in group 1' % len(polys_group1)) polys_group2 = vector_gpd.read_polygons_gpd(polys_group2_path, b_fix_invalid_polygon=False) print(datetime.now(), 'read %d polygons in group 2' % len(polys_group2)) count_group1 = len(polys_group1) # https://shapely.readthedocs.io/en/stable/manual.html#str-packed-r-tree tree = STRtree(polys_group2) select_idx = [] if process_num == 1: for idx, subsi_buff in enumerate(polys_group1): # output progress if idx % 1000 == 0: print(datetime.now(), '%d th / %d polygons' % (idx, count_group1)) adjacent_polygons = [ item for item in tree.query(subsi_buff) if item.intersects(subsi_buff) or item.touches(subsi_buff) ] if len(adjacent_polygons) > 0: select_idx.append(idx) elif process_num > 1: # end in error: # struct.error: 'i' format requires -2147483648 <= number <= 2147483647 # could be caused by too many polygons raise ValueError( "has error of struct.error: 'i' format requires -2147483648 <= number <= 2147483647, " "please use process=1") # theadPool = Pool(process_num) # parameters_list = [(idx, poly, tree, count_group1) for idx, poly in enumerate(polys_group1)] # results = theadPool.starmap(find_overlap_one_polygon, parameters_list) # select_idx = [item for item in results if item is not None] # theadPool.close() else: raise ValueError('wrong process number: %s' % str(process_num)) basic.outputlogMessage('Select %d polygons from %d ones' % (len(select_idx), count_group1)) if len(select_idx) < 1: return None # save to subset of shaepfile return vector_gpd.save_shapefile_subset_as(select_idx, polys_group1_path, save_path)
def main(options, args): time0 = time.time() image_dir = args[0] geojson_list = io_function.get_file_list_by_ext('.geojson', image_dir, bsub_folder=False) # remove some scenes, or maybe we should set bsub_folder=False # geojson_list = [item for item in geojson_list if 'incomplete_scenes' not in item ] # remove those in "incomplete_scenes" # geojson_list = [item for item in geojson_list if 'scenes_high_cloud_cover' not in item ] # remove those in "scenes_high_cloud_cover" if len(geojson_list) < 1: raise ValueError('There is no geojson files in %s' % image_dir) basic.outputlogMessage('Image Dir: %s' % image_dir) basic.outputlogMessage("Number of geojson files: %d" % len(geojson_list)) grid_polygon_shp = args[ 1] # the polygon should be in projection Cartesian coordinate system (e.g., UTM ) basic.outputlogMessage('Image grid polygon shapefile: %s' % grid_polygon_shp) process_num = options.process_num basic.outputlogMessage( 'The number of processes for creating the mosaic is: %d' % process_num) # read grid polygons grid_polygons = vector_gpd.read_polygons_gpd(grid_polygon_shp) grid_ids = vector_gpd.read_attribute_values_list(grid_polygon_shp, 'id') if grid_ids is None: basic.outputlogMessage( 'Warning, field: id is not in %s, will create default ID for each grid' % grid_polygon_shp) grid_ids = [id + 1 for id in range(len(grid_polygons))] shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( grid_polygon_shp).strip() # print(shp_prj) grid_polygons_latlon = grid_polygons if shp_prj != '+proj=longlat +datum=WGS84 +no_defs': # read polygons and reproject to 4326 projection grid_polygons_latlon = vector_gpd.read_shape_gpd_to_NewPrj( grid_polygon_shp, 'EPSG:4326') # else: # raise ValueError(' %s should be in projection of Cartesian coordinate system'%grid_polygon_shp) shp_prj_wkt = map_projection.get_raster_or_vector_srs_info_wkt( grid_polygon_shp) max_sr = options.max_sr min_sr = options.min_sr original_img_copy_dir = options.original_img_copy_dir b_to_rgb_8bit = options.to_rgb basic.outputlogMessage('Convert to 8bit RGB images: %s' % str(b_to_rgb_8bit)) # group planet image based on acquisition date b_group_date = options.group_date basic.outputlogMessage('Group Planet image based on acquisition date: %s' % str(b_group_date)) if b_group_date: # diff_days as 0, group images acquired at the same date geojson_groups = group_planet_images_date(geojson_list, diff_days=0) # sort based on yeardate in accending order : operator.itemgetter(0) geojson_groups = dict( sorted(geojson_groups.items(), key=operator.itemgetter(0))) save_group_txt = 'geojson_groups_input_folder.txt' basic.outputlogMessage( 'images are divided into %d groups, save to %s' % (len(geojson_groups.keys()), save_group_txt)) io_function.save_dict_to_txt_json(save_group_txt, geojson_groups) else: geojson_groups = {'all': geojson_list} # create mosaic of each grid cloud_cover_thr = options.cloud_cover cloud_cover_thr = cloud_cover_thr * 100 # for Planet image, it is percentage out_res = options.out_res cur_dir = os.getcwd() resampling_method = options.merged_method for key in geojson_groups.keys(): # # test # if key != '20200701': # continue geojson_list = geojson_groups[key] save_dir = os.path.basename(cur_dir) + '_mosaic_' + str( out_res) + '_' + key # print(save_dir) if process_num == 1: for id, polygon, poly_latlon in zip(grid_ids, grid_polygons, grid_polygons_latlon): # if id != 34: # continue create_moasic_of_each_grid_polygon( id, polygon, poly_latlon, out_res, cloud_cover_thr, geojson_list, save_dir, new_prj_wkt=shp_prj_wkt, new_prj_proj4=shp_prj, sr_min=min_sr, sr_max=max_sr, to_rgb=b_to_rgb_8bit, save_org_dir=original_img_copy_dir, resampling_method=resampling_method) elif process_num > 1: theadPool = Pool(process_num) # multi processes parameters_list = [ (id, polygon, poly_latlon, out_res, cloud_cover_thr, geojson_list, save_dir, shp_prj_wkt, shp_prj, min_sr, max_sr, b_to_rgb_8bit, 0, original_img_copy_dir) for id, polygon, poly_latlon in zip(grid_ids, grid_polygons, grid_polygons_latlon) ] results = theadPool.starmap(create_moasic_of_each_grid_polygon, parameters_list) # need python3 theadPool.close() else: raise ValueError('incorrect process number: %d' % process_num) cost_time_sec = time.time() - time0 basic.outputlogMessage( 'Done, total time cost %.2f seconds (%.2f minutes or %.2f hours)' % (cost_time_sec, cost_time_sec / 60, cost_time_sec / 3600)) pass
def main(): dem_index_shp = os.path.expanduser( '~/Data/Arctic/ArcticDEM/BROWSE_SERVER/indexes/ArcticDEM_Tile_Index_Rel7/ArcticDEM_Tile_Index_Rel7.shp' ) # extent_shp = os.path.expanduser('~/Data/PDO/PDO_statistics_swatchs/swatch_bounding_boxes.shp') extent_shp = os.path.expanduser( '~/Data/PDO/extent_each_swatch/merge_all_qa_exent.shp') # extent polygons and projection (proj4) extent_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( extent_shp) dem_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( dem_index_shp) if extent_shp_prj != dem_shp_prj: basic.outputlogMessage( '%s and %s do not have the same projection, will reproject %s' % (extent_shp, dem_index_shp, os.path.basename(extent_shp))) epsg = map_projection.get_raster_or_vector_srs_info_epsg(dem_index_shp) # print(epsg) # extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_shp_prj.strip()) extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp, epsg) else: extent_polys = vector_gpd.read_polygons_gpd(extent_shp) poly_ids = [idx for idx in range(len(extent_polys))] if 'boxes' in os.path.basename(extent_shp): nc_file_names = vector_gpd.read_attribute_values_list( extent_shp, 'nc_file') else: nc_file_names = vector_gpd.read_attribute_values_list( extent_shp, 'layer') # read dem polygons and tile number dem_polygons, dem_tiles = vector_gpd.read_polygons_attributes_list( dem_index_shp, 'tile', b_fix_invalid_polygon=False) for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)): basic.outputlogMessage('get data for the %d th extent (%d/%d)' % (idx, count, len(extent_polys))) save_txt_path = nc_file_names[idx] + '-' + 'dem_tiles_poly_%d.txt' % idx if os.path.isfile(save_txt_path): tiles = io_function.read_list_from_txt(save_txt_path) basic.outputlogMessage('read %d dem tiles from %s' % (len(tiles), save_txt_path)) else: # get fileurl dem_poly_ids = vector_gpd.get_poly_index_within_extent( dem_polygons, ext_poly) basic.outputlogMessage('find %d DEM within %d th extent' % (len(dem_poly_ids), (idx))) tiles = [dem_tiles[id] for id in dem_poly_ids] # save to txt io_function.save_list_to_txt(save_txt_path, tiles) basic.outputlogMessage('save dem urls to %s' % save_txt_path) # download and create a mosaic url_head = 'https://data.pgc.umn.edu/elev/dem/setsm/ArcticDEM/mosaic/v3.0/32m/' download_tarball_for_one_polygon(tarball_dir, dem_tif_dir, url_head, tiles) # create a mosaic create_a_mosaic(nc_file_names[idx], idx, dem_eachSwatch_dir, ext_poly, tiles) bak_folder = 'small_tifs' io_function.mkdir(bak_folder) # remove small and duplicated ones for file_name in nc_file_names: crop_tifs = io_function.get_file_list_by_pattern( dem_eachSwatch_dir, file_name + '*crop.tif') if len(crop_tifs) == 1: pass elif len(crop_tifs) > 1: # keep maximum one and move small ones tif_files_size = [ io_function.get_file_size_bytes(item) for item in crop_tifs ] max_size = max(tif_files_size) max_index = tif_files_size.index(max_size) del crop_tifs[max_index] for tmp in crop_tifs: io_function.movefiletodir(tmp, bak_folder) tmp = tmp.replace('_crop', '') io_function.movefiletodir(tmp, bak_folder) else: # no tif raise ValueError('Results for %s does not exist' % file_name)
def main(options, args): polygons_shp = args[0] save_folder = args[1] # folder for saving downloaded images # check training polygons assert io_function.is_file_exist(polygons_shp) os.system('mkdir -p ' + save_folder) item_types = options.item_types.split( ',') # ["PSScene4Band"] # , # PSScene4Band , PSOrthoTile start_date = datetime.strptime( options.start_date, '%Y-%m-%d') #datetime(year=2018, month=5, day=20) end_date = datetime.strptime(options.end_date, '%Y-%m-%d') #end_date cloud_cover_thr = options.cloud_cover # 0.01 planet_account = options.planet_account process_num = options.process_num # set Planet API key get_and_set_Planet_key(planet_account) shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( polygons_shp).strip() if shp_prj != '+proj=longlat +datum=WGS84 +no_defs': # reproject to 4326 projection basic.outputlogMessage('reproject %s to latlon' % polygons_shp) latlon_shp = io_function.get_name_by_adding_tail( polygons_shp, 'latlon') if os.path.isfile(latlon_shp) is False: vector_gpd.reproject_shapefile(polygons_shp, 'EPSG:4326', latlon_shp) polygons_shp = latlon_shp basic.outputlogMessage( 'save new shapefile to %s for downloading images' % polygons_shp) # read polygons polygons_json = read_polygons_json(polygons_shp) read_excluded_scenes( save_folder) # read the excluded_scenes before read download images #read geometry of images already in "save_folder" read_down_load_geometry(save_folder) # download images download_planet_images(polygons_json, start_date, end_date, cloud_cover_thr, item_types, save_folder, process_num) #check each downloaded ones are completed, otherwise, remove the incompleted ones geojson_list = io_function.get_file_list_by_ext('.geojson', save_folder, bsub_folder=False) # print(geojson_list) incom_dir = os.path.join(save_folder, 'incomplete_scenes') for geojson_file in geojson_list: scene_id = os.path.splitext(os.path.basename(geojson_file))[0] scene_dir = os.path.join(save_folder, scene_id) files = io_function.get_file_list_by_pattern(scene_dir, scene_id + '*') # print(files) if len(files) != len(asset_types): if os.path.isdir(incom_dir): io_function.mkdir(incom_dir) basic.outputlogMessage( 'warning, downloading of %s is not completed, move to incomplete_scenes ' % scene_id) io_function.movefiletodir(scene_dir, incom_dir, overwrite=True) io_function.movefiletodir(geojson_file, incom_dir, overwrite=True) test = 1 pass
def main(options, args): extent_shp = args[0] dem_index_shp = args[1] b_arcticDEM_tile = False global max_task_count max_task_count = options.max_process_num if 'Tile' in os.path.basename(dem_index_shp): save_folder = arcticDEM_tile_tarball_dir reg_tif_dir = arcticDEM_tile_reg_tif_dir b_arcticDEM_tile = True else: save_folder = tarball_dir reg_tif_dir = arcticDEM_reg_tif_dir # use the user specific save_dir for saving downloaded tarballs if options.save_dir is not None: save_folder = options.save_dir if os.path.isdir(save_folder) is False: io_function.mkdir(save_folder) save_folder = os.path.abspath(save_folder) # change to absolute path pre_name = os.path.splitext(os.path.basename(extent_shp))[0] pre_name += '_Tile' if 'Tile' in os.path.basename( dem_index_shp) else '_Strip' # extent polygons and projection (proj4) extent_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( extent_shp) dem_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( dem_index_shp) if extent_shp_prj != dem_shp_prj: basic.outputlogMessage( '%s and %s do not have the same projection, will reproject %s' % (extent_shp, dem_index_shp, os.path.basename(extent_shp))) epsg = map_projection.get_raster_or_vector_srs_info_epsg(dem_index_shp) # print(epsg) # extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_shp_prj.strip()) extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp, epsg) else: extent_polys = vector_gpd.read_polygons_gpd(extent_shp) # read 'grid_id' if the extent shp is from grid shp file, if not, grid_id_list will be None grid_id_list = vector_gpd.read_attribute_values_list(extent_shp, 'grid_id') if len(extent_polys) < 1: raise ValueError('No polygons in %s' % extent_shp) else: basic.outputlogMessage( 'read %d extent polygons in %s for downloading and proc' % (len(extent_polys), extent_shp)) download_dem_tarball(dem_index_shp, extent_polys, save_folder, pre_name, reg_tif_dir=reg_tif_dir, poly_ids=grid_id_list, b_arcticDEM_tile=b_arcticDEM_tile)
def main(options, args): extent_shp = args[0] task_list = [args[item] for item in range(1, len(args))] # task_name = args[1] if len(task_list) < 1: raise ValueError('There is no task: %s' % str(task_list)) # local_grid_id_txt is in the current dir # log_grid_ids_txt, log_grid_ids_txt_done is in grid_ids_txt_dir local_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files( extent_shp) # check if it has been complete if os.path.isfile(log_grid_ids_txt_done): basic.outputlogMessage('Tasks for extent %s have been completed' % extent_shp) return True r_working_dir = '/scratch/summit/lihu9680/Arctic/dem_processing' if options.remote_working_dir is None else options.remote_working_dir r_log_dir = '/scratch/summit/lihu9680/ArcticDEM_tmp_dir/log_dir' if options.remote_log_dir is None else options.remote_log_dir process_node = '$curc_host' if options.process_node is None else options.process_node download_node = '$curc_host' if options.download_node is None else options.download_node max_grid_count = options.max_grids b_remove_tmp_folders = options.b_remove_tmp_folders b_dont_remove_DEM_files = options.b_dont_remove_DEM_files b_no_slurm = options.b_no_slurm b_divide_to_subsets = True # modify the folder name of subsets global subset_shp_dir subset_shp_dir = subset_shp_dir + '_' + io_function.get_name_no_ext( extent_shp) global msg_file_pre msg_file_pre = io_function.get_name_no_ext(extent_shp) + '_' + msg_file_pre grid_ids_to_process_txt = io_function.get_name_no_ext( extent_shp) + '_' + 'grid_ids_to_process.txt' # build map dem cover grid (take time, but only need to run once at the beginning) build_dict_of_dem_cover_grid_ids(dem_strip_shp, grid_20_shp, strip_dem_cover_grids_txt) build_dict_of_dem_cover_grid_ids(dem_tile_shp, grid_20_shp, tile_dem_cover_grids_txt) # get grids for processing # read grids and ids time0 = time.time() all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') print('time cost of read polygons and attributes', time.time() - time0) gird_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20_shp) # get grid ids based on input extent grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids) # based on extent shape, subset grid_20_id_raster # # using gdalwarp to crop the mask, also have 0.5 pixel offset, so dont use it # grid_20_id_raster_sub = io_function.get_name_by_adding_tail(os.path.basename(grid_20_id_raster),'sub') # if RSImageProcess.subset_image_by_shapefile(grid_20_id_raster,extent_shp,save_path=grid_20_id_raster_sub) is False: # return False # read grid_ids_2d, then mask it grid_ids_2d, grid_nodata = raster_io.read_raster_one_band_np( grid_20_id_raster) # 2d array of gird ids # rasterize grid_polys, will served as mask. grid_ids_2d_mask = raster_io.burn_polygons_to_a_raster( grid_20_id_raster, grid_polys, 1, None) # raster_io.save_numpy_array_to_rasterfile(grid_ids_2d_mask,'grid_ids_2d_mask.tif',grid_20_id_raster,nodata=255) # save to disk for checking loc_masked_out = np.where(grid_ids_2d_mask != 1) # grid_ids_2d[ loc_masked_out ] = grid_nodata visit_np = np.zeros_like(grid_ids_2d, dtype=np.uint8) visit_np[loc_masked_out] = 1 # 1 indicate already visited visit_np[np.where( grid_ids_2d == grid_nodata)] = 1 # 1 indicate already visited subset_id = -1 # on tesia, uist, vpn-connected laptop if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name: io_function.mkdir(subset_shp_dir) sync_log_files(process_node, r_log_dir, process_log_dir) update_complete_grid_list(grid_ids, task_list) while True: subset_id += 1 # on tesia, uist, vpn-connected laptop if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name: # remove grids that has been complete or ignored ignore_ids = get_complete_ignore_grid_ids() num_grid_ids = save_grid_ids_need_to_process( grid_ids, ignore_ids=ignore_ids, save_path=grid_ids_to_process_txt) if num_grid_ids < 1: make_note_all_task_done(extent_shp, process_node) # if the input is not a shapefile, then don't divide it to many subsets if extent_shp.endswith('.txt'): select_grid_polys, selected_gird_ids = grid_polys, grid_ids if len(selected_gird_ids) > 2000: raise ValueError('There are too many grid to process once') b_divide_to_subsets = False subset_id = 999999 select_grids_shp = os.path.join( subset_shp_dir, io_function.get_name_no_ext(extent_shp) + '_sub%d' % subset_id + '.shp') save_selected_girds_and_ids(selected_gird_ids, select_grid_polys, gird_prj, select_grids_shp) else: select_grids_shp = os.path.join( subset_shp_dir, io_function.get_name_no_ext(extent_shp) + '_sub%d' % subset_id + '.shp') select_grid_polys, selected_gird_ids = get_grids_for_download_process( grid_polys, grid_ids, ignore_ids, max_grid_count, grid_ids_2d, visit_np, select_grids_shp, proj=gird_prj) if selected_gird_ids is None: break # no more grids if len(selected_gird_ids) < 1: continue subset_info_txt = msg_file_pre + '%d.txt' % subset_id if os.path.isfile(subset_info_txt) is False: # init the file update_subset_info(subset_info_txt, key_list=[ 'id', 'createTime', 'shp', 'pre_status', 'proc_status' ], info_list=[ subset_id, str(datetime.now()), select_grids_shp, 'notYet', 'notYet' ]) # download and unpack ArcticDEM, do registration, send to curc if download_process_send_arctic_dem(subset_info_txt, r_working_dir, process_node, task_list, b_send_data=b_no_slurm == False) is True: continue # copy file from remote machine if b_no_slurm is False: copy_results_from_remote_node() sync_log_files(process_node, r_log_dir, process_log_dir) # update complete id list update_complete_grid_list(grid_ids, task_list) # save this to disk, to check progress, if there are not too many grids (<100), # we can use this one to process withtou divide grids to many subsets num_grid_ids = save_grid_ids_need_to_process( grid_ids, save_path=grid_ids_to_process_txt) if num_grid_ids < 1: make_note_all_task_done(extent_shp, process_node) if b_no_slurm: # process ArcticDEM using local computing resource if produce_dem_products( task_list, b_remove_job_folder=b_remove_tmp_folders, no_slurm=b_no_slurm) is False: break if b_divide_to_subsets is False: break elif 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name: # curc # process ArcticDEM using the computing resource on CURC if produce_dem_products( task_list, b_remove_job_folder=b_remove_tmp_folders) is False: break else: print('unknown machine : %s ' % machine_name) break # remove no need dem files remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files) # monitor results in remote computer check_time = 200 while check_time > 0 and b_no_slurm == False: # on tesia, uist, vpn-connected laptop if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name: print(datetime.now(), 'wait 10 min for results in computing nodes') time.sleep(600) # copy file from remote machine copy_results_from_remote_node() # sync complete id list, dem info, no dem grids etcs. sync_log_files(process_node, r_log_dir, process_log_dir) # update complete id list update_complete_grid_list(grid_ids, task_list) # remove no need dem files remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files) remote_sub_txt = get_subset_info_txt_list( 'proc_status', ['notYet', 'working'], remote_node=process_node, remote_folder=r_working_dir) if len(remote_sub_txt) < 1 and check_time != 1: check_time = 1 # set to 1, then will only check one more time else: check_time -= 1 else: break