def main(): # save ArcticDEM_grid_20km = os.path.join(shp_dir, 'ArcticDEM_grid_20km.shp') # ref_raster='grid_20km_bin.tif' polygons, ids = vector_gpd.read_polygons_attributes_list( ArcticDEM_grid_20km, 'id', b_fix_invalid_polygon=False) save_raster = os.path.join(shp_dir, 'ArcticDEM_grid_20km_id.tif') # raster_io.burn_polygons_to_a_raster(ref_raster,polygons,ids,save_raster,date_type='uint16') # if no reference raster extent = vector_gpd.get_vector_file_bounding_box(ArcticDEM_grid_20km) # print(extent) res = 20000 # 20 km wkt_string = map_projection.get_raster_or_vector_srs_info_proj4( ArcticDEM_grid_20km) nodata = 2**16 - 1 raster_io.burn_polygons_to_a_raster(None, polygons, ids, save_raster, date_type='uint16', xres=res, yres=res, extent=extent, ref_prj=wkt_string, nodata=nodata) pass
def main(options, args): extent_shp_or_ids_txt = args[0] process_num = options.process_num o_res = options.out_res if os.path.isdir(grid_matchtag_sum_dir) is False: io_function.mkdir(grid_matchtag_sum_dir) basic.setlogfile('produce_matchtag_sum_ArcticDEM_log_%s.txt' % timeTools.get_now_time_str()) # read grids and ids time0 = time.time() all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') print('time cost of read polygons and attributes', time.time() - time0) # get grid ids based on input extent grid_base_name = os.path.splitext( os.path.basename(extent_shp_or_ids_txt))[0] grid_polys, grid_ids = get_grid_20(extent_shp_or_ids_txt, all_grid_polys, all_ids) # check dem difference existence grid_dem_tifs, grid_ids_no_sum = get_existing_matchtag_sum( grid_matchtag_sum_dir, grid_base_name, grid_ids) if len(grid_ids_no_sum) > 0: # refine grid_polys if len(grid_ids) > len(grid_ids_no_sum): id_index = [grid_ids.index(id) for id in grid_ids_no_sum] grid_polys = [grid_polys[idx] for idx in id_index] # # download ArcticDEM and applying registration # tarballs, reg_tifs = download_dem_tarball(dem_strip_shp, grid_polys, arcticDEM_tarball_dir, grid_base_name, # reg_tif_dir=arcticDEM_reg_tif_dir, poly_ids=grid_ids_no_demDiff) # # # unpack and applying registration # if len(tarballs) > 0: # basic.outputlogMessage('Processs %d dem tarballs'%len(tarballs)) # out_reg_tifs = process_dem_tarball(tarballs,'./',arcticDEM_reg_tif_dir,remove_inter_data=True, apply_registration=True) # basic.outputlogMessage('Get %d new registration dem tifs' % len(out_reg_tifs)) # reg_tifs.extend(out_reg_tifs) reg_tifs = io_function.get_file_list_by_ext('.tif', arcticDEM_reg_tif_dir, bsub_folder=False) matchtag_tifs = [tif for tif in reg_tifs if 'matchtag' in tif] # only keep matchtag # crop, sum out_dem_diffs = produce_matchtag_sum_grids(grid_polys, grid_ids_no_sum, grid_base_name, matchtag_tifs, o_res, process_num=process_num)
def get_grid_ids_extent(extent_shp): if 'ArcticDEM_grid_20km' in os.path.basename(extent_shp): print( 'input %s like a grid files, read grid polygons and ids from it directly' % extent_shp) grid_polys, grid_ids = vector_gpd.read_polygons_attributes_list( extent_shp, 'grid_id') file_name_base = os.path.splitext(os.path.basename(extent_shp))[0] shp_corresponding_grid_ids_txt = file_name_base + '_grid_ids.txt' io_function.save_list_to_txt(shp_corresponding_grid_ids_txt, [str(item) for item in grid_ids]) else: # read grids and ids time0 = time.time() all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') # in this file, it's "id", not "grid_id" print('time cost of read polygons and attributes', time.time() - time0) grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids) return grid_ids
def main(options, args): extent_shp_or_ids_txt = args[0] process_num = options.process_num keep_dem_percent = options.keep_dem_percent o_res = options.out_res basic.setlogfile('produce_headwall_shp_ArcticDEM_log_%s.txt' % timeTools.get_now_time_str()) if os.path.isdir(grid_dem_headwall_shp_dir) is False: io_function.mkdir(grid_dem_headwall_shp_dir) # read grids and ids time0 = time.time() all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') print('time cost of read polygons and attributes', time.time() - time0) # get grid ids based on input extent grid_base_name = os.path.splitext( os.path.basename(extent_shp_or_ids_txt))[0] grid_polys, grid_ids = get_grid_20(extent_shp_or_ids_txt, all_grid_polys, all_ids) # check dem difference existence grid_headwall_shps, grid_id_no_headwall_shp = get_existing_grid_headwall_shp( grid_dem_headwall_shp_dir, grid_base_name, grid_ids) if len(grid_id_no_headwall_shp) > 0: # refine grid_polys if len(grid_ids) > len(grid_id_no_headwall_shp): id_index = [grid_ids.index(id) for id in grid_id_no_headwall_shp] grid_polys = [grid_polys[idx] for idx in id_index] reg_tifs = io_function.get_file_list_by_ext('.tif', arcticDEM_reg_tif_dir, bsub_folder=False) reg_tifs = [tif for tif in reg_tifs if 'matchtag' not in tif] # remove matchtag # headwall_shp_folders = extract_headwall_grids(grid_polys, grid_id_no_headwall_shp, grid_base_name, reg_tifs, b_mosaic_id, b_mosaic_date, keep_dem_percent, o_res, process_num=process_num)
def rasterize_polygons(poly_path, burn_value, attribute_name, xres, yres, save_path, datatype='Byte'): ''' :param poly_path: :param burn_value: :param attribute_name: :param xres: :param yres: :param save_path: :param datatype: :return: ''' import raster_io import vector_gpd import basic_src.map_projection as map_projection if attribute_name is not None: polygons, values = vector_gpd.read_polygons_attributes_list( poly_path, attribute_name, b_fix_invalid_polygon=False) burn_value = values else: polygons = vector_gpd.read_polygons_gpd(poly_path, b_fix_invalid_polygon=False) if datatype == 'Byte': dtype = 'uint8' elif datatype == 'UInt16': dtype = 'uint16' else: dtype = 'int32' extent = vector_gpd.get_vector_file_bounding_box(poly_path) wkt_string = map_projection.get_raster_or_vector_srs_info_proj4(poly_path) return raster_io.burn_polygons_to_a_raster(None, polygons, burn_value, save_path, dtype, xres=xres, yres=yres, extent=extent, ref_prj=wkt_string)
def merge_multi_headwall_shp_to_one(shp_list, save_path): ''' merge multiple shapefile of headwall on different dates to one file :param shp_dir: :param save_path: :return: ''' # shp_list = io_function.get_file_list_by_ext('.shp',shp_dir,bsub_folder=False) if len(shp_list) < 1: print('Warning, no input shapefile, skip merging multiple shapefiles') return False if os.path.isfile(save_path): print('warning, %s already exists, skip' % save_path) return True # merge shapefile, one by one, and add the year and date from filename line_list = [] id_list = [] year_list = [] date_list = [] length_m_list = [] # length in meters for shp in shp_list: # these are line vector, we still can use the following function to read them lines, lengths = vector_gpd.read_polygons_attributes_list( shp, 'length_m') curr_count = len(id_list) acuiqsition_date = timeTools.get_yeardate_yyyymmdd( os.path.basename(shp)) year = acuiqsition_date.year for idx, (line, length) in enumerate(zip(lines, lengths)): id_list.append(idx + curr_count) line_list.append(line) length_m_list.append(length) year_list.append(year) date_list.append(timeTools.date2str(acuiqsition_date)) save_pd = pd.DataFrame({ 'id': id_list, 'length_m': length_m_list, 'dem_year': year_list, 'dem_date': date_list, 'Line': line_list }) ref_prj = map_projection.get_raster_or_vector_srs_info_proj4(shp_list[0]) return vector_gpd.save_polygons_to_files(save_pd, 'Line', ref_prj, save_path)
def get_surrounding_polygons(remain_polyons,surrounding_shp,wkt, dem_diff_tif,buffer_surrounding,process_num): if os.path.isfile(surrounding_shp): # also check the file is complete surr_polys, surr_demD = vector_gpd.read_polygons_attributes_list(surrounding_shp,'demD_mean') if len(surr_polys) < len(remain_polyons) or surr_demD is None or len(surr_demD) < len(remain_polyons): basic.outputlogMessage('%s already exists, but not complete, will be overwritten'%surrounding_shp) else: basic.outputlogMessage('%s already exists, skip'%surrounding_shp) return surrounding_shp # based on the merged polygons, calculate the relative dem_diff surrounding_polygons = vector_gpd.get_surrounding_polygons(remain_polyons, buffer_surrounding) surr_pd = pd.DataFrame({'Polygon': surrounding_polygons}) vector_gpd.save_polygons_to_files(surr_pd, 'Polygon', wkt, surrounding_shp) raster_statistic.zonal_stats_multiRasters(surrounding_shp, dem_diff_tif, tile_min_overlap=tile_min_overlap, stats=['mean', 'std', 'count'],prefix='demD', process_num=process_num) return surrounding_shp
def produce_corresponding_grid_ids_txt(extent_shp, local_grid_id_txt, log_grid_ids_txt): # if it in the logdir, not the current dir, then copy it if os.path.isfile( log_grid_ids_txt) and os.path.isfile(local_grid_id_txt) is False: io_function.copy_file_to_dst(log_grid_ids_txt, local_grid_id_txt, overwrite=False) return True # if not in the local dir, then generate it if os.path.isfile(local_grid_id_txt) is False: # read grids and ids time0 = time.time() all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') print('time cost of read polygons and attributes', time.time() - time0) # this will create local_grid_id_txt grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids) # modify local_grid_id_txt by excluding grid_id already in adjacent extent other_grid_ids = read_grid_ids_from_other_extent() grid_ids = [id for id in grid_ids if id not in other_grid_ids] # over write local_grid_id_txt file grid_ids_str = [str(item) for item in grid_ids] io_function.copy_file_to_dst(local_grid_id_txt, io_function.get_name_by_adding_tail( local_grid_id_txt, 'noRMadj')) # save a copy io_function.save_list_to_txt(local_grid_id_txt, grid_ids_str) # copy to log dir io_function.copy_file_to_dst(local_grid_id_txt, log_grid_ids_txt) return True
def remove_merge_polygon_in_one_shp(in_shp, org_raster, attribute_name, attribute_range, min_area, max_area, process_num=1): # attribute_range: [min, max], lower = attribute_range[0] upper = attribute_range[1] save_shp = io_function.get_name_by_adding_tail(in_shp, 'post') if os.path.isfile(save_shp): basic.outputlogMessage('%s exists, skip'%save_shp) return save_shp shp_pre = io_function.get_name_no_ext(in_shp) # read polygons and label from segment algorithm, note: some polygons may have the same label polygons, attr_value_list = vector_gpd.read_polygons_attributes_list(in_shp,attribute_name) print('Read %d polygons'%len(polygons)) if attr_value_list is None: raise ValueError('%s not in %s, need to remove it and then re-create'%(attribute_name,in_shp)) remain_polyons = [] rm_min_area_count = 0 rm_att_value_count = 0 for poly, att_value in zip(polygons, attr_value_list): if poly.area < min_area: rm_min_area_count += 1 continue if lower is None: if att_value >= upper: rm_att_value_count += 1 continue elif upper is None: if att_value <= lower: rm_att_value_count += 1 continue else: # out of range, rmeove if att_value < lower or att_value > upper: rm_att_value_count += 1 continue remain_polyons.append(poly) print('remove %d polygons based on min_area, %d polygons based on attribute_range, remain %d ones'%(rm_min_area_count, rm_diff_thr_count,len(remain_polyons))) if len(remain_polyons) > 1: # we should only merge polygon with similar reduction, but we already remove polygons with mean reduction > threshhold # merge touch polygons print(timeTools.get_now_time_str(), 'start building adjacent_matrix') # adjacent_matrix = vector_features.build_adjacent_map_of_polygons(remain_polyons) machine_name = os.uname()[1] # if 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name: # print('Warning, some problem of parallel running in build_adjacent_map_of_polygons on curc, but ok in my laptop and uist, change process_num = 1') # process_num = 1 adjacent_matrix = vector_gpd.build_adjacent_map_of_polygons(remain_polyons, process_num=process_num) print(timeTools.get_now_time_str(), 'finish building adjacent_matrix') if adjacent_matrix is False: return False merged_polygons = vector_features.merge_touched_polygons(remain_polyons,adjacent_matrix) print(timeTools.get_now_time_str(), 'finish merging touched polygons, get %d ones'%(len(merged_polygons))) # remove large ones remain_polyons = [] rm_max_area_count = 0 for poly in merged_polygons: if poly.area > max_area: rm_max_area_count += 1 continue remain_polyons.append(poly) print('remove %d polygons based on max_area, remain %d'%(rm_max_area_count, len(remain_polyons))) wkt = map_projection.get_raster_or_vector_srs_info_wkt(in_shp) polyons_noMulti = [ vector_gpd.MultiPolygon_to_polygons(idx,poly) for idx,poly in enumerate(remain_polyons) ] remain_polyons = [] for polys in polyons_noMulti: polys = [poly for poly in polys if poly.area > min_area] # remove tiny polygon before buffer remain_polyons.extend(polys) print('convert MultiPolygon to polygons, remove some small polygons, remain %d' % (len(remain_polyons))) # based on the merged polygons, calculate the mean dem diff, relative dem_diff buffer_surrounding = 20 # meters surrounding_polygons = vector_gpd.get_surrounding_polygons(remain_polyons,buffer_surrounding) surrounding_shp = io_function.get_name_by_adding_tail(in_shp, 'surrounding') surr_pd = pd.DataFrame({'Polygon': surrounding_polygons}) vector_gpd.save_polygons_to_files(surr_pd, 'Polygon', wkt, surrounding_shp) raster_statistic.zonal_stats_multiRasters(surrounding_shp, org_raster, stats=['mean', 'std', 'count'], prefix='demD',process_num=process_num) # calcualte attributes of remain ones: area, dem_diff: mean, std merged_pd = pd.DataFrame({'Polygon': remain_polyons}) merged_shp = io_function.get_name_by_adding_tail(in_shp, 'merged') vector_gpd.save_polygons_to_files(merged_pd, 'Polygon', wkt, merged_shp) raster_statistic.zonal_stats_multiRasters(merged_shp, dem_diff_tif, stats=['mean','std','count'], prefix='demD', process_num=process_num) # calculate the relative dem diff surr_dem_diff_list = vector_gpd.read_attribute_values_list(surrounding_shp,'demD_mean') merge_poly_dem_diff_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_mean') if len(surr_dem_diff_list) != len(merge_poly_dem_diff_list): raise ValueError('The number of surr_dem_diff_list and merge_poly_dem_diff_list is different') relative_dem_diff_list = [ mer - sur for sur, mer in zip(surr_dem_diff_list, merge_poly_dem_diff_list) ] merge_poly_demD_std_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_std') merge_poly_demD_count_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_count') # remove large ones save_polyons = [] save_demD_mean_list = [] save_demD_std_list = [] save_demD_count_list = [] save_rel_diff_list = [] save_surr_demD_list = [] rm_rel_dem_diff_count = 0 rm_min_area_count = 0 for idx in range(len(remain_polyons)): # relative dem diff if relative_dem_diff_list[idx] > dem_diff_thread_m: # rm_rel_dem_diff_count += 1 continue # when convert MultiPolygon to Polygon, may create some small polygons if remain_polyons[idx].area < min_area: rm_min_area_count += 1 continue save_polyons.append(remain_polyons[idx]) save_demD_mean_list.append(merge_poly_dem_diff_list[idx]) save_demD_std_list.append(merge_poly_demD_std_list[idx]) save_demD_count_list.append(merge_poly_demD_count_list[idx]) save_rel_diff_list.append(relative_dem_diff_list[idx]) save_surr_demD_list.append(surr_dem_diff_list[idx]) print('remove %d polygons based on relative rel_demD and %d based on min_area, remain %d' % (rm_rel_dem_diff_count, rm_min_area_count, len(save_polyons))) poly_ids = [ item+1 for item in range(len(save_polyons)) ] poly_areas = [poly.area for poly in save_polyons] save_pd = pd.DataFrame({'poly_id':poly_ids, 'poly_area':poly_areas,'demD_mean':save_demD_mean_list, 'demD_std':save_demD_std_list, 'demD_count':save_demD_count_list, 'surr_demD':save_surr_demD_list, 'rel_demD':save_rel_diff_list ,'Polygon': save_polyons}) vector_gpd.save_polygons_to_files(save_pd, 'Polygon', wkt, save_shp) # add date difference if there are available date_diff_base = os.path.basename(dem_diff_tif).replace('DEM_diff','date_diff') date_diff_tif = os.path.join(os.path.dirname(dem_diff_tif) , date_diff_base) if os.path.isfile(date_diff_tif): raster_statistic.zonal_stats_multiRasters(save_shp, date_diff_tif, stats=['mean', 'std'], prefix='dateD', process_num=process_num) return save_shp
def download_dem_tarball(dem_index_shp, extent_polys, save_folder, pre_name, reg_tif_dir=None, poly_ids=None, b_arcticDEM_tile=False): # read dem polygons and url dem_polygons, dem_urls = vector_gpd.read_polygons_attributes_list( dem_index_shp, 'fileurl', b_fix_invalid_polygon=False) basic.outputlogMessage('%d dem polygons in %s' % (len(dem_polygons), dem_index_shp)) dem_tar_ball_list = [] reg_tifs_list = [] curr_dir = os.getcwd() b_save_grid_id_noDEM = True if poly_ids is None: poly_ids = [idx for idx in range(len(extent_polys))] b_save_grid_id_noDEM = False # if poly_ids is not the global unique id, then don't save it. if os.path.isfile('no_registration_strips.txt'): no_registration_strips = io_function.read_list_from_txt( 'no_registration_strips.txt') else: no_registration_strips = [] # tarballs is being downloaded downloading_tarballs = [] for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)): basic.outputlogMessage('get data for the %d th extent (%d/%d)' % (idx, count, len(extent_polys))) save_txt_path = pre_name + '_dem_urls_poly_%d.txt' % idx if os.path.isfile(save_txt_path): urls = io_function.read_list_from_txt(save_txt_path) basic.outputlogMessage('read %d dem urls from %s' % (len(urls), save_txt_path)) else: # get fileurl dem_poly_ids = vector_gpd.get_poly_index_within_extent( dem_polygons, ext_poly) basic.outputlogMessage('find %d DEM within %d th extent' % (len(dem_poly_ids), (idx))) urls = [dem_urls[id] for id in dem_poly_ids] # save to txt io_function.save_list_to_txt(save_txt_path, urls) basic.outputlogMessage('save dem urls to %s' % save_txt_path) if len(urls) > 0: # total_size_GB = get_total_size(urls) # internet access, parallel running may cause problem. The info is not important # basic.outputlogMessage('the size of files will be downloaded is %.4lf GB for the %d th extent '%(total_size_GB,(idx+1))) # time.sleep(5) # wait 5 seconds # download them using wget one by one for ii, url in enumerate(urls): tmp = urlparse(url) # in the Strip DEM, there are around 700 url are point to tif files, failed to download them # e.g. /mnt/pgc/data/elev/dem/setsm/ArcticDEM/geocell/v3.0/2m_temp/n59w137/SETSM_WV03_20150518_104001000B703200_104001000C715B00_seg8_2m_v3.0_dem.tif if url.startswith('/mnt') and url.endswith('.tif'): basic.outputlogMessage("error: not a valid url: %s" % url) continue filename = os.path.basename(tmp.path) save_dem_path = os.path.join(save_folder, filename) if reg_tif_dir is not None: tar_base = os.path.basename(filename)[:-7] # file_pattern = ['*dem_reg.tif', '*reg_dem.tif'] # Arctic strip and tile (mosaic) version if b_arcticDEM_tile: reg_tifs = io_function.get_file_list_by_pattern( reg_tif_dir, tar_base + '*reg_dem.tif') else: reg_tifs = io_function.get_file_list_by_pattern( reg_tif_dir, tar_base + '*dem_reg.tif') if len(reg_tifs) > 0: basic.outputlogMessage( 'warning, unpack and registrated tif for %s already exists, skip downloading' % filename) reg_tifs_list.append(reg_tifs[0]) continue if './' + tar_base in no_registration_strips: basic.outputlogMessage( 'warning, %s is in no_registration_strips list, skip downloading' % filename) continue if filename in downloading_tarballs: basic.outputlogMessage( 'warning, %s is being downloaded by other processes' % filename) continue if os.path.isfile( save_dem_path) and os.path.getsize(save_dem_path) > 1: basic.outputlogMessage( 'warning, %s already exists, skip downloading' % filename) else: # download the dem basic.outputlogMessage( 'starting downloading %d th DEM (%d in total)' % ((ii + 1), len(urls))) downloading_tarballs.append(filename) # os.chdir(save_folder) # run_a_process_download(url) # download ################################################## # download in parallel basic.check_exitcode_of_process( download_tasks ) # if there is one former job failed, then quit while True: job_count = basic.alive_process_count(download_tasks) if job_count >= max_task_count: print( machine_name, datetime.now(), 'You are running %d or more tasks in parallel, wait ' % max_task_count) time.sleep(60) # continue break # start the processing sub_process = Process( target=run_a_process_download, args=(url, save_dem_path, reg_tif_dir, max_task_count, b_unpack_after_downloading )) # start a process, don't wait sub_process.start() download_tasks.append(sub_process) basic.close_remove_completed_process(download_tasks) # os.chdir(curr_dir) dem_tar_ball_list.append(save_dem_path) else: basic.outputlogMessage( 'Warning, can not find DEMs within %d th extent' % (idx)) if b_save_grid_id_noDEM: save_id_grid_no_dem(idx) # wait until all task complete while True: job_count = basic.alive_process_count(download_tasks) if job_count > 0: print( machine_name, datetime.now(), 'wait until all task are completed, alive task account: %d ' % job_count) time.sleep(60) # else: break return dem_tar_ball_list, reg_tifs_list
def filter_merge_polygons(in_shp,merged_shp,wkt, min_area,max_area,dem_diff_tif,dem_diff_thread_m,process_num): if os.path.isfile(merged_shp): # also check the file is complete polys, demD_values = vector_gpd.read_polygons_attributes_list(merged_shp,'demD_mean') if len(polys) < 1 or demD_values is None or len(demD_values) < 1: basic.outputlogMessage('%s already exists, but not complete, will be overwritten'%merged_shp) else: basic.outputlogMessage('%s exists, skip'%merged_shp) return merged_shp # read polygons and label from segment algorithm, note: some polygons may have the same label # polygons, demD_mean_list = vector_gpd.read_polygons_attributes_list(in_shp,'demD_mean') polygons, attributes = vector_gpd.read_polygons_attributes_list(in_shp,['demD_mean','DN']) demD_mean_list = attributes[0] DN_list = attributes[1] print('Read %d polygons'%len(polygons)) if demD_mean_list is None: raise ValueError('demD_mean not in %s, need to remove it and then re-create'%in_shp) # replace None (if exists) as nan demD_mean_list = np.array(demD_mean_list, dtype=float) # replace nan values as 0 demD_mean_list = np.nan_to_num(demD_mean_list) remain_polyons = [] rm_min_area_count = 0 rm_diff_thr_count = 0 for poly, demD_mean in zip(polygons, demD_mean_list): if poly.area < min_area: rm_min_area_count += 1 continue # mean value: not subsidence if demD_mean > dem_diff_thread_m: # rm_diff_thr_count += 1 continue remain_polyons.append(poly) print('remove %d polygons based on min_area, %d polygons based on dem_diff_threshold, remain %d ones'%(rm_min_area_count, rm_diff_thr_count,len(remain_polyons))) if len(remain_polyons) < 1: return None # we should only merge polygon with similar reduction, but we already remove polygons with mean reduction > threshhold # merge touch polygons # print(timeTools.get_now_time_str(), 'start building adjacent_matrix') # # adjacent_matrix = vector_features.build_adjacent_map_of_polygons(remain_polyons) # machine_name = os.uname()[1] # if 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name: # print('Warning, some problem of parallel running in build_adjacent_map_of_polygons on curc, ' # 'but ok in my laptop and uist, change process_num = 1') # process_num = 1 ############################################################ ## build adjacent_matrix then merge for entire raster # adjacent_matrix = vector_gpd.build_adjacent_map_of_polygons(remain_polyons, process_num=process_num) # print(timeTools.get_now_time_str(), 'finish building adjacent_matrix') # # if adjacent_matrix is False: # return None # merged_polygons = vector_features.merge_touched_polygons(remain_polyons, adjacent_matrix) ############################################################ # ## build adjacent_matrix then merge, patch by patch (not too many improvements) # label_id_range_txt = os.path.splitext(in_shp)[0] + '_label_IDrange.txt' # merged_polygons = merge_polygons_patchBYpatch(label_id_range_txt, remain_polyons, DN_list, process_num=process_num) ############################################################ ## merge polygons using rasterize label_raster = os.path.splitext(in_shp)[0] + '_label.tif' merged_polygons = merge_polygon_rasterize(label_raster, remain_polyons) print(timeTools.get_now_time_str(), 'finish merging touched polygons, get %d ones' % (len(merged_polygons))) # remove large ones remain_polyons = [] rm_max_area_count = 0 for poly in merged_polygons: if poly.area > max_area: rm_max_area_count += 1 continue remain_polyons.append(poly) print('remove %d polygons based on max_area, remain %d' % (rm_max_area_count, len(remain_polyons))) polyons_noMulti = [vector_gpd.MultiPolygon_to_polygons(idx, poly) for idx, poly in enumerate(remain_polyons)] remain_polyons = [] for polys in polyons_noMulti: polys = [poly for poly in polys if poly.area > min_area] # remove tiny polygon before buffer remain_polyons.extend(polys) print('convert MultiPolygon (filter_merge_polygons) to polygons and remove small ones, remain %d' % (len(remain_polyons))) if len(remain_polyons) < 1: return None # calcualte attributes of remain ones: area, dem_diff: mean, std merged_pd = pd.DataFrame({'Polygon': remain_polyons}) vector_gpd.save_polygons_to_files(merged_pd, 'Polygon', wkt, merged_shp) # based on the merged polygons, calculate the mean dem diff raster_statistic.zonal_stats_multiRasters(merged_shp, dem_diff_tif, tile_min_overlap=tile_min_overlap, stats=['mean', 'std', 'count'], prefix='demD',process_num=process_num) return merged_shp
def main(options, args): extent_shp = args[0] task_list = [args[item] for item in range(1, len(args))] # task_name = args[1] if len(task_list) < 1: raise ValueError('There is no task: %s' % str(task_list)) # local_grid_id_txt is in the current dir # log_grid_ids_txt, log_grid_ids_txt_done is in grid_ids_txt_dir local_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files( extent_shp) # check if it has been complete if os.path.isfile(log_grid_ids_txt_done): basic.outputlogMessage('Tasks for extent %s have been completed' % extent_shp) return True r_working_dir = '/scratch/summit/lihu9680/Arctic/dem_processing' if options.remote_working_dir is None else options.remote_working_dir r_log_dir = '/scratch/summit/lihu9680/ArcticDEM_tmp_dir/log_dir' if options.remote_log_dir is None else options.remote_log_dir process_node = '$curc_host' if options.process_node is None else options.process_node download_node = '$curc_host' if options.download_node is None else options.download_node max_grid_count = options.max_grids b_remove_tmp_folders = options.b_remove_tmp_folders b_dont_remove_DEM_files = options.b_dont_remove_DEM_files b_no_slurm = options.b_no_slurm b_divide_to_subsets = True # modify the folder name of subsets global subset_shp_dir subset_shp_dir = subset_shp_dir + '_' + io_function.get_name_no_ext( extent_shp) global msg_file_pre msg_file_pre = io_function.get_name_no_ext(extent_shp) + '_' + msg_file_pre grid_ids_to_process_txt = io_function.get_name_no_ext( extent_shp) + '_' + 'grid_ids_to_process.txt' # build map dem cover grid (take time, but only need to run once at the beginning) build_dict_of_dem_cover_grid_ids(dem_strip_shp, grid_20_shp, strip_dem_cover_grids_txt) build_dict_of_dem_cover_grid_ids(dem_tile_shp, grid_20_shp, tile_dem_cover_grids_txt) # get grids for processing # read grids and ids time0 = time.time() all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') print('time cost of read polygons and attributes', time.time() - time0) gird_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20_shp) # get grid ids based on input extent grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids) # based on extent shape, subset grid_20_id_raster # # using gdalwarp to crop the mask, also have 0.5 pixel offset, so dont use it # grid_20_id_raster_sub = io_function.get_name_by_adding_tail(os.path.basename(grid_20_id_raster),'sub') # if RSImageProcess.subset_image_by_shapefile(grid_20_id_raster,extent_shp,save_path=grid_20_id_raster_sub) is False: # return False # read grid_ids_2d, then mask it grid_ids_2d, grid_nodata = raster_io.read_raster_one_band_np( grid_20_id_raster) # 2d array of gird ids # rasterize grid_polys, will served as mask. grid_ids_2d_mask = raster_io.burn_polygons_to_a_raster( grid_20_id_raster, grid_polys, 1, None) # raster_io.save_numpy_array_to_rasterfile(grid_ids_2d_mask,'grid_ids_2d_mask.tif',grid_20_id_raster,nodata=255) # save to disk for checking loc_masked_out = np.where(grid_ids_2d_mask != 1) # grid_ids_2d[ loc_masked_out ] = grid_nodata visit_np = np.zeros_like(grid_ids_2d, dtype=np.uint8) visit_np[loc_masked_out] = 1 # 1 indicate already visited visit_np[np.where( grid_ids_2d == grid_nodata)] = 1 # 1 indicate already visited subset_id = -1 # on tesia, uist, vpn-connected laptop if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name: io_function.mkdir(subset_shp_dir) sync_log_files(process_node, r_log_dir, process_log_dir) update_complete_grid_list(grid_ids, task_list) while True: subset_id += 1 # on tesia, uist, vpn-connected laptop if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name: # remove grids that has been complete or ignored ignore_ids = get_complete_ignore_grid_ids() num_grid_ids = save_grid_ids_need_to_process( grid_ids, ignore_ids=ignore_ids, save_path=grid_ids_to_process_txt) if num_grid_ids < 1: make_note_all_task_done(extent_shp, process_node) # if the input is not a shapefile, then don't divide it to many subsets if extent_shp.endswith('.txt'): select_grid_polys, selected_gird_ids = grid_polys, grid_ids if len(selected_gird_ids) > 2000: raise ValueError('There are too many grid to process once') b_divide_to_subsets = False subset_id = 999999 select_grids_shp = os.path.join( subset_shp_dir, io_function.get_name_no_ext(extent_shp) + '_sub%d' % subset_id + '.shp') save_selected_girds_and_ids(selected_gird_ids, select_grid_polys, gird_prj, select_grids_shp) else: select_grids_shp = os.path.join( subset_shp_dir, io_function.get_name_no_ext(extent_shp) + '_sub%d' % subset_id + '.shp') select_grid_polys, selected_gird_ids = get_grids_for_download_process( grid_polys, grid_ids, ignore_ids, max_grid_count, grid_ids_2d, visit_np, select_grids_shp, proj=gird_prj) if selected_gird_ids is None: break # no more grids if len(selected_gird_ids) < 1: continue subset_info_txt = msg_file_pre + '%d.txt' % subset_id if os.path.isfile(subset_info_txt) is False: # init the file update_subset_info(subset_info_txt, key_list=[ 'id', 'createTime', 'shp', 'pre_status', 'proc_status' ], info_list=[ subset_id, str(datetime.now()), select_grids_shp, 'notYet', 'notYet' ]) # download and unpack ArcticDEM, do registration, send to curc if download_process_send_arctic_dem(subset_info_txt, r_working_dir, process_node, task_list, b_send_data=b_no_slurm == False) is True: continue # copy file from remote machine if b_no_slurm is False: copy_results_from_remote_node() sync_log_files(process_node, r_log_dir, process_log_dir) # update complete id list update_complete_grid_list(grid_ids, task_list) # save this to disk, to check progress, if there are not too many grids (<100), # we can use this one to process withtou divide grids to many subsets num_grid_ids = save_grid_ids_need_to_process( grid_ids, save_path=grid_ids_to_process_txt) if num_grid_ids < 1: make_note_all_task_done(extent_shp, process_node) if b_no_slurm: # process ArcticDEM using local computing resource if produce_dem_products( task_list, b_remove_job_folder=b_remove_tmp_folders, no_slurm=b_no_slurm) is False: break if b_divide_to_subsets is False: break elif 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name: # curc # process ArcticDEM using the computing resource on CURC if produce_dem_products( task_list, b_remove_job_folder=b_remove_tmp_folders) is False: break else: print('unknown machine : %s ' % machine_name) break # remove no need dem files remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files) # monitor results in remote computer check_time = 200 while check_time > 0 and b_no_slurm == False: # on tesia, uist, vpn-connected laptop if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name: print(datetime.now(), 'wait 10 min for results in computing nodes') time.sleep(600) # copy file from remote machine copy_results_from_remote_node() # sync complete id list, dem info, no dem grids etcs. sync_log_files(process_node, r_log_dir, process_log_dir) # update complete id list update_complete_grid_list(grid_ids, task_list) # remove no need dem files remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files) remote_sub_txt = get_subset_info_txt_list( 'proc_status', ['notYet', 'working'], remote_node=process_node, remote_folder=r_working_dir) if len(remote_sub_txt) < 1 and check_time != 1: check_time = 1 # set to 1, then will only check one more time else: check_time -= 1 else: break
def build_dict_of_dem_cover_grid_ids(dem_info_shp, grid_20_shp, save_dict_txt): # this will take time, but only need to run once at the beginning if os.path.isfile(save_dict_txt): print('warning, %s exists, skip build_dict_of_dem_cover_grid_ids' % save_dict_txt) return True # extent polygons and projection (proj4) dem_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( dem_info_shp) if dem_shp_prj == '': raise ValueError('get proj4 of %s failed' % dem_info_shp) grid_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( grid_20_shp) if grid_shp_prj == '': raise ValueError('get proj4 of %s failed' % grid_20_shp) if dem_shp_prj != grid_shp_prj: raise ValueError('%s and %s do not have the same projection' % (dem_info_shp, grid_20_shp)) # read DEM info dem_polygons, dem_names = vector_gpd.read_polygons_attributes_list( dem_info_shp, 'name', b_fix_invalid_polygon=False) # dem_name: eg. SETSM_GE01_20090818_1050410001E0CF00_1050410001D80200_seg1_2m_v3.0 or 11_27_2_1_2m_v3.0 dem_poly_count = len(dem_polygons) # check if there is duplicate dem names if len(dem_names) != len(set(dem_names)): raise ValueError('some duplicate dem name in %s' % dem_info_shp) # read grid polygons and ids all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') dem_cover_grids = {} # this will take time. # for idx, (dem_poly,dem_name) in enumerate(zip(dem_polygons, dem_names)): # print(timeTools.get_now_time_str(), idx, dem_poly_count) # index = vector_gpd.get_poly_index_within_extent(all_grid_polys, dem_poly) # gird_ids = [ all_ids[idx] for idx in index ] # # if dem_name in dem_cover_grids.keys(): # # basic.outputlogMessage('\n Warning, %s already in dict \n'%dem_name) # dem_cover_grids[dem_name] = gird_ids ### parallel version theadPool = Pool(multiprocessing.cpu_count()) # multi processes parameters_list = [ (all_ids, all_grid_polys, dem_poly, dem_name, idx, dem_poly_count) for idx, (dem_poly, dem_name) in enumerate(zip(dem_polygons, dem_names)) ] results = theadPool.starmap(get_overlap_grids_for_one_extent, parameters_list) # need python3 for res in results: dem_name, gird_ids = res dem_cover_grids[dem_name] = gird_ids # save to dict io_function.save_dict_to_txt_json(save_dict_txt, dem_cover_grids) theadPool.close() return True
def main(options, args): extent_shp = args[0] # ext_shp_prj = map_projection.get_raster_or_vector_srs_info_epsg(extent_shp) # reproject if necessary, it seems that the gdalwarp can handle different projection # if ext_shp_prj != 'EPSG:3413': # EPSG:3413 is the projection ArcticDEM used # extent_shp_reprj = io_function.get_name_by_adding_tail(extent_shp,'3413') # vector_gpd.reproject_shapefile(extent_shp,'EPSG:3413',extent_shp_reprj) # extent_shp = extent_shp_reprj tar_dir = options.ArcticDEM_dir save_dir = options.save_dir b_mosaic_id = options.create_mosaic_id b_mosaic_date = options.create_mosaic_date b_rm_inter = options.remove_inter_data keep_dem_percent = options.keep_dem_percent inter_format = options.format arcticDEM_shp = options.arcticDEM_shp o_res = options.out_res b_dem_diff = options.create_dem_diff dem_list_txt = options.dem_list_txt # create mosaic is time consuming, but it also takes a lot memory. For a region of 50 km by 50 km, it may take 10 to 50 GB memory process_num = options.process_num basic.outputlogMessage( 'The number of processes for creating the mosaic is: %d' % process_num) extent_shp_base = os.path.splitext(os.path.basename(extent_shp))[0] extent_prj = map_projection.get_raster_or_vector_srs_info_epsg(extent_shp) b_ArcticDEM_tar = False dem_tif_list = [] if tar_dir is not None and arcticDEM_shp is not None: b_ArcticDEM_tar = True else: dem_tif_list = io_function.read_list_from_txt(dem_list_txt) # check projection for dem_tif in dem_tif_list: dem_prj = map_projection.get_raster_or_vector_srs_info_epsg( dem_tif) if dem_prj != extent_prj: raise ValueError('The projection of %s is different from %s' % (dem_prj, extent_prj)) b_ArcticDEM_tiles = False if b_ArcticDEM_tar: arcdem_prj = map_projection.get_raster_or_vector_srs_info_epsg( arcticDEM_shp) # read dem polygons and url time0 = time.time() dem_polygons, dem_urls = vector_gpd.read_polygons_attributes_list( arcticDEM_shp, 'fileurl', b_fix_invalid_polygon=False) print('time cost of read polygons and attributes', time.time() - time0) basic.outputlogMessage('%d dem polygons in %s' % (len(dem_polygons), extent_shp)) # get tarball list tar_list = io_function.get_file_list_by_ext('.gz', tar_dir, bsub_folder=False) if len(tar_list) < 1: raise ValueError('No input tar.gz files in %s' % tar_dir) if is_ArcticDEM_tiles(tar_list): basic.outputlogMessage('Input is the mosaic version of ArcticDEM') b_ArcticDEM_tiles = True if extent_prj == arcdem_prj: extent_polys = vector_gpd.read_polygons_gpd(extent_shp) else: extent_polys = vector_gpd.read_shape_gpd_to_NewPrj( extent_shp, arcdem_prj) else: extent_polys = vector_gpd.read_polygons_gpd(extent_shp) if len(extent_polys) < 1: raise ValueError('No polygons in %s' % extent_shp) else: basic.outputlogMessage('%d extent polygons in %s' % (len(extent_polys), extent_shp)) extPolys_ids = vector_gpd.read_attribute_values_list(extent_shp, 'id') if extPolys_ids is None or None in extPolys_ids: basic.outputlogMessage( 'Warning, field: id is not in %s, will create default ID for each grid' % extent_shp) extPolys_ids = [id + 1 for id in range(len(extent_polys))] # print('extPolys_ids, count',extPolys_ids, len(extent_polys)) same_extent = False if b_dem_diff: # crop each one to the same extent, easy for DEM differnce. same_extent = True for idx, ext_poly in zip(extPolys_ids, extent_polys): basic.outputlogMessage('get data for the %d th extent (%d in total)' % (idx, len(extent_polys))) if b_ArcticDEM_tar: if b_ArcticDEM_tiles: proc_ArcticDEM_tile_one_grid_polygon(tar_dir, dem_polygons, dem_urls, o_res, save_dir, inter_format, b_rm_inter, ext_poly, idx, extent_shp_base) else: proc_ArcticDEM_strip_one_grid_polygon( tar_dir, dem_polygons, dem_urls, o_res, save_dir, inter_format, b_mosaic_id, b_mosaic_date, b_rm_inter, b_dem_diff, ext_poly, idx, keep_dem_percent, process_num, extent_shp_base, resample_method='average', same_extent=same_extent) else: proc_dem_mosaic_diff(dem_tif_list, save_dir, idx, ext_poly, b_mosaic_id, b_mosaic_date, process_num, keep_dem_percent, o_res, b_dem_diff, extent_shp_base, b_rm_inter, resample_method='average')
def main(): dem_index_shp = os.path.expanduser( '~/Data/Arctic/ArcticDEM/BROWSE_SERVER/indexes/ArcticDEM_Tile_Index_Rel7/ArcticDEM_Tile_Index_Rel7.shp' ) # extent_shp = os.path.expanduser('~/Data/PDO/PDO_statistics_swatchs/swatch_bounding_boxes.shp') extent_shp = os.path.expanduser( '~/Data/PDO/extent_each_swatch/merge_all_qa_exent.shp') # extent polygons and projection (proj4) extent_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( extent_shp) dem_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( dem_index_shp) if extent_shp_prj != dem_shp_prj: basic.outputlogMessage( '%s and %s do not have the same projection, will reproject %s' % (extent_shp, dem_index_shp, os.path.basename(extent_shp))) epsg = map_projection.get_raster_or_vector_srs_info_epsg(dem_index_shp) # print(epsg) # extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_shp_prj.strip()) extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp, epsg) else: extent_polys = vector_gpd.read_polygons_gpd(extent_shp) poly_ids = [idx for idx in range(len(extent_polys))] if 'boxes' in os.path.basename(extent_shp): nc_file_names = vector_gpd.read_attribute_values_list( extent_shp, 'nc_file') else: nc_file_names = vector_gpd.read_attribute_values_list( extent_shp, 'layer') # read dem polygons and tile number dem_polygons, dem_tiles = vector_gpd.read_polygons_attributes_list( dem_index_shp, 'tile', b_fix_invalid_polygon=False) for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)): basic.outputlogMessage('get data for the %d th extent (%d/%d)' % (idx, count, len(extent_polys))) save_txt_path = nc_file_names[idx] + '-' + 'dem_tiles_poly_%d.txt' % idx if os.path.isfile(save_txt_path): tiles = io_function.read_list_from_txt(save_txt_path) basic.outputlogMessage('read %d dem tiles from %s' % (len(tiles), save_txt_path)) else: # get fileurl dem_poly_ids = vector_gpd.get_poly_index_within_extent( dem_polygons, ext_poly) basic.outputlogMessage('find %d DEM within %d th extent' % (len(dem_poly_ids), (idx))) tiles = [dem_tiles[id] for id in dem_poly_ids] # save to txt io_function.save_list_to_txt(save_txt_path, tiles) basic.outputlogMessage('save dem urls to %s' % save_txt_path) # download and create a mosaic url_head = 'https://data.pgc.umn.edu/elev/dem/setsm/ArcticDEM/mosaic/v3.0/32m/' download_tarball_for_one_polygon(tarball_dir, dem_tif_dir, url_head, tiles) # create a mosaic create_a_mosaic(nc_file_names[idx], idx, dem_eachSwatch_dir, ext_poly, tiles) bak_folder = 'small_tifs' io_function.mkdir(bak_folder) # remove small and duplicated ones for file_name in nc_file_names: crop_tifs = io_function.get_file_list_by_pattern( dem_eachSwatch_dir, file_name + '*crop.tif') if len(crop_tifs) == 1: pass elif len(crop_tifs) > 1: # keep maximum one and move small ones tif_files_size = [ io_function.get_file_size_bytes(item) for item in crop_tifs ] max_size = max(tif_files_size) max_index = tif_files_size.index(max_size) del crop_tifs[max_index] for tmp in crop_tifs: io_function.movefiletodir(tmp, bak_folder) tmp = tmp.replace('_crop', '') io_function.movefiletodir(tmp, bak_folder) else: # no tif raise ValueError('Results for %s does not exist' % file_name)
def remove_polygons_based_relative_dem_diff(remain_polyons,merged_shp,surrounding_shp,wkt, save_shp, min_area, dem_diff_thread_m): if os.path.isfile(save_shp): # also check the file is complete polys, demD_values = vector_gpd.read_polygons_attributes_list(save_shp, 'demD_mean') if len(polys) < 1 or demD_values is None or len(demD_values) < 1: basic.outputlogMessage('%s already exists, but not complete, will be overwritten' % save_shp) else: basic.outputlogMessage('%s exists, skip'%save_shp) return save_shp # calculate the relative dem diff surr_dem_diff_list = vector_gpd.read_attribute_values_list(surrounding_shp,'demD_mean') merge_poly_dem_diff_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_mean') # convert to float type (can change None to nan) surr_dem_diff_list = np.array(surr_dem_diff_list, dtype=float) merge_poly_dem_diff_list = np.array(merge_poly_dem_diff_list, dtype=float) if len(surr_dem_diff_list) != len(merge_poly_dem_diff_list): raise ValueError('The number of surr_dem_diff_list and merge_poly_dem_diff_list is different') relative_dem_diff_list = [ mer - sur for sur, mer in zip(surr_dem_diff_list, merge_poly_dem_diff_list) ] merge_poly_demD_std_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_std') merge_poly_demD_count_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_count') # remove large ones save_polyons = [] save_demD_mean_list = [] save_demD_std_list = [] save_demD_count_list = [] save_rel_diff_list = [] save_surr_demD_list = [] rm_rel_dem_diff_count = 0 rm_min_area_count = 0 for idx in range(len(remain_polyons)): # relative dem diff if relative_dem_diff_list[idx] > dem_diff_thread_m: # rm_rel_dem_diff_count += 1 continue # when convert MultiPolygon to Polygon, may create some small polygons (in function merge shp) if remain_polyons[idx].area < min_area: rm_min_area_count += 1 continue save_polyons.append(remain_polyons[idx]) save_demD_mean_list.append(merge_poly_dem_diff_list[idx]) save_demD_std_list.append(merge_poly_demD_std_list[idx]) save_demD_count_list.append(merge_poly_demD_count_list[idx]) save_rel_diff_list.append(relative_dem_diff_list[idx]) save_surr_demD_list.append(surr_dem_diff_list[idx]) print('remove %d polygons based on relative rel_demD and %d based on min_area, remain %d' % (rm_rel_dem_diff_count, rm_min_area_count, len(save_polyons))) if len(save_polyons) < 1: print('Warning, no polygons after remove based on relative demD') return None poly_ids = [ item+1 for item in range(len(save_polyons)) ] poly_areas = [poly.area for poly in save_polyons] save_pd = pd.DataFrame({'poly_id':poly_ids, 'poly_area':poly_areas,'demD_mean':save_demD_mean_list, 'demD_std':save_demD_std_list, 'demD_count':save_demD_count_list, 'surr_demD':save_surr_demD_list, 'rel_demD':save_rel_diff_list ,'Polygon': save_polyons}) vector_gpd.save_polygons_to_files(save_pd, 'Polygon', wkt, save_shp) return save_shp
def get_file_size_dem_tarball(dem_index_shp, extent_polys, pre_name, xlsx_size_path, poly_ids=None): # read dem polygons and url dem_polygons, dem_urls = vector_gpd.read_polygons_attributes_list( dem_index_shp, 'fileurl', b_fix_invalid_polygon=False) if os.path.isfile(xlsx_size_path): size_table = pd.read_excel(xlsx_size_path) save_idx_list = size_table['index'].to_list() save_url_list = size_table['fileurl'].to_list() save_size_list = size_table['filesize'].to_list() else: save_idx_list = [item for item in range(len(dem_urls))] # index list save_url_list = dem_urls save_size_list = [float('nan')] * len(save_idx_list) basic.outputlogMessage('%d dem polygons in %s' % (len(dem_polygons), dem_index_shp)) if poly_ids is None: poly_ids = [item for item in range(len(extent_polys))] for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)): basic.outputlogMessage( 'get ArcticDEM filesize for the %d th extent (%d/%d)' % (idx, count, len(extent_polys))) save_filesize_txt = pre_name + '_dem_FileSize_poly_%d.txt' % idx if os.path.isfile(save_filesize_txt): basic.outputlogMessage('%s exists, skip' % save_filesize_txt) continue # get fileurl dem_poly_idx_list = vector_gpd.get_poly_index_within_extent( dem_polygons, ext_poly) basic.outputlogMessage('find %d DEM within %d th extent' % (len(dem_poly_idx_list), (idx))) urls = [dem_urls[id] for id in dem_poly_idx_list] url_size_list = [save_size_list[id] for id in dem_poly_idx_list] if len(urls) > 0: total_count = len(urls) for ii, (url, fileS, url_idx) in enumerate( zip(urls, url_size_list, dem_poly_idx_list)): # remove url start with /mnt and end with .tif if url.startswith('/mnt') and url.endswith('.tif'): basic.outputlogMessage("error: not a valid url: %s" % url) continue if math.isnan(fileS) is False: continue url_size_GB = get_one_url_file_size(url, ii, total_count) url_size_list[ii] = url_size_GB save_size_list[url_idx] = url_size_GB url_size_list_noNone = [ item for item in url_size_list if math.isnan(item) is False ] if len(url_size_list_noNone) != len(url_size_list): basic.outputlogMessage( 'There are %d None value in url_size_list' % (len(url_size_list) - len(url_size_list_noNone))) total_size_GB = sum(url_size_list_noNone) basic.outputlogMessage( 'the size of files will be downloaded is %.4lf GB for the %d th extent ' % (total_size_GB, (idx + 1))) with open(save_filesize_txt, 'w') as f_obj: f_obj.writelines('%d DEM files, total size is %.6lf GB \n' % (len(urls), total_size_GB)) else: basic.outputlogMessage( 'Warning, can not find DEMs within %d th extent' % (idx)) # save table save_dict = { 'index': save_idx_list, 'filesize': save_size_list, 'fileurl': save_url_list } save_dict_pd = pd.DataFrame(save_dict) # set strings to url as False: Number of URLS is over Excel's limit of 65,530 URLS per worksheet # https://github.com/cxn03651/write_xlsx/issues/42 with pd.ExcelWriter(xlsx_size_path, options={'strings_to_urls': False}) as writer: save_dict_pd.to_excel(writer, sheet_name='url_file_size') return None
def main(options, args): process_num = options.process_num buffer_size = options.buffer_size # perform the selection grid by grid basic.setlogfile('select_RTS_YOLO_demDiff_headwall_%s.txt' % timeTools.get_now_time_str()) b_grid = options.b_grid if b_grid: # process the selection grid by grid extent_shp_or_ids_txt = args[0] yolo_result_dir = os.path.expanduser( '~/Data/Arctic/alaska/autoMapping/alaskaNS_yolov4_1') dem_subsidence_dir = grid_dem_diffs_segment_dir grid_headwall_dir = grid_dem_headwall_shp_dir # read grids and ids time0 = time.time() all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') print('time cost of read polygons and attributes', time.time() - time0) # get grid ids based on input extent grid_base_name = os.path.splitext( os.path.basename(extent_shp_or_ids_txt))[0] grid_polys, grid_ids = get_grid_20(extent_shp_or_ids_txt, all_grid_polys, all_ids) # check dem difference existence grid_rts_shps, grid_id_no_rts_shp = get_existing_select_grid_rts( grid_rts_shp_dir, grid_base_name, grid_ids) if len(grid_id_no_rts_shp) > 0: # refine grid_polys if len(grid_ids) > len(grid_id_no_rts_shp): id_index = [grid_ids.index(id) for id in grid_id_no_rts_shp] grid_polys = [grid_polys[idx] for idx in id_index] # rts_shp_folders = select_rts_map_demDiff_headwall_grids( yolo_result_dir, dem_subsidence_dir, grid_headwall_dir, grid_polys, grid_id_no_rts_shp, grid_base_name, process_num=process_num) else: # processing the selection for two input shapefile yolo_box_shp = args[0] dem_subsidence_shp = args[1] print('polygon group 1:', yolo_box_shp) print('polygon group 2:', dem_subsidence_shp) if options.save_path is not None: save_path = options.save_path else: save_path = io_function.get_name_by_adding_tail( yolo_box_shp, 'select') select_polygons_overlap_others_in_group2(yolo_box_shp, dem_subsidence_shp, save_path, buffer_size=buffer_size, process_num=process_num) pass