def get_overlap_image_index(polygons,image_boxes,min_overlap_area=1): ''' get the index of images that the polygons overlap :param polygons: a list of polygons :param image_boxes: the extent of the all the images :param min_overlap_area: minumum areas for checking the overlap, ignore the image if it's too small :return: ''' # find the images which the polygons overlap (one or two images) img_idx = [] # for a_poly in polygons: # a_poly_json = mapping(a_poly) # polygon_box = rasterio.features.bounds(a_poly_json) polygon_box = get_bounds_of_polygons(polygons) for idx, img_box in enumerate(image_boxes): if rasterio.coords.disjoint_bounds(img_box, polygon_box) is False: if idx not in img_idx: img_idx.append(idx) # check overlap for idx in img_idx: box_poly = convert_image_bound_to_shapely_polygon(image_boxes[idx]) poly_index = get_poly_index_within_extent(polygons,box_poly,min_overlap_area=min_overlap_area) # if no overlap, remove index if len(poly_index) < 1: img_idx.remove(idx) return img_idx
def produce_dem_diff_grids(grid_polys, grid_ids, pre_name, reg_tifs,b_apply_matchtag, b_mosaic_id,b_mosaic_date,keep_dem_percent,o_res,process_num=4): dem_ext_polys = get_dem_tif_ext_polygons(reg_tifs) dem_diff_tifs = [] # mosaic and crop if os.path.isfile(grid_dem_diff_less2dem_txt): grid_id_less2dem_list = [ int(item) for item in io_function.read_list_from_txt(grid_dem_diff_less2dem_txt) ] # no need covert to int else: grid_id_less2dem_list = [] for grid_id, grid_poly in zip(grid_ids, grid_polys): if grid_id in grid_id_less2dem_list: basic.outputlogMessage('skip grid %d, previous processing shows that, the count of DEM is smaller than 2'%grid_id) continue save_dir = 'grid_%d_tmp_files'%grid_id # check free disk space work_dir = './' free_GB = io_function.get_free_disk_space_GB(work_dir) total_wait_time = 0 while free_GB < 50 and total_wait_time < 60*60*12: basic.outputlogMessage(' The free disk space (%.4f) is less than 50 GB, wait 60 seconds'%free_GB) time.sleep(60) total_wait_time += 60 free_GB = io_function.get_free_disk_space_GB(work_dir) # get subset of tifs dem_poly_index = vector_gpd.get_poly_index_within_extent(dem_ext_polys, grid_poly) if len(dem_poly_index) < 1: basic.outputlogMessage('warning, no dem tifs within %d grid, skip' % grid_id) save_id_grid_no_valid_dem(grid_id) continue dem_list_sub = [reg_tifs[index] for index in dem_poly_index] # filter by month # cancel, because it removes many good data. # dem_list_sub = filter_dem_by_month(dem_list_sub) mosaic_tif_list = mosaic_crop_dem(dem_list_sub, save_dir, grid_id, grid_poly, b_mosaic_id, b_mosaic_date, process_num, keep_dem_percent, o_res, pre_name, resample_method='average',b_mask_matchtag=b_apply_matchtag, b_mask_stripDEM_outlier=b_mask_stripDEM_outlier,b_mask_surface_water=b_mask_surface_water, b_mosaic_year=b_mosaic_year) # dem co-registration (cancel, the result in not good with the default setting) # dem differencing save_dem_diff = os.path.join(grid_dem_diffs_dir, pre_name + '_DEM_diff_grid%d.tif'%grid_id) save_date_diff = os.path.join(grid_dem_diffs_dir, pre_name + '_date_diff_grid%d.tif'%grid_id) if dem_diff_newest_oldest(mosaic_tif_list, save_dem_diff, save_date_diff, process_num, b_max_subsidence=b_max_subsidence,b_save_cm=True): dem_diff_tifs.append(save_dem_diff) else: save_id_grid_dem_less_2(grid_id) grid_id_less2dem_list.append(grid_id) return dem_diff_tifs
def produce_matchtag_sum_grids(grid_polys, grid_ids, pre_name, matchtag_tifs, o_res, process_num=4): dem_ext_polys = get_dem_tif_ext_polygons(matchtag_tifs) matchtag_sum_tifs = [] # mosaic and crop for grid_id, grid_poly in zip(grid_ids, grid_polys): save_dir = 'grid_%d_tmp_files' % grid_id # check free disk space work_dir = './' free_GB = io_function.get_free_disk_space_GB(work_dir) total_wait_time = 0 while free_GB < 50 and total_wait_time < 60 * 60 * 12: basic.outputlogMessage( ' The free disk space (%.4f) is less than 50 GB, wait 60 seconds' % free_GB) time.sleep(60) total_wait_time += 60 free_GB = io_function.get_free_disk_space_GB(work_dir) # get subset of tifs dem_poly_index = vector_gpd.get_poly_index_within_extent( dem_ext_polys, grid_poly) if len(dem_poly_index) < 1: basic.outputlogMessage( 'warning, no dem tifs within %d grid, skip' % grid_id) continue dem_list_sub = [matchtag_tifs[index] for index in dem_poly_index] mosaic_tif_list = mosaic_crop_dem(dem_list_sub, save_dir, grid_id, grid_poly, False, False, process_num, 0, o_res, pre_name, resample_method='average') # sum matchtag save_matchtag_sum = os.path.join( grid_matchtag_sum_dir, pre_name + '_count%d' % len(mosaic_tif_list) + '_matchtag_sum_grid%d.tif' % grid_id) if sum_matchtag(mosaic_tif_list, save_matchtag_sum): matchtag_sum_tifs.append(save_matchtag_sum) return matchtag_sum_tifs
def get_tar_list_sub(tar_dir, dem_polygons, dem_urls, extent_poly): dem_poly_ids = vector_gpd.get_poly_index_within_extent( dem_polygons, extent_poly) urls = [dem_urls[id] for id in dem_poly_ids] new_tar_list = [] for ii, url in enumerate(urls): tmp = urlparse(url) filename = os.path.basename(tmp.path) save_dem_path = os.path.join(tar_dir, filename) if os.path.isfile(save_dem_path): new_tar_list.append(save_dem_path) else: basic.outputlogMessage( 'Warning, %s not in %s, may need to download it first' % (filename, tar_dir)) return new_tar_list
def zonal_stats_one_polygon(idx, polygon, image_tiles, img_tile_polygons, stats, nodata=None,range=None, band = 1,all_touched=True): overlap_index = vector_gpd.get_poly_index_within_extent(img_tile_polygons, polygon) image_list = [image_tiles[item] for item in overlap_index] if len(image_list) == 1: out_image, out_tran,nodata = raster_io.read_raster_in_polygons_mask(image_list[0], polygon, nodata=nodata, all_touched=all_touched,bands=band) elif len(image_list) > 1: # for the case it overlap more than one raster, need to produce a mosaic tmp_saved_files = [] for k_img, image_path in enumerate(image_list): # print(image_path) tmp_save_path = os.path.splitext(os.path.basename(image_path))[0] + '_subset_poly%d'%idx +'.tif' _, _,nodata = raster_io.read_raster_in_polygons_mask(image_path, polygon,all_touched=all_touched,nodata=nodata, bands=band, save_path=tmp_save_path) tmp_saved_files.append(tmp_save_path) # mosaic files in tmp_saved_files save_path = 'raster_for_poly%d.tif'%idx mosaic_args_list = ['gdal_merge.py', '-o', save_path,'-n',str(nodata),'-a_nodata',str(nodata)] mosaic_args_list.extend(tmp_saved_files) if basic.exec_command_args_list_one_file(mosaic_args_list,save_path) is False: raise IOError('error, obtain a mosaic (%s) failed'%save_path) # read the raster out_image, out_nodata = raster_io.read_raster_one_band_np(save_path,band=band) # remove temporal raster tmp_saved_files.append(save_path) for item in tmp_saved_files: io_function.delete_file_or_dir(item) else: basic.outputlogMessage('warning, cannot find raster for %d (start=0) polygon'%idx) return None # do calculation return array_stats(out_image, stats, nodata,range=range)
def main(options, args): save_dir = options.save_dir extent_shp = options.extent_shp process_num = options.process_num o_res = options.out_res b_mosaic_id = options.create_mosaic_id b_mosaic_date = options.create_mosaic_date keep_dem_percent = options.keep_dem_percent dem_dir_or_txt = args[0] if os.path.isfile(dem_dir_or_txt): dem_list = io_function.read_list_from_txt(dem_dir_or_txt) else: dem_list = io_function.get_file_list_by_ext('.tif', dem_dir_or_txt, bsub_folder=False) dem_list = [ tif for tif in dem_list if 'matchtag' not in tif ] # remove matchtag dem_count = len(dem_list) if dem_count < 1: raise ValueError('No input dem files in %s' % dem_dir_or_txt) resample_method= 'average' if extent_shp is None: # groups DEM based on the same strip ID dem_groups = group_demTif_strip_pair_ID(dem_list) # mosaic them direclty without consider the extent mosaic_dir = os.path.join(save_dir, 'dem_stripID_mosaic' ) mosaic_dem_same_stripID(dem_groups, mosaic_dir, resample_method, process_num=process_num, save_source=True, o_format='GTiff') else: extent_shp_base = os.path.splitext(os.path.basename(extent_shp))[0] dem_prj = map_projection.get_raster_or_vector_srs_info_epsg(dem_list[0]) extent_prj = map_projection.get_raster_or_vector_srs_info_epsg(extent_shp) # # check projection (time-consuming if there are many tif files) # for dem_tif in dem_list: # prj = map_projection.get_raster_or_vector_srs_info_epsg(dem_tif) # if dem_prj != prj: # raise ValueError('The projection inconsistent among dems (%s is different)'%dem_tif) dem_ext_polys = get_dem_tif_ext_polygons(dem_list) if extent_prj==dem_prj: extent_polys = vector_gpd.read_polygons_gpd(extent_shp) else: extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_prj) if len(extent_polys) < 1: raise ValueError('No polygons in %s' % extent_shp) else: basic.outputlogMessage('%d extent polygons in %s' % (len(extent_polys), extent_shp)) extPolys_ids = vector_gpd.read_attribute_values_list(extent_shp, 'id') if extPolys_ids is None or None in extPolys_ids: basic.outputlogMessage('Warning, field: id is not in %s, will create default ID for each grid' % extent_shp) extPolys_ids = [id + 1 for id in range(len(extent_polys))] for idx, ext_poly in zip(extPolys_ids, extent_polys): basic.outputlogMessage('mosaic and crop DEM for the %d th extent (%d in total)' % (idx, len(extent_polys))) # get subset of DEM dem_poly_ids = vector_gpd.get_poly_index_within_extent(dem_ext_polys, ext_poly) if len(dem_poly_ids) < 1: basic.outputlogMessage('no dem tifs within %d polygons'%idx) continue dem_list_sub = [dem_list[id] for id in dem_poly_ids] mosaic_crop_dem(dem_list_sub, save_dir, idx, ext_poly, b_mosaic_id, b_mosaic_date, process_num, keep_dem_percent, o_res, extent_shp_base, resample_method='average')
def mask_dem_by_surface_water(crop_dem_list, extent_poly, extent_id, crop_tif_dir, o_res, process_num): # get list of the ArcticDEM mosaic water_mask_tifs = io_function.get_file_list_by_ext('.tif',mask_water_dir,bsub_folder=False) water_mask_ext_polys = get_dem_tif_ext_polygons(water_mask_tifs) overlap_index = vector_gpd.get_poly_index_within_extent(water_mask_ext_polys,extent_poly) #### crop and mosaic water mask sub_mosaic_dem_tifs = [water_mask_tifs[item] for item in overlap_index] water_mask_crop_tif_list = [] for tif in sub_mosaic_dem_tifs: save_crop_path = os.path.join(crop_tif_dir, os.path.basename(io_function.get_name_by_adding_tail(tif, 'sub_poly_%d' % extent_id)) ) if os.path.isfile(save_crop_path): basic.outputlogMessage('%s exists, skip' % save_crop_path) water_mask_crop_tif_list.append(save_crop_path) else: crop_tif = subset_image_by_polygon_box(tif, save_crop_path, extent_poly, resample_m='near', o_format='VRT',out_res=o_res, same_extent=True,thread_num=process_num) # if crop_tif is False: raise ValueError('warning, crop %s failed' % tif) water_mask_crop_tif_list.append(crop_tif) if len(water_mask_crop_tif_list) < 1: basic.outputlogMessage('No water mask for %d grid'%extent_id) save_id_grid_no_watermask(extent_id) return None # create mosaic, can handle only input one file, but is slow save_water_mask_mosaic = os.path.join(crop_tif_dir, 'global_surface_water_grid%d.tif'%extent_id) result = RSImageProcess.mosaic_crop_images_gdalwarp(water_mask_crop_tif_list, save_water_mask_mosaic, resampling_method='average',o_format='GTiff', compress='lzw', tiled='yes', bigtiff='if_safer',thread_num=process_num) if result is False: return False # because the resolution of dem and water mask is different, so we polygonize the watermask, then burn into the dem water_mask_shp = os.path.join(crop_tif_dir, 'global_surface_water_grid%d.shp'%extent_id) if os.path.isfile(water_mask_shp): basic.outputlogMessage('%s exists, skip cropping' % water_mask_shp) else: # set 0 as nodata if raster_io.set_nodata_to_raster_metadata(save_water_mask_mosaic,0) is False: return False if vector_gpd.raster2shapefile(save_water_mask_mosaic,water_mask_shp,connect8=True) is None: return False # masking the strip version of DEMs mask_dem_list = [] for idx, strip_dem in enumerate(crop_dem_list): save_path = io_function.get_name_by_adding_tail(strip_dem, 'maskWater') if os.path.isfile(save_path): basic.outputlogMessage('%s exist, skip'%save_path) mask_dem_list.append(save_path) continue io_function.copy_file_to_dst(strip_dem,save_path,overwrite=True) nodata = raster_io.get_nodata(save_path) if raster_io.burn_polygon_to_raster_oneband(save_path,water_mask_shp,nodata) is False: continue mask_dem_list.append(save_path) return mask_dem_list
def mask_strip_dem_outlier_by_ArcticDEM_mosaic(crop_strip_dem_list, extent_poly, extent_id, crop_tif_dir, o_res, process_num): # get list of the ArcticDEM mosaic arcticDEM_mosaic_reg_tifs = io_function.get_file_list_by_ext('.tif',arcticDEM_tile_reg_tif_dir,bsub_folder=False) mosaic_dem_ext_polys = get_dem_tif_ext_polygons(arcticDEM_mosaic_reg_tifs) overlap_index = vector_gpd.get_poly_index_within_extent(mosaic_dem_ext_polys,extent_poly) #### crop and mosaic mosaic_reg_tifs sub_mosaic_dem_tifs = [arcticDEM_mosaic_reg_tifs[item] for item in overlap_index] mosaic_crop_tif_list = [] for tif in sub_mosaic_dem_tifs: save_crop_path = os.path.join(crop_tif_dir, os.path.basename(io_function.get_name_by_adding_tail(tif, 'sub_poly_%d' % extent_id)) ) if os.path.isfile(save_crop_path): basic.outputlogMessage('%s exists, skip cropping' % save_crop_path) mosaic_crop_tif_list.append(save_crop_path) else: crop_tif = subset_image_by_polygon_box(tif, save_crop_path, extent_poly, resample_m='near', o_format='VRT', out_res=o_res,same_extent=True,thread_num=process_num) if crop_tif is False: raise ValueError('warning, crop %s failed' % tif) mosaic_crop_tif_list.append(crop_tif) if len(mosaic_crop_tif_list) < 1: basic.outputlogMessage('No mosaic version of ArcticDEM for %d grid, skip mask_strip_dem_outlier_by_ArcticDEM_mosaic'%extent_id) return False # create mosaic, can handle only input one file, but is slow save_dem_mosaic = os.path.join(crop_tif_dir, 'ArcticDEM_tiles_grid%d.tif'%extent_id) result = RSImageProcess.mosaic_crop_images_gdalwarp(mosaic_crop_tif_list, save_dem_mosaic, resampling_method='average',o_format='GTiff', compress='lzw', tiled='yes', bigtiff='if_safer',thread_num=process_num) if result is False: return False height_tileDEM, width_tileDEM, count_tileDEM, dtype_tileDEM = raster_io.get_height_width_bandnum_dtype(save_dem_mosaic) tileDEM_data, tileDEM_nodata = raster_io.read_raster_one_band_np(save_dem_mosaic) # masking the strip version of DEMs mask_strip_dem_list = [] for idx, strip_dem in enumerate(crop_strip_dem_list): save_path = io_function.get_name_by_adding_tail(strip_dem, 'maskOutlier') if os.path.isfile(save_path): basic.outputlogMessage('%s exist, skip'%save_path) mask_strip_dem_list.append(save_path) continue # check band, with, height height, width, count, dtype = raster_io.get_height_width_bandnum_dtype(strip_dem) if height_tileDEM != height or width_tileDEM != width or count_tileDEM != count: raise ValueError('size different between %s and %s' % (strip_dem, save_dem_mosaic)) if count != 1: raise ValueError('DEM and Matchtag should only have one band') try: dem_data, nodata = raster_io.read_raster_one_band_np(strip_dem) except: basic.outputlogMessage(' invalid tif file: %s'%strip_dem) continue nodata_loc = np.where(dem_data == nodata) diff = dem_data - tileDEM_data # mask as nodata dem_data[np.abs(diff) > 50 ] = nodata # ignore greater than 50 m dem_data[ nodata_loc ] = nodata # may change some nodata pixel, change them back # save to file raster_io.save_numpy_array_to_rasterfile(dem_data, save_path, strip_dem, compress='lzw', tiled='yes', bigtiff='if_safer') mask_strip_dem_list.append(save_path) return mask_strip_dem_list
def get_grid_20(extent_shp_or_id_txt, grid_polys, ids): ''' get grid polygons and ids based on input extent (polygon in shpaefile) or ids (txt file) if "file_name_base+'_grid_ids.txt'" exists, it will read id in this file directly. :param extent_shp_or_id_txt: :param grid_polys: :param ids: :return: ''' io_function.is_file_exist(extent_shp_or_id_txt) if extent_shp_or_id_txt.endswith('.txt'): grid_ids = io_function.read_list_from_txt(extent_shp_or_id_txt) grid_ids = [int(item) for item in grid_ids ] else: shp_corresponding_grid_ids_txt = get_corresponding_grid_ids_txt(extent_shp_or_id_txt) if os.path.isfile(shp_corresponding_grid_ids_txt): print('corresponding grid ids txt file for %s exists, read grid id from txt'%extent_shp_or_id_txt) grid_ids = [ int(item) for item in io_function.read_list_from_txt(shp_corresponding_grid_ids_txt)] basic.outputlogMessage('read %d grids within the extents (%s)' % (len(grid_ids), os.path.basename(extent_shp_or_id_txt))) else: # extent polygons and projection (proj4) extent_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(extent_shp_or_id_txt) if extent_shp_prj == '': raise ValueError('get proj4 of %s failed'%extent_shp_or_id_txt) grid_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20_shp) if grid_shp_prj=='': raise ValueError('get proj4 of %s failed' % grid_20_shp) if extent_shp_prj != grid_shp_prj: basic.outputlogMessage('%s and %s do not have the same projection, will reproject %s' % (extent_shp_or_id_txt, grid_20_shp, os.path.basename(extent_shp_or_id_txt))) epsg = map_projection.get_raster_or_vector_srs_info_epsg(grid_20_shp) # print(epsg) # extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_shp_prj.strip()) extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp_or_id_txt, epsg) else: extent_polys = vector_gpd.read_polygons_gpd(extent_shp_or_id_txt) ext_poly_count = len(extent_polys) if ext_poly_count < 1: raise ValueError('No polygons in %s'%extent_shp_or_id_txt) grid_index = [] # if there are many polygons, this will take time. for idx,ext_poly in enumerate(extent_polys): print(timeTools.get_now_time_str(), 'get grids for extent idx', idx, 'total polygons:',ext_poly_count) index = vector_gpd.get_poly_index_within_extent(grid_polys, ext_poly) grid_index.extend(index) grid_index = list(set(grid_index)) # remove duplicated ids basic.outputlogMessage('find %d grids within the extents (%s)' % (len(grid_index), os.path.basename(extent_shp_or_id_txt)) ) grid_ids = [ ids[idx] for idx in grid_index] grid_ids_str = [str(item) for item in grid_ids ] io_function.save_list_to_txt(shp_corresponding_grid_ids_txt,grid_ids_str) id_index = [ids.index(id) for id in grid_ids] selected_grid_polys = [grid_polys[idx] for idx in id_index ] return selected_grid_polys, grid_ids
def extract_headwall_grids(grid_polys, grid_ids, pre_name, reg_tifs, b_mosaic_id, b_mosaic_date, keep_dem_percent, o_res, process_num=1): proc = psutil.Process(os.getpid()) dem_ext_polys = get_dem_tif_ext_polygons(reg_tifs) headwall_shp_folders = [] # mosaic and crop for grid_id, grid_poly in zip(grid_ids, grid_polys): save_dir = 'grid_%d_tmp_files' % grid_id # check free disk space work_dir = './' free_GB = io_function.get_free_disk_space_GB(work_dir) total_wait_time = 0 while free_GB < 50 and total_wait_time < 60 * 60 * 12: basic.outputlogMessage( ' The free disk space (%.4f) is less than 50 GB, wait 60 seconds' % free_GB) time.sleep(60) total_wait_time += 60 free_GB = io_function.get_free_disk_space_GB(work_dir) # get subset of tifs dem_poly_index = vector_gpd.get_poly_index_within_extent( dem_ext_polys, grid_poly) if len(dem_poly_index) < 1: basic.outputlogMessage( 'warning, no dem tifs within %d grid, skip' % grid_id) save_id_grid_no_valid_dem(grid_id) continue dem_list_sub = [reg_tifs[index] for index in dem_poly_index] mosaic_tif_list = mosaic_crop_dem( dem_list_sub, save_dir, grid_id, grid_poly, b_mosaic_id, b_mosaic_date, process_num, keep_dem_percent, o_res, pre_name, resample_method='average', b_mask_matchtag=b_apply_matchtag, b_mask_stripDEM_outlier=b_mask_stripDEM_outlier, b_mask_surface_water=b_mask_surface_water, b_mosaic_year=b_mosaic_year) if len(mosaic_tif_list) < 1: basic.outputlogMessage( 'warning, failed to get DEM mosaic for grid %d' % grid_id) continue # dem co-registration (cancel, the result in not good with the default setting) # to slope slope_tifs = dem_list_to_slope_list(mosaic_tif_list, save_dir, grid_id, process_num=process_num) # extract headwall multi_headwall_shp_dir = os.path.join(save_dir, 'headwall_shp_sub_%d' % grid_id) if os.path.isdir(multi_headwall_shp_dir) is False: io_function.mkdir(multi_headwall_shp_dir) for idx, slope in enumerate(slope_tifs): working_dir = os.path.join( save_dir, os.path.splitext(os.path.basename(slope))[0]) if os.path.isdir(working_dir) is False: io_function.mkdir(working_dir) # use polygon based medial axis # if extract_headwall_from_slope(idx, len(slope_tifs), slope, working_dir, multi_headwall_shp_dir, min_slope, min_size, # max_size, max_axis_width, max_box_WH, process_num) is False: # basic.outputlogMessage('extract headwall from %s failed'%slope) # use raster based medial axis if extract_headwall_based_medial_axis_from_slope( idx, len(slope_tifs), slope, working_dir, multi_headwall_shp_dir, min_slope, min_size, max_size, min_length, max_length, max_hole_count, max_axis_width, process_num) is False: basic.outputlogMessage('extract headwall from %s failed' % slope) headwall_shp_list = io_function.get_file_list_by_ext( '.shp', multi_headwall_shp_dir, bsub_folder=False) if len(headwall_shp_list) < 1: basic.outputlogMessage('Warning, no headwall shapefile in %s' % multi_headwall_shp_dir) save_id_grid_no_headwall(grid_id) continue # merge headwall detected on different dates. save_headwall_folder = os.path.join(grid_dem_headwall_shp_dir, 'headwall_shps_grid%d' % grid_id) if os.path.isdir(save_headwall_folder) is False: io_function.mkdir(save_headwall_folder) print('before merge_multi_headwall_shp_to_one, used memory:', proc.memory_info()[0] / (1024 * 1024 * 1024.0), 'GB') save_merged_shp = os.path.join( save_headwall_folder, 'headwall_shp_multiDates_%d.shp' % grid_id) if merge_multi_headwall_shp_to_one(headwall_shp_list, save_merged_shp) is False: continue # have not find a good method to merge them, just copy all of them now # res = os.system('cp -r %s %s'%(multi_headwall_shp_dir,save_headwall_folder)) # if res != 0: # basic.outputlogMessage('Copy %s failed'%multi_headwall_shp_dir) # continue headwall_shp_folders.append(save_headwall_folder) return headwall_shp_folders
def download_dem_tarball(dem_index_shp, extent_polys, save_folder, pre_name, reg_tif_dir=None, poly_ids=None, b_arcticDEM_tile=False): # read dem polygons and url dem_polygons, dem_urls = vector_gpd.read_polygons_attributes_list( dem_index_shp, 'fileurl', b_fix_invalid_polygon=False) basic.outputlogMessage('%d dem polygons in %s' % (len(dem_polygons), dem_index_shp)) dem_tar_ball_list = [] reg_tifs_list = [] curr_dir = os.getcwd() b_save_grid_id_noDEM = True if poly_ids is None: poly_ids = [idx for idx in range(len(extent_polys))] b_save_grid_id_noDEM = False # if poly_ids is not the global unique id, then don't save it. if os.path.isfile('no_registration_strips.txt'): no_registration_strips = io_function.read_list_from_txt( 'no_registration_strips.txt') else: no_registration_strips = [] # tarballs is being downloaded downloading_tarballs = [] for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)): basic.outputlogMessage('get data for the %d th extent (%d/%d)' % (idx, count, len(extent_polys))) save_txt_path = pre_name + '_dem_urls_poly_%d.txt' % idx if os.path.isfile(save_txt_path): urls = io_function.read_list_from_txt(save_txt_path) basic.outputlogMessage('read %d dem urls from %s' % (len(urls), save_txt_path)) else: # get fileurl dem_poly_ids = vector_gpd.get_poly_index_within_extent( dem_polygons, ext_poly) basic.outputlogMessage('find %d DEM within %d th extent' % (len(dem_poly_ids), (idx))) urls = [dem_urls[id] for id in dem_poly_ids] # save to txt io_function.save_list_to_txt(save_txt_path, urls) basic.outputlogMessage('save dem urls to %s' % save_txt_path) if len(urls) > 0: # total_size_GB = get_total_size(urls) # internet access, parallel running may cause problem. The info is not important # basic.outputlogMessage('the size of files will be downloaded is %.4lf GB for the %d th extent '%(total_size_GB,(idx+1))) # time.sleep(5) # wait 5 seconds # download them using wget one by one for ii, url in enumerate(urls): tmp = urlparse(url) # in the Strip DEM, there are around 700 url are point to tif files, failed to download them # e.g. /mnt/pgc/data/elev/dem/setsm/ArcticDEM/geocell/v3.0/2m_temp/n59w137/SETSM_WV03_20150518_104001000B703200_104001000C715B00_seg8_2m_v3.0_dem.tif if url.startswith('/mnt') and url.endswith('.tif'): basic.outputlogMessage("error: not a valid url: %s" % url) continue filename = os.path.basename(tmp.path) save_dem_path = os.path.join(save_folder, filename) if reg_tif_dir is not None: tar_base = os.path.basename(filename)[:-7] # file_pattern = ['*dem_reg.tif', '*reg_dem.tif'] # Arctic strip and tile (mosaic) version if b_arcticDEM_tile: reg_tifs = io_function.get_file_list_by_pattern( reg_tif_dir, tar_base + '*reg_dem.tif') else: reg_tifs = io_function.get_file_list_by_pattern( reg_tif_dir, tar_base + '*dem_reg.tif') if len(reg_tifs) > 0: basic.outputlogMessage( 'warning, unpack and registrated tif for %s already exists, skip downloading' % filename) reg_tifs_list.append(reg_tifs[0]) continue if './' + tar_base in no_registration_strips: basic.outputlogMessage( 'warning, %s is in no_registration_strips list, skip downloading' % filename) continue if filename in downloading_tarballs: basic.outputlogMessage( 'warning, %s is being downloaded by other processes' % filename) continue if os.path.isfile( save_dem_path) and os.path.getsize(save_dem_path) > 1: basic.outputlogMessage( 'warning, %s already exists, skip downloading' % filename) else: # download the dem basic.outputlogMessage( 'starting downloading %d th DEM (%d in total)' % ((ii + 1), len(urls))) downloading_tarballs.append(filename) # os.chdir(save_folder) # run_a_process_download(url) # download ################################################## # download in parallel basic.check_exitcode_of_process( download_tasks ) # if there is one former job failed, then quit while True: job_count = basic.alive_process_count(download_tasks) if job_count >= max_task_count: print( machine_name, datetime.now(), 'You are running %d or more tasks in parallel, wait ' % max_task_count) time.sleep(60) # continue break # start the processing sub_process = Process( target=run_a_process_download, args=(url, save_dem_path, reg_tif_dir, max_task_count, b_unpack_after_downloading )) # start a process, don't wait sub_process.start() download_tasks.append(sub_process) basic.close_remove_completed_process(download_tasks) # os.chdir(curr_dir) dem_tar_ball_list.append(save_dem_path) else: basic.outputlogMessage( 'Warning, can not find DEMs within %d th extent' % (idx)) if b_save_grid_id_noDEM: save_id_grid_no_dem(idx) # wait until all task complete while True: job_count = basic.alive_process_count(download_tasks) if job_count > 0: print( machine_name, datetime.now(), 'wait until all task are completed, alive task account: %d ' % job_count) time.sleep(60) # else: break return dem_tar_ball_list, reg_tifs_list
def get_file_size_dem_tarball(dem_index_shp, extent_polys, pre_name, xlsx_size_path, poly_ids=None): # read dem polygons and url dem_polygons, dem_urls = vector_gpd.read_polygons_attributes_list( dem_index_shp, 'fileurl', b_fix_invalid_polygon=False) if os.path.isfile(xlsx_size_path): size_table = pd.read_excel(xlsx_size_path) save_idx_list = size_table['index'].to_list() save_url_list = size_table['fileurl'].to_list() save_size_list = size_table['filesize'].to_list() else: save_idx_list = [item for item in range(len(dem_urls))] # index list save_url_list = dem_urls save_size_list = [float('nan')] * len(save_idx_list) basic.outputlogMessage('%d dem polygons in %s' % (len(dem_polygons), dem_index_shp)) if poly_ids is None: poly_ids = [item for item in range(len(extent_polys))] for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)): basic.outputlogMessage( 'get ArcticDEM filesize for the %d th extent (%d/%d)' % (idx, count, len(extent_polys))) save_filesize_txt = pre_name + '_dem_FileSize_poly_%d.txt' % idx if os.path.isfile(save_filesize_txt): basic.outputlogMessage('%s exists, skip' % save_filesize_txt) continue # get fileurl dem_poly_idx_list = vector_gpd.get_poly_index_within_extent( dem_polygons, ext_poly) basic.outputlogMessage('find %d DEM within %d th extent' % (len(dem_poly_idx_list), (idx))) urls = [dem_urls[id] for id in dem_poly_idx_list] url_size_list = [save_size_list[id] for id in dem_poly_idx_list] if len(urls) > 0: total_count = len(urls) for ii, (url, fileS, url_idx) in enumerate( zip(urls, url_size_list, dem_poly_idx_list)): # remove url start with /mnt and end with .tif if url.startswith('/mnt') and url.endswith('.tif'): basic.outputlogMessage("error: not a valid url: %s" % url) continue if math.isnan(fileS) is False: continue url_size_GB = get_one_url_file_size(url, ii, total_count) url_size_list[ii] = url_size_GB save_size_list[url_idx] = url_size_GB url_size_list_noNone = [ item for item in url_size_list if math.isnan(item) is False ] if len(url_size_list_noNone) != len(url_size_list): basic.outputlogMessage( 'There are %d None value in url_size_list' % (len(url_size_list) - len(url_size_list_noNone))) total_size_GB = sum(url_size_list_noNone) basic.outputlogMessage( 'the size of files will be downloaded is %.4lf GB for the %d th extent ' % (total_size_GB, (idx + 1))) with open(save_filesize_txt, 'w') as f_obj: f_obj.writelines('%d DEM files, total size is %.6lf GB \n' % (len(urls), total_size_GB)) else: basic.outputlogMessage( 'Warning, can not find DEMs within %d th extent' % (idx)) # save table save_dict = { 'index': save_idx_list, 'filesize': save_size_list, 'fileurl': save_url_list } save_dict_pd = pd.DataFrame(save_dict) # set strings to url as False: Number of URLS is over Excel's limit of 65,530 URLS per worksheet # https://github.com/cxn03651/write_xlsx/issues/42 with pd.ExcelWriter(xlsx_size_path, options={'strings_to_urls': False}) as writer: save_dict_pd.to_excel(writer, sheet_name='url_file_size') return None
def get_overlap_grids_for_one_extent(all_ids, all_grid_polys, dem_poly, dem_name, idx, dem_poly_count): print(timeTools.get_now_time_str(), idx, dem_poly_count) index = vector_gpd.get_poly_index_within_extent(all_grid_polys, dem_poly) gird_ids = [all_ids[idx] for idx in index] return dem_name, gird_ids
def main(): dem_index_shp = os.path.expanduser( '~/Data/Arctic/ArcticDEM/BROWSE_SERVER/indexes/ArcticDEM_Tile_Index_Rel7/ArcticDEM_Tile_Index_Rel7.shp' ) # extent_shp = os.path.expanduser('~/Data/PDO/PDO_statistics_swatchs/swatch_bounding_boxes.shp') extent_shp = os.path.expanduser( '~/Data/PDO/extent_each_swatch/merge_all_qa_exent.shp') # extent polygons and projection (proj4) extent_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( extent_shp) dem_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( dem_index_shp) if extent_shp_prj != dem_shp_prj: basic.outputlogMessage( '%s and %s do not have the same projection, will reproject %s' % (extent_shp, dem_index_shp, os.path.basename(extent_shp))) epsg = map_projection.get_raster_or_vector_srs_info_epsg(dem_index_shp) # print(epsg) # extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_shp_prj.strip()) extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp, epsg) else: extent_polys = vector_gpd.read_polygons_gpd(extent_shp) poly_ids = [idx for idx in range(len(extent_polys))] if 'boxes' in os.path.basename(extent_shp): nc_file_names = vector_gpd.read_attribute_values_list( extent_shp, 'nc_file') else: nc_file_names = vector_gpd.read_attribute_values_list( extent_shp, 'layer') # read dem polygons and tile number dem_polygons, dem_tiles = vector_gpd.read_polygons_attributes_list( dem_index_shp, 'tile', b_fix_invalid_polygon=False) for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)): basic.outputlogMessage('get data for the %d th extent (%d/%d)' % (idx, count, len(extent_polys))) save_txt_path = nc_file_names[idx] + '-' + 'dem_tiles_poly_%d.txt' % idx if os.path.isfile(save_txt_path): tiles = io_function.read_list_from_txt(save_txt_path) basic.outputlogMessage('read %d dem tiles from %s' % (len(tiles), save_txt_path)) else: # get fileurl dem_poly_ids = vector_gpd.get_poly_index_within_extent( dem_polygons, ext_poly) basic.outputlogMessage('find %d DEM within %d th extent' % (len(dem_poly_ids), (idx))) tiles = [dem_tiles[id] for id in dem_poly_ids] # save to txt io_function.save_list_to_txt(save_txt_path, tiles) basic.outputlogMessage('save dem urls to %s' % save_txt_path) # download and create a mosaic url_head = 'https://data.pgc.umn.edu/elev/dem/setsm/ArcticDEM/mosaic/v3.0/32m/' download_tarball_for_one_polygon(tarball_dir, dem_tif_dir, url_head, tiles) # create a mosaic create_a_mosaic(nc_file_names[idx], idx, dem_eachSwatch_dir, ext_poly, tiles) bak_folder = 'small_tifs' io_function.mkdir(bak_folder) # remove small and duplicated ones for file_name in nc_file_names: crop_tifs = io_function.get_file_list_by_pattern( dem_eachSwatch_dir, file_name + '*crop.tif') if len(crop_tifs) == 1: pass elif len(crop_tifs) > 1: # keep maximum one and move small ones tif_files_size = [ io_function.get_file_size_bytes(item) for item in crop_tifs ] max_size = max(tif_files_size) max_index = tif_files_size.index(max_size) del crop_tifs[max_index] for tmp in crop_tifs: io_function.movefiletodir(tmp, bak_folder) tmp = tmp.replace('_crop', '') io_function.movefiletodir(tmp, bak_folder) else: # no tif raise ValueError('Results for %s does not exist' % file_name)