def mask_by_surface_water(map_raster, surface_water_crop): # save mask result to current folder save_mask_result = io_function.get_name_by_adding_tail( os.path.basename(map_raster), 'WaterMask') if os.path.isfile(save_mask_result): print('warning, %s exists' % save_mask_result) return save_mask_result # read map_array_2d, nodata = raster_io.read_raster_one_band_np(map_raster) water_array_2d, _ = raster_io.read_raster_one_band_np(surface_water_crop) print(map_array_2d.shape) if map_array_2d.shape != water_array_2d.shape: raise ValueError('size inconsistent: %s and %s' % (str(map_array_2d.shape), str(water_array_2d.shape))) # mask out pixel, original is water or others map_array_2d[np.logical_or(water_array_2d == 1, water_array_2d == 255)] = 0 if raster_io.save_numpy_array_to_rasterfile(map_array_2d, save_mask_result, map_raster, compress='lzw', tiled='Yes', bigtiff='if_safer'): return save_mask_result
def mask_by_elevation(map_raster_path, elevation_crop_path, threashold): # save mask result to current folder save_mask_result = io_function.get_name_by_adding_tail( os.path.basename(map_raster_path), 'DEMMask') if os.path.isfile(save_mask_result): print('warning, %s exists' % save_mask_result) return save_mask_result # read map_array_2d, nodata = raster_io.read_raster_one_band_np(map_raster_path) dem_array_2d, _ = raster_io.read_raster_one_band_np(elevation_crop_path) print(map_array_2d.shape) if map_array_2d.shape != dem_array_2d.shape: raise ValueError('size inconsistent: %s and %s' % (str(map_array_2d.shape), str(dem_array_2d.shape))) # mask out pixel with high elevation map_array_2d[dem_array_2d > threashold] = 0 if raster_io.save_numpy_array_to_rasterfile(map_array_2d, save_mask_result, map_raster_path, compress='lzw', tiled='Yes', bigtiff='if_safer'): return save_mask_result
def slope_tif_to_slope_shapefile(slope_tif,slope_bin_path,slope_threshold): if os.path.isfile(slope_bin_path): print('%s exist'%slope_bin_path) else: slope_data, nodata = raster_io.read_raster_one_band_np(slope_tif) bin_slope = np.zeros_like(slope_data,dtype=np.uint8) bin_slope[slope_data > slope_threshold] = 1 bin_slope[slope_data > 88] = 0 # if slope is too large, it may caused by artifacts, so remove them # # Dilation or opening # # https://opencv-python-tutroals.readthedocs.io/en/latest/py_tutorials/py_imgproc/py_morphological_ops/py_morphological_ops.html # kernel = np.ones((3, 3), np.uint8) # if kernal is 5 or larger, will remove some narrow parts. # # bin_slope = cv2.dilate(bin_slope,kernel,iterations = 1) # bin_slope = cv2.morphologyEx(bin_slope, cv2.MORPH_OPEN, kernel) # use opening to remove some noise # # bin_slope = cv2.morphologyEx(bin_slope, cv2.MORPH_CLOSE, kernel) # closing small holes inside # save slope_bin = bin_slope*255 raster_io.save_numpy_array_to_rasterfile(slope_bin,slope_bin_path,slope_tif,nodata=0,compress='lzw',tiled='yes',bigtiff='if_safer') # set nodata as 0 # to shapefile slope_bin_shp = vector_gpd.raster2shapefile(slope_bin_path,connect8=True) if slope_bin_shp is None: return False return slope_bin_shp
def get_new_color_table_for_raster(raster, color_table_txt, out_dir=None): data, no_data = raster_io.read_raster_one_band_np(raster) # remove nodata data_1d = data.flatten() print(data_1d.shape) data_1d = data_1d[data_1d != no_data] print(data_1d.shape) unique_values = np.unique(data_1d) print('unique_values:', unique_values) save_color_table = io_function.get_name_no_ext( os.path.basename(raster)) + '_color.txt' if out_dir is not None: save_color_table = os.path.join(out_dir, save_color_table) save_lines = [] with open(color_table_txt, 'r') as f_obj: all_lines = f_obj.readlines() # copy first two lines save_lines.append(all_lines[0]) save_lines.append(all_lines[1]) for idx in range(2, len(all_lines)): value = int(all_lines[idx].split(',')[0]) if value in unique_values: save_lines.append(all_lines[idx]) with open(save_color_table, 'w') as f_obj: f_obj.writelines(save_lines) print('Save color table to %s' % os.path.abspath(save_color_table))
def sum_matchtag(input_tifs, save_path): if len(input_tifs) < 1: return False # check band, with, height height, width, count, dtype = raster_io.get_height_width_bandnum_dtype( input_tifs[0]) for idx in range(1, len(input_tifs)): h, w, c, type = raster_io.get_height_width_bandnum_dtype( input_tifs[idx]) if h != height or w != width or c != count or type != dtype: raise ValueError( 'size or data type is different between %s and %s' % (input_tifs[0], input_tifs[idx])) if count != 1: raise ValueError('Matchtag should only have one band') sum_data = np.zeros((height, width), dtype=np.uint8) for tif in input_tifs: data, nodata = raster_io.read_raster_one_band_np(tif) # print(data.shape) sum_data += data # save to file raster_io.save_numpy_array_to_rasterfile(sum_data, save_path, input_tifs[0], compress='lzw', tiled='yes', bigtiff='if_safer') return True
def segment_changes_on_dem_diff(dem_diff_tif, save_dir): out_pre = os.path.splitext(os.path.basename(dem_diff_tif))[0] # read images one_band_img, nodata = raster_io.read_raster_one_band_np(dem_diff_tif) # segmentation algorithm (the output of these algorithms is not alway good, need to chose the parameters carafully) # out_labels = watershed_segmentation(one_band_img) # out_labels = k_mean_cluster_segmentation(one_band_img) # out_labels = quickshift_segmentaion(one_band_img) out_labels = mean_shift_segmentation(one_band_img) # segmentation by threshold (may have too many noise) # mean = np.nanmean(one_band_img) # print("mean value is: %.4f"%mean) # one_band_img = one_band_img - mean # out_labels = np.zeros_like(one_band_img,dtype=np.uint8) # out_labels[ np.abs(one_band_img) > 2 ] = 1 # save the label label_path = os.path.join(save_dir, out_pre + '_label.tif') raster_io.save_numpy_array_to_rasterfile(out_labels, label_path, dem_diff_tif, nodata=0) # convert the label to shapefile out_shp = os.path.join(save_dir, out_pre + '.shp') command_string = 'gdal_polygonize.py -8 %s -b 1 -f "ESRI Shapefile" %s' % ( label_path, out_shp) res = os.system(command_string) if res != 0: sys.exit(1)
def segment_subsidence_on_dem_diff(dem_diff_tif, save_dir): out_pre = os.path.splitext(os.path.basename(dem_diff_tif))[0] # read images one_band_img, nodata = raster_io.read_raster_one_band_np(dem_diff_tif) # segmentation by threshold (may have too many noise) # mean = np.nanmean(one_band_img) # print("mean value is: %.4f"%mean) # one_band_img = one_band_img - mean # cannot use mean which may affect by some Outliers out_labels = np.zeros_like(one_band_img,dtype=np.uint8) out_labels[ one_band_img < -2 ] = 1 # end in a lot of noise, change to -2, -1 results in a lot of polygons # apply median filter out_labels = cv2.medianBlur(out_labels, 3) # with kernal=3 # save the label if os.path.isdir(save_dir) is False: io_function.mkdir(save_dir) label_path = os.path.join(save_dir, out_pre + '_label.tif') raster_io.save_numpy_array_to_rasterfile(out_labels, label_path, dem_diff_tif, nodata=0) # convert the label to shapefile out_shp = os.path.join(save_dir, out_pre + '.shp') command_string = 'gdal_polygonize.py -8 %s -b 1 -f "ESRI Shapefile" %s' % (label_path, out_shp) res = os.system(command_string) if res != 0: sys.exit(1) # post-processing post_processing_subsidence(out_shp)
def read_date_dem_to_memory(pair_idx, pair, date_pair_list_sorted, dem_data_dict, dem_groups_date, less_memory=False, boundary=None): if less_memory is False: # read data to memory if need, then store in memory, avoid to read them again. # for a large area, because we read all raster to memory, it will cause "out of memory problem" if pair[0] not in dem_data_dict.keys(): data_old, nodata_old = raster_io.read_raster_one_band_np( dem_groups_date[pair[0]][0], boundary=boundary) data_old[data_old == nodata_old] = np.nan dem_data_dict[pair[0]] = data_old else: data_old = dem_data_dict[pair[0]] # read data to memory if need if pair[1] not in dem_data_dict.keys(): data_new, nodata_new = raster_io.read_raster_one_band_np( dem_groups_date[pair[1]][0], boundary=boundary) data_new[data_new == nodata_new] = np.nan dem_data_dict[pair[1]] = data_new else: data_new = dem_data_dict[pair[1]] else: # if we don't have enough memory, don't store the all DEM data in memory, only read two needed. # wil increase reading operation from disk data_old, nodata_old = raster_io.read_raster_one_band_np( dem_groups_date[pair[0]][0], boundary=boundary) data_new, nodata_new = raster_io.read_raster_one_band_np( dem_groups_date[pair[1]][0], boundary=boundary) # replace nodata with nan data_old[data_old == nodata_old] = np.nan data_new[data_new == nodata_new] = np.nan # release some memory if we can (NO) return data_old, data_new
def mask_dem_by_matchtag(input_dem, mask_tif, save_path): # check band, with, height height, width, count, dtype = raster_io.get_height_width_bandnum_dtype(input_dem) height_mask, width_mask, count_mask, dtype_mask = raster_io.get_height_width_bandnum_dtype(mask_tif) if height_mask!=height or width_mask!=width or count_mask!=count: raise ValueError('size different between %s and %s'%(input_dem, mask_tif)) if count != 1: raise ValueError('DEM and Matchtag should only have one band') dem_data, nodata = raster_io.read_raster_one_band_np(input_dem) matchdata, mask_nodata = raster_io.read_raster_one_band_np(mask_tif) # mask as nodata dem_data[ matchdata == 0 ] = nodata # save to file raster_io.save_numpy_array_to_rasterfile(dem_data,save_path,input_dem,compress='lzw',tiled='yes',bigtiff='if_safer') return save_path
def test_to_unique_label_for_superpixels(): label_img = os.path.expanduser( '~/Data/Arctic/canada_arctic/DEM/WR_dem_diff/segment_parallel_sub/WR_dem_diff_DEM_diff_prj_8bit_sub_label.tif' ) out_labels, nodata = raster_io.read_raster_one_band_np(label_img) print('nodata', nodata) print('min and max labels of out_labels', np.min(out_labels), np.max(out_labels)) new_labels = image_segment.to_unique_label_for_superpixels(out_labels) save_new_label = io_function.get_name_by_adding_tail(label_img, 'new') raster_io.save_numpy_array_to_rasterfile(new_labels, save_new_label, label_img)
def dem_to_relative_8bit_a_patch(idx, patch, patch_count, dem_path, dst_nodata): # print('tile: %d / %d' % (idx + 1, patch_count)) # patch_w = patch[2] # patch_h = patch[3] dem, nodata = raster_io.read_raster_one_band_np(dem_path, boundary=patch) # print(dem.shape) # print(dem.ndim) # dem_re = np.expand_dims(dem,axis=0) # print(dem_re.shape) patch_relative_dem_8bit = raster_io.image_numpy_allBands_to_8bit_hist( dem, per_min=0.02, per_max=0.98, src_nodata=nodata, dst_nodata=dst_nodata) return patch, patch_relative_dem_8bit
def zonal_stats_one_polygon(idx, polygon, image_tiles, img_tile_polygons, stats, nodata=None,range=None, band = 1,all_touched=True): overlap_index = vector_gpd.get_poly_index_within_extent(img_tile_polygons, polygon) image_list = [image_tiles[item] for item in overlap_index] if len(image_list) == 1: out_image, out_tran,nodata = raster_io.read_raster_in_polygons_mask(image_list[0], polygon, nodata=nodata, all_touched=all_touched,bands=band) elif len(image_list) > 1: # for the case it overlap more than one raster, need to produce a mosaic tmp_saved_files = [] for k_img, image_path in enumerate(image_list): # print(image_path) tmp_save_path = os.path.splitext(os.path.basename(image_path))[0] + '_subset_poly%d'%idx +'.tif' _, _,nodata = raster_io.read_raster_in_polygons_mask(image_path, polygon,all_touched=all_touched,nodata=nodata, bands=band, save_path=tmp_save_path) tmp_saved_files.append(tmp_save_path) # mosaic files in tmp_saved_files save_path = 'raster_for_poly%d.tif'%idx mosaic_args_list = ['gdal_merge.py', '-o', save_path,'-n',str(nodata),'-a_nodata',str(nodata)] mosaic_args_list.extend(tmp_saved_files) if basic.exec_command_args_list_one_file(mosaic_args_list,save_path) is False: raise IOError('error, obtain a mosaic (%s) failed'%save_path) # read the raster out_image, out_nodata = raster_io.read_raster_one_band_np(save_path,band=band) # remove temporal raster tmp_saved_files.append(save_path) for item in tmp_saved_files: io_function.delete_file_or_dir(item) else: basic.outputlogMessage('warning, cannot find raster for %d (start=0) polygon'%idx) return None # do calculation return array_stats(out_image, stats, nodata,range=range)
def test_projection_epsg_2163(): # read a patch from iamge with epsg_2163, then save, see what's the projection # path on my Mac # folder = os.path.expanduser('~/Data/flooding_area/Houston/Houston_SAR_GRD_FLOAT_gee/S1_Houston_prj_8bit') # path on tesia folder = os.path.expanduser( '~/Bhaltos2/lingcaoHuang/flooding_area/Houston/Houston_SAR_GRD_FLOAT_gee/S1_Houston_prj_8bit_select' ) img_path = os.path.join( folder, 'S1A_IW_GRDH_1SDV_20170829T002620_20170829T002645_018131_01E74D_D734_prj_8bit.tif' ) xoff, yoff, xsize, ysize = 10000, 10000, 500, 500 boundary = (xoff, yoff, xsize, ysize) img_data, nodata = raster_io.read_raster_one_band_np(img_path, boundary=boundary) raster_io.save_numpy_array_to_rasterfile(img_data, 'test_projection.tif', img_path, boundary=boundary)
def test_if_raseter_closed(): # to test, if the raster is close if it's outside with open dir = os.path.expanduser('~/Data/Arctic/canada_arctic/DEM/WR_dem_diff') tifs = io_function.get_file_list_by_ext('.tif', dir, bsub_folder=False) print("%d tif in %s" % (len(tifs), dir)) data_list = [] for idx in range(10): # each one open 10 times boundary = (0, 0, 100, 100) # (xoff,yoff ,xsize, ysize) for tif in tifs: data = raster_io.read_raster_one_band_np(tif, band=1, boundary=boundary) data_list.append(data) # check current files # open_file_list = basic.get_curr_process_openfiles() open_file_list = basic.get_all_processes_openfiles('python') print(' open file count:', len(open_file_list)) for o_file in open_file_list: print(o_file)
def mask_strip_dem_outlier_by_ArcticDEM_mosaic(crop_strip_dem_list, extent_poly, extent_id, crop_tif_dir, o_res, process_num): # get list of the ArcticDEM mosaic arcticDEM_mosaic_reg_tifs = io_function.get_file_list_by_ext('.tif',arcticDEM_tile_reg_tif_dir,bsub_folder=False) mosaic_dem_ext_polys = get_dem_tif_ext_polygons(arcticDEM_mosaic_reg_tifs) overlap_index = vector_gpd.get_poly_index_within_extent(mosaic_dem_ext_polys,extent_poly) #### crop and mosaic mosaic_reg_tifs sub_mosaic_dem_tifs = [arcticDEM_mosaic_reg_tifs[item] for item in overlap_index] mosaic_crop_tif_list = [] for tif in sub_mosaic_dem_tifs: save_crop_path = os.path.join(crop_tif_dir, os.path.basename(io_function.get_name_by_adding_tail(tif, 'sub_poly_%d' % extent_id)) ) if os.path.isfile(save_crop_path): basic.outputlogMessage('%s exists, skip cropping' % save_crop_path) mosaic_crop_tif_list.append(save_crop_path) else: crop_tif = subset_image_by_polygon_box(tif, save_crop_path, extent_poly, resample_m='near', o_format='VRT', out_res=o_res,same_extent=True,thread_num=process_num) if crop_tif is False: raise ValueError('warning, crop %s failed' % tif) mosaic_crop_tif_list.append(crop_tif) if len(mosaic_crop_tif_list) < 1: basic.outputlogMessage('No mosaic version of ArcticDEM for %d grid, skip mask_strip_dem_outlier_by_ArcticDEM_mosaic'%extent_id) return False # create mosaic, can handle only input one file, but is slow save_dem_mosaic = os.path.join(crop_tif_dir, 'ArcticDEM_tiles_grid%d.tif'%extent_id) result = RSImageProcess.mosaic_crop_images_gdalwarp(mosaic_crop_tif_list, save_dem_mosaic, resampling_method='average',o_format='GTiff', compress='lzw', tiled='yes', bigtiff='if_safer',thread_num=process_num) if result is False: return False height_tileDEM, width_tileDEM, count_tileDEM, dtype_tileDEM = raster_io.get_height_width_bandnum_dtype(save_dem_mosaic) tileDEM_data, tileDEM_nodata = raster_io.read_raster_one_band_np(save_dem_mosaic) # masking the strip version of DEMs mask_strip_dem_list = [] for idx, strip_dem in enumerate(crop_strip_dem_list): save_path = io_function.get_name_by_adding_tail(strip_dem, 'maskOutlier') if os.path.isfile(save_path): basic.outputlogMessage('%s exist, skip'%save_path) mask_strip_dem_list.append(save_path) continue # check band, with, height height, width, count, dtype = raster_io.get_height_width_bandnum_dtype(strip_dem) if height_tileDEM != height or width_tileDEM != width or count_tileDEM != count: raise ValueError('size different between %s and %s' % (strip_dem, save_dem_mosaic)) if count != 1: raise ValueError('DEM and Matchtag should only have one band') try: dem_data, nodata = raster_io.read_raster_one_band_np(strip_dem) except: basic.outputlogMessage(' invalid tif file: %s'%strip_dem) continue nodata_loc = np.where(dem_data == nodata) diff = dem_data - tileDEM_data # mask as nodata dem_data[np.abs(diff) > 50 ] = nodata # ignore greater than 50 m dem_data[ nodata_loc ] = nodata # may change some nodata pixel, change them back # save to file raster_io.save_numpy_array_to_rasterfile(dem_data, save_path, strip_dem, compress='lzw', tiled='yes', bigtiff='if_safer') mask_strip_dem_list.append(save_path) return mask_strip_dem_list
def segment_a_patch(idx, patch, patch_count, img_path, org_raster, b_save_patch_label): print('tile: %d / %d' % (idx + 1, patch_count)) image_name_no_ext = io_function.get_name_no_ext(img_path) patch_dir = image_name_no_ext + '_patch%d' % idx patch_label_path = os.path.join( patch_dir, image_name_no_ext + '_patch%d_label.tif' % idx) if b_save_patch_label is True: if os.path.isdir(patch_dir) is False: io_function.mkdir(patch_dir) if os.path.isfile(patch_label_path): print('%s exists, skip' % patch_label_path) return patch, patch_label_path, None, None # read imag one_band_img, nodata = raster_io.read_raster_one_band_np(img_path, boundary=patch) # # apply median filter to image (remove some noise) one_band_img = cv2.medianBlur(one_band_img, 3) # with kernal=3, cannot accept int32 # segmentation algorithm (the output of these algorithms is not alway good, need to chose the parameters carafully) # out_labels = watershed_segmentation(one_band_img) # out_labels = k_mean_cluster_segmentation(one_band_img) out_labels = quickshift_segmentaion(one_band_img, ratio=0.3, kernel_size=5, max_dist=10, sigma=1, convert2lab=False) # # # out_labels = mean_shift_segmentation(one_band_img) # print('min and max labels of out_labels', np.min(out_labels), np.max(out_labels)) if b_save_patch_label is True: # save the label raster_io.save_numpy_array_to_rasterfile( out_labels, patch_label_path, img_path, boundary=patch) # it copy nodata, need to unset it later return patch, patch_label_path, None, None # calculate the attributes based on orginal data for original data object_attributes = {} # object id (label) and attributes (list) if org_raster is not None: org_img_b1, org_nodata = raster_io.read_raster_one_band_np( org_raster, boundary=patch) # get regions (the labels output by segmentation is not unique for superpixels) # regions = measure.regionprops(out_labels, intensity_image=org_img_b1) # regions is based on out_labels, so it has the same issue. # print('region count from sk-image measure:',len(regions)) label_list = np.unique(out_labels) # get statistics for each segmented object (label) for label in label_list: in_array = org_img_b1[out_labels == label] object_attributes[label] = get_stastics_from_array( in_array, org_nodata) return patch, out_labels, nodata, object_attributes return patch, out_labels, nodata, None
def main(options, args): extent_shp = args[0] task_list = [args[item] for item in range(1, len(args))] # task_name = args[1] if len(task_list) < 1: raise ValueError('There is no task: %s' % str(task_list)) # local_grid_id_txt is in the current dir # log_grid_ids_txt, log_grid_ids_txt_done is in grid_ids_txt_dir local_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files( extent_shp) # check if it has been complete if os.path.isfile(log_grid_ids_txt_done): basic.outputlogMessage('Tasks for extent %s have been completed' % extent_shp) return True r_working_dir = '/scratch/summit/lihu9680/Arctic/dem_processing' if options.remote_working_dir is None else options.remote_working_dir r_log_dir = '/scratch/summit/lihu9680/ArcticDEM_tmp_dir/log_dir' if options.remote_log_dir is None else options.remote_log_dir process_node = '$curc_host' if options.process_node is None else options.process_node download_node = '$curc_host' if options.download_node is None else options.download_node max_grid_count = options.max_grids b_remove_tmp_folders = options.b_remove_tmp_folders b_dont_remove_DEM_files = options.b_dont_remove_DEM_files b_no_slurm = options.b_no_slurm b_divide_to_subsets = True # modify the folder name of subsets global subset_shp_dir subset_shp_dir = subset_shp_dir + '_' + io_function.get_name_no_ext( extent_shp) global msg_file_pre msg_file_pre = io_function.get_name_no_ext(extent_shp) + '_' + msg_file_pre grid_ids_to_process_txt = io_function.get_name_no_ext( extent_shp) + '_' + 'grid_ids_to_process.txt' # build map dem cover grid (take time, but only need to run once at the beginning) build_dict_of_dem_cover_grid_ids(dem_strip_shp, grid_20_shp, strip_dem_cover_grids_txt) build_dict_of_dem_cover_grid_ids(dem_tile_shp, grid_20_shp, tile_dem_cover_grids_txt) # get grids for processing # read grids and ids time0 = time.time() all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') print('time cost of read polygons and attributes', time.time() - time0) gird_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20_shp) # get grid ids based on input extent grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids) # based on extent shape, subset grid_20_id_raster # # using gdalwarp to crop the mask, also have 0.5 pixel offset, so dont use it # grid_20_id_raster_sub = io_function.get_name_by_adding_tail(os.path.basename(grid_20_id_raster),'sub') # if RSImageProcess.subset_image_by_shapefile(grid_20_id_raster,extent_shp,save_path=grid_20_id_raster_sub) is False: # return False # read grid_ids_2d, then mask it grid_ids_2d, grid_nodata = raster_io.read_raster_one_band_np( grid_20_id_raster) # 2d array of gird ids # rasterize grid_polys, will served as mask. grid_ids_2d_mask = raster_io.burn_polygons_to_a_raster( grid_20_id_raster, grid_polys, 1, None) # raster_io.save_numpy_array_to_rasterfile(grid_ids_2d_mask,'grid_ids_2d_mask.tif',grid_20_id_raster,nodata=255) # save to disk for checking loc_masked_out = np.where(grid_ids_2d_mask != 1) # grid_ids_2d[ loc_masked_out ] = grid_nodata visit_np = np.zeros_like(grid_ids_2d, dtype=np.uint8) visit_np[loc_masked_out] = 1 # 1 indicate already visited visit_np[np.where( grid_ids_2d == grid_nodata)] = 1 # 1 indicate already visited subset_id = -1 # on tesia, uist, vpn-connected laptop if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name: io_function.mkdir(subset_shp_dir) sync_log_files(process_node, r_log_dir, process_log_dir) update_complete_grid_list(grid_ids, task_list) while True: subset_id += 1 # on tesia, uist, vpn-connected laptop if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name: # remove grids that has been complete or ignored ignore_ids = get_complete_ignore_grid_ids() num_grid_ids = save_grid_ids_need_to_process( grid_ids, ignore_ids=ignore_ids, save_path=grid_ids_to_process_txt) if num_grid_ids < 1: make_note_all_task_done(extent_shp, process_node) # if the input is not a shapefile, then don't divide it to many subsets if extent_shp.endswith('.txt'): select_grid_polys, selected_gird_ids = grid_polys, grid_ids if len(selected_gird_ids) > 2000: raise ValueError('There are too many grid to process once') b_divide_to_subsets = False subset_id = 999999 select_grids_shp = os.path.join( subset_shp_dir, io_function.get_name_no_ext(extent_shp) + '_sub%d' % subset_id + '.shp') save_selected_girds_and_ids(selected_gird_ids, select_grid_polys, gird_prj, select_grids_shp) else: select_grids_shp = os.path.join( subset_shp_dir, io_function.get_name_no_ext(extent_shp) + '_sub%d' % subset_id + '.shp') select_grid_polys, selected_gird_ids = get_grids_for_download_process( grid_polys, grid_ids, ignore_ids, max_grid_count, grid_ids_2d, visit_np, select_grids_shp, proj=gird_prj) if selected_gird_ids is None: break # no more grids if len(selected_gird_ids) < 1: continue subset_info_txt = msg_file_pre + '%d.txt' % subset_id if os.path.isfile(subset_info_txt) is False: # init the file update_subset_info(subset_info_txt, key_list=[ 'id', 'createTime', 'shp', 'pre_status', 'proc_status' ], info_list=[ subset_id, str(datetime.now()), select_grids_shp, 'notYet', 'notYet' ]) # download and unpack ArcticDEM, do registration, send to curc if download_process_send_arctic_dem(subset_info_txt, r_working_dir, process_node, task_list, b_send_data=b_no_slurm == False) is True: continue # copy file from remote machine if b_no_slurm is False: copy_results_from_remote_node() sync_log_files(process_node, r_log_dir, process_log_dir) # update complete id list update_complete_grid_list(grid_ids, task_list) # save this to disk, to check progress, if there are not too many grids (<100), # we can use this one to process withtou divide grids to many subsets num_grid_ids = save_grid_ids_need_to_process( grid_ids, save_path=grid_ids_to_process_txt) if num_grid_ids < 1: make_note_all_task_done(extent_shp, process_node) if b_no_slurm: # process ArcticDEM using local computing resource if produce_dem_products( task_list, b_remove_job_folder=b_remove_tmp_folders, no_slurm=b_no_slurm) is False: break if b_divide_to_subsets is False: break elif 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name: # curc # process ArcticDEM using the computing resource on CURC if produce_dem_products( task_list, b_remove_job_folder=b_remove_tmp_folders) is False: break else: print('unknown machine : %s ' % machine_name) break # remove no need dem files remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files) # monitor results in remote computer check_time = 200 while check_time > 0 and b_no_slurm == False: # on tesia, uist, vpn-connected laptop if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name: print(datetime.now(), 'wait 10 min for results in computing nodes') time.sleep(600) # copy file from remote machine copy_results_from_remote_node() # sync complete id list, dem info, no dem grids etcs. sync_log_files(process_node, r_log_dir, process_log_dir) # update complete id list update_complete_grid_list(grid_ids, task_list) # remove no need dem files remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files) remote_sub_txt = get_subset_info_txt_list( 'proc_status', ['notYet', 'working'], remote_node=process_node, remote_folder=r_working_dir) if len(remote_sub_txt) < 1 and check_time != 1: check_time = 1 # set to 1, then will only check one more time else: check_time -= 1 else: break