def main(): # run in ~/Data/Arctic/canada_arctic/autoMapping/multiArea_sub_images on tesia ini_list = io_function.get_file_list_by_pattern('./','area*.ini') txt_list = io_function.get_file_list_by_pattern('./','area*.txt') for txt in txt_list: ini_s = io_function.read_list_from_txt(txt) ini_list.extend(ini_s) ini_list = [os.path.abspath(item) for item in ini_list] file_names = [ io_function.get_name_no_ext(item) for item in ini_list ] cur_dir = os.getcwd() # show [print(item) for item in ini_list] time.sleep(3) for name, area_ini in zip(file_names,ini_list): word_dir = os.path.join(cur_dir,name) io_function.mkdir(word_dir) os.chdir(word_dir) # copy and modify main_para.ini io_function.copyfiletodir(os.path.join(cur_dir,'main_para.ini'),'./',overwrite=True) io_function.copyfiletodir(os.path.join(cur_dir,'exe.sh'),'./',overwrite=True) parameters.write_Parameters_file('main_para.ini','training_regions',area_ini) # run exe.sh res = os.system('./exe.sh') if res !=0: print(res) sys.exit(1) os.chdir(cur_dir)
def get_new_color_table_for_raster(raster, color_table_txt, out_dir=None): data, no_data = raster_io.read_raster_one_band_np(raster) # remove nodata data_1d = data.flatten() print(data_1d.shape) data_1d = data_1d[data_1d != no_data] print(data_1d.shape) unique_values = np.unique(data_1d) print('unique_values:', unique_values) save_color_table = io_function.get_name_no_ext( os.path.basename(raster)) + '_color.txt' if out_dir is not None: save_color_table = os.path.join(out_dir, save_color_table) save_lines = [] with open(color_table_txt, 'r') as f_obj: all_lines = f_obj.readlines() # copy first two lines save_lines.append(all_lines[0]) save_lines.append(all_lines[1]) for idx in range(2, len(all_lines)): value = int(all_lines[idx].split(',')[0]) if value in unique_values: save_lines.append(all_lines[idx]) with open(save_color_table, 'w') as f_obj: f_obj.writelines(save_lines) print('Save color table to %s' % os.path.abspath(save_color_table))
def main(): hillshade_dir = os.path.join(work_dir,'hillshade_sub_images') dem_slope_8bit_dir = os.path.join(work_dir,'dem_slope_8bit_sub_images') dem_relative_8bit_dir = os.path.join(work_dir,'dem_relative_8bit_sub_images') other_dirs = [dem_slope_8bit_dir,dem_relative_8bit_dir] other_dirs_tifs = [ io_function.get_file_list_by_ext('.tif', o_dir, bsub_folder=True) for o_dir in other_dirs] json_list = io_function.get_file_list_by_ext('.json', hillshade_dir, bsub_folder=True) json_base_list = [os.path.basename(item) for item in json_list] for json_path, base_name in zip(json_list, json_base_list): date_str, poly_num = get_date_str_poly_num(base_name) for tif_list in other_dirs_tifs: for tif in tif_list: name_noext = io_function.get_name_no_ext(tif) if date_str in name_noext and poly_num in name_noext: # modify and save the json file dst_path = os.path.join(os.path.dirname(tif), name_noext+'.json') # io_function.copy_file_to_dst(json_path,dst_path) data_dict = io_function.read_dict_from_txt_json(json_path) data_dict['imagePath'] = os.path.basename(tif) data_dict['imageData'] = None io_function.save_dict_to_txt_json(dst_path, data_dict) print('saving %s'%dst_path) break pass
def tifs_to_png(image_dir): tif_list = io_function.get_file_list_by_pattern(image_dir, '*/*.tif') for idx, tif in enumerate(tif_list): print('tif to png: %d/%d tif' % (idx + 1, len(tif_list))) basename = io_function.get_name_no_ext(tif) save_path = os.path.join(image_dir, basename + '.png') if os.path.isfile(save_path): print('%s exists, skip' % save_path) continue command_str = "gdal_translate -of PNG %s %s" % (tif, save_path) basic.os_system_exit_code(command_str)
def get_tifs_bounding_boxes(image_dir): tif_list = io_function.get_file_list_by_pattern(image_dir, '*/*.tif') for idx, tif in enumerate(tif_list): print('get bounding box: %d/%d tif' % (idx + 1, len(tif_list))) basename = io_function.get_name_no_ext(tif) save_path = os.path.join(image_dir, basename + '_bound.geojson') if os.path.isfile(save_path): print('%s exists, skip' % save_path) continue command_str = imgExt + " %s -o tmp.gpkg" % tif basic.os_system_exit_code(command_str) command_str = "ogr2ogr -f GeoJSON -t_srs EPSG:3413 %s tmp.gpkg" % save_path # note: projection is EPSG:3413 basic.os_system_exit_code(command_str) io_function.delete_file_or_dir('tmp.gpkg')
def check_one_extent(extent_shp): print('start to check %s' % extent_shp) # local_grid_id_txt is in the current dir # log_grid_ids_txt, log_grid_ids_txt_done is in grid_ids_txt_dir local_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files( extent_shp) if os.path.isfile(local_grid_id_txt) is False and os.path.isfile( log_grid_ids_txt): io_function.copy_file_to_dst(log_grid_ids_txt, local_grid_id_txt) if os.path.isfile(local_grid_id_txt) is False: print('the _grid_ids.txt for %s does not exist, maybe it has started' % extent_shp) return False # check if it has been complete if os.path.isfile(log_grid_ids_txt_done): basic.outputlogMessage('Tasks for extent %s have been completed' % extent_shp) return True grid_ids_to_process_txt = io_function.get_name_no_ext( extent_shp) + '_' + 'grid_ids_to_process.txt' # read from txt file directly grid_ids = [ int(item) for item in io_function.read_list_from_txt(local_grid_id_txt) ] update_complete_grid_list(grid_ids, task_list) # check complete files, to see if it's done # remove grids that has been complete or ignored ignore_ids = get_complete_ignore_grid_ids() num_grid_ids = save_grid_ids_need_to_process( grid_ids, ignore_ids=ignore_ids, save_path=grid_ids_to_process_txt) if num_grid_ids < 1: print(datetime.now(), ' %s is marked as completed' % extent_shp) make_note_all_task_done(extent_shp, curc_node) else: print( datetime.now(), ' %s has not completed, %d grids to process, total: %d' % (extent_shp, num_grid_ids, len(grid_ids))) return True
def organize_files(sub_img_dirs, save_dir): if os.path.isfile(save_dir) is False: io_function.mkdir(save_dir) # get all png files png_list = [] for img_dir in sub_img_dirs: pngs = io_function.get_file_list_by_pattern(img_dir, '*.png') png_list.extend(pngs) image_name_list = [] images_dir = os.path.join(save_dir, 'images') imageBound_dir = os.path.join(save_dir, 'imageBound') objectPolygons_dir = os.path.join(save_dir, 'objectPolygons') io_function.mkdir(images_dir) io_function.mkdir(imageBound_dir) io_function.mkdir(objectPolygons_dir) for idx, png in enumerate(png_list): basename = io_function.get_name_no_ext(png) new_name = 'img' + str(idx + 1).zfill(6) + '_' + basename image_name_list.append(new_name) io_function.copy_file_to_dst( png, os.path.join(images_dir, new_name + '.png')) png_xml = png + '.aux.xml' if os.path.isfile(png_xml): io_function.copy_file_to_dst( png_xml, os.path.join(images_dir, new_name + '.png.aux.xml')) bound_path = png.replace('.png', '_bound.geojson') io_function.copy_file_to_dst( bound_path, os.path.join(imageBound_dir, new_name + '_bound.geojson')) digit_str = re.findall('_\d+', basename) id_str = digit_str[0][1:] object_path = os.path.join(os.path.dirname(png), 'id_%s.geojson' % id_str) io_function.copy_file_to_dst( object_path, os.path.join(objectPolygons_dir, new_name + '.geojson')) txt_path = os.path.join(save_dir, 'imageList.txt') io_function.save_list_to_txt(txt_path, image_name_list)
def polygonize_one_label(idx, label_path, org_raster, stats, prefix, b_remove_nodata, process_num=1): save_dir = os.path.dirname(label_path) out_pre = io_function.get_name_no_ext(label_path) label_shp_path = os.path.join(save_dir, out_pre + '.shp') if os.path.isfile(label_shp_path): print('%s exist, skip' % label_shp_path) return idx, label_shp_path if b_remove_nodata is True: # remove nodato (it was copy from the input image) command_str = 'gdal_edit.py -unsetnodata ' + label_path res = os.system(command_str) if res != 0: return None, None # convert the label to shapefile command_string = 'gdal_polygonize.py -8 %s -b 1 -f "ESRI Shapefile" %s' % ( label_path, label_shp_path) res = os.system(command_string) if res != 0: return None, None if org_raster is not None and stats is not None and prefix is not None: # get dem elevation information for each polygon, raster_statistic.zonal_stats_multiRasters(label_shp_path, org_raster, stats=stats, prefix=prefix, process_num=process_num) return idx, label_shp_path
def segment_a_patch(idx, patch, patch_count, img_path, org_raster, b_save_patch_label): print('tile: %d / %d' % (idx + 1, patch_count)) image_name_no_ext = io_function.get_name_no_ext(img_path) patch_dir = image_name_no_ext + '_patch%d' % idx patch_label_path = os.path.join( patch_dir, image_name_no_ext + '_patch%d_label.tif' % idx) if b_save_patch_label is True: if os.path.isdir(patch_dir) is False: io_function.mkdir(patch_dir) if os.path.isfile(patch_label_path): print('%s exists, skip' % patch_label_path) return patch, patch_label_path, None, None # read imag one_band_img, nodata = raster_io.read_raster_one_band_np(img_path, boundary=patch) # # apply median filter to image (remove some noise) one_band_img = cv2.medianBlur(one_band_img, 3) # with kernal=3, cannot accept int32 # segmentation algorithm (the output of these algorithms is not alway good, need to chose the parameters carafully) # out_labels = watershed_segmentation(one_band_img) # out_labels = k_mean_cluster_segmentation(one_band_img) out_labels = quickshift_segmentaion(one_band_img, ratio=0.3, kernel_size=5, max_dist=10, sigma=1, convert2lab=False) # # # out_labels = mean_shift_segmentation(one_band_img) # print('min and max labels of out_labels', np.min(out_labels), np.max(out_labels)) if b_save_patch_label is True: # save the label raster_io.save_numpy_array_to_rasterfile( out_labels, patch_label_path, img_path, boundary=patch) # it copy nodata, need to unset it later return patch, patch_label_path, None, None # calculate the attributes based on orginal data for original data object_attributes = {} # object id (label) and attributes (list) if org_raster is not None: org_img_b1, org_nodata = raster_io.read_raster_one_band_np( org_raster, boundary=patch) # get regions (the labels output by segmentation is not unique for superpixels) # regions = measure.regionprops(out_labels, intensity_image=org_img_b1) # regions is based on out_labels, so it has the same issue. # print('region count from sk-image measure:',len(regions)) label_list = np.unique(out_labels) # get statistics for each segmented object (label) for label in label_list: in_array = org_img_b1[out_labels == label] object_attributes[label] = get_stastics_from_array( in_array, org_nodata) return patch, out_labels, nodata, object_attributes return patch, out_labels, nodata, None
def get_subimages_SpaceNet(input_image_dir, image_pattern, input_polygon_dir, polygon_pattern, subImage_dir, subLabel_dir, process_num=1, burn_value=1, b_no_label_image=False): sub_images_list = io_function.get_file_list_by_pattern( input_image_dir, image_pattern) if len(sub_images_list) < 1: basic.outputlogMessage('No sub-images in: %s with pattern: %s' % (input_image_dir, image_pattern)) return False sub_images_count = len(sub_images_list) # do we need to check the projection of each sub-images? if os.path.isdir(subLabel_dir) is False: io_function.mkdir(subLabel_dir) if os.path.isdir(subImage_dir) is False: io_function.mkdir(subImage_dir) label_path_list = [] if b_no_label_image is True: pass else: # polygon file list polygon_files_list = io_function.get_file_list_by_pattern( input_polygon_dir, polygon_pattern) if len(polygon_files_list) < 1: basic.outputlogMessage('No polygon files in: %s with pattern: %s' % (input_polygon_dir, polygon_pattern)) return False polygon_name_list = [ os.path.basename(item) for item in polygon_files_list ] # create label images for idx, tif_path in enumerate(sub_images_list): print('%d / %d create label raster for %s' % (idx, sub_images_count, tif_path)) # find polygon file poly_path = find_corresponding_geojson_SpaceNet( tif_path, polygon_files_list, polygon_name_list) if poly_path is None: print('Warning, cannot find corresponding polygon files') continue save_path = os.path.join( subLabel_dir, io_function.get_name_no_ext(poly_path) + '.tif') if os.path.isfile(save_path): print('warning, %s already exists, skip' % save_path) label_path_list.append(save_path) continue if rasterize_polygons_to_ref_raster(tif_path, poly_path, burn_value, None, save_path, datatype='Byte', ignore_edge=True) is True: label_path_list.append(save_path) # copy sub-images, adding to txt files with open('sub_images_labels_list.txt', 'a') as f_obj: for tif_path, label_file in zip(sub_images_list, label_path_list): if label_file is None: continue dst_subImg = os.path.join(subImage_dir, os.path.basename(tif_path)) # copy sub-images io_function.copy_file_to_dst(tif_path, dst_subImg, overwrite=False) sub_image_label_str = dst_subImg + ":" + label_file + '\n' f_obj.writelines(sub_image_label_str) return True
def remove_merge_polygon_in_one_shp(in_shp, org_raster, attribute_name, attribute_range, min_area, max_area, process_num=1): # attribute_range: [min, max], lower = attribute_range[0] upper = attribute_range[1] save_shp = io_function.get_name_by_adding_tail(in_shp, 'post') if os.path.isfile(save_shp): basic.outputlogMessage('%s exists, skip'%save_shp) return save_shp shp_pre = io_function.get_name_no_ext(in_shp) # read polygons and label from segment algorithm, note: some polygons may have the same label polygons, attr_value_list = vector_gpd.read_polygons_attributes_list(in_shp,attribute_name) print('Read %d polygons'%len(polygons)) if attr_value_list is None: raise ValueError('%s not in %s, need to remove it and then re-create'%(attribute_name,in_shp)) remain_polyons = [] rm_min_area_count = 0 rm_att_value_count = 0 for poly, att_value in zip(polygons, attr_value_list): if poly.area < min_area: rm_min_area_count += 1 continue if lower is None: if att_value >= upper: rm_att_value_count += 1 continue elif upper is None: if att_value <= lower: rm_att_value_count += 1 continue else: # out of range, rmeove if att_value < lower or att_value > upper: rm_att_value_count += 1 continue remain_polyons.append(poly) print('remove %d polygons based on min_area, %d polygons based on attribute_range, remain %d ones'%(rm_min_area_count, rm_diff_thr_count,len(remain_polyons))) if len(remain_polyons) > 1: # we should only merge polygon with similar reduction, but we already remove polygons with mean reduction > threshhold # merge touch polygons print(timeTools.get_now_time_str(), 'start building adjacent_matrix') # adjacent_matrix = vector_features.build_adjacent_map_of_polygons(remain_polyons) machine_name = os.uname()[1] # if 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name: # print('Warning, some problem of parallel running in build_adjacent_map_of_polygons on curc, but ok in my laptop and uist, change process_num = 1') # process_num = 1 adjacent_matrix = vector_gpd.build_adjacent_map_of_polygons(remain_polyons, process_num=process_num) print(timeTools.get_now_time_str(), 'finish building adjacent_matrix') if adjacent_matrix is False: return False merged_polygons = vector_features.merge_touched_polygons(remain_polyons,adjacent_matrix) print(timeTools.get_now_time_str(), 'finish merging touched polygons, get %d ones'%(len(merged_polygons))) # remove large ones remain_polyons = [] rm_max_area_count = 0 for poly in merged_polygons: if poly.area > max_area: rm_max_area_count += 1 continue remain_polyons.append(poly) print('remove %d polygons based on max_area, remain %d'%(rm_max_area_count, len(remain_polyons))) wkt = map_projection.get_raster_or_vector_srs_info_wkt(in_shp) polyons_noMulti = [ vector_gpd.MultiPolygon_to_polygons(idx,poly) for idx,poly in enumerate(remain_polyons) ] remain_polyons = [] for polys in polyons_noMulti: polys = [poly for poly in polys if poly.area > min_area] # remove tiny polygon before buffer remain_polyons.extend(polys) print('convert MultiPolygon to polygons, remove some small polygons, remain %d' % (len(remain_polyons))) # based on the merged polygons, calculate the mean dem diff, relative dem_diff buffer_surrounding = 20 # meters surrounding_polygons = vector_gpd.get_surrounding_polygons(remain_polyons,buffer_surrounding) surrounding_shp = io_function.get_name_by_adding_tail(in_shp, 'surrounding') surr_pd = pd.DataFrame({'Polygon': surrounding_polygons}) vector_gpd.save_polygons_to_files(surr_pd, 'Polygon', wkt, surrounding_shp) raster_statistic.zonal_stats_multiRasters(surrounding_shp, org_raster, stats=['mean', 'std', 'count'], prefix='demD',process_num=process_num) # calcualte attributes of remain ones: area, dem_diff: mean, std merged_pd = pd.DataFrame({'Polygon': remain_polyons}) merged_shp = io_function.get_name_by_adding_tail(in_shp, 'merged') vector_gpd.save_polygons_to_files(merged_pd, 'Polygon', wkt, merged_shp) raster_statistic.zonal_stats_multiRasters(merged_shp, dem_diff_tif, stats=['mean','std','count'], prefix='demD', process_num=process_num) # calculate the relative dem diff surr_dem_diff_list = vector_gpd.read_attribute_values_list(surrounding_shp,'demD_mean') merge_poly_dem_diff_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_mean') if len(surr_dem_diff_list) != len(merge_poly_dem_diff_list): raise ValueError('The number of surr_dem_diff_list and merge_poly_dem_diff_list is different') relative_dem_diff_list = [ mer - sur for sur, mer in zip(surr_dem_diff_list, merge_poly_dem_diff_list) ] merge_poly_demD_std_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_std') merge_poly_demD_count_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_count') # remove large ones save_polyons = [] save_demD_mean_list = [] save_demD_std_list = [] save_demD_count_list = [] save_rel_diff_list = [] save_surr_demD_list = [] rm_rel_dem_diff_count = 0 rm_min_area_count = 0 for idx in range(len(remain_polyons)): # relative dem diff if relative_dem_diff_list[idx] > dem_diff_thread_m: # rm_rel_dem_diff_count += 1 continue # when convert MultiPolygon to Polygon, may create some small polygons if remain_polyons[idx].area < min_area: rm_min_area_count += 1 continue save_polyons.append(remain_polyons[idx]) save_demD_mean_list.append(merge_poly_dem_diff_list[idx]) save_demD_std_list.append(merge_poly_demD_std_list[idx]) save_demD_count_list.append(merge_poly_demD_count_list[idx]) save_rel_diff_list.append(relative_dem_diff_list[idx]) save_surr_demD_list.append(surr_dem_diff_list[idx]) print('remove %d polygons based on relative rel_demD and %d based on min_area, remain %d' % (rm_rel_dem_diff_count, rm_min_area_count, len(save_polyons))) poly_ids = [ item+1 for item in range(len(save_polyons)) ] poly_areas = [poly.area for poly in save_polyons] save_pd = pd.DataFrame({'poly_id':poly_ids, 'poly_area':poly_areas,'demD_mean':save_demD_mean_list, 'demD_std':save_demD_std_list, 'demD_count':save_demD_count_list, 'surr_demD':save_surr_demD_list, 'rel_demD':save_rel_diff_list ,'Polygon': save_polyons}) vector_gpd.save_polygons_to_files(save_pd, 'Polygon', wkt, save_shp) # add date difference if there are available date_diff_base = os.path.basename(dem_diff_tif).replace('DEM_diff','date_diff') date_diff_tif = os.path.join(os.path.dirname(dem_diff_tif) , date_diff_base) if os.path.isfile(date_diff_tif): raster_statistic.zonal_stats_multiRasters(save_shp, date_diff_tif, stats=['mean', 'std'], prefix='dateD', process_num=process_num) return save_shp
def main(options, args): extent_shp = args[0] task_list = [args[item] for item in range(1, len(args))] # task_name = args[1] if len(task_list) < 1: raise ValueError('There is no task: %s' % str(task_list)) # local_grid_id_txt is in the current dir # log_grid_ids_txt, log_grid_ids_txt_done is in grid_ids_txt_dir local_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files( extent_shp) # check if it has been complete if os.path.isfile(log_grid_ids_txt_done): basic.outputlogMessage('Tasks for extent %s have been completed' % extent_shp) return True r_working_dir = '/scratch/summit/lihu9680/Arctic/dem_processing' if options.remote_working_dir is None else options.remote_working_dir r_log_dir = '/scratch/summit/lihu9680/ArcticDEM_tmp_dir/log_dir' if options.remote_log_dir is None else options.remote_log_dir process_node = '$curc_host' if options.process_node is None else options.process_node download_node = '$curc_host' if options.download_node is None else options.download_node max_grid_count = options.max_grids b_remove_tmp_folders = options.b_remove_tmp_folders b_dont_remove_DEM_files = options.b_dont_remove_DEM_files b_no_slurm = options.b_no_slurm b_divide_to_subsets = True # modify the folder name of subsets global subset_shp_dir subset_shp_dir = subset_shp_dir + '_' + io_function.get_name_no_ext( extent_shp) global msg_file_pre msg_file_pre = io_function.get_name_no_ext(extent_shp) + '_' + msg_file_pre grid_ids_to_process_txt = io_function.get_name_no_ext( extent_shp) + '_' + 'grid_ids_to_process.txt' # build map dem cover grid (take time, but only need to run once at the beginning) build_dict_of_dem_cover_grid_ids(dem_strip_shp, grid_20_shp, strip_dem_cover_grids_txt) build_dict_of_dem_cover_grid_ids(dem_tile_shp, grid_20_shp, tile_dem_cover_grids_txt) # get grids for processing # read grids and ids time0 = time.time() all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') print('time cost of read polygons and attributes', time.time() - time0) gird_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20_shp) # get grid ids based on input extent grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids) # based on extent shape, subset grid_20_id_raster # # using gdalwarp to crop the mask, also have 0.5 pixel offset, so dont use it # grid_20_id_raster_sub = io_function.get_name_by_adding_tail(os.path.basename(grid_20_id_raster),'sub') # if RSImageProcess.subset_image_by_shapefile(grid_20_id_raster,extent_shp,save_path=grid_20_id_raster_sub) is False: # return False # read grid_ids_2d, then mask it grid_ids_2d, grid_nodata = raster_io.read_raster_one_band_np( grid_20_id_raster) # 2d array of gird ids # rasterize grid_polys, will served as mask. grid_ids_2d_mask = raster_io.burn_polygons_to_a_raster( grid_20_id_raster, grid_polys, 1, None) # raster_io.save_numpy_array_to_rasterfile(grid_ids_2d_mask,'grid_ids_2d_mask.tif',grid_20_id_raster,nodata=255) # save to disk for checking loc_masked_out = np.where(grid_ids_2d_mask != 1) # grid_ids_2d[ loc_masked_out ] = grid_nodata visit_np = np.zeros_like(grid_ids_2d, dtype=np.uint8) visit_np[loc_masked_out] = 1 # 1 indicate already visited visit_np[np.where( grid_ids_2d == grid_nodata)] = 1 # 1 indicate already visited subset_id = -1 # on tesia, uist, vpn-connected laptop if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name: io_function.mkdir(subset_shp_dir) sync_log_files(process_node, r_log_dir, process_log_dir) update_complete_grid_list(grid_ids, task_list) while True: subset_id += 1 # on tesia, uist, vpn-connected laptop if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name: # remove grids that has been complete or ignored ignore_ids = get_complete_ignore_grid_ids() num_grid_ids = save_grid_ids_need_to_process( grid_ids, ignore_ids=ignore_ids, save_path=grid_ids_to_process_txt) if num_grid_ids < 1: make_note_all_task_done(extent_shp, process_node) # if the input is not a shapefile, then don't divide it to many subsets if extent_shp.endswith('.txt'): select_grid_polys, selected_gird_ids = grid_polys, grid_ids if len(selected_gird_ids) > 2000: raise ValueError('There are too many grid to process once') b_divide_to_subsets = False subset_id = 999999 select_grids_shp = os.path.join( subset_shp_dir, io_function.get_name_no_ext(extent_shp) + '_sub%d' % subset_id + '.shp') save_selected_girds_and_ids(selected_gird_ids, select_grid_polys, gird_prj, select_grids_shp) else: select_grids_shp = os.path.join( subset_shp_dir, io_function.get_name_no_ext(extent_shp) + '_sub%d' % subset_id + '.shp') select_grid_polys, selected_gird_ids = get_grids_for_download_process( grid_polys, grid_ids, ignore_ids, max_grid_count, grid_ids_2d, visit_np, select_grids_shp, proj=gird_prj) if selected_gird_ids is None: break # no more grids if len(selected_gird_ids) < 1: continue subset_info_txt = msg_file_pre + '%d.txt' % subset_id if os.path.isfile(subset_info_txt) is False: # init the file update_subset_info(subset_info_txt, key_list=[ 'id', 'createTime', 'shp', 'pre_status', 'proc_status' ], info_list=[ subset_id, str(datetime.now()), select_grids_shp, 'notYet', 'notYet' ]) # download and unpack ArcticDEM, do registration, send to curc if download_process_send_arctic_dem(subset_info_txt, r_working_dir, process_node, task_list, b_send_data=b_no_slurm == False) is True: continue # copy file from remote machine if b_no_slurm is False: copy_results_from_remote_node() sync_log_files(process_node, r_log_dir, process_log_dir) # update complete id list update_complete_grid_list(grid_ids, task_list) # save this to disk, to check progress, if there are not too many grids (<100), # we can use this one to process withtou divide grids to many subsets num_grid_ids = save_grid_ids_need_to_process( grid_ids, save_path=grid_ids_to_process_txt) if num_grid_ids < 1: make_note_all_task_done(extent_shp, process_node) if b_no_slurm: # process ArcticDEM using local computing resource if produce_dem_products( task_list, b_remove_job_folder=b_remove_tmp_folders, no_slurm=b_no_slurm) is False: break if b_divide_to_subsets is False: break elif 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name: # curc # process ArcticDEM using the computing resource on CURC if produce_dem_products( task_list, b_remove_job_folder=b_remove_tmp_folders) is False: break else: print('unknown machine : %s ' % machine_name) break # remove no need dem files remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files) # monitor results in remote computer check_time = 200 while check_time > 0 and b_no_slurm == False: # on tesia, uist, vpn-connected laptop if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name: print(datetime.now(), 'wait 10 min for results in computing nodes') time.sleep(600) # copy file from remote machine copy_results_from_remote_node() # sync complete id list, dem info, no dem grids etcs. sync_log_files(process_node, r_log_dir, process_log_dir) # update complete id list update_complete_grid_list(grid_ids, task_list) # remove no need dem files remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files) remote_sub_txt = get_subset_info_txt_list( 'proc_status', ['notYet', 'working'], remote_node=process_node, remote_folder=r_working_dir) if len(remote_sub_txt) < 1 and check_time != 1: check_time = 1 # set to 1, then will only check one more time else: check_time -= 1 else: break
def merge_subImages_from_gan(multi_gan_source_regions, multi_gan_regions, gan_working_dir, gan_dir_pre_name, save_image_dir, save_label_dir): ''' merge translate subimages from GAN to orginal sub_images :param multi_gan_regions: :param gan_working_dir: :param gan_dir_pre_name: :return: ''' current_dir = os.getcwd() sub_img_label_txt_noGAN, sub_img_label_txt, area_ini_sub_images_labels_dict = original_sub_images_labels_list_before_gan( ) # # get original sub-images and labels # org_sub_images = [] # org_sub_labels = [] # with open(sub_img_label_txt_noGAN) as txt_obj: # line_list = [name.strip() for name in txt_obj.readlines()] # for line in line_list: # sub_image, sub_label = line.split(':') # org_sub_images.append(os.path.join(current_dir,sub_image)) # org_sub_labels.append(os.path.join(current_dir,sub_label)) # # # merge new sub images, and copy sub labels if necessary. new_sub_images = [] new_sub_labels = [] area_ini_sub_images_labels = io_function.read_dict_from_txt_json( area_ini_sub_images_labels_dict) # copy the original sub images and labels before GAN for key in area_ini_sub_images_labels.keys(): for line in area_ini_sub_images_labels[key]: sub_image, sub_label = line.split(':') new_sub_images.append(sub_image) new_sub_labels.append(sub_label) for area_idx, (area_ini, area_src_ini) in enumerate( zip(multi_gan_regions, multi_gan_source_regions)): area_name = parameters.get_string_parameters(area_ini, 'area_name') area_remark = parameters.get_string_parameters(area_ini, 'area_remark') area_time = parameters.get_string_parameters(area_ini, 'area_time') gan_project_save_dir = get_gan_project_save_dir( gan_working_dir, gan_dir_pre_name, area_name, area_remark, area_time, area_src_ini) org_sub_images = [] org_sub_labels = [] for line in area_ini_sub_images_labels[os.path.basename(area_src_ini)]: sub_image, sub_label = line.split(':') org_sub_images.append(os.path.join(current_dir, sub_image)) org_sub_labels.append(os.path.join(current_dir, sub_label)) # the new images, keep the same order of original images for idx, (org_img, org_label) in enumerate(zip(org_sub_images, org_sub_labels)): new_img = os.path.join(gan_project_save_dir, 'subImages_translate', 'I%d.tif' % idx) if os.path.isfile(new_img) is False: basic.outputlogMessage( 'warning, %d th image does not exist, ' 'may exceed gen_max_dataset_size, skip the following images ' % idx) break # check height, width, band count, datatype height, width, count, dtype = raster_io.get_height_width_bandnum_dtype( new_img) o_height, o_width, o_count, o_dtype = raster_io.get_height_width_bandnum_dtype( org_img) if height != o_height or width != o_width or count != o_count or dtype != o_dtype: raise ValueError( 'inconsistence between new GAN image and original images: %s vs %s' % (str([height, width, count, dtype ]), str([o_height, o_width, o_count, o_dtype]))) # copy subimage and sublabel new_file_name_no_ext = io_function.get_name_no_ext( org_img) + '_' + os.path.basename(gan_project_save_dir) save_img_path = os.path.join(save_image_dir, new_file_name_no_ext + '_gan.tif') save_label_path = os.path.join(save_label_dir, new_file_name_no_ext + '_label.tif') io_function.copy_file_to_dst(new_img, save_img_path, overwrite=False) io_function.copy_file_to_dst(org_label, save_label_path, overwrite=False) new_sub_images.append(save_img_path) new_sub_labels.append(save_label_path) # save new images_labels_list.txt, overwrite the original one with open(sub_img_label_txt, 'w') as f_obj: lines = [ img + ':' + label + '\n' for img, label in zip(new_sub_images, new_sub_labels) ] f_obj.writelines(lines) return True