def main(): hillshade_dir = os.path.join(work_dir,'hillshade_sub_images') dem_slope_8bit_dir = os.path.join(work_dir,'dem_slope_8bit_sub_images') dem_relative_8bit_dir = os.path.join(work_dir,'dem_relative_8bit_sub_images') other_dirs = [dem_slope_8bit_dir,dem_relative_8bit_dir] other_dirs_tifs = [ io_function.get_file_list_by_ext('.tif', o_dir, bsub_folder=True) for o_dir in other_dirs] json_list = io_function.get_file_list_by_ext('.json', hillshade_dir, bsub_folder=True) json_base_list = [os.path.basename(item) for item in json_list] for json_path, base_name in zip(json_list, json_base_list): date_str, poly_num = get_date_str_poly_num(base_name) for tif_list in other_dirs_tifs: for tif in tif_list: name_noext = io_function.get_name_no_ext(tif) if date_str in name_noext and poly_num in name_noext: # modify and save the json file dst_path = os.path.join(os.path.dirname(tif), name_noext+'.json') # io_function.copy_file_to_dst(json_path,dst_path) data_dict = io_function.read_dict_from_txt_json(json_path) data_dict['imagePath'] = os.path.basename(tif) data_dict['imageData'] = None io_function.save_dict_to_txt_json(dst_path, data_dict) print('saving %s'%dst_path) break pass
def get_loss_learning_rate_list(log_dir): # add the tensorboard in the tf1x version tf1x_dir = os.path.join(os.path.dirname(os.path.dirname(tf1x_python)), 'lib', 'python3.7', 'site-packages') sys.path.insert(0, tf1x_dir) from tensorboard.backend.event_processing.event_accumulator import EventAccumulator tf_size_guidance = { 'compressedHistograms': 10, 'images': 0, 'scalars': 0, # set a 0, to load all scalars 'histograms': 1 } events_files = io_function.get_file_list_by_pattern(log_dir, 'events*') if len(events_files) < 1: print('warning, No events file in %s' % log_dir) return None event_acc = EventAccumulator(log_dir, tf_size_guidance) event_acc.Reload() #Show all tags in the log file # tag_dict = event_acc.Tags() # io_function.save_dict_to_txt_json('event_acc.txt',tag_dict) # "scalars": [ # "clone_0/Losses/clone_0//clone_loss", # "total_loss_1", # "learning_rate", # "losses/clone_0/semantic_merged_logits/mul_1", # "clone_0/Losses/regularization_loss", # "global_step/sec" : how much time it takes for each step # ], loss_learnrate_dic = {} total_loss_1_event = event_acc.Scalars('total_loss_1') total_loss_list = [ item[2] for item in total_loss_1_event ] # item[0] is wall_time, item[1] is step, item [2] is the value loss_learnrate_dic['total_loss'] = total_loss_list step_list = [item[1] for item in total_loss_1_event] wall_time_list = [ item[0] for item in total_loss_1_event ] # we can use datetime.fromtimestamp() to convert datetime learning_rate_event = event_acc.Scalars('learning_rate') learning_rate_list = [item[2] for item in learning_rate_event] loss_learnrate_dic['learning_rate'] = learning_rate_list loss_learnrate_dic['step'] = step_list loss_learnrate_dic['wall_time'] = wall_time_list io_function.save_dict_to_txt_json( os.path.join(log_dir, 'loss_learning_rate.txt'), loss_learnrate_dic)
def update_subset_info(txt_path, key_list=None, info_list=None): # maintain a info of subset for processing, dict # id: subset_id # shp: the shapefile contain all grids in this subset # "pre_status": the status of downloading and registration of ArcticDEM, has values: 'notYet', 'working', 'done' # 'proc_status': the status of processing ArcticDEM, has values of 'notYet', 'working', 'done' info_dict = {} if os.path.isfile(txt_path): info_dict = io_function.read_dict_from_txt_json(txt_path) if isinstance(key_list, str): key_list = [key_list] if isinstance(info_list, str): info_list = [info_list] for key, info in zip(key_list, info_list): info_dict[key] = info io_function.save_dict_to_txt_json(txt_path, info_dict)
def get_miou_list_class_all(log_dir, class_num): # add the tensorboard in the tf1x version tf1x_dir = os.path.join(os.path.dirname(os.path.dirname(tf1x_python)), 'lib', 'python3.7', 'site-packages') sys.path.insert(0, tf1x_dir) from tensorboard.backend.event_processing.event_accumulator import EventAccumulator # Loading too much data is slow... # tf_size_guidance on how much data the EventAccumulator should # | store in memory. The DEFAULT_SIZE_GUIDANCE tries not to store too much # | so as to avoid OOMing the client. The size_guidance should be a map # | from a `tagType` string to an integer representing the number of # | items to keep per tag for items of that `tagType`. If the size is 0, # | all events are stored. tf_size_guidance = { 'compressedHistograms': 10, 'images': 0, 'scalars': 0, # set a 0, to load all scalars 'histograms': 1 } miou_dic = {'step': [0]} # step 0, need some where events_files = io_function.get_file_list_by_pattern(log_dir, 'events*') if len(events_files) < 1: print('warning, No events file in %s' % log_dir) return miou_dic event_acc = EventAccumulator(log_dir, tf_size_guidance) event_acc.Reload() # Show all tags in the log file tag_dict = event_acc.Tags() # io_function.save_dict_to_txt_json('event_acc.txt',tag_dict) scalar_tags = tag_dict['scalars'] # print(scalar_tags) for class_id in range(class_num): name = 'class_%d' % class_id tag = 'eval/miou_1.0_' + name if tag in scalar_tags: miou_class_event = event_acc.Scalars(tag) miou_class_list = [ item[2] for item in miou_class_event ] # item[0] is wall_time, item[1] is step, item [2] is the value # step_list = [item[1] for item in miou_class_event] # print(step_list) miou_dic[name] = miou_class_list tag = 'eval/miou_1.0_overall' if tag in scalar_tags: miou_class_overall = event_acc.Scalars('eval/miou_1.0_overall') miou_class_list = [item[2] for item in miou_class_overall] step_list = [item[1] for item in miou_class_overall] wall_time_list = [item[0] for item in miou_class_overall] # print(step_list) miou_dic['overall'] = miou_class_list miou_dic['step'] = step_list miou_dic[ 'wall_time'] = wall_time_list # we can use datetime.fromtimestamp() to convert datetime io_function.save_dict_to_txt_json(os.path.join(log_dir, 'miou.txt'), miou_dic) return miou_dic
def mosaic_crop_dem(dem_tif_list, save_dir, extent_id, extent_poly, b_mosaic_id, b_mosaic_date, process_num, keep_dem_percent, o_res, pre_name, resample_method='average', b_mask_matchtag=False, b_mask_stripDEM_outlier=False,b_mask_surface_water=False,b_mosaic_year=False): org_dem_tif_list = dem_tif_list.copy() # crop to the same extent crop_tif_dir = os.path.join(save_dir, 'dem_crop_sub_%d' % extent_id) if os.path.isdir(crop_tif_dir) is False: io_function.mkdir(crop_tif_dir) crop_tif_list = [] for tif in dem_tif_list: save_crop_path = os.path.join(crop_tif_dir, os.path.basename(io_function.get_name_by_adding_tail(tif, 'sub_poly_%d' % extent_id)) ) if os.path.isfile(save_crop_path): basic.outputlogMessage('%s exists, skip cropping' % save_crop_path) crop_tif_list.append(save_crop_path) else: crop_tif = subset_image_by_polygon_box(tif, save_crop_path, extent_poly, resample_m='near', o_format='VRT', out_res=o_res,same_extent=True,thread_num=process_num) if crop_tif is False: raise ValueError('warning, crop %s failed' % tif) crop_tif_list.append(crop_tif) dem_tif_list = crop_tif_list # mask the dem by matchtag, only keep pixel derived from a stereo match if b_mask_matchtag: mask_crop_dem_list, matchtag_crop_tif_list = mask_crop_dem_by_matchtag(org_dem_tif_list, crop_tif_list, extent_poly, extent_id, crop_tif_dir, o_res,process_num) dem_tif_list = mask_crop_dem_list # mask the outlier in strip version of DEM using the mosaic version of ArcitcDEM if b_mask_stripDEM_outlier: mask_outlier_tifs = mask_strip_dem_outlier_by_ArcticDEM_mosaic(dem_tif_list, extent_poly, extent_id, crop_tif_dir, o_res, process_num) if mask_outlier_tifs is False: pass else: dem_tif_list = mask_outlier_tifs # mask the water surface if b_mask_surface_water: # the water mask, resolution is 30 meters mask_water_tifs = mask_dem_by_surface_water(dem_tif_list, extent_poly, extent_id, crop_tif_dir, 30, process_num) if mask_water_tifs is False: raise ValueError('masking by surface water failed') if mask_water_tifs is None: basic.outputlogMessage('No water masks, skip masking') else: dem_tif_list = mask_water_tifs # area pixel count area_pixel_count = int(extent_poly.area / (o_res*o_res)) basic.outputlogMessage('Area pixel count: %d'%area_pixel_count) # create mosaic (dem with the same strip pair ID) mosaic_dir = os.path.join(save_dir, 'dem_stripID_mosaic_sub_%d' % extent_id) if b_mosaic_id: dem_groups = group_demTif_strip_pair_ID(dem_tif_list) if os.path.isfile(os.path.join(mosaic_dir, 'dem_valid_percent.txt')): basic.outputlogMessage('mosaic based on stripID exists, skip mosaicking') with open(os.path.join(mosaic_dir, 'dem_valid_percent.txt')) as f_job: tif_valid_per_list = [line.strip().split() for line in f_job.readlines()] # check keep_dem_percent dem_tif_list = [ os.path.join(mosaic_dir,tif) for tif, per in tif_valid_per_list if float(per) >= keep_dem_percent] else: io_function.mkdir(mosaic_dir) # when create mosaic using VRT end some wrong results, so choose to use 'GTiff' # for creating a mosaic with VRT format, we should use "gdalbuildvrt" mosaic_list = mosaic_dem_same_stripID(dem_groups, mosaic_dir, resample_method, process_num=process_num, o_format='GTiff') dem_tif_list = mosaic_list # get valid pixel percentage (due to large memory used in raster_io, may set process_num as 1) dem_tif_list = check_dem_valid_per(dem_tif_list, mosaic_dir, process_num=process_num, move_dem_threshold=keep_dem_percent, area_pixel_num=area_pixel_count) if len(dem_tif_list) < 1: basic.outputlogMessage('No dem_stripID_mosaic with valid_percent greater than %s'%str(keep_dem_percent)) save_id_grid_no_valid_dem(extent_id) return [] # merge DEM with close acquisition date mosaic_yeardate_dir = os.path.join(save_dir,'dem_date_mosaic_sub_%d'%extent_id) if b_mosaic_date: # groups DEM with original images acquired at the same year months dem_groups_date = group_demTif_yearmonthDay(dem_tif_list, diff_days=0) # sort based on yeardate in accending order : operator.itemgetter(0) dem_groups_date = dict(sorted(dem_groups_date.items(), key=operator.itemgetter(0))) # save to txt (json format) year_date_txt = os.path.join(mosaic_dir, 'year_date_tif.txt') io_function.save_dict_to_txt_json(year_date_txt, dem_groups_date) if os.path.isfile(os.path.join(mosaic_yeardate_dir,'dem_valid_percent.txt')): basic.outputlogMessage('mosaic based on acquisition date exists, skip mosaicking') with open(os.path.join(mosaic_yeardate_dir,'dem_valid_percent.txt')) as f_job: tif_valid_per_list = [line.strip().split() for line in f_job.readlines()] # check keep_dem_percent dem_tif_list = [ os.path.join(mosaic_yeardate_dir,tif) for tif, per in tif_valid_per_list if float(per) >= keep_dem_percent] else: io_function.mkdir(mosaic_yeardate_dir) # this is the output of mosaic, save to 'GTiff' format. mosaic_list = mosaic_dem_date(dem_groups_date,mosaic_yeardate_dir,resample_method, process_num=process_num, save_source=True, o_format='GTiff') dem_tif_list = mosaic_list # get valid pixel percentage dem_tif_list = check_dem_valid_per(dem_tif_list,mosaic_yeardate_dir,process_num=process_num, move_dem_threshold = keep_dem_percent,area_pixel_num=area_pixel_count) # mosaic dem for the same year, choose DEM close to July 1 on top. mosaic_year_dir = os.path.join(save_dir, 'dem_year_mosaic_sub_%d' % extent_id) if b_mosaic_year: # groups DEM with original images acquired at the same year dem_groups_year = group_demTif_same_year(dem_tif_list) # sort based on year in accending order : operator.itemgetter(0) dem_groups_year = dict(sorted(dem_groups_year.items(), key=operator.itemgetter(0))) # save to txt (json format) year_txt = os.path.join(mosaic_dir, 'year_tif.txt') io_function.save_dict_to_txt_json(year_txt, dem_groups_year) if os.path.isfile(os.path.join(mosaic_year_dir, 'dem_valid_percent.txt')): basic.outputlogMessage('mosaic based on acquisition year exists, skip mosaicking') with open(os.path.join(mosaic_year_dir, 'dem_valid_percent.txt')) as f_job: tif_valid_per_list = [line.strip().split() for line in f_job.readlines()] # check keep_dem_percent dem_tif_list = [os.path.join(mosaic_year_dir, tif) for tif, per in tif_valid_per_list if float(per) >= keep_dem_percent] else: io_function.mkdir(mosaic_year_dir) # this is the output of mosaic, save to 'GTiff' format. mosaic_list = mosaic_dem_same_year(dem_groups_year, mosaic_year_dir, resample_method, process_num=process_num, save_source=True, o_format='GTiff') dem_tif_list = mosaic_list # get valid pixel percentage dem_tif_list = check_dem_valid_per(dem_tif_list, mosaic_year_dir, process_num=process_num, move_dem_threshold=keep_dem_percent, area_pixel_num=area_pixel_count) return dem_tif_list
def get_sub_images_multi_regions(para_file): print( "extract sub-images and sub-labels for a given shape file (training polygons)" ) if os.path.isfile(para_file) is False: raise IOError('File %s not exists in current folder: %s' % (para_file, os.getcwd())) get_subImage_script = os.path.join(code_dir, 'datasets', 'get_subImages.py') SECONDS = time.time() # get name of training areas multi_training_regions = parameters.get_string_list_parameters_None_if_absence( para_file, 'training_regions') if multi_training_regions is None or len(multi_training_regions) < 1: raise ValueError('No training area is set in %s' % para_file) # multi_training_files = parameters.get_string_parameters_None_if_absence(para_file, 'multi_training_files') dstnodata = parameters.get_string_parameters(para_file, 'dst_nodata') buffersize = parameters.get_string_parameters(para_file, 'buffer_size') rectangle_ext = parameters.get_string_parameters(para_file, 'b_use_rectangle') process_num = parameters.get_digit_parameters(para_file, 'process_num', 'int') b_no_label_image = parameters.get_bool_parameters_None_if_absence( para_file, 'b_no_label_image') if os.path.isfile('sub_images_labels_list.txt'): io_function.delete_file_or_dir('sub_images_labels_list.txt') subImage_dir = parameters.get_string_parameters_None_if_absence( para_file, 'input_train_dir') subLabel_dir = parameters.get_string_parameters_None_if_absence( para_file, 'input_label_dir') # record sub_images_labels from each area_ini area_ini_sub_images_labels = {} sub_image_label_list_before = [] # the list before getting sub-images # loop each training regions for idx, area_ini in enumerate(multi_training_regions): input_image_dir = parameters.get_directory_None_if_absence( area_ini, 'input_image_dir') # it is ok consider a file name as pattern and pass it the following functions to get file list input_image_or_pattern = parameters.get_string_parameters( area_ini, 'input_image_or_pattern') b_sub_images_json = parameters.get_bool_parameters_None_if_absence( area_ini, 'b_sub_images_json') b_label_raster_aval = parameters.get_bool_parameters_None_if_absence( area_ini, 'b_label_raster_aval') b_polygons_for_entire_scene = parameters.get_bool_parameters_None_if_absence( area_ini, 'b_polygons_for_entire_scene') if b_sub_images_json is True: # copy sub-images, then covert json files to label images. object_names = parameters.get_string_list_parameters( para_file, 'object_names') get_subImages_json.get_subimages_label_josn( input_image_dir, input_image_or_pattern, subImage_dir, subLabel_dir, object_names, b_no_label_image=b_no_label_image, process_num=process_num) pass elif b_label_raster_aval is True: # copy the label raster and images directly. copy_subImages_labels_directly(subImage_dir, subLabel_dir, area_ini) elif b_polygons_for_entire_scene is True: # get label raster for entire scenes (not extract sub-images) by using rasterizing input_polygon_dir = parameters.get_string_parameters( area_ini, 'input_polygon_dir') input_polygon_or_pattern = parameters.get_string_parameters( area_ini, 'input_polygon_or_pattern') rasterize_polygons.get_subimages_SpaceNet(input_image_dir, input_image_or_pattern, input_polygon_dir, input_polygon_or_pattern, subImage_dir, subLabel_dir, burn_value=1) pass else: all_train_shp = parameters.get_file_path_parameters_None_if_absence( area_ini, 'training_polygons') train_shp = parameters.get_string_parameters( area_ini, 'training_polygons_sub') # get subImage and subLabel for one training polygons print( 'extract training data from image folder (%s) and polgyons (%s)' % (input_image_dir, train_shp)) if b_no_label_image is True: get_subImage_one_shp(get_subImage_script, all_train_shp, buffersize, dstnodata, rectangle_ext, train_shp, input_image_dir, file_pattern=input_image_or_pattern, process_num=process_num) else: get_subImage_subLabel_one_shp( get_subImage_script, all_train_shp, buffersize, dstnodata, rectangle_ext, train_shp, input_image_dir, file_pattern=input_image_or_pattern, process_num=process_num) sub_image_label_list_after = io_function.read_list_from_txt( 'sub_images_labels_list.txt') area_ini_sub_images_labels[area_ini] = sub_image_label_list_after[ len(sub_image_label_list_before):] # update list sub_image_label_list_before = sub_image_label_list_after # check black sub-images or most part of the sub-images is black (nodata) new_sub_image_label_list = [] delete_sub_image_label_list = [] subImage_dir_delete = subImage_dir + '_delete' subLabel_dir_delete = subLabel_dir + '_delete' io_function.mkdir(subImage_dir_delete) if b_no_label_image is None or b_no_label_image is False: io_function.mkdir(subLabel_dir_delete) b_check_sub_image_quality = parameters.get_bool_parameters_None_if_absence( para_file, 'b_check_sub_image_quality') if b_check_sub_image_quality is True: get_valid_percent_entropy.plot_valid_entropy(subImage_dir) with open('sub_images_labels_list.txt', 'r') as f_obj: lines = f_obj.readlines() for line in lines: image_path, label_path = line.strip().split(':') # valid_per = raster_io.get_valid_pixel_percentage(image_path) valid_per, entropy = raster_io.get_valid_percent_shannon_entropy( image_path) # base=10 if valid_per > 60 and entropy >= 0.5: new_sub_image_label_list.append(line) else: delete_sub_image_label_list.append(line) io_function.movefiletodir(image_path, subImage_dir_delete) if os.path.isfile(label_path): io_function.movefiletodir(label_path, subLabel_dir_delete) else: with open('sub_images_labels_list.txt', 'r') as f_obj: new_sub_image_label_list = f_obj.readlines() if len(delete_sub_image_label_list) > 0: with open('sub_images_labels_list.txt', 'w') as f_obj: for line in new_sub_image_label_list: f_obj.writelines(line) for del_line in delete_sub_image_label_list: for idx, area_ini in enumerate(multi_training_regions): if del_line in area_ini_sub_images_labels[area_ini]: area_ini_sub_images_labels[area_ini].remove(del_line) io_function.save_dict_to_txt_json('area_ini_sub_images_labels.txt', area_ini_sub_images_labels) # check weather they have the same subImage and subLabel if b_no_label_image is None or b_no_label_image is False: sub_image_list = io_function.get_file_list_by_pattern( subImage_dir, '*.tif') sub_label_list = io_function.get_file_list_by_pattern( subLabel_dir, '*.tif') if len(sub_image_list) != len(sub_label_list): raise ValueError( 'the count of subImage (%d) and subLabel (%d) is different' % (len(sub_image_list), len(sub_label_list))) # save brief information of sub-images height_list = [] width_list = [] band_count = 0 dtype = 'unknown' for line in new_sub_image_label_list: image_path, label_path = line.strip().split(':') height, width, band_count, dtype = raster_io.get_height_width_bandnum_dtype( image_path) height_list.append(height) width_list.append(width) if len(height_list) < 1 or len(width_list) < 1: raise ValueError('No sub-images') # save info to file, if it exists, it will be overwritten img_count = len(new_sub_image_label_list) with open('sub_images_patches_info.txt', 'w') as f_obj: f_obj.writelines('information of sub-images: \n') f_obj.writelines('number of sub-images : %d \n' % img_count) f_obj.writelines('band count : %d \n' % band_count) f_obj.writelines('data type : %s \n' % dtype) f_obj.writelines('maximum width and height: %d, %d \n' % (max(width_list), max(height_list))) f_obj.writelines('minimum width and height: %d, %d \n' % (min(width_list), min(height_list))) f_obj.writelines( 'mean width and height: %.2f, %.2f \n\n' % (sum(width_list) / img_count, sum(height_list) / img_count)) duration = time.time() - SECONDS os.system( 'echo "$(date): time cost of getting sub images and labels: %.2f seconds">>time_cost.txt' % duration)
def segment_a_grey_image(img_path, save_dir, process_num, org_raster=None, b_save_patch_label=False): out_pre = os.path.splitext(os.path.basename(img_path))[0] label_path = os.path.join(save_dir, out_pre + '_label.tif') if os.path.isfile(label_path): basic.outputlogMessage('%s exist, skip segmentation' % label_path) return label_path height, width, band_num, date_type = raster_io.get_height_width_bandnum_dtype( img_path) print('input image: height, width, band_num, date_type', height, width, band_num, date_type) # if the original data is available, then calculate the attributes based on that if org_raster is not None: org_height, org_width, org_band_num, org_date_type = raster_io.get_height_width_bandnum_dtype( org_raster) if org_height != height or org_width != width: raise ValueError('%s and %s do not have the same size' % (img_path, org_raster)) save_labes = np.zeros((height, width), dtype=np.int32) # divide the image the many small patches, then calcuate one by one, solving memory issues. image_patches = split_image.sliding_window(width, height, 1024, 1024, adj_overlay_x=0, adj_overlay_y=0) patch_count = len(image_patches) # for idx, patch in enumerate(image_patches): # out_patch,out_labels = segment_a_patch(idx, patch, patch_count,img_path) # # copy to the entire image # row_s = patch[1] # row_e = patch[1] + patch[3] # col_s = patch[0] # col_e = patch[0] + patch[2] # save_labes[row_s:row_e, col_s:col_e] = out_labels theadPool = Pool(process_num) parameters_list = [(idx, patch, patch_count, img_path, org_raster, b_save_patch_label) for idx, patch in enumerate(image_patches)] results = theadPool.starmap(segment_a_patch, parameters_list) patch_label_path_list = [] patch_label_id_range = [] object_attributes = {} # object id (label) and attributes (list) for res in results: patch, out_labels, nodata, attributes = res if isinstance( out_labels, str) and os.path.isfile(out_labels): #if it's a label file patch_label_path_list.append(out_labels) else: # copy to the entire image row_s = patch[1] row_e = patch[1] + patch[3] col_s = patch[0] col_e = patch[0] + patch[2] current_min = np.max(save_labes) print('current_max', current_min) patch_label_id_range.append(current_min) save_labes[row_s:row_e, col_s:col_e] = out_labels + current_min + 1 if attributes is not None: update_label_attr = {} for key in attributes: update_label_attr[key + current_min] = attributes[key] # add to the attributes object_attributes.update(update_label_attr) # # apply median filter (remove some noise), # we should not use median filter, because it's labels, not images. # label_blurs = cv2.medianBlur(np.float32(save_labes), 3) # with kernal=3, cannot accept int32 # # print(label_blurs, label_blurs.dtype) # save_labes = label_blurs.astype(np.int32) # return a list of labels saved in current working folder. if b_save_patch_label: return patch_label_path_list if os.path.isdir(save_dir) is False: io_function.mkdir(save_dir) # save attributes (if not empty) if object_attributes: attribute_path = os.path.join(save_dir, out_pre + '_attributes.txt') io_function.save_dict_to_txt_json(attribute_path, object_attributes) # save the label raster_io.save_numpy_array_to_rasterfile(save_labes, label_path, img_path) # do not set nodata # save id ranges to txt label_id_range_txt = os.path.splitext(label_path)[0] + '_IDrange.txt' patch_label_id_range = [str(item) for item in patch_label_id_range] io_function.save_list_to_txt(label_id_range_txt, patch_label_id_range) return label_path
def dem_diff_newest_oldest(dem_tif_list, out_dem_diff, out_date_diff, process_num, b_max_subsidence=False, b_save_cm=False): ''' get DEM difference, for each pixel, newest vaild value - oldest valid value :param dem_list: :param output: :return: ''' if len(dem_tif_list) < 2: basic.outputlogMessage('error, the count of DEM is smaller than 2') return False # groups DEM with original images acquired at the same year months dem_groups_date = group_demTif_yearmonthDay(dem_tif_list, diff_days=0) # sort based on yeardate in accending order : operator.itemgetter(0) dem_groups_date = dict( sorted(dem_groups_date.items(), key=operator.itemgetter(0))) txt_save_path = os.path.splitext(out_date_diff)[0] + '.txt' # change the key to integer number after sorting and save to txt file dem_groups_date_sort_idx = {} for idx, key in enumerate(dem_groups_date.keys()): dem_groups_date_sort_idx[idx] = dem_groups_date[key] io_function.save_dict_to_txt_json(txt_save_path, dem_groups_date_sort_idx) date_list = list(dem_groups_date.keys()) dem_tif_list = [dem_groups_date[key][0] for key in dem_groups_date.keys() ] # each date, only have one tif tif_obj_list = [raster_io.open_raster_read(tif) for tif in dem_tif_list] height, width, _ = raster_io.get_width_heigth_bandnum(tif_obj_list[0]) # check them have the width and height for tif, obj in zip(dem_tif_list[1:], tif_obj_list[1:]): h, w, _ = raster_io.get_width_heigth_bandnum(obj) if h != height or w != width: raise ValueError( 'the height and width of %s is different from others' % tif) # divide the image the many small patches, then calcuate one by one, solving memory issues. image_patches = split_image.sliding_window(width, height, 1024, 1024, adj_overlay_x=0, adj_overlay_y=0) patch_count = len(image_patches) tif_obj_list = None # read all and their date date_pair_list = list(combinations(date_list, 2)) date_diff_list = [(item[1] - item[0]).days for item in date_pair_list] # sort based on day difference (from max to min) date_pair_list_sorted = [ x for _, x in sorted(zip(date_diff_list, date_pair_list), reverse=True) ] # descending # get the difference date_diff_np = np.zeros((height, width), dtype=np.uint16) old_date_index = np.zeros((height, width), dtype=np.uint8) new_date_index = np.zeros((height, width), dtype=np.uint8) dem_diff_np = np.empty((height, width), dtype=np.float32) dem_diff_np[:] = np.nan if process_num == 1: for idx, patch in enumerate(image_patches): _,patch_dem_diff,patch_date_diff, patch_old_date_idx,patch_new_date_idx = \ dem_diff_newest_oldest_a_patch(idx, patch, patch_count,date_pair_list_sorted,dem_groups_date) # copy to the entire image row_s = patch[1] row_e = patch[1] + patch[3] col_s = patch[0] col_e = patch[0] + patch[2] dem_diff_np[row_s:row_e, col_s:col_e] = patch_dem_diff date_diff_np[row_s:row_e, col_s:col_e] = patch_date_diff old_date_index[row_s:row_e, col_s:col_e] = patch_old_date_idx new_date_index[row_s:row_e, col_s:col_e] = patch_new_date_idx else: theadPool = Pool(process_num) parameters_list = [(idx, patch, patch_count, date_pair_list_sorted, dem_groups_date) for idx, patch in enumerate(image_patches)] if b_max_subsidence is False: results = theadPool.starmap(dem_diff_newest_oldest_a_patch, parameters_list) else: results = theadPool.starmap(dem_diff_new_old_min_neg_diff_patch, parameters_list) for res in results: patch, patch_dem_diff, patch_date_diff, patch_old_date_idx, patch_new_date_idx = res # copy to the entire image row_s = patch[1] row_e = patch[1] + patch[3] col_s = patch[0] col_e = patch[0] + patch[2] dem_diff_np[row_s:row_e, col_s:col_e] = patch_dem_diff date_diff_np[row_s:row_e, col_s:col_e] = patch_date_diff old_date_index[row_s:row_e, col_s:col_e] = patch_old_date_idx new_date_index[row_s:row_e, col_s:col_e] = patch_new_date_idx theadPool.close() # save date diff to tif (16 bit) raster_io.save_numpy_array_to_rasterfile(date_diff_np, out_date_diff, dem_tif_list[0], nodata=0, compress='lzw', tiled='yes', bigtiff='if_safer') # save old and new date index to tif (8 bit) out_old_date_idx = io_function.get_name_by_adding_tail( out_date_diff, 'oldIndex') out_new_date_idx = io_function.get_name_by_adding_tail( out_date_diff, 'newIndex') raster_io.save_numpy_array_to_rasterfile(old_date_index, out_old_date_idx, dem_tif_list[0], nodata=255, compress='lzw', tiled='yes', bigtiff='if_safer') raster_io.save_numpy_array_to_rasterfile(new_date_index, out_new_date_idx, dem_tif_list[0], nodata=255, compress='lzw', tiled='yes', bigtiff='if_safer') # # stretch the DEM difference, save to 8 bit. # dem_diff_np_8bit = raster_io.image_numpy_to_8bit(dem_diff_np,10,-10,dst_nodata=0) # out_dem_diff_8bit = io_function.get_name_by_adding_tail(out_dem_diff, '8bit') # raster_io.save_numpy_array_to_rasterfile(dem_diff_np_8bit, out_dem_diff_8bit, dem_tif_list[0], nodata=0) # if possible, save to 16 bit, to save the disk storage. # dem_diff_np[0:5,0] = -500 # dem_diff_np[0,0:5] = 500 # print(np.nanmin(dem_diff_np)) # print(np.nanmax(dem_diff_np)) # if np.nanmin(dem_diff_np_cm) < range.min or np.nanmax(dem_diff_np_cm) > range.max: # save dem diff to files (float), meter if b_save_cm is False: raster_io.save_numpy_array_to_rasterfile(dem_diff_np, out_dem_diff, dem_tif_list[0], nodata=-9999, compress='lzw', tiled='yes', bigtiff='if_safer') else: # save dem diff to 16bit, centimeter, only handle diff from -327.67 to 327.67 meters bit16_nodata = 32767 range = np.iinfo(np.int16) dem_diff_np_cm = dem_diff_np * 100 dem_diff_np_cm[dem_diff_np_cm < range.min] = range.min dem_diff_np_cm[dem_diff_np_cm > range.max] = range.max dem_diff_np_cm[np.isnan( dem_diff_np_cm)] = bit16_nodata # set the nodata for int16 dem_diff_np_cm = dem_diff_np_cm.astype(np.int16) # save to int16 out_dem_diff_cm = out_dem_diff basic.outputlogMessage( 'note, save DEM difference (%s) to centimeter, int16, range: -327.68 to 327.67 m' % os.path.basename(out_dem_diff_cm)) raster_io.save_numpy_array_to_rasterfile(dem_diff_np_cm, out_dem_diff_cm, dem_tif_list[0], nodata=bit16_nodata, compress='lzw', tiled='yes', bigtiff='if_safer') return True
def main(options, args): time0 = time.time() image_dir = args[0] geojson_list = io_function.get_file_list_by_ext('.geojson', image_dir, bsub_folder=False) # remove some scenes, or maybe we should set bsub_folder=False # geojson_list = [item for item in geojson_list if 'incomplete_scenes' not in item ] # remove those in "incomplete_scenes" # geojson_list = [item for item in geojson_list if 'scenes_high_cloud_cover' not in item ] # remove those in "scenes_high_cloud_cover" if len(geojson_list) < 1: raise ValueError('There is no geojson files in %s' % image_dir) basic.outputlogMessage('Image Dir: %s' % image_dir) basic.outputlogMessage("Number of geojson files: %d" % len(geojson_list)) grid_polygon_shp = args[ 1] # the polygon should be in projection Cartesian coordinate system (e.g., UTM ) basic.outputlogMessage('Image grid polygon shapefile: %s' % grid_polygon_shp) process_num = options.process_num basic.outputlogMessage( 'The number of processes for creating the mosaic is: %d' % process_num) # read grid polygons grid_polygons = vector_gpd.read_polygons_gpd(grid_polygon_shp) grid_ids = vector_gpd.read_attribute_values_list(grid_polygon_shp, 'id') if grid_ids is None: basic.outputlogMessage( 'Warning, field: id is not in %s, will create default ID for each grid' % grid_polygon_shp) grid_ids = [id + 1 for id in range(len(grid_polygons))] shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( grid_polygon_shp).strip() # print(shp_prj) grid_polygons_latlon = grid_polygons if shp_prj != '+proj=longlat +datum=WGS84 +no_defs': # read polygons and reproject to 4326 projection grid_polygons_latlon = vector_gpd.read_shape_gpd_to_NewPrj( grid_polygon_shp, 'EPSG:4326') # else: # raise ValueError(' %s should be in projection of Cartesian coordinate system'%grid_polygon_shp) shp_prj_wkt = map_projection.get_raster_or_vector_srs_info_wkt( grid_polygon_shp) max_sr = options.max_sr min_sr = options.min_sr original_img_copy_dir = options.original_img_copy_dir b_to_rgb_8bit = options.to_rgb basic.outputlogMessage('Convert to 8bit RGB images: %s' % str(b_to_rgb_8bit)) # group planet image based on acquisition date b_group_date = options.group_date basic.outputlogMessage('Group Planet image based on acquisition date: %s' % str(b_group_date)) if b_group_date: # diff_days as 0, group images acquired at the same date geojson_groups = group_planet_images_date(geojson_list, diff_days=0) # sort based on yeardate in accending order : operator.itemgetter(0) geojson_groups = dict( sorted(geojson_groups.items(), key=operator.itemgetter(0))) save_group_txt = 'geojson_groups_input_folder.txt' basic.outputlogMessage( 'images are divided into %d groups, save to %s' % (len(geojson_groups.keys()), save_group_txt)) io_function.save_dict_to_txt_json(save_group_txt, geojson_groups) else: geojson_groups = {'all': geojson_list} # create mosaic of each grid cloud_cover_thr = options.cloud_cover cloud_cover_thr = cloud_cover_thr * 100 # for Planet image, it is percentage out_res = options.out_res cur_dir = os.getcwd() resampling_method = options.merged_method for key in geojson_groups.keys(): # # test # if key != '20200701': # continue geojson_list = geojson_groups[key] save_dir = os.path.basename(cur_dir) + '_mosaic_' + str( out_res) + '_' + key # print(save_dir) if process_num == 1: for id, polygon, poly_latlon in zip(grid_ids, grid_polygons, grid_polygons_latlon): # if id != 34: # continue create_moasic_of_each_grid_polygon( id, polygon, poly_latlon, out_res, cloud_cover_thr, geojson_list, save_dir, new_prj_wkt=shp_prj_wkt, new_prj_proj4=shp_prj, sr_min=min_sr, sr_max=max_sr, to_rgb=b_to_rgb_8bit, save_org_dir=original_img_copy_dir, resampling_method=resampling_method) elif process_num > 1: theadPool = Pool(process_num) # multi processes parameters_list = [ (id, polygon, poly_latlon, out_res, cloud_cover_thr, geojson_list, save_dir, shp_prj_wkt, shp_prj, min_sr, max_sr, b_to_rgb_8bit, 0, original_img_copy_dir) for id, polygon, poly_latlon in zip(grid_ids, grid_polygons, grid_polygons_latlon) ] results = theadPool.starmap(create_moasic_of_each_grid_polygon, parameters_list) # need python3 theadPool.close() else: raise ValueError('incorrect process number: %d' % process_num) cost_time_sec = time.time() - time0 basic.outputlogMessage( 'Done, total time cost %.2f seconds (%.2f minutes or %.2f hours)' % (cost_time_sec, cost_time_sec / 60, cost_time_sec / 3600)) pass
def build_dict_of_dem_cover_grid_ids(dem_info_shp, grid_20_shp, save_dict_txt): # this will take time, but only need to run once at the beginning if os.path.isfile(save_dict_txt): print('warning, %s exists, skip build_dict_of_dem_cover_grid_ids' % save_dict_txt) return True # extent polygons and projection (proj4) dem_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( dem_info_shp) if dem_shp_prj == '': raise ValueError('get proj4 of %s failed' % dem_info_shp) grid_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4( grid_20_shp) if grid_shp_prj == '': raise ValueError('get proj4 of %s failed' % grid_20_shp) if dem_shp_prj != grid_shp_prj: raise ValueError('%s and %s do not have the same projection' % (dem_info_shp, grid_20_shp)) # read DEM info dem_polygons, dem_names = vector_gpd.read_polygons_attributes_list( dem_info_shp, 'name', b_fix_invalid_polygon=False) # dem_name: eg. SETSM_GE01_20090818_1050410001E0CF00_1050410001D80200_seg1_2m_v3.0 or 11_27_2_1_2m_v3.0 dem_poly_count = len(dem_polygons) # check if there is duplicate dem names if len(dem_names) != len(set(dem_names)): raise ValueError('some duplicate dem name in %s' % dem_info_shp) # read grid polygons and ids all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list( grid_20_shp, 'id') dem_cover_grids = {} # this will take time. # for idx, (dem_poly,dem_name) in enumerate(zip(dem_polygons, dem_names)): # print(timeTools.get_now_time_str(), idx, dem_poly_count) # index = vector_gpd.get_poly_index_within_extent(all_grid_polys, dem_poly) # gird_ids = [ all_ids[idx] for idx in index ] # # if dem_name in dem_cover_grids.keys(): # # basic.outputlogMessage('\n Warning, %s already in dict \n'%dem_name) # dem_cover_grids[dem_name] = gird_ids ### parallel version theadPool = Pool(multiprocessing.cpu_count()) # multi processes parameters_list = [ (all_ids, all_grid_polys, dem_poly, dem_name, idx, dem_poly_count) for idx, (dem_poly, dem_name) in enumerate(zip(dem_polygons, dem_names)) ] results = theadPool.starmap(get_overlap_grids_for_one_extent, parameters_list) # need python3 for res in results: dem_name, gird_ids = res dem_cover_grids[dem_name] = gird_ids # save to dict io_function.save_dict_to_txt_json(save_dict_txt, dem_cover_grids) theadPool.close() return True
def proc_dem_mosaic_diff(dem_tif_list, save_dir, extent_id, extent_poly, b_mosaic_id, b_mosaic_date, process_num, keep_dem_percent, o_res, b_dem_diff, pre_name, b_rm_inter, resample_method='average'): if len(dem_tif_list) < 1: basic.outputlogMessage('No input dem files') return False # area pixel count area_pixel_count = int(extent_poly.area / (o_res * o_res)) basic.outputlogMessage('Area pixel count: %d' % area_pixel_count) # groups DEM dem_groups = group_demTif_strip_pair_ID(dem_tif_list) # create mosaic (dem with the same strip pair ID) mosaic_dir = os.path.join(save_dir, 'dem_stripID_mosaic_sub_%d' % extent_id) if b_mosaic_id: if os.path.isfile(os.path.join(mosaic_dir, 'dem_valid_percent.txt')): basic.outputlogMessage( 'mosaic based on stripID exists, skip mosaicking') with open(os.path.join(mosaic_dir, 'dem_valid_percent.txt')) as f_job: tif_names = [line.split()[0] for line in f_job.readlines()] dem_tif_list = [ os.path.join(mosaic_dir, item) for item in tif_names ] # print(dem_tif_list) else: io_function.mkdir(mosaic_dir) # when create mosaic using VRT end some wrong results, so choose to use 'GTiff' # for creating a mosaic with VRT format, we should use "gdalbuildvrt" mosaic_list = mosaic_dem_same_stripID(dem_groups, mosaic_dir, resample_method, process_num=process_num, o_format='GTiff') dem_tif_list = mosaic_list # get valid pixel percentage dem_tif_list = check_dem_valid_per( dem_tif_list, mosaic_dir, process_num=process_num, move_dem_threshold=keep_dem_percent, area_pixel_num=area_pixel_count) # groups DEM with original images acquired at the same year months dem_groups_date = group_demTif_yearmonthDay(dem_tif_list, diff_days=31) # sort based on yeardate in accending order : operator.itemgetter(0) dem_groups_date = dict( sorted(dem_groups_date.items(), key=operator.itemgetter(0))) # save to txt (json format) year_date_txt = os.path.join(mosaic_dir, 'year_date_tif.txt') io_function.save_dict_to_txt_json(year_date_txt, dem_groups_date) # merge DEM with close acquisition date mosaic_yeardate_dir = os.path.join(save_dir, 'dem_date_mosaic_sub_%d' % extent_id) if b_mosaic_date: if os.path.isfile( os.path.join(mosaic_yeardate_dir, 'dem_valid_percent.txt')): basic.outputlogMessage( 'mosaic based on acquisition date exists, skip mosaicking') with open( os.path.join(mosaic_yeardate_dir, 'dem_valid_percent.txt')) as f_job: tif_names = [line.split()[0] for line in f_job.readlines()] dem_tif_list = [ os.path.join(mosaic_yeardate_dir, item) for item in tif_names ] # print(dem_tif_list) else: io_function.mkdir(mosaic_yeardate_dir) # this is the output of mosaic, save to 'GTiff' format. mosaic_list = mosaic_dem_date(dem_groups_date, mosaic_yeardate_dir, resample_method, process_num=process_num, save_source=True, o_format='GTiff') dem_tif_list = mosaic_list # get valid pixel percentage dem_tif_list = check_dem_valid_per( dem_tif_list, mosaic_yeardate_dir, process_num=process_num, move_dem_threshold=keep_dem_percent, area_pixel_num=area_pixel_count) # co-registration # do DEM difference if b_dem_diff: save_dem_diff = os.path.join( save_dir, pre_name + '_DEM_diff_sub_%d.tif' % extent_id) save_date_diff = os.path.join( save_dir, pre_name + '_date_diff_sub_%d.tif' % extent_id) dem_diff_newest_oldest(dem_tif_list, save_dem_diff, save_date_diff) pass # remove intermediate files if b_rm_inter: # remove the mosaic folders # if b_dem_diff: # io_function.delete_file_or_dir(mosaic_dir) # io_function.delete_file_or_dir(mosaic_yeardate_dir) pass pass
def dem_diff_newest_oldest(dem_tif_list, out_dem_diff, out_date_diff): ''' get DEM difference, for each pixel, newest vaild value - oldest valid value :param dem_list: :param output: :return: ''' if len(dem_tif_list) < 2: basic.outputlogMessage('error, the count of DEM is smaller than 2') return False # groups DEM with original images acquired at the same year months dem_groups_date = group_demTif_yearmonthDay(dem_tif_list, diff_days=0) # sort based on yeardate in accending order : operator.itemgetter(0) dem_groups_date = dict( sorted(dem_groups_date.items(), key=operator.itemgetter(0))) txt_save_path = os.path.splitext(out_date_diff)[0] + '.txt' io_function.save_dict_to_txt_json(txt_save_path, dem_groups_date) date_list = list(dem_groups_date.keys()) dem_tif_list = [dem_groups_date[key][0] for key in dem_groups_date.keys() ] # each date, only have one tif tif_obj_list = [raster_io.open_raster_read(tif) for tif in dem_tif_list] height, width, _ = raster_io.get_width_heigth_bandnum(tif_obj_list[0]) # check them have the width and height for tif, obj in zip(dem_tif_list[1:], tif_obj_list[1:]): h, w, _ = raster_io.get_width_heigth_bandnum(obj) if h != height or w != width: raise ValueError( 'the height and width of %s is different from others' % tif) # divide the image the many small patches, then calcuate one by one, solving memory issues. image_patches = split_image.sliding_window(width, height, 1024, 1024, adj_overlay_x=0, adj_overlay_y=0) patch_count = len(image_patches) tif_obj_list = None # read all and their date date_pair_list = list(combinations(date_list, 2)) date_diff_list = [(item[1] - item[0]).days for item in date_pair_list] # sort based on day difference (from max to min) date_pair_list_sorted = [ x for _, x in sorted(zip(date_diff_list, date_pair_list), reverse=True) ] # descending # get the difference date_diff_np = np.zeros((height, width), dtype=np.uint16) dem_diff_np = np.empty((height, width), dtype=np.float32) dem_diff_np[:] = np.nan for idx, patch in enumerate(image_patches): print('tile: %d / %d' % (idx + 1, patch_count)) patch_w = patch[2] patch_h = patch[3] patch_date_diff = np.zeros((patch_h, patch_w), dtype=np.uint16) patch_dem_diff = np.empty((patch_h, patch_w), dtype=np.float32) patch_dem_diff[:] = np.nan # use dict to read data from disk (only need) dem_data_dict = {} for p_idx, pair in enumerate(date_pair_list_sorted): diff_days = (pair[1] - pair[0]).days basic.outputlogMessage( 'Getting DEM difference using the one on %s and %s, total day diff: %d' % (timeTools.date2str(pair[1]), timeTools.date2str( pair[0]), diff_days)) # print(pair,':',(pair[1] - pair[0]).days) data_old, data_new = read_date_dem_to_memory(p_idx, pair, date_pair_list_sorted, dem_data_dict, dem_groups_date, boundary=patch) # print('data_old shape:',data_old.shape) # print('data_new shape:',data_new.shape) diff_two = data_new - data_old # print(diff_two) # fill the element new_ele = np.where( np.logical_and(np.isnan(patch_dem_diff), ~np.isnan(diff_two))) patch_dem_diff[new_ele] = diff_two[new_ele] patch_date_diff[new_ele] = diff_days # check if all have been filled ( nan pixels) diff_remain_hole = np.where(np.isnan(patch_dem_diff)) # basic.outputlogMessage(' remain %.4f percent pixels need to be filled'% (100.0*diff_remain_hole[0].size/patch_dem_diff.size) ) if diff_remain_hole[0].size < 1: break # copy to the entire image row_s = patch[1] row_e = patch[1] + patch[3] col_s = patch[0] col_e = patch[0] + patch[2] dem_diff_np[row_s:row_e, col_s:col_e] = patch_dem_diff date_diff_np[row_s:row_e, col_s:col_e] = patch_date_diff # save date diff to tif (16 bit) raster_io.save_numpy_array_to_rasterfile(date_diff_np, out_date_diff, dem_tif_list[0], nodata=0, compress='lzw', tiled='yes', bigtiff='if_safer') # # stretch the DEM difference, save to 8 bit. # dem_diff_np_8bit = raster_io.image_numpy_to_8bit(dem_diff_np,10,-10,dst_nodata=0) # out_dem_diff_8bit = io_function.get_name_by_adding_tail(out_dem_diff, '8bit') # raster_io.save_numpy_array_to_rasterfile(dem_diff_np_8bit, out_dem_diff_8bit, dem_tif_list[0], nodata=0) # if possible, save to 16 bit, to save the disk storage. # dem_diff_np[0:5,0] = -500 # dem_diff_np[0,0:5] = 500 # print(np.nanmin(dem_diff_np)) # print(np.nanmax(dem_diff_np)) range = np.iinfo(np.int16) # dem_diff_np_cm = dem_diff_np*100 # if np.nanmin(dem_diff_np_cm) < range.min or np.nanmax(dem_diff_np_cm) > range.max: # save dem diff to files (float), meter raster_io.save_numpy_array_to_rasterfile(dem_diff_np, out_dem_diff, dem_tif_list[0], nodata=-9999, compress='lzw', tiled='yes', bigtiff='if_safer') # else: # # save dem diff to 16bit, centimeter, only handle diff from -327.67 to 327.67 meters # dem_diff_np_cm = dem_diff_np_cm.astype(np.int16) # save to int16 # raster_io.save_numpy_array_to_rasterfile(dem_diff_np_cm, out_dem_diff_cm, dem_tif_list[0],nodata=32767,compress='lzw',tiled='yes',bigtiff='if_safer') return True