def dem_to_relative_dem(input, output, patch_width, patch_height, process_num): if os.path.isfile(output): basic.outputlogMessage('%s exists, skip' % output) return True height, width, _, _ = raster_io.get_height_width_bandnum_dtype(input) dst_nodata = 255 # divide the image the many small patches, then calcuate one by one, solving memory issues. image_patches = split_image.sliding_window(width, height, patch_width, patch_height, adj_overlay_x=0, adj_overlay_y=0) patch_count = len(image_patches) # get the difference dem_relative_8bit_np = np.zeros((height, width), dtype=np.uint8) if process_num == 1: for idx, patch in enumerate(image_patches): _, patch_rel_dem_8bit = dem_to_relative_8bit_a_patch( idx, patch, patch_count, input, dst_nodata) # copy to the entire image row_s = patch[1] row_e = patch[1] + patch[3] col_s = patch[0] col_e = patch[0] + patch[2] dem_relative_8bit_np[row_s:row_e, col_s:col_e] = patch_rel_dem_8bit else: theadPool = Pool(process_num) parameters_list = [(idx, patch, patch_count, input, dst_nodata) for idx, patch in enumerate(image_patches)] results = theadPool.starmap(dem_to_relative_8bit_a_patch, parameters_list) for res in results: patch, patch_rel_dem_8bit = res # copy to the entire image row_s = patch[1] row_e = patch[1] + patch[3] col_s = patch[0] col_e = patch[0] + patch[2] dem_relative_8bit_np[row_s:row_e, col_s:col_e] = patch_rel_dem_8bit theadPool.close() # save date diff to tif (16 bit) raster_io.save_numpy_array_to_rasterfile(dem_relative_8bit_np, output, input, nodata=dst_nodata, compress='lzw', tiled='yes', bigtiff='if_safer') return True
def main(options, args): msi_files = args # all images in this file should have the same width and height # output = options.output name_index = options.name_index if len(msi_files) < 1: raise IOError('NO input images') # test_TheilSen() annual_based = options.annual_based # sort the file to make msi_files = sorted(msi_files) # for file in msi_files: # print(file) # test aoi = (300, 250, 600, 300) # (xoff, yoff ,xsize, ysize) in pixels # aoi = (300, 250, 10, 20) # band_index = [1,2,3] # for test valid_month = [7, 8] confidence_inter = 0.95 # split the image and label img_obj = RSImageclass() if img_obj.open(msi_files[0]) is False: raise IOError('Open %s failed' % msi_files[0]) width = img_obj.GetWidth() height = img_obj.GetHeight() patch_w = 200 patch_h = 200 patch_boundary = split_image.sliding_window( width, height, patch_w, patch_h, 0, 0) # boundary of patch (xoff,yoff ,xsize, ysize) # use multiple thread num_cores = multiprocessing.cpu_count() print('number of thread %d' % num_cores) # theadPool = mp.Pool(num_cores) # multi threads, can not utilize all the CPUs? not sure hlc 2018-4-19 theadPool = Pool(num_cores) # multi processes # for idx, aoi in enumerate(patch_boundary): # print(idx, aoi) tmp_dir = '%s_trend_patches' % name_index parameters_list = [(msi_files, aoi, name_index, valid_month, confidence_inter, os.path.join(tmp_dir, '%d.tif' % idx), annual_based) for idx, aoi in enumerate(patch_boundary)] results = theadPool.map(cal_trend_for_one_index_parallel, parameters_list)
def segment_a_grey_image(img_path, save_dir, process_num, org_raster=None, b_save_patch_label=False): out_pre = os.path.splitext(os.path.basename(img_path))[0] label_path = os.path.join(save_dir, out_pre + '_label.tif') if os.path.isfile(label_path): basic.outputlogMessage('%s exist, skip segmentation' % label_path) return label_path height, width, band_num, date_type = raster_io.get_height_width_bandnum_dtype( img_path) print('input image: height, width, band_num, date_type', height, width, band_num, date_type) # if the original data is available, then calculate the attributes based on that if org_raster is not None: org_height, org_width, org_band_num, org_date_type = raster_io.get_height_width_bandnum_dtype( org_raster) if org_height != height or org_width != width: raise ValueError('%s and %s do not have the same size' % (img_path, org_raster)) save_labes = np.zeros((height, width), dtype=np.int32) # divide the image the many small patches, then calcuate one by one, solving memory issues. image_patches = split_image.sliding_window(width, height, 1024, 1024, adj_overlay_x=0, adj_overlay_y=0) patch_count = len(image_patches) # for idx, patch in enumerate(image_patches): # out_patch,out_labels = segment_a_patch(idx, patch, patch_count,img_path) # # copy to the entire image # row_s = patch[1] # row_e = patch[1] + patch[3] # col_s = patch[0] # col_e = patch[0] + patch[2] # save_labes[row_s:row_e, col_s:col_e] = out_labels theadPool = Pool(process_num) parameters_list = [(idx, patch, patch_count, img_path, org_raster, b_save_patch_label) for idx, patch in enumerate(image_patches)] results = theadPool.starmap(segment_a_patch, parameters_list) patch_label_path_list = [] patch_label_id_range = [] object_attributes = {} # object id (label) and attributes (list) for res in results: patch, out_labels, nodata, attributes = res if isinstance( out_labels, str) and os.path.isfile(out_labels): #if it's a label file patch_label_path_list.append(out_labels) else: # copy to the entire image row_s = patch[1] row_e = patch[1] + patch[3] col_s = patch[0] col_e = patch[0] + patch[2] current_min = np.max(save_labes) print('current_max', current_min) patch_label_id_range.append(current_min) save_labes[row_s:row_e, col_s:col_e] = out_labels + current_min + 1 if attributes is not None: update_label_attr = {} for key in attributes: update_label_attr[key + current_min] = attributes[key] # add to the attributes object_attributes.update(update_label_attr) # # apply median filter (remove some noise), # we should not use median filter, because it's labels, not images. # label_blurs = cv2.medianBlur(np.float32(save_labes), 3) # with kernal=3, cannot accept int32 # # print(label_blurs, label_blurs.dtype) # save_labes = label_blurs.astype(np.int32) # return a list of labels saved in current working folder. if b_save_patch_label: return patch_label_path_list if os.path.isdir(save_dir) is False: io_function.mkdir(save_dir) # save attributes (if not empty) if object_attributes: attribute_path = os.path.join(save_dir, out_pre + '_attributes.txt') io_function.save_dict_to_txt_json(attribute_path, object_attributes) # save the label raster_io.save_numpy_array_to_rasterfile(save_labes, label_path, img_path) # do not set nodata # save id ranges to txt label_id_range_txt = os.path.splitext(label_path)[0] + '_IDrange.txt' patch_label_id_range = [str(item) for item in patch_label_id_range] io_function.save_list_to_txt(label_id_range_txt, patch_label_id_range) return label_path
def make_dataset(root,list_txt,patch_w,patch_h,adj_overlay_x,adj_overlay_y,train=True): """ get the patches information of the remote sensing images. :param root: data root :param list_txt: a list file contain images (one row contain one train image and one label image with space in the center if the input is for training; one row contain one image if it is for inference) :param patch_w: the width of the expected patch :param patch_h: the height of the expected patch :param adj_overlay: the extended distance (in pixel) to adjacent patch, make each patch has overlay with adjacent patch :param train: indicate training or inference :return: dataset (list) """ dataset = [] crop_height=patch_h+2*adj_overlay_y crop_width=patch_w+2*adj_overlay_x if os.path.isfile(list_txt) is False: basic.outputlogMessage("error, file %s not exist"%list_txt) assert False with open(list_txt) as file_obj: files_list = file_obj.readlines() if len(files_list) < 1: basic.outputlogMessage("error, no file name in the %s" % list_txt) assert False if train: abandon_number = 0 count_for_pure_image = 0 for line in files_list: names_list = line.split() if len(names_list) < 1: # empty line continue image_name = names_list[0] label_name = names_list[1].strip() # img_path = os.path.join(root,image_name) # label_path = os.path.join(root,label_name) img_path=image_name label_path=label_name # (width,height) = check_input_image_and_label(img_path,label_path) # split the image and label patch_boundary = split_image.sliding_window(width, height, patch_w, patch_h, adj_overlay_x,adj_overlay_y) for patch in patch_boundary: # remove the patch small than model input size # if patch[2] < crop_width or patch[3] < crop_height:# xSize < 480 or ySize < 480 # # # print ('not in edge mode') # continue img_patch = patchclass(img_path,patch) label_patch = patchclass(label_path,patch) gt_test = read_patch2(label_patch) max=np.amax(gt_test) min=np.amin(gt_test) if max==min: count_for_pure_image = count_for_pure_image+1 #print ("this is %d image"%min) a=random.randint(0,4) if a == 1: dataset.append([img_patch, label_patch]) else: abandon_number=abandon_number+1 continue else: dataset.append([img_patch, label_patch]) print("%d images are abandoned"%abandon_number) print("%d images are 1 or 0 images"%count_for_pure_image) print("%d images in total"%len(dataset)) else: for line in files_list: names_list = line.split() image_name = names_list[0] label_name = names_list[1].strip() img_path = image_name label_path = label_name (width, height) = check_input_image_and_label(img_path, label_path) # img_obj = RSImageclass() if img_obj.open(img_path) is False: assert False width = img_obj.GetWidth() height = img_obj.GetHeight() # split the image and label patch_boundary = split_image.sliding_window_test(width, height, patch_w, patch_h, adj_overlay_x,adj_overlay_y) for patch in patch_boundary: # need to handle the patch with smaller size # if patch[2] < crop_width or patch[3] < crop_height: # xSize < 480 or ySize < 480 # continue img_patch = patchclass(img_path, patch) label_patch = patchclass(label_path, patch) dataset.append([img_patch, label_patch]) return dataset
def dem_diff_newest_oldest(dem_tif_list, out_dem_diff, out_date_diff, process_num, b_max_subsidence=False, b_save_cm=False): ''' get DEM difference, for each pixel, newest vaild value - oldest valid value :param dem_list: :param output: :return: ''' if len(dem_tif_list) < 2: basic.outputlogMessage('error, the count of DEM is smaller than 2') return False # groups DEM with original images acquired at the same year months dem_groups_date = group_demTif_yearmonthDay(dem_tif_list, diff_days=0) # sort based on yeardate in accending order : operator.itemgetter(0) dem_groups_date = dict( sorted(dem_groups_date.items(), key=operator.itemgetter(0))) txt_save_path = os.path.splitext(out_date_diff)[0] + '.txt' # change the key to integer number after sorting and save to txt file dem_groups_date_sort_idx = {} for idx, key in enumerate(dem_groups_date.keys()): dem_groups_date_sort_idx[idx] = dem_groups_date[key] io_function.save_dict_to_txt_json(txt_save_path, dem_groups_date_sort_idx) date_list = list(dem_groups_date.keys()) dem_tif_list = [dem_groups_date[key][0] for key in dem_groups_date.keys() ] # each date, only have one tif tif_obj_list = [raster_io.open_raster_read(tif) for tif in dem_tif_list] height, width, _ = raster_io.get_width_heigth_bandnum(tif_obj_list[0]) # check them have the width and height for tif, obj in zip(dem_tif_list[1:], tif_obj_list[1:]): h, w, _ = raster_io.get_width_heigth_bandnum(obj) if h != height or w != width: raise ValueError( 'the height and width of %s is different from others' % tif) # divide the image the many small patches, then calcuate one by one, solving memory issues. image_patches = split_image.sliding_window(width, height, 1024, 1024, adj_overlay_x=0, adj_overlay_y=0) patch_count = len(image_patches) tif_obj_list = None # read all and their date date_pair_list = list(combinations(date_list, 2)) date_diff_list = [(item[1] - item[0]).days for item in date_pair_list] # sort based on day difference (from max to min) date_pair_list_sorted = [ x for _, x in sorted(zip(date_diff_list, date_pair_list), reverse=True) ] # descending # get the difference date_diff_np = np.zeros((height, width), dtype=np.uint16) old_date_index = np.zeros((height, width), dtype=np.uint8) new_date_index = np.zeros((height, width), dtype=np.uint8) dem_diff_np = np.empty((height, width), dtype=np.float32) dem_diff_np[:] = np.nan if process_num == 1: for idx, patch in enumerate(image_patches): _,patch_dem_diff,patch_date_diff, patch_old_date_idx,patch_new_date_idx = \ dem_diff_newest_oldest_a_patch(idx, patch, patch_count,date_pair_list_sorted,dem_groups_date) # copy to the entire image row_s = patch[1] row_e = patch[1] + patch[3] col_s = patch[0] col_e = patch[0] + patch[2] dem_diff_np[row_s:row_e, col_s:col_e] = patch_dem_diff date_diff_np[row_s:row_e, col_s:col_e] = patch_date_diff old_date_index[row_s:row_e, col_s:col_e] = patch_old_date_idx new_date_index[row_s:row_e, col_s:col_e] = patch_new_date_idx else: theadPool = Pool(process_num) parameters_list = [(idx, patch, patch_count, date_pair_list_sorted, dem_groups_date) for idx, patch in enumerate(image_patches)] if b_max_subsidence is False: results = theadPool.starmap(dem_diff_newest_oldest_a_patch, parameters_list) else: results = theadPool.starmap(dem_diff_new_old_min_neg_diff_patch, parameters_list) for res in results: patch, patch_dem_diff, patch_date_diff, patch_old_date_idx, patch_new_date_idx = res # copy to the entire image row_s = patch[1] row_e = patch[1] + patch[3] col_s = patch[0] col_e = patch[0] + patch[2] dem_diff_np[row_s:row_e, col_s:col_e] = patch_dem_diff date_diff_np[row_s:row_e, col_s:col_e] = patch_date_diff old_date_index[row_s:row_e, col_s:col_e] = patch_old_date_idx new_date_index[row_s:row_e, col_s:col_e] = patch_new_date_idx theadPool.close() # save date diff to tif (16 bit) raster_io.save_numpy_array_to_rasterfile(date_diff_np, out_date_diff, dem_tif_list[0], nodata=0, compress='lzw', tiled='yes', bigtiff='if_safer') # save old and new date index to tif (8 bit) out_old_date_idx = io_function.get_name_by_adding_tail( out_date_diff, 'oldIndex') out_new_date_idx = io_function.get_name_by_adding_tail( out_date_diff, 'newIndex') raster_io.save_numpy_array_to_rasterfile(old_date_index, out_old_date_idx, dem_tif_list[0], nodata=255, compress='lzw', tiled='yes', bigtiff='if_safer') raster_io.save_numpy_array_to_rasterfile(new_date_index, out_new_date_idx, dem_tif_list[0], nodata=255, compress='lzw', tiled='yes', bigtiff='if_safer') # # stretch the DEM difference, save to 8 bit. # dem_diff_np_8bit = raster_io.image_numpy_to_8bit(dem_diff_np,10,-10,dst_nodata=0) # out_dem_diff_8bit = io_function.get_name_by_adding_tail(out_dem_diff, '8bit') # raster_io.save_numpy_array_to_rasterfile(dem_diff_np_8bit, out_dem_diff_8bit, dem_tif_list[0], nodata=0) # if possible, save to 16 bit, to save the disk storage. # dem_diff_np[0:5,0] = -500 # dem_diff_np[0,0:5] = 500 # print(np.nanmin(dem_diff_np)) # print(np.nanmax(dem_diff_np)) # if np.nanmin(dem_diff_np_cm) < range.min or np.nanmax(dem_diff_np_cm) > range.max: # save dem diff to files (float), meter if b_save_cm is False: raster_io.save_numpy_array_to_rasterfile(dem_diff_np, out_dem_diff, dem_tif_list[0], nodata=-9999, compress='lzw', tiled='yes', bigtiff='if_safer') else: # save dem diff to 16bit, centimeter, only handle diff from -327.67 to 327.67 meters bit16_nodata = 32767 range = np.iinfo(np.int16) dem_diff_np_cm = dem_diff_np * 100 dem_diff_np_cm[dem_diff_np_cm < range.min] = range.min dem_diff_np_cm[dem_diff_np_cm > range.max] = range.max dem_diff_np_cm[np.isnan( dem_diff_np_cm)] = bit16_nodata # set the nodata for int16 dem_diff_np_cm = dem_diff_np_cm.astype(np.int16) # save to int16 out_dem_diff_cm = out_dem_diff basic.outputlogMessage( 'note, save DEM difference (%s) to centimeter, int16, range: -327.68 to 327.67 m' % os.path.basename(out_dem_diff_cm)) raster_io.save_numpy_array_to_rasterfile(dem_diff_np_cm, out_dem_diff_cm, dem_tif_list[0], nodata=bit16_nodata, compress='lzw', tiled='yes', bigtiff='if_safer') return True
def make_dataset(root, list_txt, patch_w, patch_h, adj_overlay, train=True): """ get the patches information of the remote sensing images. :param root: data root :param list_txt: a list file contain images (one row contain one train image and one label image with space in the center if the input is for training; one row contain one image if it is for inference) :param patch_w: the width of the expected patch :param patch_h: the height of the expected patch :param adj_overlay: the extended distance (in pixel) to adjacent patch, make each patch has overlay with adjacent patch :param train: indicate training or inference :return: dataset (list) """ dataset = [] if os.path.isfile(list_txt) is False: basic.outputlogMessage("error, file %s not exist" % list_txt) assert False with open(list_txt) as file_obj: files_list = file_obj.readlines() if len(files_list) < 1: basic.outputlogMessage("error, no file name in the %s" % list_txt) assert False if train: for line in files_list: names_list = line.split() if len(names_list) < 1: # empty line continue image_name = names_list[0] label_name = names_list[1].strip() img_path = os.path.join(root, image_name) label_path = os.path.join(root, label_name) # (width, height) = check_input_image_and_label(img_path, label_path) # split the image and label patch_boundary = split_image.sliding_window( width, height, patch_w, patch_h, adj_overlay) for patch in patch_boundary: # remove the patch small than model input size if patch[2] < crop_width or patch[ 3] < crop_height: # xSize < 480 or ySize < 480 continue img_patch = patchclass(img_path, patch) label_patch = patchclass(label_path, patch) dataset.append([img_patch, label_patch]) else: for line in files_list: names_list = line.split() image_name = names_list[0].strip() img_path = os.path.join(root, image_name) # img_obj = RSImageclass() if img_obj.open(img_path) is False: assert False width = img_obj.GetWidth() height = img_obj.GetHeight() # split the image and label patch_boundary = split_image.sliding_window( width, height, patch_w, patch_h, adj_overlay) for patch in patch_boundary: # need to handle the patch with smaller size # if patch[2] < crop_width or patch[3] < crop_height: # xSize < 480 or ySize < 480 # continue img_patch = patchclass(img_path, patch) dataset.append(img_patch) return dataset
def dem_diff_newest_oldest(dem_tif_list, out_dem_diff, out_date_diff): ''' get DEM difference, for each pixel, newest vaild value - oldest valid value :param dem_list: :param output: :return: ''' if len(dem_tif_list) < 2: basic.outputlogMessage('error, the count of DEM is smaller than 2') return False # groups DEM with original images acquired at the same year months dem_groups_date = group_demTif_yearmonthDay(dem_tif_list, diff_days=0) # sort based on yeardate in accending order : operator.itemgetter(0) dem_groups_date = dict( sorted(dem_groups_date.items(), key=operator.itemgetter(0))) txt_save_path = os.path.splitext(out_date_diff)[0] + '.txt' io_function.save_dict_to_txt_json(txt_save_path, dem_groups_date) date_list = list(dem_groups_date.keys()) dem_tif_list = [dem_groups_date[key][0] for key in dem_groups_date.keys() ] # each date, only have one tif tif_obj_list = [raster_io.open_raster_read(tif) for tif in dem_tif_list] height, width, _ = raster_io.get_width_heigth_bandnum(tif_obj_list[0]) # check them have the width and height for tif, obj in zip(dem_tif_list[1:], tif_obj_list[1:]): h, w, _ = raster_io.get_width_heigth_bandnum(obj) if h != height or w != width: raise ValueError( 'the height and width of %s is different from others' % tif) # divide the image the many small patches, then calcuate one by one, solving memory issues. image_patches = split_image.sliding_window(width, height, 1024, 1024, adj_overlay_x=0, adj_overlay_y=0) patch_count = len(image_patches) tif_obj_list = None # read all and their date date_pair_list = list(combinations(date_list, 2)) date_diff_list = [(item[1] - item[0]).days for item in date_pair_list] # sort based on day difference (from max to min) date_pair_list_sorted = [ x for _, x in sorted(zip(date_diff_list, date_pair_list), reverse=True) ] # descending # get the difference date_diff_np = np.zeros((height, width), dtype=np.uint16) dem_diff_np = np.empty((height, width), dtype=np.float32) dem_diff_np[:] = np.nan for idx, patch in enumerate(image_patches): print('tile: %d / %d' % (idx + 1, patch_count)) patch_w = patch[2] patch_h = patch[3] patch_date_diff = np.zeros((patch_h, patch_w), dtype=np.uint16) patch_dem_diff = np.empty((patch_h, patch_w), dtype=np.float32) patch_dem_diff[:] = np.nan # use dict to read data from disk (only need) dem_data_dict = {} for p_idx, pair in enumerate(date_pair_list_sorted): diff_days = (pair[1] - pair[0]).days basic.outputlogMessage( 'Getting DEM difference using the one on %s and %s, total day diff: %d' % (timeTools.date2str(pair[1]), timeTools.date2str( pair[0]), diff_days)) # print(pair,':',(pair[1] - pair[0]).days) data_old, data_new = read_date_dem_to_memory(p_idx, pair, date_pair_list_sorted, dem_data_dict, dem_groups_date, boundary=patch) # print('data_old shape:',data_old.shape) # print('data_new shape:',data_new.shape) diff_two = data_new - data_old # print(diff_two) # fill the element new_ele = np.where( np.logical_and(np.isnan(patch_dem_diff), ~np.isnan(diff_two))) patch_dem_diff[new_ele] = diff_two[new_ele] patch_date_diff[new_ele] = diff_days # check if all have been filled ( nan pixels) diff_remain_hole = np.where(np.isnan(patch_dem_diff)) # basic.outputlogMessage(' remain %.4f percent pixels need to be filled'% (100.0*diff_remain_hole[0].size/patch_dem_diff.size) ) if diff_remain_hole[0].size < 1: break # copy to the entire image row_s = patch[1] row_e = patch[1] + patch[3] col_s = patch[0] col_e = patch[0] + patch[2] dem_diff_np[row_s:row_e, col_s:col_e] = patch_dem_diff date_diff_np[row_s:row_e, col_s:col_e] = patch_date_diff # save date diff to tif (16 bit) raster_io.save_numpy_array_to_rasterfile(date_diff_np, out_date_diff, dem_tif_list[0], nodata=0, compress='lzw', tiled='yes', bigtiff='if_safer') # # stretch the DEM difference, save to 8 bit. # dem_diff_np_8bit = raster_io.image_numpy_to_8bit(dem_diff_np,10,-10,dst_nodata=0) # out_dem_diff_8bit = io_function.get_name_by_adding_tail(out_dem_diff, '8bit') # raster_io.save_numpy_array_to_rasterfile(dem_diff_np_8bit, out_dem_diff_8bit, dem_tif_list[0], nodata=0) # if possible, save to 16 bit, to save the disk storage. # dem_diff_np[0:5,0] = -500 # dem_diff_np[0,0:5] = 500 # print(np.nanmin(dem_diff_np)) # print(np.nanmax(dem_diff_np)) range = np.iinfo(np.int16) # dem_diff_np_cm = dem_diff_np*100 # if np.nanmin(dem_diff_np_cm) < range.min or np.nanmax(dem_diff_np_cm) > range.max: # save dem diff to files (float), meter raster_io.save_numpy_array_to_rasterfile(dem_diff_np, out_dem_diff, dem_tif_list[0], nodata=-9999, compress='lzw', tiled='yes', bigtiff='if_safer') # else: # # save dem diff to 16bit, centimeter, only handle diff from -327.67 to 327.67 meters # dem_diff_np_cm = dem_diff_np_cm.astype(np.int16) # save to int16 # raster_io.save_numpy_array_to_rasterfile(dem_diff_np_cm, out_dem_diff_cm, dem_tif_list[0],nodata=32767,compress='lzw',tiled='yes',bigtiff='if_safer') return True