def hoekman(src_path: str, norm=False) -> None: ''' Hoekman decomposition and save to file, the hoekman_and_norm() is an older version of this func, and is a subset of this func @in -src_path -source path, where should contains 'C3' folder @in -norm -normalize or not, default: false ''' if 'C3' in os.listdir(src_path): # read C3 file print(f'hoekman on dir (norm={norm}): {src_path}', end='') c3 = psr.read_c3(osp.join(src_path, 'C3')) h = psr.Hokeman_decomposition(c3) dst_path = osp.join(src_path, 'Hoekman') fu.mkdir_if_not_exist(dst_path) # np.save(osp.join(dst_path, 'ori'), h) # save the unnormalized file # normalize if norm: for ii in range(9): h[ii, :, :] = psr.min_max_contrast_median_map( 10 * np.log10(h[ii, :, :])) # cv2.imwrite(osp.join(dst_path, f'{ii}.jpg'), (h[0, :, :]*255).astype(np.uint8)) # plt.hist() can take very long time to process a 2D array, but little time to process a 1D array, so flatten the array if possible # plt.hist(h[ii, :, :].flatten()) # plt.savefig(osp.join(dst_path, f'log-hist-{ii}.jpg')) # save to file if norm: np.save(osp.join(dst_path, 'normed'), h) else: np.save(osp.join(dst_path, 'unnormed'), h) print('\tdone') else: raise ValueError('wrong src path')
def extract_H_A_alpha_span(path): ''' Extract H/A/alpha/Span data Args: path (str): to the the H/A/alpha data ''' if osp.isdir(path): path = osp.join(path, 'unnormed.npy') if not osp.isfile(path): raise IOError(f'{path} is not a valid path') HAalpha = np.load(path) assert HAalpha.shape[0] == 3, 'Wrong shape of H/A/ahpha data' c3 = psr.read_c3(osp.dirname(path.replace('HAalpha', 'C3')), out='save_space') assert np.array_equal( c3.shape[1:], HAalpha.shape[1:]), 'Unmatched C3 and H/A/alpha data pair' span = c3[0, ...] + c3[5, ...] + c3[8, ...] span[span < mathlib.eps] = mathlib.eps span = np.expand_dims(np.log(span), 0) HAalphaSpan = np.concatenate((HAalpha, span), axis=0) dst_file = path.replace('HAalpha', 'HAalphaSpan') print(f'extract H/A/alpha/Span data from {path} to {dst_file}') fu.mkdir_if_not_exist(osp.dirname(dst_file)) np.save(dst_file, HAalphaSpan)
def split_train_val_test(src_path, dst_path, data_format, train_ratio=0.8): ''' Random split the training, validation, test set using specific ratio of number of training samples Args: src_path (str): PolSAR file path dst_path (str): path to write split file data_format (str): 'Hoekman' or 's2' or 'C3 train_ratio (float): ratio of number of training samples. Default: 0.8 ''' # collect all files all_filenames = dict() for location in os.listdir(src_path): all_filenames[location] = [] for root, dirs, _ in os.walk(osp.join(src_path, location)): if root.endswith(data_format): for dir in dirs: all_filenames[location].append(osp.join(root, dir)) # split val_ratio = (1-train_ratio)/2 test_ratio = (1-train_ratio)/2 val_split = [] test_split = [] train_split = [] num_all_files = 0 for k, v in all_filenames.items(): num_all_files += len(v) num_val = round(len(v)*val_ratio) num_test = round(len(v)*test_ratio) val_test_idx = random.sample(range(len(v)), num_val+num_test) val_test, train = types.list_pop(v, val_test_idx) train_split += train val_split += val_test[:len(val_test)//2] test_split += val_test[len(val_test)//2:] print(f'num of all files: {num_all_files}') print('num of train split: ', len(train_split)) print('num of val split: ', len(val_split)) print('num of test split: ', len(test_split)) assert len(train_split) + len(val_split) + len(test_split) == num_all_files # save to file dst_path = osp.join(dst_path, data_format, str(train_ratio)) fu.mkdir_if_not_exist(dst_path) with open(osp.join(dst_path, 'val.txt'), 'w') as f: for item in val_split: f.write(f'{item}\n') with open(osp.join(dst_path, 'train.txt'), 'w') as f: for item in train_split: f.write(f'{item}\n') with open(osp.join(dst_path, 'test.txt'), 'w') as f: for item in test_split: f.write(f'{item}\n')
def split_hoekman_file(src_path: str, patch_size=(512, 512), filename='normed.npy') -> None: ''' split hoekman file into patches @in -src_path -source path, where should contains 'Hoekman' folder -patch_size -size of a patch, in [height, width] format -filename -name of hoekman data file ''' if src_path.split('/')[-1] != 'Hoekman': src_path = osp.join(src_path, 'Hoekman') print(f'spliting hoekman data on : {src_path}') whole_data = np.load(osp.join(src_path, filename)) whole_het, whole_wes = whole_data.shape[1:] idx = 0 start_x = 0 start_y = 0 p_het, p_wes = patch_size while start_x < whole_wes and start_y < whole_het: # print(f' spliting the {idx}-th patch') # write bin file p_data = whole_data[:, start_y:start_y + p_het, start_x:start_x + p_wes] p_folder = osp.join(src_path, str(idx)) fu.mkdir_if_not_exist(p_folder) np.save(osp.join(p_folder, 'normed'), p_data) # write image, which is cutted from big picture, not re-generated # p_img = (p_data[0, :, :]*255).astype(np.uint8) # cv2.imwrite(osp.join(p_folder, 'img.jpg'), p_img) # increase patch index idx += 1 start_x += p_wes if start_x >= whole_wes: # next row start_x = 0 start_y += p_het if start_y >= whole_het: # finish print('totle split', idx, 'patches done') return elif start_y + p_het > whole_het: # suplement start_y = whole_het - p_het elif start_x + p_wes > whole_wes: start_x = whole_wes - p_wes print('all splitted')
def exact_patch(self, dst_path, rois): ''' Extract pathces of PolSAR data Args: dst_path (str): destination folder rois (list): window specifies the position of patch, should in the form of [x, y, w, h], where x and y are the coordinates of the lower right corner of the patch ''' pauli = self.to_rgb() for ii, roi in enumerate(rois): dst_folder = osp.join(dst_path, str(ii)) fu.mkdir_if_not_exist(dst_folder) with open(osp.join(dst_folder, 'README.txt'), 'w') as f: f.write( f'Original file path: {self.path}\nROI: {roi}\nin the format of (x, y, w, h), where x and y are the coordinates the lower right corner' ) raise NotImplementedError
def hoekman_and_norm(src_path: str) -> None: ''' hoekman decomposition, normalization as pauliRGB, and save to file @in -src_path -source path, where should contains 'C3' folder ''' if 'C3' in os.listdir(src_path): print(f'hoekman and norm on dir: {src_path}', end='') c3 = psr.read_c3(osp.join(src_path, 'C3')) h = psr.Hokeman_decomposition(c3) dst_path = osp.join(src_path, 'Hoekman') fu.mkdir_if_not_exist(dst_path) # np.save(osp.join(dst_path, 'ori'), h) # save the unnormalized file for ii in range(9): h[ii, :, :] = psr.min_max_contrast_median_map( 10 * np.log10(h[ii, :, :])) # cv2.imwrite(osp.join(dst_path, f'{ii}.jpg'), (h[0, :, :]*255).astype(np.uint8)) # plt.hist() can take very long time to process a 2D array, but little time to process a 1D array, so flatten the array if possible # plt.hist(h[ii, :, :].flatten()) # plt.savefig(osp.join(dst_path, f'log-hist-{ii}.jpg')) np.save(osp.join(dst_path, 'normed'), h) print('\tdone') else: raise ValueError('wrong src path')
def PCA_uni_rot(path, n_components, num_channels=48, save_model=True, save_data=False) -> None: ''' Transform zscored uni_rot data using PCA, also applicable to coherent pattern files Args: path (str): path to zscored data n_components (float): param for the PCA function num_channels (float): number of channels. Default: 48 save_model (bool): if save the PCA model. Default: False save_data (bool): if save the transformed data. Default: False note: only using training set to get the transform matrix, and apply which to the val and test sets ''' print('transfrom PolSAR rotation matrix using PCA on:', path) # check input if osp.isdir(osp.join(path, 'training')): train_filenames = os.listdir(osp.join(path, 'training')) # elif osp.isdir(osp.join(path, 'train')): # train_filenames = os.listdir(osp.join(path, 'train')) else: raise IOError('Can not find training set') # load training set data # train_files = np.expand_dims(np.load(r'data/PolSAR_building_det/zscored/GF3/training/anshou20190223_080.npy'), 0) train_files = np.empty((len(train_filenames), num_channels, 512, 512)) print('loading trainig files') for ii in tqdm.trange(len(train_filenames)): filename = train_filenames[ii] full_file_path = osp.join(path, 'training', filename) train_files[ii, ...] = np.load(full_file_path) # fit training data to pca model, and save the transformed data to file print('fitting PCA model') if not isinstance(n_components, (list, tuple)): n_components = [n_components] for n_component in n_components: print(f'\nn_component={n_component}') pca = PCA(n_components=n_component) train_files = train_files.transpose(0, 2, 3, 1).reshape(-1, num_channels) train_files = pca.fit_transform(train_files) train_files = train_files.reshape(len(train_filenames), 512, 512, -1) print( f'reduced dimension: {train_files.shape}\nvar: {pca.explained_variance_}, \nvar ratio: {pca.explained_variance_ratio_}\n cumsum:\n{np.cumsum(pca.explained_variance_ratio_)}' ) if save_model: model_path = osp.split(osp.split(path)[0])[0] model_path = osp.join(model_path, f'PCA_{n_component}.p') pickle.dump(pca, open(model_path, 'wb')) print(f'saved model on {model_path}') if save_data: print('dumping traing data files') fu.mkdir_if_not_exist( osp.join(path, 'training').replace('zscored', f'PCA_{n_component}')) for ii in tqdm.trange(len(train_filenames)): filename = train_filenames[ii] full_file_path = osp.join(path, 'training', filename).replace( 'zscored', f'PCA_{n_component}') np.save(full_file_path, train_files[ii, ...]) del train_files, train_filenames # apply model to val set val_filenames = os.listdir(osp.join(path, 'validation')) val_files = np.empty((len(val_filenames), num_channels, 512, 512)) print('loading val files') for ii in tqdm.trange(len(val_filenames)): filename = val_filenames[ii] full_file_path = osp.join(path, 'validation', filename) val_files[ii, ...] = np.load(full_file_path) print('PCA tranfome on val set') val_files = val_files.transpose(0, 2, 3, 1).reshape(-1, num_channels) val_files = pca.transform(val_files).reshape( len(val_filenames), 512, 512, -1) print('dumping val data files') fu.mkdir_if_not_exist( osp.join(path, 'validation').replace('zscored', f'PCA_{n_component}')) for ii in tqdm.trange(len(val_filenames)): filename = val_filenames[ii] full_file_path = osp.join(path, 'validation', filename).replace( 'zscored', f'PCA_{n_component}') np.save(full_file_path, val_files[ii, ...]) del val_files, val_filenames # apply model to test set test_filenames = os.listdir(osp.join(path, 'test')) test_files = np.empty( (len(test_filenames), num_channels, 512, 512)) print('loading test files') for ii in tqdm.trange(len(test_filenames)): filename = test_filenames[ii] full_file_path = osp.join(path, 'test', filename) test_files[ii, ...] = np.load(full_file_path) print('PCA tranfome on test set') test_files = test_files.transpose(0, 2, 3, 1).reshape(-1, num_channels) test_files = pca.transform(test_files).reshape( len(test_filenames), 512, 512, -1) print('dumping test data files') fu.mkdir_if_not_exist( osp.join(path, 'test').replace('zscored', f'PCA_{n_component}')) for ii in tqdm.trange(len(test_filenames)): filename = test_filenames[ii] full_file_path = osp.join(path, 'test', filename).replace( 'zscored', f'PCA_{n_component}') np.save(full_file_path, test_files[ii, ...]) del test_files, test_filenames
def zscore_uni_rot(path: str, num_channels=7, type='GF3') -> None: ''' Zscore the PolSAR rotation matrix, only using statistics from training set, and apply to train, val and test set, also applicable to coherent pattern files Args: path (str): path to the files need to be zscored num_channels (float): number of channels. Default: 48 type (str): 'GF3' or 'RS2' ''' print('zscore process:') # get mean and std value of the training set all_train_files = fu.read_file_as_list( osp.join(work_dir, type + '_training.txt')) full_data = np.empty((len(all_train_files), num_channels, 512, 512)) for ii in tqdm.trange(len(all_train_files)): filename = osp.join(path, type, all_train_files[ii]).replace('png', 'npy') f = np.load(filename) full_data[ii, ...] = f m = np.mean(full_data, axis=(0, 2, 3), keepdims=True).squeeze(0) std = np.std(full_data, axis=(0, 2, 3), ddof=1, keepdims=True).squeeze(0) print(f'mean: {m}, \nunbiased std: {std}') # apply zscore to the training, val, test sets using the derived mean and std value print('zscore val files') all_val_files = fu.read_file_as_list( osp.join(work_dir, type + '_validation.txt')) for ii in tqdm.trange(len(all_val_files)): file = all_val_files[ii].replace('png', 'npy') src_path = osp.join(path, type, file) f = np.load(src_path) f = (f - m) / std dst_path = osp.join(path, type, 'validation', file).replace('unnormed', 'zscored') fu.mkdir_if_not_exist(osp.split(dst_path)[0]) np.save(dst_path, f) print('zscore test files') all_test_files = fu.read_file_as_list( osp.join(work_dir, type + '_test.txt')) for ii in tqdm.trange(len(all_test_files)): file = all_test_files[ii].replace('png', 'npy') src_path = osp.join(path, type, file) f = np.load(src_path) f = (f - m) / std dst_path = osp.join(path, type, 'test', file).replace('unnormed', 'zscored') fu.mkdir_if_not_exist(osp.split(dst_path)[0]) np.save(dst_path, f) print('zscore train files') all_train_files = fu.read_file_as_list( osp.join(work_dir, type + '_training.txt')) for ii in tqdm.trange(len(all_train_files)): file = all_train_files[ii].replace('png', 'npy') src_path = osp.join(path, type, file) f = np.load(src_path) f = (f - m) / std dst_path = osp.join(path, type, 'training', file).replace('unnormed', 'zscored') fu.mkdir_if_not_exist(osp.split(dst_path)[0]) np.save(dst_path, f)
def flatten_directory(path: str, folder='uni_rot', ori_data_type='mat', select_features=None): ''' Flatten hierarchical directory according to label files Args: path (str): path to label files, not been divided into train, val and test sets folder (str): folder in which file data stored, also indicates the data type ori_data_type (str): original data type, 'mat': Matlab file, 'npy': numpy file, 'C3' diagonal elements of C3 matrix with logarithm select_features (tuple): features to be selected, None indicates to select all. Default: None ''' src_path = path.replace('label', 'data') pngs = glob.glob(osp.join(path, '*.png')) for png in pngs: png = osp.basename(png) loc = re.findall(r'[a-z]+', png)[0] date = re.findall(r'\d{8}', png)[0] idx = re.findall(r'_(\d{3}).', png)[0] idx = idx.lstrip('0') if idx.lstrip('0') else '0' if folder == 'C3': src_file_path = osp.join(src_path, loc, date, folder, idx) else: src_file_path = osp.join(src_path, loc, date, folder, idx, 'unnormed.' + ori_data_type) dst_file_path = osp.join(path.replace('label', folder + '_unnormed'), png.replace('png', 'npy')) fu.mkdir_if_not_exist(osp.dirname(dst_file_path)) print( f'copy {src_file_path.replace(src_path, "")} to {dst_file_path.replace(src_path, "")}' ) # different original file type if ori_data_type == 'mat': file = load_uni_rot_mat_file(src_file_path, select_features=select_features) elif ori_data_type == 'npy': file = np.load(src_file_path) # check in nan or inf num_nan = np.isnan(file).sum() num_inf = np.isinf(file).sum() if num_nan > 0: raise ValueError(f'{src_file_path}: nan value exist') if num_inf > 0: raise ValueError(f'{src_file_path}: inf value exist') elif ori_data_type == 'C3': file = psr.read_c3(src_file_path, out='save_space') file = file[(0, 5, 8), :, :] file[file < mathlib.eps] = mathlib.eps file = np.log(file) mathlib.check_inf_nan(file) np.save(dst_file_path, file) print('flatten directory finished\n')
def test(cfg, logger, run_id): # Setup Augmentations augmentations = cfg.test.augments logger.info(f'using augments: {augmentations}') data_aug = get_composed_augmentations(augmentations) # Setup Dataloader data_loader = get_loader(cfg.data.dataloader) data_path = cfg.data.path data_loader = data_loader( data_path, data_format=cfg.data.format, norm=cfg.data.norm, split=cfg.test.dataset, split_root=cfg.data.split, log=cfg.data.log, augments=data_aug, logger=logger, ENL=cfg.data.ENL, ) run_id = osp.join(run_id, cfg.test.dataset) os.mkdir(run_id) logger.info("data path: {}".format(data_path)) logger.info(f'num of {cfg.test.dataset} set samples: {len(data_loader)}') loader = data.DataLoader( data_loader, batch_size=cfg.test.batch_size, num_workers=cfg.test.n_workers, shuffle=False, persistent_workers=True, drop_last=False, ) # Setup Model device = f'cuda:{cfg.gpu[0]}' model = get_model(cfg.model).to(device) input_size = (cfg.model.in_channels, 512, 512) logger.info(f'using model: {cfg.model.arch}') model = torch.nn.DataParallel(model, device_ids=cfg.gpu) # load model params if osp.isfile(cfg.test.pth): logger.info("Loading model from checkpoint '{}'".format(cfg.test.pth)) # load model state checkpoint = torch.load(cfg.test.pth) model.load_state_dict(checkpoint["model_state"]) else: raise FileNotFoundError(f'{cfg.test.pth} file not found') # Setup Metrics running_metrics_val = runningScore(2) running_metrics_train = runningScore(2) metrics = runningScore(2) test_psnr_meter = averageMeter() test_ssim_meter = averageMeter() img_cnt = 0 data_range = 255 if cfg.data.log: data_range = np.log(data_range) # test model.eval() with torch.no_grad(): for clean, noisy, files_path in loader: noisy = noisy.to(device, dtype=torch.float32) noisy_denoised = model(noisy) psnr = [] ssim = [] if cfg.data.simulate: clean = clean.to(device, dtype=torch.float32) for ii in range(clean.shape[0]): psnr.append( piq.psnr(noisy_denoised[ii, ...], clean[ii, ...], data_range=data_range).cpu()) ssim.append( piq.ssim(noisy_denoised[ii, ...], clean[ii, ...], data_range=data_range).cpu()) test_psnr_meter.update(np.array(psnr).mean(), n=clean.shape[0]) test_ssim_meter.update(np.array(ssim).mean(), n=clean.shape[0]) clean = clean.cpu().numpy() noisy = noisy.cpu().numpy() noisy_denoised = noisy_denoised.cpu().numpy() # save images for ii in range(clean.shape[0]): file_path = files_path[ii][29:] file_path = file_path.replace(r'/', '_') file_path = osp.splitext(file_path)[0] file_ori = noisy[ii, ...] file_clean = clean[ii, ...] file_denoise = noisy_denoised[ii, ...] path_ori = osp.join(run_id, file_path) path_denoise = osp.join(run_id, file_path) path_clean = osp.join(run_id, file_path) if cfg.data.simulate: metric_str = f'_{psnr[ii].item():.3f}_{ssim[ii].item():.3f}' path_ori += metric_str path_denoise += metric_str path_clean += metric_str path_ori = osp.join(path_ori, 'original') path_denoise = osp.join(path_denoise, 'denoise') path_clean = osp.join(path_clean, 'clean') fu.mkdir_if_not_exist(path_ori) fu.mkdir_if_not_exist(path_denoise) fu.mkdir_if_not_exist(path_clean) print('clean') psr.write_hoekman_image(file_clean, path_clean, is_print=True) print('noisy') psr.write_hoekman_image(file_ori, path_ori, is_print=True) print('denoise') psr.write_hoekman_image(file_denoise, path_denoise, is_print=True) if cfg.data.simulate: logger.info( f'overall psnr: {test_psnr_meter.avg}, ssim: {test_ssim_meter.avg}' ) logger.info(f'\ndone')