Example #1
0
def hoekman(src_path: str, norm=False) -> None:
    ''' Hoekman decomposition and save to file, the hoekman_and_norm() is an older version of this func, and is a subset of this func
    @in     -src_path       -source path, where should contains 'C3' folder
    @in     -norm           -normalize or not, default: false
    '''
    if 'C3' in os.listdir(src_path):
        # read C3 file
        print(f'hoekman on dir (norm={norm}): {src_path}', end='')
        c3 = psr.read_c3(osp.join(src_path, 'C3'))
        h = psr.Hokeman_decomposition(c3)

        dst_path = osp.join(src_path, 'Hoekman')
        fu.mkdir_if_not_exist(dst_path)
        # np.save(osp.join(dst_path, 'ori'), h)                 # save the unnormalized file

        # normalize
        if norm:
            for ii in range(9):
                h[ii, :, :] = psr.min_max_contrast_median_map(
                    10 * np.log10(h[ii, :, :]))
                # cv2.imwrite(osp.join(dst_path, f'{ii}.jpg'), (h[0, :, :]*255).astype(np.uint8))
                # plt.hist() can take very long time to process a 2D array, but little time to process a 1D array, so flatten the array if possible
                # plt.hist(h[ii, :, :].flatten())
                # plt.savefig(osp.join(dst_path, f'log-hist-{ii}.jpg'))

        # save to file
        if norm:
            np.save(osp.join(dst_path, 'normed'), h)
        else:
            np.save(osp.join(dst_path, 'unnormed'), h)
        print('\tdone')

    else:
        raise ValueError('wrong src path')
def extract_H_A_alpha_span(path):
    ''' Extract H/A/alpha/Span data 
    Args:
        path (str): to the the H/A/alpha data
    '''

    if osp.isdir(path):
        path = osp.join(path, 'unnormed.npy')
    if not osp.isfile(path):
        raise IOError(f'{path} is not a valid path')

    HAalpha = np.load(path)
    assert HAalpha.shape[0] == 3, 'Wrong shape of H/A/ahpha data'

    c3 = psr.read_c3(osp.dirname(path.replace('HAalpha', 'C3')),
                     out='save_space')
    assert np.array_equal(
        c3.shape[1:],
        HAalpha.shape[1:]), 'Unmatched C3 and H/A/alpha data pair'

    span = c3[0, ...] + c3[5, ...] + c3[8, ...]
    span[span < mathlib.eps] = mathlib.eps
    span = np.expand_dims(np.log(span), 0)

    HAalphaSpan = np.concatenate((HAalpha, span), axis=0)

    dst_file = path.replace('HAalpha', 'HAalphaSpan')
    print(f'extract H/A/alpha/Span data from {path}   to   {dst_file}')
    fu.mkdir_if_not_exist(osp.dirname(dst_file))
    np.save(dst_file, HAalphaSpan)
Example #3
0
def split_train_val_test(src_path, dst_path, data_format, train_ratio=0.8):
    ''' Random split the training, validation, test set using specific ratio of number of training samples

    Args:
        src_path (str): PolSAR file path
        dst_path (str): path to write split file
        data_format (str): 'Hoekman' or 's2' or 'C3
        train_ratio (float): ratio of number of training samples. Default: 0.8
    '''

    # collect all files
    all_filenames = dict()
    for location in os.listdir(src_path):
        all_filenames[location] = []
        for root, dirs, _ in os.walk(osp.join(src_path, location)):
            if root.endswith(data_format):
                for dir in dirs:
                    all_filenames[location].append(osp.join(root, dir))
    
    # split
    val_ratio = (1-train_ratio)/2
    test_ratio = (1-train_ratio)/2
    val_split = []
    test_split =  []
    train_split = []
    num_all_files = 0
    for k, v in all_filenames.items():
        num_all_files += len(v)
        num_val = round(len(v)*val_ratio)
        num_test = round(len(v)*test_ratio)
        val_test_idx = random.sample(range(len(v)), num_val+num_test)
        val_test, train = types.list_pop(v, val_test_idx)
        train_split += train
        val_split += val_test[:len(val_test)//2]
        test_split += val_test[len(val_test)//2:]

    print(f'num of all files: {num_all_files}')
    print('num of train split: ', len(train_split))
    print('num of val split: ', len(val_split))
    print('num of test split: ', len(test_split))

    assert len(train_split) + len(val_split) + len(test_split) == num_all_files
    # save to file
    dst_path = osp.join(dst_path, data_format, str(train_ratio))
    fu.mkdir_if_not_exist(dst_path)

    with open(osp.join(dst_path, 'val.txt'), 'w') as f:
        for item in val_split:
            f.write(f'{item}\n')

    with open(osp.join(dst_path, 'train.txt'), 'w') as f:
        for item in train_split:
            f.write(f'{item}\n')
            
    with open(osp.join(dst_path, 'test.txt'), 'w') as f:
        for item in test_split:
            f.write(f'{item}\n')
def split_hoekman_file(src_path: str,
                       patch_size=(512, 512),
                       filename='normed.npy') -> None:
    ''' split hoekman file into patches
    @in     -src_path       -source path, where should contains 'Hoekman' folder
            -patch_size     -size of a patch, in [height, width] format
            -filename       -name of hoekman data file
    '''
    if src_path.split('/')[-1] != 'Hoekman':
        src_path = osp.join(src_path, 'Hoekman')
    print(f'spliting hoekman data on : {src_path}')
    whole_data = np.load(osp.join(src_path, filename))
    whole_het, whole_wes = whole_data.shape[1:]
    idx = 0
    start_x = 0
    start_y = 0
    p_het, p_wes = patch_size
    while start_x < whole_wes and start_y < whole_het:
        # print(f'    spliting the {idx}-th patch')

        # write bin file
        p_data = whole_data[:, start_y:start_y + p_het,
                            start_x:start_x + p_wes]
        p_folder = osp.join(src_path, str(idx))
        fu.mkdir_if_not_exist(p_folder)
        np.save(osp.join(p_folder, 'normed'), p_data)

        # write image, which is cutted from big picture, not re-generated
        # p_img = (p_data[0, :, :]*255).astype(np.uint8)
        # cv2.imwrite(osp.join(p_folder, 'img.jpg'), p_img)

        # increase patch index
        idx += 1
        start_x += p_wes
        if start_x >= whole_wes:  # next row
            start_x = 0
            start_y += p_het
            if start_y >= whole_het:  # finish
                print('totle split', idx, 'patches done')
                return
            elif start_y + p_het > whole_het:  # suplement
                start_y = whole_het - p_het
        elif start_x + p_wes > whole_wes:
            start_x = whole_wes - p_wes

    print('all splitted')
Example #5
0
    def exact_patch(self, dst_path, rois):
        ''' Extract pathces of PolSAR data

        Args:
            dst_path (str): destination folder
            rois (list): window specifies the position of patch, should in the 
                form of [x, y, w, h], where x and y are the coordinates of the 
                lower right corner of the patch
        '''

        pauli = self.to_rgb()
        for ii, roi in enumerate(rois):
            dst_folder = osp.join(dst_path, str(ii))
            fu.mkdir_if_not_exist(dst_folder)

            with open(osp.join(dst_folder, 'README.txt'), 'w') as f:
                f.write(
                    f'Original file path: {self.path}\nROI: {roi}\nin the format of (x, y, w, h), where x and y are the coordinates the lower right corner'
                )
                raise NotImplementedError
def hoekman_and_norm(src_path: str) -> None:
    ''' hoekman decomposition, normalization as pauliRGB, and save to file
    @in     -src_path       -source path, where should contains 'C3' folder
    '''
    if 'C3' in os.listdir(src_path):
        print(f'hoekman and norm on dir: {src_path}', end='')
        c3 = psr.read_c3(osp.join(src_path, 'C3'))
        h = psr.Hokeman_decomposition(c3)

        dst_path = osp.join(src_path, 'Hoekman')
        fu.mkdir_if_not_exist(dst_path)
        # np.save(osp.join(dst_path, 'ori'), h)                 # save the unnormalized file

        for ii in range(9):
            h[ii, :, :] = psr.min_max_contrast_median_map(
                10 * np.log10(h[ii, :, :]))
            # cv2.imwrite(osp.join(dst_path, f'{ii}.jpg'), (h[0, :, :]*255).astype(np.uint8))
            # plt.hist() can take very long time to process a 2D array, but little time to process a 1D array, so flatten the array if possible
            # plt.hist(h[ii, :, :].flatten())
            # plt.savefig(osp.join(dst_path, f'log-hist-{ii}.jpg'))
        np.save(osp.join(dst_path, 'normed'), h)
        print('\tdone')
    else:
        raise ValueError('wrong src path')
def PCA_uni_rot(path,
                n_components,
                num_channels=48,
                save_model=True,
                save_data=False) -> None:
    ''' Transform zscored uni_rot data using PCA, also applicable to coherent pattern files

    Args:
        path (str): path to zscored data
        n_components (float): param for the PCA function
        num_channels (float): number of channels. Default: 48
        save_model (bool): if save the PCA model. Default: False
        save_data (bool): if save the transformed data. Default: False

    note: only using training set to get the transform matrix, and apply which to the val and test sets
    '''

    print('transfrom PolSAR rotation matrix using PCA on:', path)

    # check input
    if osp.isdir(osp.join(path, 'training')):
        train_filenames = os.listdir(osp.join(path, 'training'))
    # elif osp.isdir(osp.join(path, 'train')):
    #     train_filenames = os.listdir(osp.join(path, 'train'))
    else:
        raise IOError('Can not find training set')

    # load training set data
    # train_files = np.expand_dims(np.load(r'data/PolSAR_building_det/zscored/GF3/training/anshou20190223_080.npy'), 0)
    train_files = np.empty((len(train_filenames), num_channels, 512, 512))
    print('loading trainig files')
    for ii in tqdm.trange(len(train_filenames)):
        filename = train_filenames[ii]
        full_file_path = osp.join(path, 'training', filename)
        train_files[ii, ...] = np.load(full_file_path)

    # fit training data to pca model, and save the transformed data to file
    print('fitting PCA model')
    if not isinstance(n_components, (list, tuple)):
        n_components = [n_components]

    for n_component in n_components:
        print(f'\nn_component={n_component}')
        pca = PCA(n_components=n_component)
        train_files = train_files.transpose(0, 2, 3,
                                            1).reshape(-1, num_channels)
        train_files = pca.fit_transform(train_files)
        train_files = train_files.reshape(len(train_filenames), 512, 512, -1)
        print(
            f'reduced dimension: {train_files.shape}\nvar: {pca.explained_variance_}, \nvar ratio: {pca.explained_variance_ratio_}\n cumsum:\n{np.cumsum(pca.explained_variance_ratio_)}'
        )

        if save_model:
            model_path = osp.split(osp.split(path)[0])[0]
            model_path = osp.join(model_path, f'PCA_{n_component}.p')
            pickle.dump(pca, open(model_path, 'wb'))
            print(f'saved model on {model_path}')

        if save_data:
            print('dumping traing data files')
            fu.mkdir_if_not_exist(
                osp.join(path, 'training').replace('zscored',
                                                   f'PCA_{n_component}'))
            for ii in tqdm.trange(len(train_filenames)):
                filename = train_filenames[ii]
                full_file_path = osp.join(path, 'training', filename).replace(
                    'zscored', f'PCA_{n_component}')
                np.save(full_file_path, train_files[ii, ...])

            del train_files, train_filenames

            # apply model to val set
            val_filenames = os.listdir(osp.join(path, 'validation'))
            val_files = np.empty((len(val_filenames), num_channels, 512, 512))
            print('loading val files')
            for ii in tqdm.trange(len(val_filenames)):
                filename = val_filenames[ii]
                full_file_path = osp.join(path, 'validation', filename)
                val_files[ii, ...] = np.load(full_file_path)

            print('PCA tranfome on val set')
            val_files = val_files.transpose(0, 2, 3,
                                            1).reshape(-1, num_channels)
            val_files = pca.transform(val_files).reshape(
                len(val_filenames), 512, 512, -1)

            print('dumping val data files')
            fu.mkdir_if_not_exist(
                osp.join(path, 'validation').replace('zscored',
                                                     f'PCA_{n_component}'))
            for ii in tqdm.trange(len(val_filenames)):
                filename = val_filenames[ii]
                full_file_path = osp.join(path, 'validation',
                                          filename).replace(
                                              'zscored', f'PCA_{n_component}')
                np.save(full_file_path, val_files[ii, ...])

            del val_files, val_filenames

            # apply model to test set
            test_filenames = os.listdir(osp.join(path, 'test'))
            test_files = np.empty(
                (len(test_filenames), num_channels, 512, 512))
            print('loading test files')
            for ii in tqdm.trange(len(test_filenames)):
                filename = test_filenames[ii]
                full_file_path = osp.join(path, 'test', filename)
                test_files[ii, ...] = np.load(full_file_path)

            print('PCA tranfome on test set')
            test_files = test_files.transpose(0, 2, 3,
                                              1).reshape(-1, num_channels)
            test_files = pca.transform(test_files).reshape(
                len(test_filenames), 512, 512, -1)

            print('dumping test data files')
            fu.mkdir_if_not_exist(
                osp.join(path, 'test').replace('zscored',
                                               f'PCA_{n_component}'))
            for ii in tqdm.trange(len(test_filenames)):
                filename = test_filenames[ii]
                full_file_path = osp.join(path, 'test', filename).replace(
                    'zscored', f'PCA_{n_component}')
                np.save(full_file_path, test_files[ii, ...])

            del test_files, test_filenames
def zscore_uni_rot(path: str, num_channels=7, type='GF3') -> None:
    ''' Zscore the PolSAR rotation matrix, only using statistics from training set, and apply to train, val and test set, also applicable to coherent pattern files
    
    Args:
        path (str): path to the files need to be zscored
        num_channels (float): number of channels. Default: 48
        type (str): 'GF3' or 'RS2'
     '''

    print('zscore process:')
    #  get mean and std value of the training set
    all_train_files = fu.read_file_as_list(
        osp.join(work_dir, type + '_training.txt'))

    full_data = np.empty((len(all_train_files), num_channels, 512, 512))
    for ii in tqdm.trange(len(all_train_files)):
        filename = osp.join(path, type,
                            all_train_files[ii]).replace('png', 'npy')
        f = np.load(filename)
        full_data[ii, ...] = f

    m = np.mean(full_data, axis=(0, 2, 3), keepdims=True).squeeze(0)
    std = np.std(full_data, axis=(0, 2, 3), ddof=1, keepdims=True).squeeze(0)
    print(f'mean: {m}, \nunbiased std: {std}')

    # apply zscore to the training, val, test sets using the derived mean and std value
    print('zscore val files')
    all_val_files = fu.read_file_as_list(
        osp.join(work_dir, type + '_validation.txt'))
    for ii in tqdm.trange(len(all_val_files)):
        file = all_val_files[ii].replace('png', 'npy')
        src_path = osp.join(path, type, file)
        f = np.load(src_path)
        f = (f - m) / std
        dst_path = osp.join(path, type, 'validation',
                            file).replace('unnormed', 'zscored')
        fu.mkdir_if_not_exist(osp.split(dst_path)[0])
        np.save(dst_path, f)

    print('zscore test files')
    all_test_files = fu.read_file_as_list(
        osp.join(work_dir, type + '_test.txt'))
    for ii in tqdm.trange(len(all_test_files)):
        file = all_test_files[ii].replace('png', 'npy')
        src_path = osp.join(path, type, file)
        f = np.load(src_path)
        f = (f - m) / std
        dst_path = osp.join(path, type, 'test',
                            file).replace('unnormed', 'zscored')
        fu.mkdir_if_not_exist(osp.split(dst_path)[0])
        np.save(dst_path, f)

    print('zscore train files')
    all_train_files = fu.read_file_as_list(
        osp.join(work_dir, type + '_training.txt'))
    for ii in tqdm.trange(len(all_train_files)):
        file = all_train_files[ii].replace('png', 'npy')
        src_path = osp.join(path, type, file)
        f = np.load(src_path)
        f = (f - m) / std
        dst_path = osp.join(path, type, 'training',
                            file).replace('unnormed', 'zscored')
        fu.mkdir_if_not_exist(osp.split(dst_path)[0])
        np.save(dst_path, f)
def flatten_directory(path: str,
                      folder='uni_rot',
                      ori_data_type='mat',
                      select_features=None):
    ''' Flatten hierarchical directory according to label files

    Args:
        path (str): path to label files, not been divided into train, val and 
            test sets
        folder (str): folder in which file data stored, also indicates the 
            data type
        ori_data_type (str): original data type, 'mat': Matlab file, 'npy': 
            numpy file, 'C3' diagonal elements of C3 matrix with logarithm
        select_features (tuple): features to be selected, None indicates to 
            select all. Default: None
    '''

    src_path = path.replace('label', 'data')
    pngs = glob.glob(osp.join(path, '*.png'))
    for png in pngs:
        png = osp.basename(png)
        loc = re.findall(r'[a-z]+', png)[0]
        date = re.findall(r'\d{8}', png)[0]
        idx = re.findall(r'_(\d{3}).', png)[0]
        idx = idx.lstrip('0') if idx.lstrip('0') else '0'

        if folder == 'C3':
            src_file_path = osp.join(src_path, loc, date, folder, idx)
        else:
            src_file_path = osp.join(src_path, loc, date, folder, idx,
                                     'unnormed.' + ori_data_type)
        dst_file_path = osp.join(path.replace('label', folder + '_unnormed'),
                                 png.replace('png', 'npy'))
        fu.mkdir_if_not_exist(osp.dirname(dst_file_path))
        print(
            f'copy {src_file_path.replace(src_path, "")}   to   {dst_file_path.replace(src_path, "")}'
        )

        # different original file type
        if ori_data_type == 'mat':
            file = load_uni_rot_mat_file(src_file_path,
                                         select_features=select_features)
        elif ori_data_type == 'npy':
            file = np.load(src_file_path)
            # check in nan or inf
            num_nan = np.isnan(file).sum()
            num_inf = np.isinf(file).sum()
            if num_nan > 0:
                raise ValueError(f'{src_file_path}: nan value exist')
            if num_inf > 0:
                raise ValueError(f'{src_file_path}: inf value exist')
        elif ori_data_type == 'C3':
            file = psr.read_c3(src_file_path, out='save_space')
            file = file[(0, 5, 8), :, :]
            file[file < mathlib.eps] = mathlib.eps
            file = np.log(file)
            mathlib.check_inf_nan(file)

        np.save(dst_file_path, file)

    print('flatten directory finished\n')
Example #10
0
def test(cfg, logger, run_id):
    # Setup Augmentations
    augmentations = cfg.test.augments
    logger.info(f'using augments: {augmentations}')
    data_aug = get_composed_augmentations(augmentations)

    # Setup Dataloader
    data_loader = get_loader(cfg.data.dataloader)
    data_path = cfg.data.path
    data_loader = data_loader(
        data_path,
        data_format=cfg.data.format,
        norm=cfg.data.norm,
        split=cfg.test.dataset,
        split_root=cfg.data.split,
        log=cfg.data.log,
        augments=data_aug,
        logger=logger,
        ENL=cfg.data.ENL,
    )
    run_id = osp.join(run_id, cfg.test.dataset)
    os.mkdir(run_id)

    logger.info("data path: {}".format(data_path))
    logger.info(f'num of {cfg.test.dataset} set samples: {len(data_loader)}')

    loader = data.DataLoader(
        data_loader,
        batch_size=cfg.test.batch_size,
        num_workers=cfg.test.n_workers,
        shuffle=False,
        persistent_workers=True,
        drop_last=False,
    )

    # Setup Model
    device = f'cuda:{cfg.gpu[0]}'
    model = get_model(cfg.model).to(device)
    input_size = (cfg.model.in_channels, 512, 512)
    logger.info(f'using model: {cfg.model.arch}')

    model = torch.nn.DataParallel(model, device_ids=cfg.gpu)

    # load model params
    if osp.isfile(cfg.test.pth):
        logger.info("Loading model from checkpoint '{}'".format(cfg.test.pth))

        # load model state
        checkpoint = torch.load(cfg.test.pth)
        model.load_state_dict(checkpoint["model_state"])
    else:
        raise FileNotFoundError(f'{cfg.test.pth} file not found')

    # Setup Metrics
    running_metrics_val = runningScore(2)
    running_metrics_train = runningScore(2)
    metrics = runningScore(2)
    test_psnr_meter = averageMeter()
    test_ssim_meter = averageMeter()
    img_cnt = 0
    data_range = 255
    if cfg.data.log:
        data_range = np.log(data_range)

    # test
    model.eval()
    with torch.no_grad():
        for clean, noisy, files_path in loader:

            noisy = noisy.to(device, dtype=torch.float32)
            noisy_denoised = model(noisy)

            psnr = []
            ssim = []
            if cfg.data.simulate:
                clean = clean.to(device, dtype=torch.float32)
                for ii in range(clean.shape[0]):
                    psnr.append(
                        piq.psnr(noisy_denoised[ii, ...],
                                 clean[ii, ...],
                                 data_range=data_range).cpu())
                    ssim.append(
                        piq.ssim(noisy_denoised[ii, ...],
                                 clean[ii, ...],
                                 data_range=data_range).cpu())

                test_psnr_meter.update(np.array(psnr).mean(), n=clean.shape[0])
                test_ssim_meter.update(np.array(ssim).mean(), n=clean.shape[0])

            clean = clean.cpu().numpy()
            noisy = noisy.cpu().numpy()
            noisy_denoised = noisy_denoised.cpu().numpy()

            # save images
            for ii in range(clean.shape[0]):

                file_path = files_path[ii][29:]
                file_path = file_path.replace(r'/', '_')
                file_path = osp.splitext(file_path)[0]
                file_ori = noisy[ii, ...]
                file_clean = clean[ii, ...]
                file_denoise = noisy_denoised[ii, ...]

                path_ori = osp.join(run_id, file_path)
                path_denoise = osp.join(run_id, file_path)
                path_clean = osp.join(run_id, file_path)
                if cfg.data.simulate:
                    metric_str = f'_{psnr[ii].item():.3f}_{ssim[ii].item():.3f}'
                    path_ori += metric_str
                    path_denoise += metric_str
                    path_clean += metric_str

                path_ori = osp.join(path_ori, 'original')
                path_denoise = osp.join(path_denoise, 'denoise')
                path_clean = osp.join(path_clean, 'clean')

                fu.mkdir_if_not_exist(path_ori)
                fu.mkdir_if_not_exist(path_denoise)
                fu.mkdir_if_not_exist(path_clean)

                print('clean')
                psr.write_hoekman_image(file_clean, path_clean, is_print=True)
                print('noisy')
                psr.write_hoekman_image(file_ori, path_ori, is_print=True)
                print('denoise')
                psr.write_hoekman_image(file_denoise,
                                        path_denoise,
                                        is_print=True)

        if cfg.data.simulate:
            logger.info(
                f'overall psnr: {test_psnr_meter.avg}, ssim: {test_ssim_meter.avg}'
            )

        logger.info(f'\ndone')