Ejemplo n.º 1
0
def extract_patches_multifiles(data_dir, names, target_label, patch_size,
                               patch_step, save_dir):
    patches = []
    labeles = []
    coding_labeles = []
    for name in names:
        if name is not None and not name.endswith(target_label):
            continue
        cur_data_dir = os.path.join(data_dir, name)
        print 'extract patches from ', cur_data_dir, ' at ', str(os.getpid())
        pv_mask_image, pv_mhd_image = read_from_dir(cur_data_dir)
        coding_image = convert_coding(cur_data_dir)
        [x_min, x_max, y_min, y_max] = get_boundingbox(pv_mask_image)
        r = patch_size / 2
        cur_patches = []
        cur_coding_labeles = []
        for i in range(x_min, x_max, patch_step):
            for j in range(y_min, y_max, patch_step):
                cur_patch = pv_mhd_image[i - r:i + r + 1, j - r:j + r + 1]
                cur_mask_patch = pv_mhd_image[i - r:i + r + 1, j - r:j + r + 1]
                if ((1.0 * np.sum(cur_mask_patch)) /
                    (1.0 * patch_size * patch_size)) < 0.1:
                    continue
                cur_label = target_label
                cur_coding_label = coding_image[i - x_min, j - y_min]

                if save_dir is not None:
                    save_path_dir = os.path.join(save_dir,
                                                 str(cur_coding_label),
                                                 str(cur_label))
                    if os.path.exists(save_path_dir):
                        cur_id = len(os.listdir(save_path_dir))
                    else:
                        cur_id = 0
                    save_path = os.path.join(save_dir, str(cur_coding_label),
                                             str(cur_label),
                                             str(cur_id) + '.npy')
                    check_save_path(save_path)
                    np.save(save_path, cur_patch)
                else:
                    cur_patches.append(np.array(cur_patch).flatten())
                    cur_coding_labeles.append(cur_coding_label)
        if save_dir is None:
            if len(cur_patches) == 0:
                continue
            patches.append(cur_patches)
            coding_labeles.append(cur_coding_labeles)
            labeles.append(int(target_label))
    print len(patches), len(coding_labeles), len(labeles)
    return patches, coding_labeles, labeles
Ejemplo n.º 2
0
def extract_patches_multidir(
        data_dir,
        subclasses=['train', 'val', 'test'],
        target_labels=[0, 1, 2, 3],
        patch_size=7,
        patch_step=1,
        save_dir='/home/give/Documents/dataset/ICPR2018/BoVW-TextSpecific-multiphase',
        return_flag=False):
    patches = []
    labeles = []
    coding_labeles = []
    for subclass in subclasses:
        for target_label in target_labels:
            cur_data_dir = os.path.join(data_dir, subclass)
            cur_patches, cur_coding_labels, cur_labeles = extract_patches_singledir(
                cur_data_dir,
                str(target_label),
                patch_size=patch_size,
                patch_step=patch_step,
                save_dir=None,
                multiprocess=8)
            patches.extend(cur_patches)
            coding_labeles.extend(cur_coding_labels)
            labeles.extend(cur_labeles)
    if return_flag:
        return patches, coding_labeles, labeles
    if save_dir is not None:
        save_dict = {}
        for i, cur_patches in enumerate(patches):
            cur_label = labeles[i]
            for j, cur_patch in enumerate(cur_patches):
                cur_coding_label = coding_labeles[i][j]
                if cur_coding_label not in save_dict.keys():
                    save_dict[cur_coding_label] = {}
                if cur_label not in save_dict[cur_coding_label].keys():
                    save_dict[cur_coding_label][cur_label] = []
                save_dict[cur_coding_label][cur_label].append(cur_patch)
        # save save_dict
        save_path = os.path.join(save_dir, 'save_dict.mat')
        check_save_path(save_path)
        # scio.savemat(save_path, save_dict)
        for subkey in save_dict.keys():
            for subsubkey in save_dict[subkey].keys():
                save_path = os.path.join(save_dir, str(subkey), str(subsubkey),
                                         'data.npy')
                check_save_path(save_path)
                np.save(save_path, save_dict[subkey][subsubkey])
Ejemplo n.º 3
0
def generate_representor_multidir(data_dir,
                                  patch_dir,
                                  reload=None,
                                  save_dir=None):
    import scipy.io as scio
    if reload is None:
        train_features, train_labels = generate_representor(
            data_dir, patch_dir=patch_dir, subclass='train')
        check_save_path(os.path.join(save_dir, 'training.mat'))
        scio.savemat(os.path.join(save_dir, 'training.mat'), {
            'features': train_features,
            'labels': train_labels
        })
        test_features, test_labels = generate_representor(data_dir,
                                                          patch_dir=patch_dir,
                                                          subclass='test')
        scio.savemat(os.path.join(save_dir, 'testing.mat'), {
            'features': test_features,
            'labels': test_labels
        })
        val_features, val_labels = generate_representor(data_dir,
                                                        patch_dir=patch_dir,
                                                        subclass='val')
        scio.savemat(os.path.join(save_dir, 'validation.mat'), {
            'features': val_features,
            'labels': val_labels
        })
    else:
        train_data = scio.loadmat(os.path.join(save_dir, 'training.mat'))
        train_features = train_data['features']
        train_labels = train_data['labels']

        test_data = scio.loadmat(os.path.join(save_dir, 'testing.mat'))
        test_features = test_data['features']
        test_labels = test_data['labels']

        val_data = scio.loadmat(os.path.join(save_dir, 'validation.mat'))
        val_features = val_data['features']
        val_labels = val_data['labels']
    acc = execute_classify(train_features, train_labels, val_features,
                           val_labels, test_features, test_labels)
    return acc
Ejemplo n.º 4
0
def extract_patches_multifiles(data_dir, names, target_label, patch_size, patch_step, save_dir):
    patches = []
    labeles = []
    coding_labeles = []
    for name in names:
        if name is not None and not name.endswith(target_label):
            continue

        cur_data_dir = os.path.join(data_dir, name)
        patches_save_path = os.path.join(cur_data_dir, 'patches.npy')
        coding_labeles_path = os.path.join(cur_data_dir, 'coding_labeles.npy')
        labeles_path = os.path.join(cur_data_dir, 'labeles.npy')
        mask_images = []
        mhd_images = []
        if not os.path.exists(patches_save_path):
            for phasename in ['NC', 'ART', 'PV']:
                image_path = glob(os.path.join(data_dir, name, phasename + '_Image*.mhd'))[0]
                mask_path = os.path.join(data_dir, name, phasename + '_Registration.mhd')
                mhd_image = read_mhd_image(image_path, rejust=True)
                mhd_image = np.squeeze(mhd_image)
                # show_image(mhd_image)
                mask_image = read_mhd_image(mask_path)
                mask_image = np.squeeze(mask_image)
                [xmin, xmax, ymin, ymax] = get_boundingbox(mask_image)
                mask_image = mask_image[xmin: xmax, ymin: ymax]
                mhd_image = mhd_image[xmin: xmax, ymin: ymax]
                mhd_image[mask_image != 1] = 0
                mask_images.append(mask_image)
                mhd_images.append(mhd_image)
                # show_image(mhd_image)
            cur_patches = []
            cur_coding_labeles = []
            mask_images = convert2depthlaster(mask_images)
            coding_image = convert_coding(cur_data_dir)
            mhd_images = convert2depthlaster(mhd_images)
            [width, height, _] = list(np.shape(mhd_images))
            print 'extract patches from ', cur_data_dir, ' at ', str(
                os.getpid()), ' corresponding size is [', width, height, ']'
            for i in range(patch_size / 2, width - patch_size / 2, patch_step):
                for j in range(patch_size / 2, height - patch_size / 2, patch_step):
                    cur_patch = mhd_images[i - patch_size / 2:i + patch_size / 2 + 1, j - patch_size / 2: j + patch_size / 2 + 1, :]
                    cur_label = target_label
                    cur_coding_label = coding_image[i, j]
                    if save_dir is not None:
                        save_path_dir = os.path.join(save_dir, str(cur_coding_label), str(cur_label))
                        if os.path.exists(save_path_dir):
                            cur_id = len(os.listdir(save_path_dir))
                        else:
                            cur_id = 0
                        save_path = os.path.join(save_dir, str(cur_coding_label), str(cur_label), str(cur_id) + '.npy')
                        check_save_path(save_path)
                        np.save(save_path, cur_patch)
                    else:
                        cur_patches.append(flatten_multiphase(np.array(cur_patch)))
                        cur_coding_labeles.append(cur_coding_label)
            if save_dir is None:
                if len(cur_patches) == 0:
                    continue
                patches.append(cur_patches)
                coding_labeles.append(cur_coding_labeles)
                labeles.append(int(target_label))
            np.save(patches_save_path, cur_patches)
            np.save(coding_labeles_path, cur_coding_labeles)

        else:
            cur_patches = np.load(patches_save_path)
            cur_coding_labeles = np.load(coding_labeles_path)
            coding_labeles.append(cur_coding_labeles)
            patches.append(cur_patches)
            labeles.append(int(target_label))
    print len(patches), len(coding_labeles), len(labeles)
    return patches, coding_labeles, labeles