def __getitem__(self, index):
        img_idx = self.lists[index]
        img = self.imgs[img_idx].transpose(2, 1, 0)
        #img = self.imgs[img_idx]
        dpt = self.dpts[img_idx].transpose(1, 0)
        #dpt = self.dpts[img_idx]

        #image = Image.fromarray(np.uint8(img))
        #depth = Image.fromarray(np.uint8(dpt))

        #image.save('img1.png')

        #input_transform = transforms.Compose([flow_transforms.Scale(228)])
        #input_transform = transforms.Compose([flow_transforms.ArrayToTensor()])
        input_transform = transforms.Compose(
            [flow_transforms.Scale(228),
             flow_transforms.ArrayToTensor()])
        #target_depth_transform = transforms.Compose([flow_transforms.Scale(228)])
        #target_depth_transform = transforms.Compose([flow_transforms.ArrayToTensor()])
        target_depth_transform = transforms.Compose([
            flow_transforms.Scale_Single(228),
            flow_transforms.ArrayToTensor()
        ])

        img = input_transform(img)
        dpt = target_depth_transform(dpt)

        #image = Image.fromarray(np.uint8(img))
        #image.save('img2.png')

        return img, dpt
Exemple #2
0
def load_data(is_training = True):
    global IMAGE_NUM
    train_idx_path = "data/trainNdxs.txt"
    test_idx_path = "data/testNdxs.txt"
    input_rgb_images_dir = 'data/nyu_datasets_changed/input/'
    target_depth_images_dir = 'data/nyu_datasets_changed/target_depths/'
    target_labels_images_dir = 'data/nyu_datasets_changed/labels_38/'

    data_path = "data/nyu_depth_v2_labeled.mat"
    train_idx = np.loadtxt(train_idx_path, dtype = 'int')
    test_idx = np.loadtxt(test_idx_path, dtype = 'int')

    input_transform = flow_transforms.Compose([flow_transforms.Scale(120)])
    target_depth_transform = flow_transforms.Compose([flow_transforms.Scale_Single(60)])
    target_labels_transform = flow_transforms.Compose([])

    co_transform=flow_transforms.Compose([
            flow_transforms.RandomRotate(4),
            flow_transforms.RandomCrop((480,640)),
            flow_transforms.RandomVerticalFlip()
        ])

    data = []
    if is_training:
        data = ListDataset(data_path,train_idx,input_transform,target_depth_transform, target_labels_transform, co_transform)
    else:
        data = ListDataset(data_path, test_idx, input_transform, target_depth_transform, target_labels_transform)
    IMAGE_NUM = len(data)
    return data
NUM_TRAIN = 1000
NUM_VAL = 300
NUM_TEST = 149

listing = random.sample(os.listdir(input_rgb_images_dir), 1449)
train_listing = listing[:min(NUM_TRAIN, train_on)]
val_listing = listing[NUM_TRAIN:min(NUM_VAL + NUM_TRAIN, val_on + NUM_TRAIN)]
test_listing = listing[NUM_VAL +
                       NUM_TRAIN:min(NUM_VAL + NUM_TRAIN + NUM_TEST, test_on +
                                     NUM_VAL + NUM_TRAIN)]

data_dir = (input_rgb_images_dir, target_depth_images_dir,
            target_labels_images_dir)

input_transform = transforms.Compose(
    [flow_transforms.Scale(228),
     flow_transforms.ArrayToTensor()])
target_depth_transform = transforms.Compose(
    [flow_transforms.Scale_Single(228),
     flow_transforms.ArrayToTensor()])
target_labels_transform = transforms.Compose([flow_transforms.ArrayToTensor()])

##Apply this transform on input, ground truth depth images and labeled images

co_transform = flow_transforms.Compose([
    flow_transforms.RandomCrop((480, 640)),
    flow_transforms.RandomHorizontalFlip()
])

##Splitting in train, val and test sets [No data augmentation on val and test, only on train]
Exemple #4
0
    def __getitem__(self, index):
        left = self.left[index]
        right = self.right[index]
        left_img = self.loader(left)
        right_img = self.loader(right)
        disp_L = self.disp_L[index]
        dataL = self.dploader(disp_L)
        dataL[dataL == np.inf] = 0

        if not (self.disp_R is None):
            disp_R = self.disp_R[index]
            dataR = self.dploader(disp_R)
            dataR[dataR == np.inf] = 0

        max_h = 2048 // 4
        max_w = 3072 // 4

        # photometric unsymmetric-augmentation
        random_brightness = np.random.uniform(0.5, 2., 2)
        random_gamma = np.random.uniform(0.8, 1.2, 2)
        random_contrast = np.random.uniform(0.8, 1.2, 2)
        left_img = torchvision.transforms.functional.adjust_brightness(
            left_img, random_brightness[0])
        left_img = torchvision.transforms.functional.adjust_gamma(
            left_img, random_gamma[0])
        left_img = torchvision.transforms.functional.adjust_contrast(
            left_img, random_contrast[0])
        right_img = torchvision.transforms.functional.adjust_brightness(
            right_img, random_brightness[1])
        right_img = torchvision.transforms.functional.adjust_gamma(
            right_img, random_gamma[1])
        right_img = torchvision.transforms.functional.adjust_contrast(
            right_img, random_contrast[1])
        right_img = np.asarray(right_img)
        left_img = np.asarray(left_img)

        # horizontal flip
        if not (self.disp_R is None):
            if np.random.binomial(1, 0.5):
                tmp = right_img
                right_img = left_img[:, ::-1]
                left_img = tmp[:, ::-1]
                tmp = dataR
                dataR = dataL[:, ::-1]
                dataL = tmp[:, ::-1]

        # geometric unsymmetric-augmentation
        angle = 0
        px = 0
        if np.random.binomial(1, 0.5):
            angle = 0.1
            px = 2
        co_transform = flow_transforms.Compose([
            flow_transforms.RandomVdisp(angle, px),
            flow_transforms.Scale(np.random.uniform(self.rand_scale[0],
                                                    self.rand_scale[1]),
                                  order=self.order),
            flow_transforms.RandomCrop((max_h, max_w)),
        ])
        augmented, dataL = co_transform([left_img, right_img], dataL)
        left_img = augmented[0]
        right_img = augmented[1]

        # randomly occlude a region
        if np.random.binomial(1, 0.5):
            sx = int(np.random.uniform(50, 150))
            sy = int(np.random.uniform(50, 150))
            cx = int(np.random.uniform(sx, right_img.shape[0] - sx))
            cy = int(np.random.uniform(sy, right_img.shape[1] - sy))
            right_img[cx - sx:cx + sx,
                      cy - sy:cy + sy] = np.mean(np.mean(right_img, 0),
                                                 0)[np.newaxis, np.newaxis]

        h, w, _ = left_img.shape
        top_pad = max_h - h
        left_pad = max_w - w
        left_img = np.lib.pad(left_img, ((top_pad, 0), (0, left_pad), (0, 0)),
                              mode='constant',
                              constant_values=0)
        right_img = np.lib.pad(right_img,
                               ((top_pad, 0), (0, left_pad), (0, 0)),
                               mode='constant',
                               constant_values=0)

        dataL = np.expand_dims(np.expand_dims(dataL, 0), 0)
        dataL = np.lib.pad(dataL,
                           ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)),
                           mode='constant',
                           constant_values=0)[0, 0]
        dataL = np.ascontiguousarray(dataL, dtype=np.float32)

        processed = preprocess.get_transform()
        left_img = processed(left_img)
        right_img = processed(right_img)
        return (left_img, right_img, dataL)