Python get_transformの例、datasets.data_io.get_transform Pythonの例

コード例 #1

0

ファイルを表示

ファイル: read_driving.py プロジェクト: gpcv-luochong/zongshu

    def __getitem__(self, index):
        left_img = self.load_image(
            os.path.join(self.datapath, self.left_filenames[index]))
        right_img = self.load_image(
            os.path.join(self.datapath, self.right_filenames[index]))
        disparity = self.load_disp(
            os.path.join(self.datapath, self.disp_filenames[index]))

        if self.training:
            w, h = left_img.size
            crop_w, crop_h = 512, 256
            x1 = random.randint(0, w - crop_w)
            y1 = random.randint(0, h - crop_h)

            # random crop
            left_img = left_img.crop((x1, y1, x1 + crop_w, y1 + crop_h))
            right_img = right_img.crop((x1, y1, x1 + crop_w, y1 + crop_h))
            disparity = disparity[y1:y1 + crop_h, x1:x1 + crop_w]

            # to tensor, normalize
            processed = get_transform()
            left_img = processed(left_img)
            right_img = processed(right_img)

            return {"left": left_img, "right": right_img, "disp": disparity}
        else:
            w, h = left_img.size
            crop_w, crop_h = 512, 256

            left_img = left_img.crop((w - crop_w, h - crop_h, w, h))
            right_img = right_img.crop((w - crop_w, h - crop_h, w, h))
            disparity = disparity[h - crop_h:h, w - crop_w:w]

            processed = get_transform()
            left_img = processed(left_img)
            right_img = processed(right_img)

            return {
                "left": left_img,
                "right": right_img,
                "disp": disparity,
                "top_pad": 0,
                "right_pad": 0
            }

コード例 #2

0

ファイルを表示

import numpy as np
from PIL import Image
from datasets.data_io import get_transform
from models.bgnet import BGNet
from models.bgnet_plus import BGNet_Plus

model = BGNet_Plus().cuda()

checkpoint = torch.load('models/Sceneflow-IRS-BGNet-Plus.pth',
                        map_location=lambda storage, loc: storage)
model.load_state_dict(checkpoint)
model.eval()
left_img = Image.open('sample/im0.png').convert('L')
right_img = Image.open('sample/im1.png').convert('L')
w, h = left_img.size
h1 = h % 64
w1 = w % 64
h1 = h - h1
w1 = w - w1
h1 = int(h1)
w1 = int(w1)
left_img = left_img.resize((w1, h1), Image.ANTIALIAS)
right_img = right_img.resize((w1, h1), Image.ANTIALIAS)
left_img = np.ascontiguousarray(left_img, dtype=np.float32)
right_img = np.ascontiguousarray(right_img, dtype=np.float32)
preprocess = get_transform()
left_img = preprocess(left_img)
right_img = preprocess(right_img)
pred, _ = model(left_img.unsqueeze(0).cuda(), right_img.unsqueeze(0).cuda())
pred = pred[0].data.cpu().numpy() * 256
skimage.io.imsave('sample_disp.png', pred.astype('uint16'))

コード例 #3

0

ファイルを表示

ファイル: kitti_dataset.py プロジェクト: zjjerica/cascade-stereo

    def __getitem__(self, index):
        left_img = self.load_image(
            os.path.join(self.datapath, self.left_filenames[index]))
        right_img = self.load_image(
            os.path.join(self.datapath, self.right_filenames[index]))

        if self.disp_filenames:  # has disparity ground truth
            disparity = self.load_disp(
                os.path.join(self.datapath, self.disp_filenames[index]))
        else:
            disparity = None

        if self.training:
            w, h = left_img.size
            crop_w, crop_h = self.crop_width, self.crop_height

            x1 = random.randint(0, w - crop_w)
            y1 = random.randint(0, h - crop_h)

            # random crop
            left_img = left_img.crop((x1, y1, x1 + crop_w, y1 + crop_h))
            right_img = right_img.crop((x1, y1, x1 + crop_w, y1 + crop_h))
            disparity = disparity[y1:y1 + crop_h, x1:x1 + crop_w]

            # to tensor, normalize
            processed = get_transform()
            left_img = processed(left_img)
            right_img = processed(right_img)

            return {
                "left": left_img,
                "right": right_img,
                "disparity": disparity
            }
        else:
            w, h = left_img.size

            # normalize
            processed = get_transform()
            left_img = processed(left_img).numpy()
            right_img = processed(right_img).numpy()

            # pad to size 1248x384
            top_pad = self.test_crop_height - h
            right_pad = self.test_crop_width - w
            assert top_pad > 0 and right_pad > 0
            # pad images
            left_img = np.lib.pad(left_img,
                                  ((0, 0), (top_pad, 0), (0, right_pad)),
                                  mode='constant',
                                  constant_values=0)
            right_img = np.lib.pad(right_img,
                                   ((0, 0), (top_pad, 0), (0, right_pad)),
                                   mode='constant',
                                   constant_values=0)
            # pad disparity gt
            if disparity is not None:
                assert len(disparity.shape) == 2
                disparity = np.lib.pad(disparity,
                                       ((top_pad, 0), (0, right_pad)),
                                       mode='constant',
                                       constant_values=0)

            if disparity is not None:
                return {
                    "left": left_img,
                    "right": right_img,
                    "disparity": disparity,
                    "top_pad": top_pad,
                    "right_pad": right_pad
                }
            else:
                return {
                    "left": left_img,
                    "right": right_img,
                    "top_pad": top_pad,
                    "right_pad": right_pad,
                    "left_filename": self.left_filenames[index],
                    "right_filename": self.right_filenames[index]
                }

コード例 #4

0

ファイルを表示

ファイル: sceneflow_dataset.py プロジェクト: yzxstore/PAM

    def __getitem__(self, index):
        left_img = self.load_image(
            os.path.join(self.datapath, self.left_filenames[index]))
        right_img = self.load_image(
            os.path.join(self.datapath, self.right_filenames[index]))
        left_disp = self.load_disp(
            os.path.join(self.datapath, self.left_disp_filenames[index]))
        right_disp = self.load_disp(
            os.path.join(self.datapath, self.right_disp_filenames[index]))

        if self.training:
            w, h = left_img.size
            crop_w, crop_h = 512, 256

            x1 = random.randint(0, w - crop_w)
            y1 = random.randint(0, h - crop_h)

            # random crop
            left_img = left_img.crop((x1, y1, x1 + crop_w, y1 + crop_h))
            right_img = right_img.crop((x1, y1, x1 + crop_w, y1 + crop_h))
            left_disp = left_disp[y1:y1 + crop_h, x1:x1 + crop_w]
            right_disp = right_disp[y1:y1 + crop_h, x1:x1 + crop_w]

            # to tensor, normalize
            processed = get_transform()
            left_img = processed(left_img)
            right_img = processed(right_img)

            # augumentation
            # if random.random()<0.5:
            #     left_img = torch.flip(left_img, [1])
            #     right_img = torch.flip(right_img, [1])
            #     left_disp = np.ascontiguousarray(np.flip(left_disp, 0))
            #     right_disp = np.ascontiguousarray(np.flip(right_disp, 0))

            return {
                "left": left_img,
                "right": right_img,
                "left_disp": left_disp,
                "right_disp": right_disp
            }
        else:
            w, h = left_img.size
            crop_w, crop_h = 960, 512

            left_img = left_img.crop((w - crop_w, h - crop_h, w, h))
            right_img = right_img.crop((w - crop_w, h - crop_h, w, h))
            disparity = left_disp[h - crop_h:h, w - crop_w:w]
            disparity_right = right_disp[h - crop_h:h, w - crop_w:w]

            processed = get_transform()
            left_img = processed(left_img)
            right_img = processed(right_img)

            return {
                "left": left_img,
                "right": right_img,
                "left_disp": disparity,
                "right_disp": disparity_right,
                "top_pad": 0,
                "right_pad": 0
            }

コード例 #5

0

ファイルを表示

ファイル: middlebury_dataset.py プロジェクト: mli0603/GwcNet

    def __getitem__(self, index):
        left_img = self.load_image(
            os.path.join(self.datapath, self.left_filenames[index]))
        right_img = self.load_image(
            os.path.join(self.datapath, self.right_filenames[index]))
        disparity = self.load_disp(
            os.path.join(self.datapath, self.disp_filenames[index]))

        # occ
        occ = np.array(Image.open(self.occ_data[index])) != 255
        disparity[occ] = 0.0

        if self.training:
            w, h = left_img.size
            crop_w, crop_h = 512, 256

            x1 = random.randint(0, w - crop_w)
            y1 = random.randint(0, h - crop_h)

            # random crop
            left_img = left_img.crop((x1, y1, x1 + crop_w, y1 + crop_h))
            right_img = right_img.crop((x1, y1, x1 + crop_w, y1 + crop_h))
            disparity = disparity[y1:y1 + crop_h, x1:x1 + crop_w]

            # to tensor, normalize
            processed = get_transform()
            left_img = processed(left_img)
            right_img = processed(right_img)

            return {
                "left": left_img,
                "right": right_img,
                "disparity": disparity
            }
        else:
            w, h = left_img.size
            top_pad = math.ceil(h / 32) * 32 - h
            right_pad = math.ceil(w / 32) * 32 - w

            processed = get_transform()
            left_img = processed(left_img)
            right_img = processed(right_img)

            # pad images
            left_img = np.lib.pad(left_img,
                                  ((0, 0), (top_pad, 0), (0, right_pad)),
                                  mode='constant',
                                  constant_values=0)
            right_img = np.lib.pad(right_img,
                                   ((0, 0), (top_pad, 0), (0, right_pad)),
                                   mode='constant',
                                   constant_values=0)

            # pad disparity gt
            if disparity is not None:
                assert len(disparity.shape) == 2
                disparity = np.lib.pad(disparity,
                                       ((top_pad, 0), (0, right_pad)),
                                       mode='constant',
                                       constant_values=0)

            return {
                "left": left_img,
                "right": right_img,
                "disparity": disparity,
                "top_pad": top_pad,
                "right_pad": right_pad
            }

コード例 #6

0

ファイルを表示

ファイル: kitti_dataset.py プロジェクト: yzxstore/PAM

    def __getitem__(self, index):
        left_img = self.load_image(os.path.join(self.datapath, self.left_filenames[index]))
        right_img = self.load_image(os.path.join(self.datapath, self.right_filenames[index]))

        if self.disp_occ_filenames:  # has disparity ground truth
            disp_occ = self.load_disp(os.path.join(self.datapath, self.disp_occ_filenames[index]))
            disp_noc = self.load_disp(os.path.join(self.datapath, self.disp_noc_filenames[index]))
        else:
            disp_occ = None

        if self.training:
            w, h = left_img.size
            crop_w, crop_h = 512, 256

            x1 = random.randint(0, w - crop_w)
            y1 = random.randint(0, h - crop_h)

            # random crop
            left_img = left_img.crop((x1, y1, x1 + crop_w, y1 + crop_h))
            right_img = right_img.crop((x1, y1, x1 + crop_w, y1 + crop_h))
            disp_occ = disp_occ[y1:y1 + crop_h, x1:x1 + crop_w]
            disp_noc = disp_noc[y1:y1 + crop_h, x1:x1 + crop_w]
            occ_mask = ((disp_occ - disp_noc) > 0).astype(np.float32)

            # to tensor, normalize
            processed = get_transform()
            left_img = processed(left_img)
            right_img = processed(right_img)

            # # augumentation
            # if random.random() < 0.5:
            #     left_img = torch.flip(left_img, [1])
            #     right_img = torch.flip(right_img, [1])
            #     disp_occ = np.ascontiguousarray(np.flip(disp_occ, 0))

            return {"left": left_img,
                    "right": right_img,
                    "left_disp": disp_occ,
                    "occ_mask": occ_mask}
        else:
            w, h = left_img.size

            # normalize
            processed = get_transform()
            left_img = processed(left_img).numpy()
            right_img = processed(right_img).numpy()

            # pad to size 1248x384
            top_pad = 384 - h
            right_pad = 1248 - w
            assert top_pad > 0 and right_pad > 0
            # pad images
            left_img = np.lib.pad(left_img, ((0, 0), (top_pad, 0), (0, right_pad)), mode='edge')
            right_img = np.lib.pad(right_img, ((0, 0), (top_pad, 0), (0, right_pad)), mode='edge')
            # pad disparity gt
            if self.disp_occ_filenames is not None:
                # assert len(self.disp_occ_filenames.shape) == 2
                disp_occ = np.lib.pad(disp_occ, ((top_pad, 0), (0, right_pad)), mode='constant', constant_values=0)

            if self.disp_occ_filenames is not None:
                return {"left": left_img,
                        "right": right_img,
                        "left_disp": disp_occ,
                        "top_pad": top_pad,
                        "right_pad": right_pad
                        }
            else:
                return {"left": left_img,
                        "right": right_img,
                        "top_pad": top_pad,
                        "right_pad": right_pad,
                        "left_filename": self.left_filenames[index],
                        "right_filename": self.right_filenames[index]
                        }

コード例 #7

0

ファイルを表示

    def __getitem__(self, index):
        left_img = self.load_image(
            os.path.join(self.datapath, self.left_filenames[index]))
        right_img = self.load_image(
            os.path.join(self.datapath, self.right_filenames[index]))
        if self.mask_filenames:
            mask = self.load_image(
                os.path.join(self.datapath, self.mask_filenames[index]))
        else:
            mask = None
        if self.disp_filenames:  # has disparity ground truth
            disparity = self.load_disp(
                os.path.join(self.datapath, self.disp_filenames[index]))
            if self.mask_filenames:
                mask = np.asarray(mask)
                temp = mask > 0
                disparity = disparity * temp
        else:
            disparity = None

        if self.training:
            #rgb2gray
            left_img = left_img.convert('L')
            right_img = right_img.convert('L')

            w, h = left_img.size
            crop_w, crop_h = 512, 256

            x1 = random.randint(0, w - crop_w)
            y1 = random.randint(0, h - crop_h)

            # random crop
            left_img = left_img.crop((x1, y1, x1 + crop_w, y1 + crop_h))
            right_img = right_img.crop((x1, y1, x1 + crop_w, y1 + crop_h))
            disparity = disparity[y1:y1 + crop_h, x1:x1 + crop_w]

            left_img = np.ascontiguousarray(left_img, dtype=np.float32)
            right_img = np.ascontiguousarray(right_img, dtype=np.float32)
            # to tensor, normalize
            preprocess = get_transform()
            left_img = preprocess(left_img)
            right_img = preprocess(right_img)
            disparity = np.expand_dims(disparity, 0)
            # return [left_img,right_img],-disparity
            return {
                "left": left_img,
                "right": right_img,
                "disparity": disparity
            }

        else:
            w, h = left_img.size
            top_pad = 384 - h
            right_pad = 1280 - w
            assert top_pad > 0 and right_pad > 0

            left_img = np.ascontiguousarray(left_img, dtype=np.float32)
            right_img = np.ascontiguousarray(right_img, dtype=np.float32)

            left_img = np.lib.pad(
                left_img,
                ((top_pad, 0), (0, right_pad)),
                mode='symmetric',
            )
            right_img = np.lib.pad(right_img, ((top_pad, 0), (0, right_pad)),
                                   mode='symmetric')
            disparity = np.lib.pad(disparity, ((top_pad, 0), (0, right_pad)),
                                   mode='constant',
                                   constant_values=0)

            preprocess = get_transform()
            left_img = preprocess(left_img)
            right_img = preprocess(right_img)

            disparity = np.expand_dims(disparity, 0)
            # return [left_img,right_img],-disparity
            return {
                "left": left_img,
                "right": right_img,
                "disparity": disparity
            }