def define_transforms(self, input, gt, img=None):
        # Define random variabels
        hflip_input = np.random.uniform(0.0,
                                        1.0) > 0.5 and self.flip == 'hflip'

        if self.train:
            i, j, h, w = transforms.RandomCrop.get_params(
                input, output_size=self.crop)
            input = F.crop(input, i, j, h, w)
            gt = F.crop(gt, i, j, h, w)
            if hflip_input:
                input, gt = F.hflip(input), F.hflip(gt)

            if self.use_rgb:
                img = F.crop(img, i, j, h, w)
                if hflip_input:
                    img = F.hflip(img)
            input, gt = depth_read(input, self.sparse_val), depth_read(
                gt, self.sparse_val)

        else:
            input, gt = self.center_crop(input), self.center_crop(gt)
            if self.use_rgb:
                img = self.center_crop(img)
            input, gt = depth_read(input, self.sparse_val), depth_read(
                gt, self.sparse_val)

        return input, gt, img
예제 #2
0
    def define_transforms(self, input, gt, img=None):
        input, gt = self.center_crop(input), self.center_crop(gt)
        if self.use_rgb:
            img = self.center_crop(img)
        if self.carla:
            input, gt = depth_read_carla(input), depth_read_carla(gt)
        else:
            input, gt = depth_read(input, self.sparse_val), depth_read(
                gt, self.sparse_val)

        return input, gt, img
예제 #3
0
    def define_transforms(self, input, gt, img=None):

        # Define random variabels
        angle = np.random.uniform(-5.0, 5.0) and self.rotate
        angle = int(angle) if type(angle) == bool else angle
        hflip_input = np.random.uniform(0.0,
                                        1.0) > 0.5 and self.flip == 'hflip'
        vflip_input = np.random.uniform(0.0,
                                        1.0) > 0.5 and self.flip == 'vflip'
        self.scale = np.random.uniform(self.lowerbound, self.bound)

        if self.train:
            input, gt = F.rotate(input, angle), F.rotate(gt, angle)
            i, j, h, w = transforms.RandomCrop.get_params(
                input, output_size=self.crop)
            input = F.crop(input, i, j, h, w)
            gt = F.crop(gt, i, j, h, w)
            if hflip_input:
                input, gt = F.hflip(input), F.hflip(gt)
            elif vflip_input:
                input, gt = F.vflip(input), F.vflip(gt)

            if self.use_rgb:
                img = F.rotate(img, angle)
                # img = misc.imresize(img, self.scale) #'nearest')
                # img = Image.fromarray(img)
                img = F.crop(img, i, j, h, w)
                # img = self.center_crop(img)
                if hflip_input:
                    img = F.hflip(img)
                elif vflip_input:
                    img = F.vflip(img)
                # img = self.color_jitter(img)

            input, gt = depth_read(input,
                                   self.sparse_val) / self.scale, depth_read(
                                       gt, self.sparse_val) / self.scale
        else:
            # input, gt = F.crop(input, 130, 10, 240, 1200), F.crop(gt, 130, 10, 240, 1200)
            input, gt = self.center_crop(input), self.center_crop(gt)
            if self.use_rgb:
                # img = F.crop(img)

                img = self.center_crop(img)
            input, gt = depth_read(input, self.sparse_val), depth_read(
                gt, self.sparse_val)

        return input, gt, img
 def compute_mean_std(self):
     nums = np.array([])
     means = np.array([])
     stds = np.array([])
     max_lst = np.array([])
     for i, raw_img_path in tqdm.tqdm(enumerate(self.train_paths['lidar_in'])):
         raw_img = Image.open(raw_img_path)
         raw_np = depth_read(raw_img)
         vec = raw_np[raw_np >= 0]
         # vec = vec/84.0
         means = np.append(means, np.mean(vec))
         stds = np.append(stds, np.std(vec))
         nums = np.append(nums, len(vec))
         max_lst = np.append(max_lst, np.max(vec))
     mean = np.dot(nums, means)/np.sum(nums)
     std = np.sqrt((np.dot(nums, stds**2) + np.dot(nums, (means-mean)**2))/np.sum(nums))
     return mean, std, max_lst
예제 #5
0
    def __getitem__(self, idx):
        lidar = self.lidar_files[idx]
        raw_path = os.path.join(lidar)
        raw_pil = Image.open(raw_path)

        assert raw_pil.size == (1216, 352)

        crop = 352 - args.crop_h
        raw_pil_crop = raw_pil.crop((0, crop, 1216, 352))

        raw = depth_read(raw_pil_crop, args.sparse_val)
        raw = self.to_tensor(raw).float()

        rgb = self.rgb_files[idx]
        rgb_path = os.path.join(rgb)
        rgb_pil = Image.open(rgb_path)
        assert rgb_pil.size == (1216, 352)
        rgb_pil_crop = rgb_pil.crop((0, crop, 1216, 352))
        rgb = self.to_tensor(rgb_pil_crop).float()
        rgb = rgb * 255.0

        return raw, rgb, os.path.basename(lidar), crop
예제 #6
0
def main():
    global args
    global dataset
    args = parser.parse_args()

    torch.backends.cudnn.benchmark = args.cudnn

    best_file_name = glob.glob(os.path.join(args.save_path, 'model_best*'))[0]

    save_root = os.path.join(os.path.dirname(best_file_name), 'results')
    if not os.path.isdir(save_root):
        os.makedirs(save_root)

    print("==========\nArgs:{}\n==========".format(args))
    # INIT
    print("Init model: '{}'".format(args.mod))
    channels_in = 1 if args.input_type == 'depth' else 4
    model = Models.define_model(mod=args.mod, in_channels=channels_in)
    print("Number of parameters in model {} is {:.3f}M".format(
        args.mod.upper(),
        sum(tensor.numel() for tensor in model.parameters()) / 1e6))
    if not args.no_cuda:
        # Load on gpu before passing params to optimizer
        if not args.multi:
            model = model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()
    if os.path.isfile(best_file_name):
        print("=> loading checkpoint '{}'".format(best_file_name))
        checkpoint = torch.load(best_file_name)
        model.load_state_dict(checkpoint['state_dict'])
        lowest_loss = checkpoint['loss']
        best_epoch = checkpoint['best epoch']
        print(
            'Lowest RMSE for selection validation set was {:.4f} in epoch {}'.
            format(lowest_loss, best_epoch))
    else:
        print("=> no checkpoint found at '{}'".format(best_file_name))
        return

    if not args.no_cuda:
        model = model.cuda()
    print("Initializing dataset {}".format(args.dataset))
    dataset = Datasets.define_dataset(args.dataset, args.data_path,
                                      args.input_type)
    dataset.prepare_dataset()
    to_pil = transforms.ToPILImage()
    to_tensor = transforms.ToTensor()
    norm = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                std=[0.229, 0.224, 0.225])
    depth_norm = transforms.Normalize(mean=[14.97 / args.max_depth],
                                      std=[11.15 / args.max_depth])
    model.eval()
    print("===> Start testing")
    total_time = []
    with torch.no_grad():
        for i, (img, rgb, gt) in tqdm.tqdm(
                enumerate(
                    zip(dataset.selected_paths['lidar_in'],
                        dataset.selected_paths['img'],
                        dataset.selected_paths['gt']))):

            raw_path = os.path.join(img)
            raw_pil = Image.open(raw_path)
            gt_path = os.path.join(gt)
            gt_pil = Image.open(gt)
            assert raw_pil.size == (1216, 352)

            crop = 352 - args.crop_h
            raw_pil_crop = raw_pil.crop((0, crop, 1216, 352))
            gt_pil_crop = gt_pil.crop((0, crop, 1216, 352))

            raw = depth_read(raw_pil_crop, args.sparse_val)
            raw = to_tensor(raw).float()
            gt = depth_read(gt_pil_crop, args.sparse_val)
            gt = to_tensor(gt).float()
            valid_mask = (raw > 0).detach().float()

            input = torch.unsqueeze(raw, 0).cuda()
            gt = torch.unsqueeze(gt, 0).cuda()

            if args.normal:
                # Put in {0-1} range and then normalize
                input = input / args.max_depth
                # input = depth_norm(input)

            if args.input_type == 'rgb':
                rgb_path = os.path.join(rgb)
                rgb_pil = Image.open(rgb_path)
                assert rgb_pil.size == (1216, 352)
                rgb_pil_crop = rgb_pil.crop((0, crop, 1216, 352))
                rgb = to_tensor(rgb_pil_crop).float()
                rgb = torch.unsqueeze(rgb, 0).cuda()
                if not args.normal:
                    rgb = rgb * 255.0

                input = torch.cat((input, rgb), 1)

            torch.cuda.synchronize()
            a = time.perf_counter()
            output, _, _, _ = model(input)
            torch.cuda.synchronize()
            b = time.perf_counter()
            total_time.append(b - a)
            if args.normal:
                output = output * args.max_depth
            output = torch.clamp(output, min=0, max=85)

            output = output * 256.
            raw = raw * 256.
            output = output[0][0:1].cpu()
            data = output[0].numpy()

            if crop != 0:
                padding = (0, 0, crop, 0)
                output = torch.nn.functional.pad(output, padding, "constant",
                                                 0)
                output[:, 0:crop] = output[:, crop].repeat(crop, 1)

            pil_img = to_pil(output.int())
            assert pil_img.size == (1216, 352)
            pil_img.save(os.path.join(save_root, os.path.basename(img)))
    print('average_time: ', sum(total_time[100:]) / (len(total_time[100:])))
    print('num imgs: ', i + 1)