예제 #1
0
    def __init__(self, root, isTrain=True):
        self.images_root = os.path.join(root, 'img')
        self.labels_root = os.path.join(root, 'gt')
        self.list_root = os.path.join(root, 'list')

        # print('image root = ', self.images_root)
        # print('labels root = ', self.labels_root)

        if isTrain:
            list_path = os.path.join(self.list_root, 'train_aug.txt')
            self.input_transform = transforms.Compose([
                transforms.RandomRotation(10),  # 随机旋转
                transforms.CenterCrop(256),
                transforms.RandomHorizontalFlip(),  # 随机翻转
                transforms.ToTensor(),
                transforms.Normalize([.485, .456, .406], [.229, .224, .225])
            ])
            self.target_transform = transforms.Compose(
                [transforms.CenterCrop(256),
                 transform.ToLabel()])
        else:
            list_path = os.path.join(self.list_root, 'val.txt')
            self.input_transform = transforms.Compose([
                transforms.CenterCrop(256),
                transforms.ToTensor(),
                transforms.Normalize([.485, .456, .406], [.229, .224, .225])
            ])
            self.target_transform = transforms.Compose(
                [transforms.CenterCrop(256),
                 transform.ToLabel()])

        self.filenames = [i_id.strip() for i_id in open(list_path)]
예제 #2
0
    def __getitem__(self, index):
        rgb, depth = self.__getraw__(index)
        if rgb.ndim < 2 and depth.ndim != 2:
            print("Wrong DEPTH ", depth)
            return None
        if self.transform is not None:
            rgb_np, depth_np = self.transform(rgb, depth)
        else:
            raise (RuntimeError("transform not defined"))

        # color normalization
        # rgb_tensor = normalize_rgb(rgb_tensor)
        # rgb_np = normalize_np(rgb_np)

        if self.modality == 'rgb':
            input_np = rgb_np

        to_tensor = transforms.ToTensor()
        input_tensor = to_tensor(input_np)
        while input_tensor.dim() < 3:
            input_tensor = input_tensor.unsqueeze(0)
        depth_tensor = to_tensor(depth_np)
        depth_tensor = depth_tensor.unsqueeze(0)

        return input_tensor, depth_tensor
예제 #3
0
    def __init__(self, opt, set_name='train', train=True):
        """
        Args:
            root_dir (string): COCO directory.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        # self.opt = opt
        if train:
            self.root_dir = osp.join(opt.root_dir, 'VisDrone2019-DET-train')
        else:
            self.root_dir = osp.join(opt.root_dir, 'VisDrone2019-DET-val')
        self.anno_dir = osp.join(self.root_dir, 'annotations_json')
        self.img_dir = osp.join(self.root_dir, 'images')
        self.set_name = set_name
        self.train = train

        self.coco = COCO(osp.join(self.anno_dir, INSTANCES_SET.format(self.set_name)))
        self.image_ids = self.coco.getImgIds()

        self.load_classes()

        self.input_size = opt.input_size

        if self.train:
            self.transform = transforms.Compose([
                # tsf.RandomColorJeter(0.3, 0.3, 0.3, 0.3),
                # tsf.RandomGaussianBlur(),
                tsf.RandomHorizontalFlip(),
                tsf.Resizer(self.input_size),
                tsf.Normalizer(**opt.norm_cfg),
                tsf.ToTensor()
            ])
        else:
            self.transform = transforms.Compose([
                tsf.Resizer(self.input_size),
                tsf.Normalizer(**opt.norm_cfg),
                tsf.ToTensor()
            ])
예제 #4
0
    def __init__(self, opt, set_name='train', train=True):
        """
        Args:
            root_dir (string): COCO directory.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        # self.opt = opt
        self.root_dir = opt.root_dir

        self.anno_dir = osp.join(self.root_dir, ANNO_ROOT)
        self.img_dir = osp.join(self.root_dir, IMG_ROOT)
        self.set_name = set_name
        self.train = train

        self.coco = COCO(
            osp.join(self.anno_dir, INSTANCES_SET.format(self.set_name)))
        self.image_ids = self.coco.getImgIds()

        self.load_classes()

        self.min_size = opt.min_size
        self.max_size = opt.max_size
        self.input_size = (self.min_size, self.max_size)
        self.resize = self.resizes(opt.resize_type)

        if self.train:
            self.transform = transforms.Compose([
                tsf.RandomColorJeter(0.3, 0.3, 0.3, 0.3),
                tsf.RandomGaussianBlur(), self.resize,
                tsf.Normalizer(**opt.norm_cfg),
                tsf.ToTensor()
            ])
        else:
            self.transform = transforms.Compose(
                [self.resize,
                 tsf.Normalizer(**opt.norm_cfg),
                 tsf.ToTensor()])
예제 #5
0
def create_loader(args):
    if args.dataset == 'vocaug':
        composed_transforms_tr = transforms.Compose([
            tr.RandomSized(512),
            tr.RandomRotate(15),
            tr.RandomHorizontalFlip(),
            tr.Normalize(mean=(0.485, 0.456, 0.406),
                         std=(0.229, 0.224, 0.225)),
            tr.ToTensor()
        ])

        composed_transforms_ts = transforms.Compose([
            tr.FixedResize(size=(512, 512)),
            tr.Normalize(mean=(0.485, 0.456, 0.406),
                         std=(0.229, 0.224, 0.225)),
            tr.ToTensor()
        ])

        train_set = VOCAug(split='train', transform=composed_transforms_tr)
        val_set = VOCAug(split='val', transform=composed_transforms_ts)
    else:
        print('Database {} not available.'.format(args.dataset))
        raise NotImplementedError

    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.workers,
                              pin_memory=True)
    val_loader = DataLoader(val_set,
                            batch_size=16,
                            shuffle=False,
                            num_workers=args.workers,
                            pin_memory=True)

    return train_loader, val_loader
예제 #6
0
    def __getitem__(self, index):
        rgb, depth, pose = self.__getraw__(index)
        if self.transform is not None:
            rgb_np, depth_np, pose = self.transform(rgb, depth, pose)
        else:
            raise (RuntimeError("transform not defined"))

        # color normalization
        # rgb_tensor = normalize_rgb(rgb_tensor)
        # rgb_np = normalize_np(rgb_np)

        if self.modality == 'rgb':
            input_np = rgb_np

        to_tensor = transforms.ToTensor()
        input_tensor = to_tensor(input_np)
        while input_tensor.dim() < 3:
            input_tensor = input_tensor.unsqueeze(0)
        depth_tensor = to_tensor(depth_np).unsqueeze(0)
        pose_tensor = to_tensor(pose)  #.unsqueeze(0)

        return input_tensor, depth_tensor, pose_tensor
    if rgb is not None:
        rgb = transform(rgb)
    if sparse is not None:
        sparse = transform(sparse)
    if target is not None:
        target = transform(target)
    if rgb_near is not None:
        rgb_near = transform(rgb_near)
    return rgb, sparse, target, rgb_near


def no_transform(rgb, sparse, target, rgb_near, args):
    return rgb, sparse, target, rgb_near


to_tensor = transforms.ToTensor()
to_float_tensor = lambda x: to_tensor(x).float()


def handle_gray(rgb, args):
    if rgb is None:
        return None, None
    if not args.use_g:
        return rgb, None
    else:
        img = np.array(Image.fromarray(rgb).convert('L'))
        img = np.expand_dims(img, -1)
        if not args.use_rgb:
            rgb_ret = None
        else:
            rgb_ret = rgb
예제 #8
0
        return 'VOCAug(split=' + str(self.split) + ')'


if __name__ == '__main__':
    from dataloaders import transforms as tr
    from libs.utils import decode_segmap
    from torch.utils.data import DataLoader
    from torchvision import transforms
    import matplotlib.pyplot as plt
    import numpy as np

    composed_transforms_tr = transforms.Compose([
        tr.RandomHorizontalFlip(),
        tr.RandomSized(512),
        tr.RandomRotate(15),
        tr.ToTensor()
    ])

    voc_train = VOCAug(split='train', transform=composed_transforms_tr)

    dataloader = DataLoader(voc_train,
                            batch_size=5,
                            shuffle=True,
                            num_workers=1)

    print(len(dataloader))

    for ii, sample in enumerate(dataloader):
        print(sample['image'].size())
        img = sample['image'].numpy()
        gt = sample['label'].numpy()
    with open(file_path, 'r') as f:
        lines = f.readlines()

        for line in lines:
            im_path = os.path.join(root_dir, line.split()[0])
            gt_path = os.path.join(root_dir, line.split()[1])

            im_gt_paths.append((im_path, gt_path))

    return im_gt_paths


# array to tensor
from dataloaders import transforms as my_transforms
to_tensor = my_transforms.ToTensor()


class KittiFolder(Dataset):
    """
        RGB:
        kitti_raw_data/2011-xx-xx/2011_xx_xx_drive_xxxx_sync/image_02/data/xxxxxxxx01.png
        Depth:
        train: train_gt16bit/xxxxx.png
        val: val_gt16bit/xxxxx.png
        test: test_gt16bit/xxxxx.png
    """
    def __init__(self,
                 root_dir='/home/data/UnsupervisedDepth/wangixn/KITTI',
                 mode='train',
                 loader=pil_loader,
예제 #10
0
def main():
    print('Testing data on ' + args.camera + '!')

    assert args.data == 'nyudepthv2', '=> only nyudepthv2 ' \
                                      'available at this ' \
                                      'point'

    to_tensor = transforms.ToTensor()

    assert not (
                args.camera == 'webcam' and not
    args.modality == 'rgb'), '=> webcam only accept RGB ' \
                             'model'

    output_directory = utils.get_output_directory(args)
    best_model_filename = os.path.join(output_directory,
                                       'model_best.pth.tar')
    assert os.path.isfile(best_model_filename), \
        "=> no best model found at '{}'".format(
            best_model_filename)
    print("=> loading best model '{}'".format(
        best_model_filename))
    checkpoint = torch.load(best_model_filename)
    args.start_epoch = checkpoint['epoch']
    model = checkpoint['model']
    model.eval()

    switch = True

    if args.camera == 'kinect':
        kinect = PyKinectRuntime.PyKinectRuntime(
            PyKinectV2.FrameSourceTypes_Color |
            PyKinectV2.FrameSourceTypes_Depth)
        counter = 0

        assert not kinect._sensor is None, '=> No Kinect ' \
                                           'device ' \
                                           'detected!'

        while True:
            if kinect.has_new_color_frame() and \
                    kinect.has_new_depth_frame():
                bgra_frame = kinect.get_last_color_frame()
                bgra_frame = bgra_frame.reshape((
                                                kinect.color_frame_desc.Height,
                                                kinect.color_frame_desc.Width,
                                                4),
                                                order='C')
                rgb_frame = cv2.cvtColor(bgra_frame,
                                         cv2.COLOR_BGRA2RGB)

                depth_frame = kinect.get_last_depth_frame()

                merged_image, rmse = depth_estimate(model,
                                                    rgb_frame,
                                                    depth_frame,
                                                    save=False)

                merged_image_bgr = cv2.cvtColor(
                    merged_image.astype('uint8'),
                    cv2.COLOR_RGB2BGR, switch)
                switch = False
                cv2.imshow('my webcam',
                           merged_image_bgr.astype('uint8'))
                if counter == 15:
                    print('RMSE = ' + str(rmse))
                counter = counter + 1
                if counter == 16:
                    counter = 0

            if cv2.waitKey(1) == 27:
                break

    elif args.camera == 'webcam':

        cam = cv2.VideoCapture(0)

        while True:

            ret_val, img = cam.read()

            img = cv2.flip(img, 1)

            rgb = cv2.cvtColor(np.array(img),
                               cv2.COLOR_BGRA2RGB)

            transform = transforms.Compose([
                transforms.Resize([228, 304]),
            ])
            rgb_image = transform(rgb)

            if args.modality == 'rgbd':
                assert '=> can\'t test webcam with depth ' \
                       'information!'

            rgb_np = np.asfarray(rgb_image,
                                 dtype='float') / 255
            input_tensor = to_tensor(rgb_np)
            while input_tensor.dim() < 4:
                input_tensor = input_tensor.unsqueeze(0)
            input_tensor = input_tensor.cuda()
            torch.cuda.synchronize()
            end = time.time()
            with torch.no_grad():
                pred = model(input_tensor)
            torch.cuda.synchronize()
            gpu_time = time.time() - end

            pred_depth = np.squeeze(pred.cpu().numpy())

            d_min = np.min(pred_depth)
            d_max = np.max(pred_depth)
            pred_color_map = color_map(pred_depth, d_min,
                                       d_max,
                                       plt.cm.viridis)

            merged_image = np.hstack(
                [rgb_image, pred_color_map])
            merged_image_bgr = cv2.cvtColor(
                merged_image.astype('uint8'),
                cv2.COLOR_RGB2BGR)
            cv2.imshow('my webcam',
                       merged_image_bgr.astype('uint8'))

            if cv2.waitKey(1) == 27:
                break  # esc to quit

    else:
        file_name = args.kinectdata + '.p'
        pickle_path = os.path.join('CameraData', file_name)
        print(pickle_path)

        if not os.path.exists('CameraData'):
            assert '=>do data find at ' + pickle_path

        f = open(pickle_path, 'rb')
        pickle_file = pickle.load(f)
        f.close()

        bgr_frame = pickle_file['rgb']
        depth = pickle_file['depth']

        rgb_frame = cv2.cvtColor(bgr_frame,
                                 cv2.COLOR_BGR2RGB)

        merged_image, rmse = depth_estimate(model,
                                            rgb_frame,
                                            depth,
                                            save=True,
                                            switch=True)
        plt.figure('Merged Image')
        plt.imshow(merged_image.astype('uint8'))
        plt.show()
        print('RMSE = ' + str(rmse))

    cv2.destroyAllWindows()
예제 #11
0
def depth_estimate(model, rgb_frame, depth, save=False,
                   switch=False):
    to_tensor = transforms.ToTensor()
    cmap_depth = plt.cm.viridis
    cmap_error = plt.cm.inferno

    rgb_np, depth_np = image_transform(rgb_frame, depth)

    ##creat sparse depth
    mask_keep = sampler(depth_np, args.sample_spacing)
    sample_number = np.count_nonzero(
        mask_keep.astype('int'))
    if args.modality == 'rgb':
        sample_number = 0
    if switch:
        print('Total samples = ' + str(sample_number))
    sparse_depth = np.zeros(depth_np.shape)
    sparse_depth[mask_keep] = depth_np[mask_keep]
    sparse_depth_np = sparse_depth

    ##choose input
    if args.modality == 'rgb':
        input_np = rgb_np
    elif args.modality == 'rgbd':
        rgbd = np.append(rgb_np,
                         np.expand_dims(sparse_depth_np,
                                        axis=2), axis=2)
        input_np = np.asfarray(rgbd, dtype='float')
    elif args.modality == 'd':
        input_np = sparse_depth_np

    input_tensor = to_tensor(input_np)

    while input_tensor.dim() < 4:
        input_tensor = input_tensor.unsqueeze(0)

    input_tensor = input_tensor.cuda()
    torch.cuda.synchronize()

    ##get prediction
    end = time.time()
    with torch.no_grad():
        pred = model(input_tensor)
    torch.cuda.synchronize()
    gpu_time = time.time() - end

    ##get result
    target_tensor = to_tensor(depth_np)
    while target_tensor.dim() < 4:
        target_tensor = target_tensor.unsqueeze(0)
    target_tensor = target_tensor.cuda()
    torch.cuda.synchronize()
    result = Result()
    result.evaluate(pred.data, target_tensor.data)

    pred_depth = np.squeeze(pred.data.cpu().numpy())

    ##convert depth to colour map
    d_min = min(np.min(pred_depth), np.min(depth_np))
    d_max = max(np.max(pred_depth), np.max(depth_np))
    # # d_min = float(0.5)
    # # d_max = float(6)

    pred_depth_rgb_map = color_map(pred_depth, d_min, d_max,
                                   cmap_depth)
    input_depth_color_map = color_map(depth_np, d_min,
                                      d_max, cmap_depth)
    sparse_depth_color_map = color_map(sparse_depth, d_min,
                                       d_max, cmap_depth)

    ##get error map
    mask = depth_np <= 0
    combined_map = depth_np
    combined_map[mask] = pred_depth[mask]
    abs_error_map = np.absolute(combined_map - pred_depth)
    # error_min = min(np.min(combined_map), np.min(
    # pred_depth))
    # error_max = max(np.max(combined_map), np.max(
    # pred_depth))
    error_min = np.min(abs_error_map)
    error_max = np.max(abs_error_map)
    error_map_color = color_map(abs_error_map, error_min,
                                error_max, cmap_error)

    ##show images
    rgb_image = rgb_np * 255
    merged_image = np.hstack([rgb_image, pred_depth_rgb_map,
                              input_depth_color_map,
                              sparse_depth_color_map,
                              error_map_color])

    ##save image
    if args.write and save:
        images_save(sample_number, pred_depth_rgb_map,
                    sparse_depth_color_map, error_map_color,
                    rgb_image, input_depth_color_map,
                    result)

    return merged_image, result.rmse
예제 #12
0
    def __init__(self, directory):
        self.directory = directory
        self.data = fetch_data(self.directory)
        self.transform = None

        self.to_tensor = transforms.ToTensor()