def val_transform(self, rgb, depth):
        s = self.getFocalScale()

        depth = np.asfarray(
            depth, dtype='float32'
        )  #This used to be the last step, not sure if it goes here?
        if (self.augArgs.varScale):  #Variable global scale simulation
            scale = self.getDepthGroup()
            depth_np = depth * scale
        else:
            depth_np = depth

        if (self.augArgs.varFocus):
            transform = transforms.Compose([
                transforms.Crop(130, 10, 240, 1200),
                transforms.Resize(
                    s
                ),  #Resize both images without correcting the depth values
                transforms.CenterCrop(self.output_size),
            ])
        else:
            transform = transforms.Compose([
                transforms.Crop(130, 10, 240, 1200),
                transforms.CenterCrop(self.output_size),
            ])

        rgb_np = transform(rgb)
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = transform(depth_np)
        return rgb_np, depth_np
    def train_transform(self, im, gt):
        im = np.array(im).astype(np.float32)
        gt = np.array(gt).astype(np.float32)

        s = np.random.uniform(1.0, 1.5)  # random scaling
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip
        color_jitter = my_transforms.ColorJitter(0.4, 0.4, 0.4)

        transform = my_transforms.Compose([
            my_transforms.Crop(130, 10, 240, 1200),
            my_transforms.Resize(460 / 240, interpolation='bilinear'),
            my_transforms.Rotate(angle),
            my_transforms.Resize(s),
            my_transforms.CenterCrop(self.size),
            my_transforms.HorizontalFlip(do_flip)
        ])

        im_ = transform(im)
        im_ = color_jitter(im_)

        gt_ = transform(gt)

        im_ = np.array(im_).astype(np.float32)
        gt_ = np.array(gt_).astype(np.float32)

        im_ /= 255.0
        gt_ /= 100.0 * s
        im_ = to_tensor(im_)
        gt_ = to_tensor(gt_)

        gt_ = gt_.unsqueeze(0)

        return im_, gt_
    def train_transform(self, rgb, depth):
        #s = np.random.uniform(1.0, 1.5)  # random scaling
        #depth_np = depth / s
        s = self.getFocalScale()

        if (self.augArgs.varFocus):  #Variable focal length simulation
            depth_np = depth
        else:
            depth_np = depth / s  #Correct for focal length

        if (self.augArgs.varScale):  #Variable global scale simulation
            scale = self.getDepthGroup()
            depth_np = depth_np * scale

        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        transform = transforms.Compose([
            transforms.Crop(130, 10, 240, 1200),
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(do_flip)
        ])
        rgb_np = transform(rgb)
        rgb_np = self.color_jitter(rgb_np)  # random color jittering
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        # Scipy affine_transform produced RuntimeError when the depth map was
        # given as a 'numpy.ndarray'
        depth_np = np.asfarray(depth_np, dtype='float32')
        depth_np = transform(depth_np)

        return rgb_np, depth_np
    def val_transform(self, rgb, depth):
        depth_np = depth / (self.depth_divider)
        transform = transforms.Compose([
            transforms.Crop(130, 10, 240, 1200),
            transforms.CenterCrop(self.output_size),
        ])
        rgb_np = transform(rgb)
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = np.asfarray(depth_np, dtype='float32')
        depth_np = transform(depth_np)

        return rgb_np, depth_np
Ejemplo n.º 5
0
    def val_transform(self, rgb, depth):
        depth_np = depth
        transform = transforms.Compose([
            transforms.Crop(130, 10, 220, 1200),
            transforms.CenterCrop(self.output_size)
        ])
        rgb_np = transform(rgb)
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255  #Why do this??
        depth_np = np.asfarray(depth_np, dtype='float32')
        depth_np = transform(depth_np)

        return rgb_np, depth_np
    def val_transform(self, rgb, depth):
        depth_np = depth
        transform = transforms.Compose([
            transforms.Crop(0, 20, 750, 2000),
            transforms.Resize(500 / 750),
            transforms.CenterCrop(self.output_size),
        ])
        rgb_np = transform(rgb)
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = np.asfarray(depth_np, dtype='float32')
        depth_np = transform(depth_np)

        return rgb_np, depth_np
Ejemplo n.º 7
0
    def val_transform(self, rgb, depth):
        depth_np = depth
        transform = transforms.Compose([
            #transform.Resize(250.0 / iheight),
            transforms.Crop(130, 10, 240, 1200),
            transforms.CenterCrop(self.output_size),
            transforms.Resize(self.output_size),
        ])
        rgb_np = transform(rgb)
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        depth_np = np.asfarray(depth_np, dtype='float32')
        depth_np = transform(depth_np)

        return rgb_np, depth_np
    def _val_transform(self, rgb, sparse_depth, depth_gt):
        transform = transforms.Compose([
            transforms.Crop(*self._road_crop),
            transforms.CenterCrop(self.output_size),
        ])
        rgb = transform(rgb)
        rgb = np.asfarray(rgb, dtype='float') / 255

        sparse_depth = np.asfarray(sparse_depth, dtype='float32')
        sparse_depth = transform(sparse_depth)

        depth_gt = np.asfarray(depth_gt, dtype='float32')
        depth_gt = transform(depth_gt)

        return rgb, sparse_depth, depth_gt
    def val_transform(self, im, gt):
        im = np.array(im).astype(np.float32)
        gt = np.array(gt).astype(np.float32)

        transform = my_transforms.Compose([
            my_transforms.Crop(130, 10, 240, 1200),
            my_transforms.Resize(460 / 240, interpolation='bilinear'),
            my_transforms.CenterCrop(self.size)
        ])

        im_ = transform(im)
        gt_ = transform(gt)

        im_ = np.array(im_).astype(np.float32)
        gt_ = np.array(gt_).astype(np.float32)

        im_ /= 255.0
        gt_ /= 100.0
        im_ = to_tensor(im_)
        gt_ = to_tensor(gt_)

        gt_ = gt_.unsqueeze(0)
        return im_, gt_
    def train_transform(self, rgb, depth):
        s = np.random.uniform(1.0, 1.5)  # random scaling
        depth_np = depth / (s * self.depth_divider)
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        transform = transforms.Compose([
            transforms.Crop(130, 10, 240, 1200),
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(do_flip)
        ])
        rgb_np = transform(rgb)
        rgb_np = self.color_jitter(rgb_np)  # random color jittering
        rgb_np = np.asfarray(rgb_np, dtype='float') / 255
        # Scipy affine_transform produced RuntimeError when the depth map was
        # given as a 'numpy.ndarray'
        depth_np = np.asfarray(depth_np, dtype='float32')
        depth_np = transform(depth_np)

        return rgb_np, depth_np
    def _train_transform(self, rgb, sparse_depth, depth_gt):
        s = np.random.uniform(1.0, 1.5)  # random scaling
        depth_gt = depth_gt / s

        # TODO critical why is the input not scaled in original implementation?
        sparse_depth = sparse_depth / s

        # TODO adapt and refactor
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        # TODO critical adjust sizes
        transform = transforms.Compose([
            transforms.Crop(*self._road_crop),
            transforms.Rotate(angle),
            transforms.Resize(s),
            transforms.CenterCrop(self.output_size),
            transforms.HorizontalFlip(do_flip)
        ])

        rgb = transform(rgb)
        sparse_depth = transform(sparse_depth)

        # TODO needed?
        # Scipy affine_transform produced RuntimeError when the depth map was
        # given as a 'numpy.ndarray'
        depth_gt = np.asfarray(depth_gt, dtype='float32')
        depth_gt = transform(depth_gt)

        rgb = self._color_jitter(rgb)  # random color jittering

        # convert color [0,255] -> [0.0, 1.0] floats
        rgb = np.asfarray(rgb, dtype='float') / 255

        return rgb, sparse_depth, depth_gt
Ejemplo n.º 12
0
def main(cfg, gpus):
    # Network Builders
    torch.cuda.set_device(gpus[0])
    print('###### Create model ######')
    net_objectness = ModelBuilder.build_objectness(
        arch=cfg.MODEL.arch_objectness,
        weights=cfg.MODEL.weights_enc_query,
        fix_encoder=cfg.TRAIN.fix_encoder)
    net_decoder = ModelBuilder.build_decoder(
        arch=cfg.MODEL.arch_decoder.lower(),
        input_dim=cfg.MODEL.decoder_dim,
        fc_dim=cfg.MODEL.fc_dim,
        ppm_dim=cfg.MODEL.ppm_dim,
        num_class=2,
        weights=cfg.MODEL.weights_decoder,
        dropout_rate=cfg.MODEL.dropout_rate,
        use_dropout=cfg.MODEL.use_dropout)

    crit = nn.NLLLoss(ignore_index=255)

    print('###### Load data ######')
    data_name = cfg.DATASET.name
    if data_name == 'VOC':
        from dataloaders.customized_objectness import voc_fewshot
        make_data = voc_fewshot
        max_label = 20
    elif data_name == 'COCO':
        from dataloaders.customized_objectness import coco_fewshot
        make_data = coco_fewshot
        max_label = 80
    else:
        raise ValueError('Wrong config for dataset!')
    labels = CLASS_LABELS[data_name][cfg.TASK.fold_idx]
    labels_val = CLASS_LABELS[data_name]['all'] - CLASS_LABELS[data_name][
        cfg.TASK.fold_idx]
    exclude_labels = labels_val

    value_scale = 255
    mean = [0.485, 0.456, 0.406]
    mean = [item * value_scale for item in mean]
    std = [0.229, 0.224, 0.225]
    std = [item * value_scale for item in std]

    train_transform = [
        transforms.ToNumpy(),
        transforms.RandScale([0.9, 1.1]),
        transforms.RandRotate([-10, 10], padding=mean, ignore_label=0),
        transforms.RandomGaussianBlur(),
        transforms.RandomHorizontalFlip(),
        transforms.Crop([cfg.DATASET.input_size[0], cfg.DATASET.input_size[1]],
                        crop_type='rand',
                        padding=mean,
                        ignore_label=0)
    ]

    train_transform = Compose(train_transform)

    val_transform = Compose([
        transforms.ToNumpy(),
        transforms.Resize_pad(size=cfg.DATASET.input_size[0])
    ])

    dataset = make_data(base_dir=cfg.DATASET.data_dir,
                        split=cfg.DATASET.data_split,
                        transforms=train_transform,
                        to_tensor=transforms.ToTensorNormalize_noresize(),
                        labels=labels,
                        max_iters=cfg.TRAIN.n_iters * cfg.TRAIN.n_batch,
                        n_ways=cfg.TASK.n_ways,
                        n_shots=cfg.TASK.n_shots,
                        n_queries=cfg.TASK.n_queries,
                        permute=cfg.TRAIN.permute_labels,
                        exclude_labels=exclude_labels)
    trainloader = DataLoader(dataset,
                             batch_size=cfg.TRAIN.n_batch,
                             shuffle=True,
                             num_workers=4,
                             pin_memory=True,
                             drop_last=True)

    #segmentation_module = nn.DataParallel(segmentation_module, device_ids=gpus)
    net_objectness.cuda()
    net_decoder.cuda()

    # Set up optimizers
    nets = (net_objectness, net_decoder, crit)
    optimizers = create_optimizers(nets, cfg)

    batch_time = AverageMeter()
    data_time = AverageMeter()
    ave_total_loss = AverageMeter()
    ave_acc = AverageMeter()

    history = {'train': {'iter': [], 'loss': [], 'acc': []}}

    net_objectness.train(not cfg.TRAIN.fix_bn)
    net_decoder.train(not cfg.TRAIN.fix_bn)

    best_iou = 0
    # main loop
    tic = time.time()

    print('###### Training ######')
    for i_iter, sample_batched in enumerate(trainloader):
        # Prepare input
        feed_dict = data_preprocess(sample_batched, cfg)

        data_time.update(time.time() - tic)
        net_objectness.zero_grad()
        net_decoder.zero_grad()

        # adjust learning rate
        adjust_learning_rate(optimizers, i_iter, cfg)

        # forward pass
        feat = net_objectness(feed_dict['img_data'], return_feature_maps=True)
        pred = net_decoder(feat)
        loss = crit(pred, feed_dict['seg_label'])
        acc = pixel_acc(pred, feed_dict['seg_label'])
        loss = loss.mean()
        acc = acc.mean()

        # Backward
        loss.backward()
        for optimizer in optimizers:
            if optimizer:
                optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - tic)
        tic = time.time()

        # update average loss and acc
        ave_total_loss.update(loss.data.item())
        ave_acc.update(acc.data.item() * 100)

        # calculate accuracy, and display
        if i_iter % cfg.TRAIN.disp_iter == 0:
            print('Iter: [{}][{}/{}], Time: {:.2f}, Data: {:.2f}, '
                  'lr_encoder: {:.6f}, lr_decoder: {:.6f}, '
                  'Accuracy: {:4.2f}, Loss: {:.6f}'.format(
                      i_iter, i_iter, cfg.TRAIN.n_iters, batch_time.average(),
                      data_time.average(), cfg.TRAIN.running_lr_encoder,
                      cfg.TRAIN.running_lr_decoder, ave_acc.average(),
                      ave_total_loss.average()))

            history['train']['iter'].append(i_iter)
            history['train']['loss'].append(loss.data.item())
            history['train']['acc'].append(acc.data.item())

        if (i_iter + 1) % cfg.TRAIN.save_freq == 0:
            checkpoint(nets, history, cfg, i_iter + 1)

        if (i_iter + 1) % cfg.TRAIN.eval_freq == 0:
            metric = Metric(max_label=max_label, n_runs=cfg.VAL.n_runs)
            with torch.no_grad():
                print('----Evaluation----')
                net_objectness.eval()
                net_decoder.eval()
                net_decoder.use_softmax = True
                for run in range(cfg.VAL.n_runs):
                    print(f'### Run {run + 1} ###')
                    set_seed(cfg.VAL.seed + run)

                    print(f'### Load validation data ###')
                    dataset_val = make_data(
                        base_dir=cfg.DATASET.data_dir,
                        split='val',
                        transforms=val_transform,
                        to_tensor=transforms.ToTensorNormalize_noresize(),
                        labels=labels_val,
                        max_iters=cfg.VAL.n_iters * cfg.VAL.n_batch,
                        n_ways=cfg.TASK.n_ways,
                        n_shots=cfg.TASK.n_shots,
                        n_queries=cfg.TASK.n_queries,
                        permute=cfg.VAL.permute_labels,
                        exclude_labels=[])
                    if data_name == 'COCO':
                        coco_cls_ids = dataset_val.datasets[
                            0].dataset.coco.getCatIds()
                    testloader = DataLoader(dataset_val,
                                            batch_size=cfg.VAL.n_batch,
                                            shuffle=False,
                                            num_workers=1,
                                            pin_memory=True,
                                            drop_last=False)
                    print(f"Total # of validation Data: {len(dataset)}")

                    #for sample_batched in tqdm.tqdm(testloader):
                    for sample_batched in testloader:
                        feed_dict = data_preprocess(sample_batched,
                                                    cfg,
                                                    is_val=True)
                        if data_name == 'COCO':
                            label_ids = [
                                coco_cls_ids.index(x) + 1
                                for x in sample_batched['class_ids']
                            ]
                        else:
                            label_ids = list(sample_batched['class_ids'])

                        feat = net_objectness(feed_dict['img_data'],
                                              return_feature_maps=True)
                        query_pred = net_decoder(
                            feat, segSize=cfg.DATASET.input_size)
                        metric.record(
                            np.array(query_pred.argmax(dim=1)[0].cpu()),
                            np.array(feed_dict['seg_label'][0].cpu()),
                            labels=label_ids,
                            n_run=run)

                    classIoU, meanIoU = metric.get_mIoU(
                        labels=sorted(labels_val), n_run=run)
                    classIoU_binary, meanIoU_binary = metric.get_mIoU_binary(
                        n_run=run)

            classIoU, classIoU_std, meanIoU, meanIoU_std = metric.get_mIoU(
                labels=sorted(labels_val))
            classIoU_binary, classIoU_std_binary, meanIoU_binary, meanIoU_std_binary = metric.get_mIoU_binary(
            )

            print('----- Evaluation Result -----')
            print(f'best meanIoU_binary: {best_iou}')
            print(f'meanIoU mean: {meanIoU}')
            print(f'meanIoU std: {meanIoU_std}')
            print(f'meanIoU_binary mean: {meanIoU_binary}')
            print(f'meanIoU_binary std: {meanIoU_std_binary}')

            if meanIoU_binary > best_iou:
                best_iou = meanIoU_binary
                checkpoint(nets, history, cfg, 'best')
            net_objectness.train(not cfg.TRAIN.fix_bn)
            net_decoder.train(not cfg.TRAIN.fix_bn)
            net_decoder.use_softmax = False

    print('Training Done!')