Beispiel #1
0
    def forward(self, es, ta):
        if self.mod is None:
            sys.path.append(str(config.lpips_root))
            import PerceptualSimilarity.models as ps

            self.mod = ps.PerceptualLoss()

        if self.clip:
            es = torch.clamp(es, -1, 1)
        out = self.mod(es, ta, normalize=False)

        return out.mean()
Beispiel #2
0
def compute_lpips(gt_path, inp_path, version='0.0', use_gpu=True):
    model = models.PerceptualLoss(model='net-lin',
                                  net='alex',
                                  use_gpu=use_gpu,
                                  version=version)
    img0_np = util.load_image(gt_path)
    img1_np = util.load_image(inp_path)
    img0 = util.im2tensor(img0_np)
    img1 = util.im2tensor(img1_np)
    if (use_gpu):
        img0 = img0.cuda()
        img1 = img1.cuda()

    dist01 = model.forward(img0, img1)
    if use_gpu:
        return dist01.item()
    return dist01
Beispiel #3
0
def main(args):
    ## Distributed computing

    # utility for synchronization
    def reduce_tensor(tensor):
        rt = tensor.clone()
        torch.distributed.all_reduce(rt, op = torch.distributed.ReduceOp.SUM)
        return rt

    # enable distributed computing
    if args.distributed:
        set_affinity(args.local_rank)
        num_devices = torch.cuda.device_count()
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend = 'nccl', init_method = 'env://')

        world_size  = torch.distributed.get_world_size() #os.environ['WORLD_SIZE']
        print('num_devices', num_devices, 
              'local_rank', args.local_rank, 
              'world_size', world_size)
    else: # if not args.distributed:
        num_devices, world_size = 1, 1

    ## Model preparation (Conv-LSTM or Conv-TT-LSTM)

    # construct the model with the specified hyper-parameters
    model = ConvLSTMNet(
        input_channels = args.img_channels, 
        output_sigmoid = args.use_sigmoid,
        # model architecture
        layers_per_block = (3, 3, 3, 3), 
        hidden_channels  = (32, 48, 48, 32), 
        skip_stride = 2,
        # convolutional tensor-train layers
        cell = args.model,
        cell_params = {
            "order": args.model_order, 
            "steps": args.model_steps, 
            "ranks": args.model_ranks},
        # convolutional parameters
        kernel_size = args.kernel_size).cuda()

    if args.distributed:
        if args.use_apex: # use DDP from apex.parallel
            from apex.parallel import DistributedDataParallel as DDP
            model = DDP(model, delay_allreduce = True)
        else: # use DDP from nn.parallel
            from torch.nn.parallel import DistributedDataParallel as DDP
            model = DDP(model, device_ids = [args.local_rank])

    PSmodel = PSmodels.PerceptualLoss(model = 'net-lin', 
        net = 'alex', use_gpu = True, gpu_ids = [args.local_rank])

    ## Dataset Preparation (KTH, UCF, tinyUCF)
    Dataset = {"KTH": KTH_Dataset, "MNIST": MNIST_Dataset}[args.dataset]

    DATA_DIR = os.path.join("../data", 
        {"MNIST": "mnist", "KTH": "kth"}[args.dataset])

    # batch size for each process
    total_batch_size  = args.batch_size
    assert total_batch_size % world_size == 0, \
        'The batch_size is not divisible by world_size.'
    batch_size = total_batch_size // world_size

    total_frames = args.input_frames + args.future_frames

    # dataloaer for the valiation dataset 
    test_data_path = os.path.join(DATA_DIR, args.test_data_file)
    assert os.path.exists(test_data_path), \
        "The test dataset does not exist. "+test_data_path

    test_dataset = Dataset({"path": test_data_path, 
        "unique_mode": True, "num_frames": total_frames, "num_samples": args.test_samples,
        "height": args.img_height, "width": args.img_width, "channels": args.img_channels, 'training': False})

    test_sampler = torch.utils.data.distributed.DistributedSampler(
        test_dataset, num_replicas = world_size, rank = args.local_rank, shuffle = False)
    test_loader  = torch.utils.data.DataLoader(
        test_dataset, batch_size = batch_size, drop_last = True, 
        num_workers = num_devices * 4, pin_memory = True, sampler = test_sampler)

    test_samples = len(test_loader) * total_batch_size
    print(test_samples)

    ## Main script for test phase 
    MSE_  = torch.zeros((args.future_frames), dtype = torch.float32).cuda()
    PSNR_ = torch.zeros((args.future_frames), dtype = torch.float32).cuda()
    SSIM_ = torch.zeros((args.future_frames), dtype = torch.float32).cuda()
    PIPS_ = torch.zeros((args.future_frames), dtype = torch.float32).cuda()

    with torch.no_grad():
        model.eval()

        for it, frames in enumerate(test_loader):

            frames = frames.permute(0, 1, 4, 2, 3).cuda()
            inputs = frames[:,  :args.input_frames]
            origin = frames[:, -args.future_frames:]

            pred = model(inputs, 
                input_frames  =  args.input_frames, 
                future_frames = args.future_frames, 
                output_frames = args.future_frames, 
                teacher_forcing = False)

            # accumlate the statistics per frame
            for t in range(-args.future_frames, 0):
                origin_, pred_ = origin[:, t], pred[:, t]

                if args.img_channels == 1:
                    origin_ = origin_.repeat([1, 3, 1, 1])
                    pred_   =   pred_.repeat([1, 3, 1, 1])

                dist = PSmodel(origin_, pred_)
                PIPS_[t] += torch.sum(dist).item()

            origin = origin.permute(0, 1, 3, 4, 2).cpu().numpy()
            pred   =   pred.permute(0, 1, 3, 4, 2).cpu().numpy()

            for t in range(-args.future_frames, 0):
                for i in range(batch_size):
                    origin_, pred_ = origin[i, t], pred[i, t]

                    if args.img_channels == 1:
                        origin_ = np.squeeze(origin_, axis = -1)
                        pred_   = np.squeeze(pred_,   axis = -1)

                    MSE_[t]  += skimage.metrics.mean_squared_error(origin_, pred_)
                    PSNR_[t] += skimage.metrics.peak_signal_noise_ratio(origin_, pred_)
                    SSIM_[t] += skimage.metrics.structural_similarity(origin_, pred_, multichannel = args.img_channels > 1)

        if args.distributed:
            MSE  = reduce_tensor( MSE_) / test_samples
            PSNR = reduce_tensor(PSNR_) / test_samples
            SSIM = reduce_tensor(SSIM_) / test_samples
            PIPS = reduce_tensor(PIPS_) / test_samples
        else: # if not args.distributed:
            MSE  = MSE_  / test_samples
            PSNR = PSNR_ / test_samples
            SSIM = SSIM_ / test_samples
            PIPS = PIPS_ / test_samples

    if args.local_rank == 0:
        print("MSE: {} (x1e-3)\nPSNR: {}\nSSIM: {}\nLPIPS: {}".format(
            1e3 * torch.mean(MSE).cpu().item(), torch.mean(PSNR).cpu().item(), 
            torch.mean(SSIM).cpu().item(), torch.mean(PIPS).cpu().item()))

    print( "MSE:",  MSE.cpu().numpy())
    print("PSNR:", PSNR.cpu().numpy())
    print("SSIM:", SSIM.cpu().numpy())
    print("PIPS:", PIPS.cpu().numpy())
Beispiel #4
0
def main(args):
    ## Model preparation (Conv-LSTM or Conv-TT-LSTM)

    # whether to use GPU (or CPU)
    use_cuda = args.use_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # whether to use multi-GPU (or single-GPU)
    multi_gpu = use_cuda and args.multi_gpu and torch.cuda.device_count() > 1
    num_gpus = (
        torch.cuda.device_count() if multi_gpu else 1) if use_cuda else 0

    # construct the model with the specified hyper-parameters
    model = ConvLSTMNet(
        # input to the model
        input_channels=args.img_channels,
        # architecture of the model
        layers_per_block=(3, 3, 3, 3),
        hidden_channels=(32, 48, 48, 32),
        skip_stride=2,
        # parameters of convolutional tensor-train layers
        cell=args.model,
        cell_params={
            "order": args.model_order,
            "steps": args.model_steps,
            "rank": args.model_rank
        },
        # parameters of convolutional operations
        kernel_size=args.kernel_size,
        bias=True,
        # output function and output format
        output_sigmoid=args.use_sigmoid)

    # move the model to the device (CPU, GPU, multi-GPU)
    model.to(device)
    if multi_gpu:
        model = nn.DataParallel(model)

    # load the model parameters from checkpoint
    model.load_state_dict(torch.load(args.checkpoint))

    ## Dataset Preparation (Moving-MNIST, KTH)
    Dataset = {"MNIST": MNIST_Dataset, "KTH": KTH_Dataset}[args.dataset]

    DATA_DIR = os.path.join("../../datasets", {
        "MNIST": "moving-mnist",
        "KTH": "kth"
    }[args.dataset])

    # number of total frames
    total_frames = args.input_frames + args.future_frames

    # dataloaer for test set
    test_data_path = os.path.join(DATA_DIR, args.test_data_file)

    test_data = Dataset({
        "path": test_data_path,
        "unique_mode": True,
        "num_frames": total_frames,
        "num_samples": args.test_samples,
        "height": args.img_height,
        "width": args.img_width,
        "channels": args.img_channels
    })

    test_data_loader = torch.utils.data.DataLoader(test_data,
                                                   batch_size=args.batch_size,
                                                   shuffle=False,
                                                   num_workers=5 *
                                                   max(num_gpus, 1),
                                                   drop_last=True)

    test_size = len(test_data_loader) * args.batch_size

    ## Main script for test phase
    model.eval()

    MSE = np.zeros(args.future_frames, dtype=np.float32)
    PSNR = np.zeros(args.future_frames, dtype=np.float32)
    SSIM = np.zeros(args.future_frames, dtype=np.float32)
    PIPS = np.zeros(args.future_frames, dtype=np.float32)

    PSmodel = PSmodels.PerceptualLoss(model='net-lin',
                                      net='alex',
                                      use_gpu=use_cuda,
                                      gpu_ids=[0])

    with torch.no_grad():

        for frames in test_data_loader:

            # 5-th order: batch_size x total_frames x channels x height x width
            frames = frames.permute(0, 1, 4, 2, 3).to(device)

            inputs = frames[:, :args.input_frames]
            origin = frames[:, -args.future_frames:]

            pred = model(inputs,
                         input_frames=args.input_frames,
                         future_frames=args.future_frames,
                         output_frames=args.future_frames,
                         teacher_forcing=False)

            # clamp the output to [0, 1]
            pred = torch.clamp(pred, min=0, max=1)

            # accumlate the statistics per frame
            for t in range(-args.future_frames, 0):
                origin_, pred_ = origin[:, t], pred[:, t]
                if args.img_channels == 1:
                    origin_ = origin_.repeat([1, 3, 1, 1])
                    pred_ = pred_.repeat([1, 3, 1, 1])

                dist = PSmodel(origin_, pred_)
                PIPS[t] += torch.sum(dist).item() / test_size

            origin = origin.permute(0, 1, 3, 4, 2).cpu().numpy()
            pred = pred.permute(0, 1, 3, 4, 2).cpu().numpy()
            for t in range(-args.future_frames, 0):
                for i in range(args.batch_size):
                    origin_, pred_ = origin[i, t], pred[i, t]
                    if args.img_channels == 1:
                        origin_ = np.squeeze(origin_, axis=-1)
                        pred_ = np.squeeze(pred_, axis=-1)

                    MSE[t] += skimage.measure.compare_mse(origin_,
                                                          pred_) / test_size
                    PSNR[t] += skimage.measure.compare_psnr(origin_,
                                                            pred_) / test_size
                    SSIM[t] += skimage.measure.compare_ssim(
                        origin_, pred_,
                        multichannel=(args.img_channels > 1)) / test_size

    print("MSE: {} (x1e-3); PSNR: {}, SSIM: {}, LPIPS: {}".format(
        1e3 * np.mean(MSE), np.mean(PSNR), np.mean(SSIM), np.mean(PIPS)))

    print("MSE:", MSE)
    print("PSNR:", PSNR)
    print("SSIM:", SSIM)
    print("PIPS:", PIPS)
Beispiel #5
0
def eval_model(model,
               loader,
               device,
               vocab,
               use_gt_boxes=False,
               use_feats=False,
               filter_box=False):
    all_boxes = defaultdict(list)
    total_iou = []
    total_boxes = 0
    num_batches = 0
    num_samples = 0
    mae_per_image = []
    mae_roi_per_image = []
    roi_only_iou = []
    ssim_per_image = []
    ssim_rois = []
    rois = 0
    margin = 2

    ## Initializing the perceptual loss model
    lpips_model = models.PerceptualLoss(model='net-lin',
                                        net='alex',
                                        use_gpu=True)
    perceptual_error_image = []
    # ---------------------------------------

    img_idx = 0

    with torch.no_grad():
        for batch in tqdm.tqdm(loader):
            num_batches += 1
            # if num_batches > 10:
            #     break
            batch = [tensor.to(device) for tensor in batch]
            masks = None
            #len", len(batch))

            imgs, objs, boxes, triples, obj_to_img, triple_to_img, imgs_in = [
                b.to(device) for b in batch
            ]
            predicates = triples[:, 1]

            #EVAL_ALL = True
            if not args.generative:
                imgs, imgs_in, objs, boxes, triples, obj_to_img, \
                dropimage_indices, dropfeats_indices = [b.to(device) for b in process_batch(
                    imgs, imgs_in, objs, boxes, triples, obj_to_img, triple_to_img, device,
                    use_feats=use_feats, filter_box=filter_box)]

                dropbox_indices = dropimage_indices
            else:
                dropbox_indices = torch.ones_like(
                    objs.unsqueeze(1).float()).to(device)
                dropfeats_indices = torch.ones_like(
                    objs.unsqueeze(1).float()).to(device)
                dropimage_indices = torch.zeros_like(
                    objs.unsqueeze(1).float()).to(device)

            if imgs.shape[0] == 0:
                continue

            if args.visualize_graphs:
                # visualize scene graphs for debugging purposes
                visualize_scene_graphs(obj_to_img, objs, triples, vocab,
                                       device)

            if use_gt_boxes:
                model_out = model(
                    objs,
                    triples,
                    obj_to_img,
                    boxes_gt=boxes,
                    masks_gt=masks,
                    src_image=imgs_in,
                    keep_box_idx=torch.ones_like(dropimage_indices),
                    keep_feat_idx=dropfeats_indices,
                    keep_image_idx=dropimage_indices,
                    mode='eval')
            else:
                model_out = model(objs,
                                  triples,
                                  obj_to_img,
                                  boxes_gt=boxes,
                                  src_image=imgs_in,
                                  keep_box_idx=dropimage_indices,
                                  keep_feats_idx=dropfeats_indices,
                                  keep_image_idx=dropimage_indices,
                                  mode='eval')

            # OUTPUT
            imgs_pred, boxes_pred, masks_pred, _, _ = model_out
            # ----------------------------------------------------------------------------------------------------------

            # Save all box predictions
            all_boxes['boxes_gt'].append(boxes)
            all_boxes['objs'].append(objs)
            all_boxes['boxes_pred'].append(boxes_pred)
            all_boxes['drop_targets'].append(dropbox_indices)

            # IoU over all
            total_iou.append(jaccard(boxes_pred, boxes).detach().cpu().numpy())
            total_boxes += boxes_pred.size(0)

            # IoU over targets only
            pred_dropbox = boxes_pred[dropbox_indices.squeeze() == 0, :]
            gt_dropbox = boxes[dropbox_indices.squeeze() == 0, :]
            roi_only_iou.append(
                jaccard(pred_dropbox, gt_dropbox).detach().cpu().numpy())
            rois += pred_dropbox.size(0)

            num_samples += imgs.shape[0]
            imgs = imagenet_deprocess_batch(imgs).float()
            imgs_pred = imagenet_deprocess_batch(imgs_pred).float()

            if args.visualize_imgs_boxes:
                # visualize images with drawn boxes for debugging purposes
                visualize_imgs_boxes(imgs, imgs_pred, boxes, boxes_pred)

            if args.save_images:
                # save reconstructed images for later FID and Inception computation
                if args.save_gt_images:
                    # pass imgs as argument to additionally save gt images
                    save_images(imgs_pred, img_idx, imgs)
                else:
                    save_images(imgs_pred, img_idx)

            # MAE per image
            mae_per_image.append(
                torch.mean(
                    torch.abs(imgs - imgs_pred).view(imgs.shape[0], -1),
                    1).cpu().numpy())

            for s in range(imgs.shape[0]):
                # get coordinates of target
                left, right, top, bottom = bbox_coordinates_with_margin(
                    boxes[s, :], margin, imgs)
                if left > right or top > bottom:
                    continue

                # calculate errors only in RoI one by one
                mae_roi_per_image.append(
                    torch.mean(
                        torch.abs(imgs[s, :, top:bottom, left:right] -
                                  imgs_pred[s, :, top:bottom,
                                            left:right])).cpu().item())

                ssim_per_image.append(
                    pytorch_ssim.ssim(imgs[s:s + 1, :, :, :] / 255.0,
                                      imgs_pred[s:s + 1, :, :, :] / 255.0,
                                      window_size=3).cpu().item())
                ssim_rois.append(
                    pytorch_ssim.ssim(
                        imgs[s:s + 1, :, top:bottom, left:right] / 255.0,
                        imgs_pred[s:s + 1, :, top:bottom, left:right] / 255.0,
                        window_size=3).cpu().item())

                # normalize as expected from the LPIPS model
                imgs_pred_norm = imgs_pred[s:s + 1, :, :, :] / 127.5 - 1
                imgs_gt_norm = imgs[s:s + 1, :, :, :] / 127.5 - 1
                perceptual_error_image.append(
                    lpips_model.forward(imgs_pred_norm,
                                        imgs_gt_norm).detach().cpu().numpy())

            if num_batches % args.print_every == 0:
                calculate_scores(mae_per_image, mae_roi_per_image, total_iou,
                                 roi_only_iou, ssim_per_image, ssim_rois,
                                 perceptual_error_image)

            if num_batches % args.save_every == 0:
                save_results(mae_per_image, mae_roi_per_image, total_iou,
                             roi_only_iou, ssim_per_image, ssim_rois,
                             perceptual_error_image, all_boxes, num_batches)

            img_idx += 1

    calculate_scores(mae_per_image, mae_roi_per_image, total_iou, roi_only_iou,
                     ssim_per_image, ssim_rois, perceptual_error_image)
    save_results(mae_per_image, mae_roi_per_image, total_iou, roi_only_iou,
                 ssim_per_image, ssim_rois, perceptual_error_image, all_boxes,
                 'final')
Beispiel #6
0
def main(args):
    ## Distributed computing

    # utility for synchronization
    def reduce_tensor(tensor, reduce_sum=False):
        rt = tensor.clone()
        torch.distributed.all_reduce(rt, op=torch.distributed.ReduceOp.SUM)
        return rt if reduce_sum else (rt / world_size)

    # enable distributed computing
    if args.distributed:
        set_affinity(args.local_rank)
        num_devices = torch.cuda.device_count()
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        node_rank = args.node_rank
        global_rank = node_rank * num_devices + args.local_rank
        world_size = torch.distributed.get_world_size(
        )  #os.environ['WORLD_SIZE']
    else:
        global_rank, num_devices, world_size = 0, 1, 1

    ## Data format: batch(0) x steps(1) x height(2) x width(3) x channels(4)

    # batch_size (0)
    total_batch_size = args.batch_size
    assert total_batch_size % world_size == 0, \
        'The batch_size is not divisible by world_size.'
    batch_size = total_batch_size // world_size

    # steps (1)
    total_frames = args.future_frames + args.input_frames

    # frame format (2, 3)
    img_resize = (args.img_height != args.img_height_u) or (args.img_width !=
                                                            args.img_width_u)

    ## Model preparation (Conv-LSTM or Conv-TT-LSTM)

    # size of the neural network model (depth and width)
    layers_per_block = (3, 3, 3, 3)
    hidden_channels = (32, 48, 48, 32)
    skip_stride = 2

    # construct the model with the specified hyper-parameters
    model = ConvLSTMNet(
        # architecture of the model
        layers_per_block=layers_per_block,
        hidden_channels=hidden_channels,
        input_channels=1,
        skip_stride=skip_stride,
        cell_params={
            "steps": 3,
            "order": 3,
            "ranks": 8
        },
        # parameters of convolutional operation
        kernel_size=5,
        bias=True).cuda()

    if args.distributed:
        model = DDP(model, device_ids=[args.local_rank])

    PSmodel = PSmodels.PerceptualLoss(model='net-lin',
                                      net='alex',
                                      use_gpu=True,
                                      gpu_ids=[args.local_rank])

    ## Dataset Preparation (KTH, UCF, tinyUCF)
    assert args.dataset in ["MNIST", "KTH"], \
        "The dataset is not currently supported."

    Dataset = {"KTH": KTH_Dataset, "MNIST": MNIST_Dataset}[args.dataset]

    # path to the dataset folder
    DATA_DIR = args.data_path

    assert os.path.exists(DATA_DIR), \
        "The dataset folder does not exist. "+DATA_DIR

    assert os.path.exists(DATA_DIR), \
        "The test dataset does not exist. "+DATA_DIR

    test_dataset = Dataset({
        "path": DATA_DIR,
        "unique_mode": True,
        "num_frames": total_frames,
        "num_samples": args.test_samples,
        "height": args.img_height,
        "width": args.img_width,
        "channels": 1,
        'training': False
    })

    test_sampler = torch.utils.data.distributed.DistributedSampler(
        test_dataset, num_replicas=world_size, rank=global_rank, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=batch_size,
                                              drop_last=True,
                                              num_workers=num_devices * 4,
                                              pin_memory=True,
                                              sampler=test_sampler)

    test_samples = len(test_loader) * total_batch_size

    MODEL_FILE = args.model_path

    assert os.path.exists(MODEL_FILE), \
        "The specified model is not found in the folder."

    checkpoint = torch.load(MODEL_FILE)
    eval_epoch = checkpoint.get("epoch", 0)
    model.load_state_dict(checkpoint["model_state_dict"])

    ## Main script for test phase
    MSE_ = torch.zeros((args.future_frames), dtype=torch.float32).cuda()
    PSNR_ = torch.zeros((args.future_frames), dtype=torch.float32).cuda()
    SSIM_ = torch.zeros((args.future_frames), dtype=torch.float32).cuda()
    PIPS_ = torch.zeros((args.future_frames), dtype=torch.float32).cuda()

    with torch.no_grad():
        model.eval()

        samples = 0
        for it, frames in enumerate(test_loader):
            samples += total_batch_size

            frames = torch.mean(frames, dim=-1, keepdim=True)

            if img_resize:
                frames_ = frames.cpu().numpy()
                frames = np.zeros((batch_size, total_frames, args.img_height_u,
                                   args.img_width_u, 1),
                                  dtype=np.float32)

                for b in range(batch_size):
                    for t in range(total_frames):
                        frames[b, t] = skimage.transform.resize(
                            frames_[b, t],
                            (args.img_height_u, args.img_width_u))

                frames = torch.from_numpy(frames)

            # 5-th order: batch_size x total_frames x channels x height x width
            frames = frames.permute(0, 1, 4, 2, 3).cuda()
            inputs = frames[:, :args.input_frames]
            origin = frames[:, -args.future_frames:]

            pred = model(inputs,
                         input_frames=args.input_frames,
                         future_frames=args.future_frames,
                         output_frames=args.future_frames,
                         teacher_forcing=False)

            # clamp the output to [0, 1]
            pred = torch.clamp(pred, min=0, max=1)

            # accumlate the statistics per frame
            for t in range(-args.future_frames, 0):
                origin_, pred_ = origin[:, t], pred[:, t]

                origin_ = origin_.repeat([1, 3, 1, 1])
                pred_ = pred_.repeat([1, 3, 1, 1])

                dist = PSmodel(origin_, pred_)
                PIPS_[t] += torch.sum(dist).item()

            origin = origin.permute(0, 1, 3, 4, 2).cpu().numpy()
            pred = pred.permute(0, 1, 3, 4, 2).cpu().numpy()

            for t in range(-args.future_frames, 0):
                for i in range(batch_size):
                    origin_, pred_ = origin[i, t], pred[i, t]

                    origin_ = np.squeeze(origin_, axis=-1)
                    pred_ = np.squeeze(pred_, axis=-1)

                    MSE_[t] += skimage.metrics.mean_squared_error(
                        origin_, pred_)
                    PSNR_[t] += skimage.metrics.peak_signal_noise_ratio(
                        origin_, pred_)
                    SSIM_[t] += skimage.metrics.structural_similarity(
                        origin_, pred_)

            if args.distributed:
                MSE = reduce_tensor(MSE_, reduce_sum=True) / samples
                PSNR = reduce_tensor(PSNR_, reduce_sum=True) / samples
                SSIM = reduce_tensor(SSIM_, reduce_sum=True) / samples
                PIPS = reduce_tensor(PIPS_, reduce_sum=True) / samples
            else:
                MSE = MSE_ / samples
                PSNR = PSNR_ / samples
                SSIM = SSIM_ / samples
                PIPS = PIPS_ / samples

            if ((it + 1) % 50 == 0
                    or it + 1 == len(test_loader)) and args.local_rank == 0:
                print((it + 1) * total_batch_size, '/', test_samples,
                      ": MSE:  ",
                      torch.mean(MSE).cpu().item() * 1e3, "; PSNR: ",
                      torch.mean(PSNR).cpu().item(), "; SSIM: ",
                      torch.mean(SSIM).cpu().item(), ";LPIPS: ",
                      torch.mean(PIPS).cpu().item())

        if args.distributed:
            MSE = reduce_tensor(MSE_, reduce_sum=True) / test_samples
            PSNR = reduce_tensor(PSNR_, reduce_sum=True) / test_samples
            SSIM = reduce_tensor(SSIM_, reduce_sum=True) / test_samples
            PIPS = reduce_tensor(PIPS_, reduce_sum=True) / test_samples
        else:
            MSE = MSE_ / test_samples
            PSNR = PSNR_ / test_samples
            SSIM = SSIM_ / test_samples
            PIPS = PIPS_ / test_samples

        MSE_AVG = torch.mean(MSE).cpu().item()
        PSNR_AVG = torch.mean(PSNR).cpu().item()
        SSIM_AVG = torch.mean(SSIM).cpu().item()
        PIPS_AVG = torch.mean(PIPS).cpu().item()

        if args.local_rank == 0:
            print(
                "Epoch \t{} \tMSE: \t{} (x1e-3) \tPSNR: \t{} \tSSIM: \t{} \tLPIPS: \t{}"
                .format(eval_epoch, 1e3 * MSE_AVG, PSNR_AVG, SSIM_AVG,
                        PIPS_AVG))
Beispiel #7
0
 def __init__(self):
     super(PerceptualLossLPIPS, self).__init__()
     #self.loss_network = ps.PerceptualLoss(use_gpu=torch.cuda.is_available())
     self.loss_network = models.PerceptualLoss(
         use_gpu=torch.cuda.is_available())
def main(ref_dir, generated_dir, version='0.0', use_gpu=True):
    """
    Compute the mean and standard deviation of the LPIPS, PSNR and SSIM metrics over an image directory
    
    Args:
        ref_dir: reference images directory
        generated_dir: generated images directory
        version: version of LPIPS to use, default 0.0
        use_gpu: whether to use gpu for faster computation
    """

    ## Initialize the LPIPS model
    model = models.PerceptualLoss(model='net-lin',
                                  net='alex',
                                  use_gpu=use_gpu,
                                  version=version)

    files = os.listdir(ref_dir)

    lpips_list = np.empty(len(files))
    psnr_list = np.empty(len(files))
    ssim_list = np.empty(len(files))

    for i, file in enumerate(files):
        if (os.path.exists(os.path.join(generated_dir, file))):

            # Load images
            img0_np = util.load_image(os.path.join(ref_dir, file))
            img1_np = util.load_image(os.path.join(generated_dir, file))

            img0 = util.im2tensor(img0_np)
            img1 = util.im2tensor(img1_np)

            if (use_gpu):
                img0 = img0.cuda()
                img1 = img1.cuda()

            # Compute LPIPS distance
            dist01 = model.forward(img0, img1)
            lpips_list[i] = dist01

            # Compute PSNR value
            psnr = metrics.peak_signal_noise_ratio(img0_np, img1_np)
            psnr_list[i] = psnr

            # Compute SSIM value
            ssim = metrics.structural_similarity(img0_np,
                                                 img1_np,
                                                 multichannel=True)
            ssim_list[i] = ssim

            print('%s: %.4f, %.4f, %.4f' % (file, dist01, psnr, ssim))

    print("LPIPS mean: {:.4f}".format(lpips_list.mean()))
    print("LPIPS std: {:.4f}".format(lpips_list.std()))

    print("PSNR mean: {:.4f}".format(psnr_list.mean()))
    print("PSNR std: {:.4f}".format(psnr_list.std()))

    print("SSIM mean: {:.4f}".format(ssim_list.mean()))
    print("SSIM std: {:.4f}".format(ssim_list.std()))
Beispiel #9
0
def main(args):
    ## Data format: batch_size(0) x time_steps(1) x
    #  img_height(2) x img_width(3) x channels(4)

    # batch size (0)
    assert args.log_samples % args.batch_size == 0, \
        "The argument log_samples should be a multiple of batch_size."

    # frame split (1)
    input_frames = args.input_frames
    future_frames = args.future_frames
    total_frames = input_frames + future_frames

    log_frames = args.log_frames

    list_input_frames = list(range(0, input_frames, log_frames))
    plot_input_frames = len(list_input_frames)

    list_future_frames = list(range(0, future_frames, log_frames))
    plot_future_frames = len(list_future_frames)

    assert args.img_channels in [1, 3], \
        "The number of channels is either 1 or 3."

    img_colored = (args.img_channels == 3)

    ## Model preparation (Conv-LSTM)

    # whether to use GPU (or CPU)
    use_cuda = args.use_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # whether to use multi-GPU (or single-GPU)
    multi_gpu = (use_cuda and args.multi_gpu and torch.cuda.device_count() > 1)

    # number of GPUs used for training
    num_gpus = (
        torch.cuda.device_count() if multi_gpu else 1) if use_cuda else 0

    print("Device: %s (# of GPUs: %d)" % (device, num_gpus))

    # size of the Conv-LSTM network
    if args.model_size == "origin":  # 12-layers
        layers_per_block = (3, ) * 4
        hidden_channels = (32, 48, 48, 32)
        skip_stride = 2
    elif args.model_size == "small":
        layers_per_block = (3, ) * 4
        hidden_channels = (32, ) * 4
        skip_stride = 2
    elif args.model_size == "shallow":  # 4-layers
        layers_per_block = (4, )
        hidden_channels = (128, )
        skip_stride = None
    else:
        raise NotImplementedError

    # construct the model with the specified hyper-parameters
    model = ConvLSTMNet(
        # model architecture
        layers_per_block,
        hidden_channels,
        skip_stride=skip_stride,
        # input/output interfaces
        input_channels=args.img_channels,
        output_sigmoid=args.use_sigmoid,
        input_height=args.img_height,
        input_width=args.img_width,
        # non-local blocks
        non_local=args.use_non_local,
        pairwise_function=args.pairwise_function,
        use_norm=args.use_norm,
        sub_sampling=args.use_sub_sample,
        # convolutional layers
        arma=args.use_arma,
        w_dilation=args.w_dilation,
        w_kernel_size=args.w_kernel_size,
        w_bias=args.use_bias,
        a_kernel_size=args.a_kernel_size,
        a_padding_mode=args.a_padding_mode)

    # count the total number of model parameters
    num_params = sum(param.numel() for param in model.parameters()
                     if param.requires_grad)
    print("# of params. = ", num_params)

    # move the model to the device (CPU, GPU, multi-GPU)
    model.to(device)
    if multi_gpu: model = nn.DataParallel(model)

    # create the name and timestamp of the model
    model_name = args.model_name + '_' + args.model_stamp

    print("Model name:", model_name)
    print("# of future frames:", future_frames)

    PSmodel = PSmodels.PerceptualLoss(model='net-lin',
                                      net='alex',
                                      use_gpu=use_cuda,
                                      gpu_ids=[0])

    ## Dataset Preparation (Moving-MNIST)
    dataset = args.dataset
    Dataset = {"MNIST": MNIST_Dataset}[dataset]

    # path to the dataset folder
    if args.data_path == "default":
        DATA_DIR = {"MNIST": "moving-mnist"}[dataset]
        DATA_DIR = os.path.join("../datasets", DATA_DIR)
    else:  # if args.data_path != "default":
        DATA_DIR = args.data_path

    assert os.path.exists(DATA_DIR), \
        "The dataset folder does not exist."

    # number of workers for the dataloaders
    num_workers = 5 * max(num_gpus, 1)

    # dataloaer for test set
    test_data_path = os.path.join(DATA_DIR, args.test_data_file)
    assert os.path.exists(test_data_path), \
        "The test set does not exist."

    test_data = Dataset({
        "path": test_data_path,
        "unique_mode": True,
        "num_frames": total_frames,
        "num_samples": args.test_samples,
        "height": args.img_height,
        "width": args.img_width,
        "channels": args.img_channels
    })

    test_data_loader = torch.utils.data.DataLoader(test_data,
                                                   batch_size=args.batch_size,
                                                   shuffle=False,
                                                   num_workers=num_workers,
                                                   drop_last=True)

    test_size = len(test_data_loader) * args.batch_size
    print("# of test samples:", test_size)

    ## Outputs (Models and Results)
    if args.output_path == "default":
        OUTPUT_DIR = {"MNIST": "./moving-mnist"}[dataset]
    else:  # if args.output_path != "default":
        OUTPUT_DIR = args.output_path

    OUTPUT_DIR = os.path.join(OUTPUT_DIR, model_name)
    if not os.path.exists(OUTPUT_DIR):
        os.makedirs(OUTPUT_DIR)

    # path to the models
    MODEL_DIR = os.path.join(OUTPUT_DIR, "models")
    if not os.path.exists(MODEL_DIR):
        os.makedirs(MODEL_DIR)

    # load the best / last / specified model
    if args.eval_auto:
        if args.eval_best:
            MODEL_FILE = os.path.join(MODEL_DIR, 'training_best.pt')
        else:  # if args.eval_last:
            MODEL_FILE = os.path.join(MODEL_DIR, 'training_last.pt')
    else:  # if args.eval_spec:
        MODEL_FILE = os.path.join(MODEL_DIR,
                                  'training_%d.pt' % args.eval_epoch)

    assert os.path.exists(MODEL_FILE), \
        "The specified model is not found in the folder."

    checkpoint = torch.load(MODEL_FILE)
    eval_epoch = checkpoint.get("epoch", args.eval_epoch)
    model.load_state_dict(checkpoint["model_state_dict"])

    # path to the results (images and statistics)
    RESULT_DIR = os.path.join(OUTPUT_DIR, "results")
    if not os.path.exists(RESULT_DIR):
        os.makedirs(RESULT_DIR)

    RESULT_IMG = os.path.join(
        RESULT_DIR,
        "test_images_" + str(eval_epoch) + "_" + str(future_frames))
    if not os.path.exists(RESULT_IMG):
        os.makedirs(RESULT_IMG)

    RESULT_STAT = os.path.join(RESULT_DIR, "test_stats")
    if not os.path.exists(RESULT_STAT):
        os.makedirs(RESULT_STAT)

    RESULT_STAT = os.path.join(RESULT_STAT, 'epoch_%d' % eval_epoch)

    ## Main script for test phase
    MSE = [0.] * future_frames
    PSNR = [0.] * future_frames
    SSIM = [0.] * future_frames
    PIPS = [0.] * future_frames

    with torch.no_grad():
        model.eval()

        samples = 0
        for frames in test_data_loader:
            samples += args.batch_size

            # 5-th order: batch_size x total_frames x channels x height x width
            frames = frames.permute(0, 1, 4, 2, 3).to(device)

            inputs = frames[:, :input_frames]
            origin = frames[:, -future_frames:]

            pred = model(inputs,
                         input_frames=input_frames,
                         future_frames=future_frames,
                         output_frames=future_frames,
                         teacher_forcing=False)

            # clamp the output to [0, 1]
            pred = torch.clamp(pred, min=0, max=1)

            # save the first sample for each batch to the folder
            if samples % args.log_samples == 0:
                print("samples: ", samples)

                input_0 = inputs[0, list_input_frames]
                origin_0 = origin[0, list_future_frames]
                pred_0 = pred[0, list_future_frames]

                # pad the input with zeros (if needed)
                if plot_input_frames < plot_future_frames:
                    input_0 = torch.cat([
                        torch.zeros(plot_future_frames - plot_input_frames,
                                    args.img_channels,
                                    args.img_height,
                                    args.img_width,
                                    device=device), input_0
                    ],
                                        dim=0)

                img = torchvision.utils.make_grid(torch.cat(
                    [input_0, origin_0, pred_0], dim=0),
                                                  nrow=plot_future_frames)

                RESULT_FILE = os.path.join(
                    RESULT_IMG, "cmp_%d_%d.jpg" % (eval_epoch, samples))
                torchvision.utils.save_image(img, RESULT_FILE)

            # accumlate the statistics per frame
            for t in range(-future_frames, 0):
                origin_, pred_ = origin[:, t], pred[:, t]
                if not img_colored:
                    origin_ = origin_.repeat([1, 3, 1, 1])
                    pred_ = pred_.repeat([1, 3, 1, 1])

                dist = PSmodel(origin_, pred_)
                PIPS[t] += torch.sum(dist).item()

            origin = origin.permute(0, 1, 3, 4, 2).cpu().numpy()
            pred = pred.permute(0, 1, 3, 4, 2).cpu().numpy()
            for t in range(-future_frames, 0):
                for i in range(args.batch_size):
                    origin_, pred_ = origin[i, t], pred[i, t]
                    if not img_colored:
                        origin_ = np.squeeze(origin_, axis=-1)
                        pred_ = np.squeeze(pred_, axis=-1)

                    MSE[t] += skimage.measure.compare_mse(origin_, pred_)
                    PSNR[t] += skimage.measure.compare_psnr(origin_, pred_)
                    SSIM[t] += skimage.measure.compare_ssim(
                        origin_, pred_, multichannel=img_colored)

    for t in range(future_frames):
        MSE[t] /= test_size
        PSNR[t] /= test_size
        SSIM[t] /= test_size
        PIPS[t] /= test_size

    # compute the average statistics
    MSE_AVG = sum(MSE) / future_frames
    PSNR_AVG = sum(PSNR) / future_frames
    SSIM_AVG = sum(SSIM) / future_frames
    PIPS_AVG = sum(PIPS) / future_frames

    print("Epoch {}, MSE: {} (x1e-3); PSNR: {}, SSIM: {}, PIPS: {}".format(
        eval_epoch, 1e3 * MSE_AVG, PSNR_AVG, SSIM_AVG, PIPS_AVG))

    print("PSNR:", PSNR)
    print("SSIM:", SSIM)
    print("PIPS:", PIPS)

    np.savez(RESULT_STAT, MSE=MSE, PSNR=PSNR, SSIM=SSIM, PIPS=PIPS)
    print('--------------------------------------------------------------')
def eval_model(args,
               model,
               loader,
               device,
               use_gt=False,
               use_feats=False,
               filter_box=False):
    all_losses = defaultdict(list)
    all_boxes = defaultdict(list)
    total_iou = []
    total_boxes = 0
    num_batches = 0
    num_samples = 0
    mae_per_image = []
    mae_roi_per_image = []
    roi_only_iou = []
    ssim_per_image = []
    ssim_rois = []
    rois = 0
    margin = 2

    ## Initializing the perceptual loss model
    lpips_model = models.PerceptualLoss(model='net-lin',
                                        net='alex',
                                        use_gpu=True)
    perceptual_error_image = []
    perceptual_error_roi = []
    # ---------------------------------------

    with torch.no_grad():
        for batch in tqdm.tqdm(loader):
            num_batches += 1
            # if num_batches > 10:
            #     break
            batch = [tensor.to(device) for tensor in batch]
            masks = None
            if len(batch) == 6:
                imgs, objs, boxes, triples, obj_to_img, triple_to_img = batch
            elif len(batch) == 7:
                imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = batch
            elif len(batch) == 12:
                imgs, objs, boxes, triples, obj_to_img, triple_to_img, \
                objs_r, boxes_r, triples_r, obj_to_img_r, triple_to_img_r, imgs_in = batch
            elif len(batch) == 13:
                imgs, objs, boxes, triples, obj_to_img, triple_to_img, attributes, \
                objs_r, boxes_r, triples_r, obj_to_img_r, triple_to_img_r, imgs_in = batch
            else:
                assert False
            predicates = triples[:, 1]

            # #EVAL_ALL = True
            if EVAL_ALL:
                imgs, imgs_in, objs, boxes, triples, obj_to_img, \
                dropbox_indices, dropfeats_indices = process_batch(
                    imgs, imgs_in, objs, boxes, triples, obj_to_img, triple_to_img, device,
                    use_feats=use_feats, filter_box=filter_box)
            else:
                dropbox_indices = None
                dropfeats_indices = None
            #
            # if use_gt: # gt boxes
            #     model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=masks, src_image=imgs_in,
            #                       drop_box_idx=None, drop_feat_idx=dropfeats_indices, mode='eval')
            # else:
            #     model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, src_image=imgs_in,
            #                       drop_box_idx=dropbox_indices, drop_feats_idx=dropfeats_indices, mode='eval')

            masks_gt = None
            gt_train = False

            attributes = torch.zeros_like(attributes)

            all_features = None
            # Run the model with predicted masks
            model_out = model(imgs,
                              objs,
                              triples,
                              obj_to_img,
                              boxes_gt=boxes,
                              masks_gt=masks_gt,
                              attributes=attributes,
                              gt_train=gt_train,
                              test_mode=False,
                              use_gt_box=True,
                              features=all_features,
                              drop_box_idx=dropbox_indices,
                              drop_feat_idx=dropfeats_indices,
                              src_image=imgs_in)
            #imgs_pred, boxes_pred, masks_pred, _, layout, _ = model_out

            # OUTPUT
            imgs_pred, boxes_pred, masks_pred, predicate_scores, layout, _ = model_out
            # --------------------------------------------------------------------------------------------------------------
            #imgs_pred *= 3
            #print(imgs_pred.min(), imgs_pred.max())

            # Save all box predictions
            all_boxes['boxes_gt'].append(boxes)
            all_boxes['objs'].append(objs)
            all_boxes['boxes_pred'].append(boxes_pred)
            all_boxes['drop_targets'].append(dropbox_indices)

            # IoU over all
            total_iou.append(jaccard(boxes_pred,
                                     boxes).cpu().numpy())  #.detach()
            total_boxes += boxes_pred.size(0)

            # IoU over targets only
            pred_dropbox = boxes_pred[dropbox_indices.squeeze() == 0, :]
            gt_dropbox = boxes[dropbox_indices.squeeze() == 0, :]
            roi_only_iou.append(
                jaccard(pred_dropbox, gt_dropbox).detach().cpu().numpy())
            rois += pred_dropbox.size(0)
            # assert(pred_dropbox.size(0) == imgs.size(0))

            num_samples += imgs.shape[0]
            imgs = imagenet_deprocess_batch(imgs).float()
            imgs_pred = imagenet_deprocess_batch(imgs_pred).float()

            # Uncomment to plot images (for debugging purposes)
            #visualize_imgs_boxes(imgs, imgs_pred, boxes, boxes)

            # MAE per image
            mae_per_image.append(
                torch.mean(
                    torch.abs(imgs - imgs_pred).view(imgs.shape[0], -1),
                    1).cpu().numpy())

            for s in range(imgs.shape[0]):
                # get coordinates of target
                left, right, top, bottom = bbox_coordinates_with_margin(
                    boxes[s, :], margin, imgs)
                # calculate errors only in RoI one by one - good, i wanted to check this too since the errors were suspicious pheww
                mae_roi_per_image.append(
                    torch.mean(
                        torch.abs(imgs[s, :, top:bottom, left:right] -
                                  imgs_pred[s, :, top:bottom,
                                            left:right])).cpu().item())

                ssim_per_image.append(
                    pytorch_ssim.ssim(imgs[s:s + 1, :, :, :] / 255.0,
                                      imgs_pred[s:s + 1, :, :, :] / 255.0,
                                      window_size=3).cpu().item())
                ssim_rois.append(
                    pytorch_ssim.ssim(
                        imgs[s:s + 1, :, top:bottom, left:right] / 255.0,
                        imgs_pred[s:s + 1, :, top:bottom, left:right] / 255.0,
                        window_size=3).cpu().item())

                imgs_pred_norm = imgs_pred[
                    s:s +
                    1, :, :, :] / 127.5 - 1  # = util.im2tensor(imgs_pred[s:s+1, :, :, :].detach().cpu().numpy())
                imgs_gt_norm = imgs[
                    s:s +
                    1, :, :, :] / 127.5 - 1  # util.im2tensor(imgs[s:s+1, :, :, :].detach().cpu().numpy())

                #perceptual_error_roi.append(lpips_model.forward(imgs_pred_norm[:,:, top:bottom, left:right],
                #                                                  imgs_gt_norm[:,:, top:bottom, left:right]))

                #print(imgs_pred_norm.shape)
                perceptual_error_image.append(
                    lpips_model.forward(imgs_pred_norm,
                                        imgs_gt_norm).detach().cpu().numpy())

            if num_batches % PRINT_EVERY == 0:
                calculate_scores(mae_per_image, mae_roi_per_image, total_iou,
                                 roi_only_iou, ssim_per_image, ssim_rois,
                                 perceptual_error_image, perceptual_error_roi)

            if num_batches % SAVE_EVERY == 0:
                save_results(mae_per_image, mae_roi_per_image, total_iou,
                             roi_only_iou, ssim_per_image, ssim_rois,
                             perceptual_error_image, perceptual_error_roi,
                             all_boxes, num_batches)

    # mean_losses = {k: np.mean(v) for k, v in all_losses.items()}

    save_results(mae_per_image, mae_roi_per_image, total_iou, roi_only_iou,
                 ssim_per_image, ssim_rois, perceptual_error_image,
                 perceptual_error_roi, all_boxes, 'final')

    # masks_to_store = masks
    # if masks_to_store is not None:
    #     masks_to_store = masks_to_store.data.cpu().clone()

    # masks_pred_to_store = masks_pred
    # if masks_pred_to_store is not None:
    #     masks_pred_to_store = masks_pred_to_store.data.cpu().clone()

    # batch_data = {
    #     'objs': objs.detach().cpu().clone(),
    #     'boxes_gt': boxes.detach().cpu().clone(),
    #     'masks_gt': masks_to_store,
    #     'triples': triples.detach().cpu().clone(),
    #     'obj_to_img': obj_to_img.detach().cpu().clone(),
    #     'triple_to_img': triple_to_img.detach().cpu().clone(),
    #     'boxes_pred': boxes_pred.detach().cpu().clone(),
    #     'masks_pred': masks_pred_to_store
    # }
    # out = [mean_losses, samples, batch_data, avg_iou]
    # out = [mean_losses, mean_L1, avg_iou]

    return  # mae_per_image, mae_roi_per_image, total_iou, roi_only_iou
Beispiel #11
0
def run_model(args, checkpoint, loader=None):

  output_dir = args.exp_dir
  model = build_model(args, checkpoint)
  if loader is None:
    loader = build_eval_loader(args, checkpoint, vocab_t)

  img_dir = makedir(output_dir, 'images_' + SPLIT)
  graph_json_dir = makedir(output_dir, 'graphs_json')

  f = open(output_dir + "/result_ids.txt", "w")

  img_idx = 0
  total_iou_all = []
  total_iou = get_def_dict()
  total_boxes = 0
  mae_per_image_all = []
  mae_per_image = get_def_dict()
  mae_roi_per_image_all = []
  mae_roi_per_image = get_def_dict()
  roi_only_iou_all = []
  roi_only_iou = get_def_dict()
  ssim_per_image_all = []
  ssim_per_image = get_def_dict()
  ssim_rois_all = []
  ssim_rois = get_def_dict()
  rois = 0
  margin = 2

  ## Initializing the perceptual loss model
  lpips_model = models.PerceptualLoss(model='net-lin', net='alex', use_gpu=True)
  perceptual_error_image_all = []
  perceptual_error_image = get_def_dict()
  perceptual_error_roi_all = []
  perceptual_error_roi = get_def_dict()

  for batch in loader:

    imgs, imgs_src, objs, objs_src, boxes, boxes_src, triples, triples_src, obj_to_img, \
        triple_to_img, imgs_in = [x.cuda() for x in batch]

    imgs_gt = imagenet_deprocess_batch(imgs_src)
    imgs_target_gt = imagenet_deprocess_batch(imgs)

    # Get mode from target scene - source scene, or image id, using sets
    graph_set_bef = Counter(tuple(row) for row in tripleToObjID(triples_src, objs_src))
    obj_set_bef = Counter([int(obj.cpu()) for obj in objs_src])
    graph_set_aft = Counter(tuple(row) for row in tripleToObjID(triples, objs))
    obj_set_aft = Counter([int(obj.cpu()) for obj in objs])

    if len(objs) > len(objs_src):
      mode = "addition"
      changes = graph_set_aft - graph_set_bef
      obj_ids = list(obj_set_aft - obj_set_bef)
      new_ids = (objs == obj_ids[0]).nonzero()
    elif len(objs) < len(objs_src):
      mode = "remove"
      changes = graph_set_bef - graph_set_aft
      obj_ids = list(obj_set_bef - obj_set_aft)
      new_ids_src = (objs_src == obj_ids[0]).nonzero()
      new_objs = [obj for obj in objs]
      new_objs.append(objs_src[new_ids_src[0]])
      objs = torch.tensor(new_objs).cuda()
      num_objs = len(objs)
      new_ids = [torch.tensor(num_objs-1)]
      new_boxes = [bbox for bbox in boxes]
      new_boxes.append(boxes_src[new_ids_src[0]][0])
      boxes = torch.stack(new_boxes)
      obj_to_img = torch.zeros(num_objs, dtype=objs.dtype, device=objs.device)
    elif torch.all(torch.eq(objs, objs_src)):
      mode = "reposition"
      changes = (graph_set_bef - graph_set_aft) + (graph_set_aft - graph_set_bef)
      idx_cnt = np.zeros((25,1))
      for [s,p,o] in list(changes):
        idx_cnt[s] += 1
        idx_cnt[o] += 1

      obj_ids = idx_cnt.argmax(0)
      id_src = (objs_src == obj_ids[0]).nonzero()
      box_src = boxes_src[id_src[0]]
      new_ids = (objs == obj_ids[0]).nonzero()
      boxes[new_ids[0]] = box_src

    elif len(objs) == len(objs_src):
      mode = "replace"
      changes = (graph_set_bef - graph_set_aft) + (graph_set_aft - graph_set_bef)
      obj_ids = [list(obj_set_bef - obj_set_aft)[0], list(obj_set_aft - obj_set_bef)[0]]
      new_ids = (objs == obj_ids[1]).nonzero()
    else:
      assert False

    new_ids = [int(new_id.cpu()) for new_id in new_ids]

    show_im = False
    if show_im:
      img_gt = imgs_gt[0].numpy().transpose(1, 2, 0)
      img_gt_target = imgs_target_gt[0].numpy().transpose(1, 2, 0)
      fig = plt.figure()
      fig.add_subplot(1, 2, 1)
      plt.imshow(img_gt)
      fig.add_subplot(1, 2, 2)
      plt.imshow(img_gt_target)
      plt.show(block=True)

    query_feats = None

    if args.with_query_image:
      img, box = query_image_by_semantic_id(new_ids, img_idx, loader)
      query_feats = model.forward_visual_feats(img, box)

      img_filename_query = '%04d_query.png' % (img_idx)
      img = imagenet_deprocess_batch(img)
      img_np = img[0].numpy().transpose(1, 2, 0).astype(np.uint8)
      img_path = os.path.join(img_dir, img_filename_query)
      imsave(img_path, img_np)


    img_gt_filename = '%04d_gt_src.png' % (img_idx)
    img_target_gt_filename = '%04d_gt_target.png' % (img_idx)
    img_pred_filename = '%04d_changed.png' % (img_idx)
    img_filename_noised = '%04d_noised.png' % (img_idx)

    triples_ = triples

    boxes_gt = boxes

    keep_box_idx = torch.ones_like(objs.unsqueeze(1), dtype=torch.float)
    keep_feat_idx = torch.ones_like(objs.unsqueeze(1), dtype=torch.float)
    keep_image_idx = torch.ones_like(objs.unsqueeze(1), dtype=torch.float)

    subject_node = new_ids[0]
    keep_image_idx[subject_node] = 0

    if mode == 'reposition':
      keep_box_idx[subject_node] = 0
    elif mode == "remove":
      keep_feat_idx[subject_node] = 0
    else:
      if mode == "replace":
        keep_feat_idx[subject_node] = 0
      if mode == 'auto_withfeats':
        keep_image_idx[subject_node] = 0

      if mode == 'auto_nofeats':
        if not args.with_query_image:
          keep_feat_idx[subject_node] = 0

    model_out = model(objs, triples_, obj_to_img,
        boxes_gt=boxes_gt, masks_gt=None, src_image=imgs_in, mode=mode,
        query_feats=query_feats, keep_box_idx=keep_box_idx, keep_feat_idx=keep_feat_idx,
        keep_image_idx=keep_image_idx)

    imgs_pred, boxes_pred_o, masks_pred, noised_srcs, _ = model_out

    imgs = imagenet_deprocess_batch(imgs).float()
    imgs_pred = imagenet_deprocess_batch(imgs_pred).float()

    #Metrics

    # IoU over all
    curr_iou = jaccard(boxes_pred_o, boxes).detach().cpu().numpy()
    total_iou_all.append(curr_iou)
    total_iou[mode].append(curr_iou)
    total_boxes += boxes_pred_o.size(0)

    # IoU over targets only
    pred_dropbox = boxes_pred_o[keep_box_idx.squeeze() == 0, :]
    gt_dropbox = boxes[keep_box_idx.squeeze() == 0, :]
    curr_iou_roi = jaccard(pred_dropbox, gt_dropbox).detach().cpu().numpy()
    roi_only_iou_all.append(curr_iou_roi)
    roi_only_iou[mode].append(curr_iou_roi)
    rois += pred_dropbox.size(0)

    # MAE per image
    curr_mae = torch.mean(
      torch.abs(imgs - imgs_pred).view(imgs.shape[0], -1), 1).cpu().numpy()
    mae_per_image[mode].append(curr_mae)
    mae_per_image_all.append(curr_mae)

    for s in range(imgs.shape[0]):
      # get coordinates of target
      left, right, top, bottom = bbox_coordinates_with_margin(boxes[s, :], margin, imgs)
      if left > right or top > bottom:
        continue
      # print("bboxes with margin: ", left, right, top, bottom)

      # calculate errors only in RoI one by one
      curr_mae_roi = torch.mean(
        torch.abs(imgs[s, :, top:bottom, left:right] - imgs_pred[s, :, top:bottom, left:right])).cpu().item()
      mae_roi_per_image[mode].append(curr_mae_roi)
      mae_roi_per_image_all.append(curr_mae_roi)

      curr_ssim = pytorch_ssim.ssim(imgs[s:s + 1, :, :, :] / 255.0,
                          imgs_pred[s:s + 1, :, :, :] / 255.0, window_size=3).cpu().item()
      ssim_per_image_all.append(curr_ssim)
      ssim_per_image[mode].append(curr_ssim)

      curr_ssim_roi = pytorch_ssim.ssim(imgs[s:s + 1, :, top:bottom, left:right] / 255.0,
                          imgs_pred[s:s + 1, :, top:bottom, left:right] / 255.0, window_size=3).cpu().item()
      ssim_rois_all.append(curr_ssim_roi)
      ssim_rois[mode].append(curr_ssim_roi)

      imgs_pred_norm = imgs_pred[s:s + 1, :, :, :] / 127.5 - 1
      imgs_gt_norm = imgs[s:s + 1, :, :, :] / 127.5 - 1

      curr_lpips = lpips_model.forward(imgs_pred_norm, imgs_gt_norm).detach().cpu().numpy()
      perceptual_error_image_all.append(curr_lpips)
      perceptual_error_image[mode].append(curr_lpips)

    for i in range(imgs_pred.size(0)):

      if args.save_imgs:
        img_gt = imgs_gt[i].numpy().transpose(1, 2, 0).astype(np.uint8)
        img_gt = cv2.resize(img_gt, (128, 128))
        img_gt_path = os.path.join(img_dir, img_gt_filename)
        imsave(img_gt_path, img_gt)

        img_gt_target = imgs_target_gt[i].numpy().transpose(1, 2, 0).astype(np.uint8)
        img_gt_target = cv2.resize(img_gt_target, (128, 128))
        img_gt_target_path = os.path.join(img_dir, img_target_gt_filename)
        imsave(img_gt_target_path, img_gt_target)

        noised_src_np = imagenet_deprocess_batch(noised_srcs[:, :3, :, :])
        noised_src_np = noised_src_np[i].numpy().transpose(1, 2, 0).astype(np.uint8)
        noised_src_np = cv2.resize(noised_src_np, (128, 128))
        img_path_noised = os.path.join(img_dir, img_filename_noised)
        imsave(img_path_noised, noised_src_np)

        img_pred_np = imgs_pred[i].numpy().transpose(1, 2, 0).astype(np.uint8)
        img_pred_np = cv2.resize(img_pred_np, (128, 128))
        img_path = os.path.join(img_dir, img_pred_filename)
        imsave(img_path, img_pred_np)

      save_graph_json(objs, triples, boxes, "after", graph_json_dir, img_idx)


    img_idx += 1

    if img_idx % print_every == 0:
      calculate_scores(mae_per_image_all, mae_roi_per_image_all, total_iou_all, roi_only_iou_all, ssim_per_image_all,
                       ssim_rois_all, perceptual_error_image_all, perceptual_error_roi_all)
      calculate_scores_modes(mae_per_image, mae_roi_per_image, total_iou, roi_only_iou, ssim_per_image, ssim_rois,
                       perceptual_error_image, perceptual_error_roi)

    print('Saved %d images' % img_idx)

  f.close()
Beispiel #12
0
 def __init__(self, weight=1.0, net='alex', use_gpu=True):
     """
     Wrapper for PerceptualSimilarity.models.PerceptualLoss
     """
     self.model = models.PerceptualLoss(net=net, use_gpu=use_gpu)
     self.weight = weight