예제 #1
0
    def __init__(self,
                 root,
                 split="train_aug",
                 is_transform=False,
                 img_size=512):
        self.root = root
        self.split = split
        self.is_transform = is_transform
        self.ignore_index = 255
        self.n_classes = 21
        self.img_size = img_size if isinstance(img_size, tuple) else (img_size,
                                                                      img_size)
        self.files = collections.defaultdict(list)

        self.image_transform = Compose([
            ToTensor(),
            Normalize([.485, .456, .406], [.229, .224, .225]),
        ])
        self.filler = [0, 0, 0]

        # Reading pascal VOC dataset list
        self.voc_path = get_data_path('pascal')
        for split in ["train", "val", "trainval", "test"]:
            file_list = tuple(
                open(
                    self.voc_path + '/ImageSets/Segmentation/' + split +
                    '.txt', 'r'))
            file_list = [id_.rstrip() for id_ in file_list]
            self.files[split] = file_list

        # Reading SBD dataset list
        self.sbd_path = get_data_path('sbd')
        self.sbd_train_list = tuple(
            open(self.sbd_path + 'dataset/train_withValdata.txt', 'r'))
        self.sbd_train_list = [id_.rstrip() for id_ in self.sbd_train_list]

        self.sbd_val_list = tuple(open(self.sbd_path + 'dataset/val.txt', 'r'))
        self.sbd_val_list = [id_.rstrip() for id_ in self.sbd_val_list]

        # Augmenting pascal and SBD dataset list
        self.files[
            'trainval_aug'] = self.sbd_train_list + self.sbd_val_list + self.files[
                'train']
        self.files['train_aug'] = list(
            set(self.files['trainval_aug']) - set(self.files['val']))

        # needed for extracting GT of sbd and pascal dataset
        if not os.path.isdir(self.root + '/pre_encoded'):
            self.setup(pre_encode=True)
        else:
            self.setup(pre_encode=False)

        self.files = self.files[self.split]
    def filtertraindata(self):
        datapath = get_data_path('coco')
        train_list = tuple(open(datapath + 'annotations/train2014.txt', 'r'))
        val_list = tuple(open(datapath + 'annotations/val2014.txt', 'r'))
        total_list = ['/train2014/'+id_.rstrip() for id_ in train_list] + ['/val2014/'+id_.rstrip() for id_ in val_list]

        annotation_path = os.path.join(datapath, 'seg_mask')
        aug_list = []
        for filename in total_list:
            lbl_path = annotation_path + filename + '.png'
            lbl = Image.open(lbl_path).convert('P')
            lbl = np.array(lbl, dtype=np.int32)
            if np.sum(pascal_map[lbl] != 0) > 1000 and np.intersect1d(np.unique(lbl),pascal_classes).any():
                aug_list.append(filename)

        val_aug_list = random.sample(aug_list, 1500)
        train_aug_list = list(set(aug_list) - set(val_aug_list))
        with open(os.path.join(datapath, 'annotations', 'train_aug.txt'), 'w') as txtfile:
            [txtfile.write(file + '\n') for file in train_aug_list]
        with open(os.path.join(datapath, 'annotations', 'val.txt'), 'w') as txtfile:
            [txtfile.write(file + '\n') for file in val_aug_list]
    def __init__(self, root, split="train_aug", is_transform=False, img_size=512):
        self.root = root
        self.split = split
        self.is_transform = is_transform
        self.ignore_index = 91
        self.n_classes = 21
        self.img_size = img_size if isinstance(img_size, tuple) else (img_size, img_size)
        self.files = collections.defaultdict(list)

        self.image_transform = Compose([
            ToTensor(),
            Normalize([.485, .456, .406], [.229, .224, .225]),
        ])

        self.filler = [0, 0, 0]

        # Reading COCO dataset list - train2014, val2014, train_aug, val, test2014, test2015
        self.data_path = get_data_path('coco')
        filepath = self.data_path + '/annotations/' + split + '.txt', 'r'
        if split is "train_aug" and not os.path.exists(filepath):
            self.filtertraindata()
        file_list = tuple(open(filepath))
        file_list = [id_.rstrip() for id_ in file_list]
        self.files = file_list
예제 #4
0
def train(args):
    global n_classes

    # Set the seed for reproducing the results
    random.seed(args.manualSeed)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.manualSeed)
        cudnn.benchmark = True

    # Set up results folder
    if not os.path.exists('results/saved_val_images'):
        os.makedirs('results/saved_val_images')
    if not os.path.exists('results/saved_train_images'):
        os.makedirs('results/saved_train_images')

    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)

    traindata = data_loader(data_path,
                            split=args.split,
                            is_transform=True,
                            img_size=(args.img_rows, args.img_cols))
    trainloader = data.DataLoader(traindata,
                                  batch_size=args.batch_size,
                                  num_workers=7,
                                  shuffle=True)

    valdata = data_loader(data_path,
                          split="val",
                          is_transform=False,
                          img_size=(args.img_rows, args.img_cols))
    valloader = data.DataLoader(valdata,
                                batch_size=args.batch_size,
                                num_workers=7,
                                shuffle=False)

    n_classes = traindata.n_classes
    n_trainsamples = len(traindata)
    n_iters_per_epoch = np.ceil(n_trainsamples /
                                float(args.batch_size * args.iter_size))

    # Setup Model
    model = torch.nn.DataParallel(
        get_model(args.arch,
                  n_classes,
                  ignore_index=traindata.ignore_index,
                  output_stride=args.ost))

    if torch.cuda.is_available():
        model.cuda()

    epochs_done = 0
    X = []
    Y = []
    Y_test = []
    avg_pixel_acc = 0
    mean_class_acc = 0
    mIoU = 0
    avg_pixel_acc_test = 0
    mean_class_acc_test = 0
    mIoU_test = 0

    if args.model_path:
        model_name = args.model_path.split('.')
        checkpoint_name = model_name[0] + '_optimizer.pkl'
        checkpoint = torch.load(checkpoint_name)
        optm = checkpoint['optimizer']
        model.load_state_dict(checkpoint['state_dict'])
        split_str = model_name[0].split('_')
        epochs_done = int(split_str[-1])
        saved_loss = pickle.load(open("results/saved_loss.p", "rb"))
        saved_accuracy = pickle.load(open("results/saved_accuracy.p", "rb"))
        X = saved_loss["X"][:epochs_done]
        Y = saved_loss["Y"][:epochs_done]
        Y_test = saved_loss["Y_test"][:epochs_done]
        avg_pixel_acc = saved_accuracy["P"][:epochs_done, :]
        mean_class_acc = saved_accuracy["M"][:epochs_done, :]
        mIoU = saved_accuracy["I"][:epochs_done, :]
        avg_pixel_acc_test = saved_accuracy["P_test"][:epochs_done, :]
        mean_class_acc_test = saved_accuracy["M_test"][:epochs_done, :]
        mIoU_test = saved_accuracy["I_test"][:epochs_done, :]

    # Learning rates: For new layers (such as final layer), we set lr to be 10x the learning rate of layers already trained
    bias_10x_params = filter(
        lambda x: ('bias' in x[0]) and ('final' in x[0]) and ('conv' in x[0]),
        model.named_parameters())
    bias_10x_params = list(map(lambda x: x[1], bias_10x_params))

    bias_params = filter(lambda x: ('bias' in x[0]) and ('final' not in x[0]),
                         model.named_parameters())
    bias_params = list(map(lambda x: x[1], bias_params))

    nonbias_10x_params = filter(
        lambda x:
        (('bias' not in x[0]) or ('bn' in x[0])) and ('final' in x[0]),
        model.named_parameters())
    nonbias_10x_params = list(map(lambda x: x[1], nonbias_10x_params))

    nonbias_params = filter(
        lambda x: ('bias' not in x[0]) and ('final' not in x[0]),
        model.named_parameters())
    nonbias_params = list(map(lambda x: x[1], nonbias_params))

    optimizer = torch.optim.SGD([
        {
            'params': bias_params,
            'lr': args.l_rate
        },
        {
            'params': bias_10x_params,
            'lr': 20 * args.l_rate
        },
        {
            'params': nonbias_10x_params,
            'lr': 10 * args.l_rate
        },
        {
            'params': nonbias_params,
            'lr': args.l_rate
        },
    ],
                                lr=args.l_rate,
                                momentum=args.momentum,
                                weight_decay=args.wd,
                                nesterov=(args.optim == 'Nesterov'))
    numgroups = 4

    # Setting up scheduler
    if args.model_path and args.restore:
        # Here we restore all states of optimizer
        optimizer.load_state_dict(optm)
        total_iters = n_iters_per_epoch * args.n_epoch
        lambda1 = lambda step: 0.5 + 0.5 * math.cos(np.pi * step / total_iters)
        scheduler = lr_scheduler.LambdaLR(optimizer,
                                          lr_lambda=[lambda1] * numgroups,
                                          last_epoch=epochs_done *
                                          n_iters_per_epoch)
    else:
        # Here we simply restart the training
        if args.T0:
            total_iters = args.T0 * n_iters_per_epoch
        else:
            total_iters = ((args.n_epoch - epochs_done) * n_iters_per_epoch)
        lambda1 = lambda step: 0.5 + 0.5 * math.cos(np.pi * step / total_iters)
        scheduler = lr_scheduler.LambdaLR(optimizer,
                                          lr_lambda=[lambda1] * numgroups)

    global l_avg, totalclasswise_pixel_acc, totalclasswise_gtpixels, totalclasswise_predpixels
    global l_avg_test, totalclasswise_pixel_acc_test, totalclasswise_gtpixels_test, totalclasswise_predpixels_test
    global steps, steps_test

    scheduler.step()

    for epoch in range(epochs_done, args.n_epoch):
        # Reset all variables every epoch
        l_avg = 0
        totalclasswise_pixel_acc = 0
        totalclasswise_gtpixels = 0
        totalclasswise_predpixels = 0
        l_avg_test = 0
        totalclasswise_pixel_acc_test = 0
        totalclasswise_gtpixels_test = 0
        totalclasswise_predpixels_test = 0
        steps = 0
        steps_test = 0

        trainmodel(model, optimizer, trainloader, epoch, scheduler, traindata)
        valmodel(model, valloader, epoch)

        # save the model every 10 epochs
        if (epoch + 1) % 10 == 0 or epoch == args.n_epoch - 1:
            torch.save(
                model, "results/{}_{}_{}.pkl".format(args.arch, args.dataset,
                                                     epoch + 1))
            torch.save(
                {
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict()
                }, "results/{}_{}_{}_optimizer.pkl".format(
                    args.arch, args.dataset, epoch + 1))

        if os.path.isfile("results/saved_loss.p"):
            os.remove("results/saved_loss.p")
        if os.path.isfile("results/saved_accuracy.p"):
            os.remove("results/saved_accuracy.p")

        # saving train and validation loss
        X.append(epoch + 1)
        Y.append(l_avg / steps)
        Y_test.append(l_avg_test / steps_test)
        saved_loss = {"X": X, "Y": Y, "Y_test": Y_test}
        pickle.dump(saved_loss, open("results/saved_loss.p", "wb"))

        # pixel accuracy
        totalclasswise_pixel_acc = totalclasswise_pixel_acc.reshape(
            (-1, n_classes)).astype(np.float32)
        totalclasswise_gtpixels = totalclasswise_gtpixels.reshape(
            (-1, n_classes))
        totalclasswise_predpixels = totalclasswise_predpixels.reshape(
            (-1, n_classes))
        totalclasswise_pixel_acc_test = totalclasswise_pixel_acc_test.reshape(
            (-1, n_classes)).astype(np.float32)
        totalclasswise_gtpixels_test = totalclasswise_gtpixels_test.reshape(
            (-1, n_classes))
        totalclasswise_predpixels_test = totalclasswise_predpixels_test.reshape(
            (-1, n_classes))

        if isinstance(avg_pixel_acc, np.ndarray):
            avg_pixel_acc = np.vstack(
                (avg_pixel_acc, np.sum(totalclasswise_pixel_acc, axis=1) /
                 np.sum(totalclasswise_gtpixels, axis=1)))
            mean_class_acc = np.vstack(
                (mean_class_acc,
                 np.mean(totalclasswise_pixel_acc / totalclasswise_gtpixels,
                         axis=1)))
            mIoU = np.vstack(
                (mIoU,
                 np.mean(totalclasswise_pixel_acc /
                         (totalclasswise_gtpixels + totalclasswise_predpixels -
                          totalclasswise_pixel_acc),
                         axis=1)))

            avg_pixel_acc_test = np.vstack(
                (avg_pixel_acc_test,
                 np.sum(totalclasswise_pixel_acc_test, axis=1) /
                 np.sum(totalclasswise_gtpixels_test, axis=1)))
            mean_class_acc_test = np.vstack(
                (mean_class_acc_test,
                 np.mean(totalclasswise_pixel_acc_test /
                         totalclasswise_gtpixels_test,
                         axis=1)))
            mIoU_test = np.vstack((mIoU_test,
                                   np.mean(totalclasswise_pixel_acc_test /
                                           (totalclasswise_gtpixels_test +
                                            totalclasswise_predpixels_test -
                                            totalclasswise_pixel_acc_test),
                                           axis=1)))
        else:
            avg_pixel_acc = np.sum(totalclasswise_pixel_acc, axis=1) / np.sum(
                totalclasswise_gtpixels, axis=1)
            mean_class_acc = np.mean(totalclasswise_pixel_acc /
                                     totalclasswise_gtpixels,
                                     axis=1)
            mIoU = np.mean(
                totalclasswise_pixel_acc /
                (totalclasswise_gtpixels + totalclasswise_predpixels -
                 totalclasswise_pixel_acc),
                axis=1)

            avg_pixel_acc_test = np.sum(
                totalclasswise_pixel_acc_test, axis=1) / np.sum(
                    totalclasswise_gtpixels_test, axis=1)
            mean_class_acc_test = np.mean(totalclasswise_pixel_acc_test /
                                          totalclasswise_gtpixels_test,
                                          axis=1)
            mIoU_test = np.mean(
                totalclasswise_pixel_acc_test /
                (totalclasswise_gtpixels_test + totalclasswise_predpixels_test
                 - totalclasswise_pixel_acc_test),
                axis=1)

        saved_accuracy = {
            "X": X,
            "P": avg_pixel_acc,
            "M": mean_class_acc,
            "I": mIoU,
            "P_test": avg_pixel_acc_test,
            "M_test": mean_class_acc_test,
            "I_test": mIoU_test
        }
        pickle.dump(saved_accuracy, open("results/saved_accuracy.p", "wb"))
예제 #5
0
def test(args):

    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    testdata = data_loader(data_path,
                           split="val",
                           is_transform=False,
                           img_size=(512, 512))
    n_classes = testdata.n_classes
    eps = 1e-10

    # (TODO): Choose the scale according to dataset requirements
    scales = [0.5, 0.75, 1.0, 1.25]
    base_size = min(testdata.img_size)
    crop_size = (args.img_rows, args.img_cols)
    stride = [0, 0]
    stride[0] = int(np.ceil(float(crop_size[0]) * 2 / 3))
    stride[1] = int(np.ceil(float(crop_size[1]) * 2 / 3))
    size_transform_img = [Scale(int(base_size * i)) for i in scales]

    mask1_len = np.zeros(n_classes, dtype=float)
    mask2_len = np.zeros(n_classes, dtype=float)
    correct_len = np.zeros(n_classes, dtype=float)

    # Setup Model
    model = torch.nn.DataParallel(
        get_model(args.arch, n_classes, ignore_index=testdata.ignore_index))
    model_name = args.model_path.split('.')
    checkpoint_name = model_name[0] + '_optimizer.pkl'
    checkpoint = torch.load(checkpoint_name)
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

    soft = nn.Softmax2d()
    cm = np.zeros((n_classes, n_classes), dtype=np.float64)
    if torch.cuda.is_available():
        model.cuda()
        soft.cuda()

    for f_no, line in enumerate(testdata.files):
        imgr, lblr = testdata.readfile(line)
        lbl = np.array(lblr)
        origw, origh = imgr.size

        # Maintain final prediction array for each image
        pred = np.zeros((n_classes, origh, origw), dtype=np.float32)

        # Loop over all scales for single image
        for i in range(len(scales)):
            img = size_transform_img[i](imgr)
            imsw, imsh = img.size

            imwstart, imhstart = 0, 0
            imw, imh = imsw, imsh
            # Zero padding if any size if smaller than crop_size
            if imsw < crop_size[1] or imsh < crop_size[0]:
                padw, padh = max(crop_size[1] - imsw,
                                 0), max(crop_size[0] - imsh, 0)
                imw += padw
                imh += padh
                im = Image.new(img.mode, (imw, imh), tuple(testdata.filler))
                im.paste(img, (int(padw / 2), int(padh / 2)))
                imwstart += int(padw / 2)
                imhstart += int(padh / 2)
                img = im

            # Now tile image - each of crop_size and loop over them
            h_grid = int(np.ceil(float(imh - crop_size[0]) / stride[0])) + 1
            w_grid = int(np.ceil(float(imw - crop_size[1]) / stride[1])) + 1

            # maintain prediction probability for each pixel
            datascale = torch.zeros(n_classes, imh, imw).cuda()
            countscale = torch.zeros(n_classes, imh, imw).cuda()
            for w in range(w_grid):
                for h in range(h_grid):
                    # crop portion from image - crop_size
                    x1, y1 = w * stride[1], h * stride[0]
                    x2, y2 = int(min(x1 + crop_size[1],
                                     imw)), int(min(y1 + crop_size[0], imh))
                    x1, y1 = x2 - crop_size[1], y2 - crop_size[0]
                    img_cropped = img.crop((x1, y1, x2, y2))

                    # Input image as well its flipped version
                    img1 = testdata.image_transform(img_cropped)
                    img2 = testdata.image_transform(
                        img_cropped.transpose(Image.FLIP_LEFT_RIGHT))
                    images = torch.stack((img1, img2), dim=0)

                    if torch.cuda.is_available():
                        images = Variable(images.cuda(), volatile=True)
                    else:
                        images = Variable(images, volatile=True)

                    # Output prediction for image and its flip version
                    outputs = model(images)

                    # Sum prediction from image and its flip and then normalize
                    prob = outputs[0] + outputs[
                        1][:, :,
                           getattr(torch.arange(outputs.size(3) -
                                                1, -1, -1), 'cuda')().long()]
                    prob = soft(prob.view(-1, *prob.size()))

                    # Place the score in the proper position
                    datascale[:, y1:y2, x1:x2] += prob.data
                    countscale[:, y1:y2, x1:x2] += 1
            # After looping over all tiles of image, normalize the scores and bilinear interpolation to orignal image size
            datascale /= (countscale + eps)
            datascale = datascale[:, imhstart:imhstart + imsh,
                                  imwstart:imwstart + imsw]
            datascale = datascale.cpu().numpy()
            datascale = np.transpose(datascale, (1, 2, 0))
            datascale = resize(datascale, (origh, origw),
                               order=1,
                               preserve_range=True,
                               mode='symmetric',
                               clip=False)
            datascale = np.transpose(datascale, (2, 0, 1))

            # Sum up all the scores for all scales
            pred += (datascale / (np.sum(datascale, axis=0) + eps))

        pred = pred / len(scales)
        pred = pred.argmax(0)

        pred[lbl == testdata.ignore_index] = testdata.ignore_index

        for m in range(n_classes):
            mask1 = lbl == m
            mask2 = pred == m
            diff = pred[mask1] - lbl[mask1]
            mask1_len[m] += float(np.sum(mask1))
            mask2_len[m] += float(np.sum(mask2))
            correct_len[m] += np.sum(diff == 0)

        cm += confusion_matrix(lbl.ravel(),
                               pred.ravel(),
                               labels=range(n_classes))
        indexes_to_avg = mask1_len > 0
        print("pixel accuracy")
        print(
            np.sum(correct_len[indexes_to_avg]) /
            np.sum(mask1_len[indexes_to_avg]))
        print("Class_wise_IOU")
        print(correct_len[indexes_to_avg] /
              (mask1_len[indexes_to_avg] + mask2_len[indexes_to_avg] -
               correct_len[indexes_to_avg]))
        print("mean IOU")
        print(
            np.mean(correct_len[indexes_to_avg] /
                    (mask1_len[indexes_to_avg] + mask2_len[indexes_to_avg] -
                     correct_len[indexes_to_avg])))
        print("mean accuracy")
        print(np.mean(correct_len[indexes_to_avg] / mask1_len[indexes_to_avg]))

        decoded = testdata.decode_segmap(pred)
        pickle.dump(
            np.transpose(np.array(imgr, dtype=np.uint8), [2, 0, 1]),
            open("results/saved_test_images/" + str(f_no) + "_input.p", "wb"))
        pickle.dump(
            np.transpose(decoded, [2, 0, 1]),
            open("results/saved_test_images/" + str(f_no) + "_output.p", "wb"))
        pickle.dump(
            np.transpose(testdata.decode_segmap(lbl), [2, 0, 1]),
            open("results/saved_test_images/" + str(f_no) + "_target.p", "wb"))

    sio.savemat("results/cm.mat", {'cm': cm})
예제 #6
0
def test(args):

    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    testdata = data_loader(data_path, split=args.split, is_transform=False, img_size=(512, 512))
    n_classes = testdata.n_classes
    eps = 1e-10

    args.coco += 5

    scales = [0.5, 0.75, 1.0, 1.25]
    base_size = min(testdata.img_size)
    crop_size = (args.img_rows, args.img_cols)
    stride = [0, 0]
    stride[0] = int(np.ceil(float(crop_size[0]) * 2/3))
    stride[1] = int(np.ceil(float(crop_size[1]) * 2/3))
    size_transform_img = [Scale(int(base_size*i)) for i in scales]

    # Setup Model
    model = torch.nn.DataParallel(get_model(args.arch, n_classes, ignore_index=testdata.ignore_index, output_stride=args.ost))
    model_name = args.model_path.split('.')
    checkpoint_name = model_name[0] + '_optimizer.pkl'
    checkpoint = torch.load(checkpoint_name)
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

    soft = nn.Softmax2d()
    if torch.cuda.is_available():
        model.cuda()
        soft.cuda()

    for f_no, line in enumerate(testdata.files):
        imgr = readfile(args.img_path, line)
        origw, origh = imgr.size

        # Maintain final prediction array for each image
        pred = np.zeros((n_classes, origh, origw), dtype=np.float32)

        # Loop over all scales for single image
        for i in range(len(scales)):
            img = size_transform_img[i](imgr)
            imsw, imsh = img.size

            imwstart, imhstart = 0, 0
            imw, imh = imsw, imsh
            # Zero padding if any size if smaller than crop_size
            if imsw < crop_size[1] or imsh < crop_size[0]:
                padw, padh = max(crop_size[1] - imsw, 0), max(crop_size[0] - imsh, 0)
                imw += padw
                imh += padh
                im = Image.new(img.mode, (imw, imh), tuple(testdata.filler))
                im.paste(img, (int(padw / 2), int(padh / 2)))
                imwstart += int(padw / 2)
                imhstart += int(padh / 2)
                img = im

            # Now tile image - each of crop_size and loop over them
            h_grid = int(np.ceil(float(imh - crop_size[0]) / stride[0])) + 1
            w_grid = int(np.ceil(float(imw - crop_size[1]) / stride[1])) + 1

            # maintain prediction probability for each pixel
            datascale = torch.zeros(n_classes, imh, imw).cuda()
            countscale = torch.zeros(n_classes, imh, imw).cuda()
            for w in range(w_grid):
                for h in range(h_grid):
                    # crop portion from image - crop_size
                    x1, y1 = w * stride[1], h * stride[0]
                    x2, y2 = int(min(x1 + crop_size[1], imw)), int(min(y1 + crop_size[0], imh))
                    x1, y1 = x2 - crop_size[1], y2 - crop_size[0]
                    img_cropped = img.crop((x1, y1, x2, y2))

                    # Input image as well its flipped version
                    img1 = testdata.image_transform(img_cropped)
                    img2 = testdata.image_transform(img_cropped.transpose(Image.FLIP_LEFT_RIGHT))
                    images = torch.stack((img1, img2), dim=0)

                    if torch.cuda.is_available():
                        images = Variable(images.cuda(), volatile=True)
                    else:
                        images = Variable(images, volatile=True)

                    # Output prediction for image and its flip version
                    outputs = model(images)

                    # Sum prediction from image and its flip and then normalize
                    prob = outputs[0] + outputs[1][:, :, getattr(torch.arange(outputs.size(3)-1, -1, -1), 'cuda')().long()]
                    prob = soft(prob.view(-1, *prob.size()))

                    # Place the score in the proper position
                    datascale[:, y1:y2, x1:x2] += prob.data
                    countscale[:, y1:y2, x1:x2] += 1
            # After looping over all tiles of image, normalize the scores and bilinear interpolation to orignal image size
            datascale /= (countscale + eps)
            datascale = datascale[:, imhstart:imhstart+imsh, imwstart:imwstart+imsw]
            datascale = datascale.cpu().numpy()
            datascale = np.transpose(datascale, (1, 2, 0))
            datascale = resize(datascale, (origh, origw), order=1, preserve_range=True, mode='symmetric', clip=False)
            datascale = np.transpose(datascale, (2, 0, 1))

            # Sum up all the scores for all scales
            pred += (datascale / (np.sum(datascale, axis=0) + eps))

        pred = pred / len(scales)
        pred = pred.argmax(0).astype(np.uint32)

        im = Image.fromarray(pred)
        im.save(os.path.join(args.outpath, str(args.coco) + "_" + str(args.split) + "_cls/" + line + ".png"))