Ejemplo n.º 1
0
def eval(dataloader, head_detector):
    """
    Given the dataloader of the test split compute the
    average corLoc of the dataset using the head detector 
    model given as the argument to the function. 
    """
    test_img_num = 0
    test_corrLoc = 0.0
    for _, (img, bbox_, scale) in enumerate(dataloader):
        scale = at.scalar(scale)
        img, bbox = img.cuda().float(), bbox_.cuda()
        img, bbox = Variable(img), Variable(bbox)
        pred_bboxes_, _ = head_detector.predict(img, scale, mode='evaluate')
        gt_bboxs = at.tonumpy(bbox_)[0]
        pred_bboxes_ = at.tonumpy(pred_bboxes_)
        if pred_bboxes_.shape[0] == 0:
            test_img_num += 1
            continue
        else:
            ious = bbox_iou(pred_bboxes_, gt_bboxs)
            max_ious = ious.max(axis=1)
            corr_preds = np.where(max_ious >= 0.5)[0]
            num_boxs = gt_bboxs.shape[0]
            num_corr_preds = len(corr_preds)
            test_corrLoc += num_corr_preds / num_boxs
            test_img_num += 1
    return test_corrLoc / test_img_num
Ejemplo n.º 2
0
    def forward(self, imgs, bboxs, scale):
        n,_,_ = bboxs.size()
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')        
        _, _, H, W = imgs.size()
        img_size = (H, W)
        features = self.head_detector.extractor(imgs)
        rpn_locs, rpn_scores, rois, rois_scores, anchor = self.head_detector.rpn(features, img_size, scale)
        bbox = bboxs[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]

        # ------------------ RPN losses -------------------#
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(at.tonumpy(bbox),anchor,img_size)
        gt_rpn_label = at.tovariable(gt_rpn_label).long()
        gt_rpn_loc = at.tovariable(gt_rpn_loc)
        rpn_loc_loss = head_detector_loss(
            rpn_loc,
            gt_rpn_loc,
            gt_rpn_label.data,
            self.rpn_sigma)

        rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1)
        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long())
        losses = [rpn_loc_loss, rpn_cls_loss]
        losses = losses + [sum(losses)]

        return LossTuple(*losses), rois, rois_scores
Ejemplo n.º 3
0
 def update_meters(self, losses):
     loss_d = {
         k: at.scalar(at.tonumpy(v))
         for k, v in losses._asdict().items()
     }
     for key, meter in self.meters.items():
         meter.add(loss_d[key])
Ejemplo n.º 4
0
    def predict(self, x, scale=1., mode='evaluate', thresh=0.01):
        
        if mode == 'evaluate':
            nms_thresh = 0.3 #0.3
            score_thresh = thresh
        elif mode == 'visualize':
            nms_thresh = 0.3 #0.3
            score_thresh = thresh

        _, _, rois, rois_scores, _ = self.forward(x, scale=scale)
        roi = at.totensor(rois)
        probabilities = at.tonumpy(F.softmax(at.tovariable(rois_scores)))
        _, _, H, W = x.size()
        size = (H,W)
        roi[:, 0::2] = (roi[:, 0::2]).clamp(min=0, max=size[0])
        roi[:, 1::2] = (roi[:, 1::2]).clamp(min=0, max=size[1])        
        roi_raw = at.tonumpy(roi)
        probabilities = np.squeeze(probabilities)
        bbox, score = self._suppress(roi_raw, probabilities, nms_thresh, score_thresh)
        return bbox, score
Ejemplo n.º 5
0
def eval(dataloader, head_detector):
    test_img_num = 0
    test_corrLoc = 0.0
    for _, (img, bbox_, scale) in enumerate(dataloader):
        scale = at.scalar(scale)
        img, bbox = img.cuda().float(), bbox_.cuda()
        img, bbox = Variable(img), Variable(bbox)
        pred_bboxes_, _ = head_detector.predict(img, scale, mode='evaluate')
        gt_bboxs = at.tonumpy(bbox_)[0]
        pred_bboxes_ = at.tonumpy(pred_bboxes_)
        if pred_bboxes_.shape[0] == 0:
            test_img_num += 1
            continue
        else:
            ious = bbox_iou(pred_bboxes_, gt_bboxs)
            max_ious = ious.max(axis=1)
            corr_preds = np.where(max_ious >= 0.5)[0]
            num_boxs = gt_bboxs.shape[0]
            num_corr_preds = len(corr_preds)
            test_corrLoc += num_corr_preds / num_boxs
            test_img_num += 1
    return test_corrLoc / test_img_num
Ejemplo n.º 6
0
def train():
    # Get the dataset
    for phase in phases:
        if phase == 'train':
            if dataset_name == 'hollywood':
                train_data_list_path = os.path.join(
                    opt.hollywood_dataset_root_path, 'hollywood_train.idl')
                train_data_list = utils.get_phase_data_list(
                    train_data_list_path, dataset_name)
            if dataset_name == 'brainwash':
                train_data_list_path = os.path.join(
                    opt.brainwash_dataset_root_path, 'brainwash_train.idl')
                train_data_list = utils.get_phase_data_list(
                    train_data_list_path, dataset_name)
        elif phase == 'val':
            if dataset_name == 'hollywood':
                val_data_list_path = os.path.join(
                    opt.hollywood_dataset_root_path, 'hollywood_val.idl')
                val_data_list = utils.get_phase_data_list(
                    val_data_list_path, dataset_name)
            if dataset_name == 'brainwash':
                val_data_list_path = os.path.join(
                    opt.brainwash_dataset_root_path, 'brainwash_val.idl')
                val_data_list = utils.get_phase_data_list(
                    val_data_list_path, dataset_name)
        elif phase == 'test':
            if dataset_name == 'hollywood':
                test_data_list_path = os.path.join(
                    opt.hollywood_dataset_root_path, 'hollywood_test.idl')
                test_data_list = utils.get_phase_data_list(
                    test_data_list_path, dataset_name)
            if dataset_name == 'brainwash':
                test_data_list_path = os.path.join(
                    opt.brainwash_dataset_root_path, 'brainwash_test.idl')
                test_data_list = utils.get_phase_data_list(
                    test_data_list_path, dataset_name)

    print("Number of images for training: %s" % (len(train_data_list)))
    print("Number of images for val: %s" % (len(val_data_list)))
    print("Number of images for test: %s" % (len(test_data_list)))

    if data_check_flag:
        utils.check_loaded_data(train_data_list[random.randint(
            1, len(train_data_list))])
        utils.check_loaded_data(val_data_list[random.randint(
            1, len(val_data_list))])
        utils.check_loaded_data(test_data_list[random.randint(
            1, len(test_data_list))])

    # Load the train dataset
    train_dataset = Dataset(train_data_list)
    test_dataset = Dataset(val_data_list)
    print("Load data.")

    train_dataloader = data_.DataLoader(train_dataset,
                                        batch_size=1,
                                        shuffle=True,
                                        num_workers=1)
    test_dataloader = data_.DataLoader(test_dataset,
                                       batch_size=1,
                                       shuffle=True,
                                       num_workers=1)
    # Initialize the head detector.
    head_detector_vgg16 = Head_Detector_VGG16(ratios=[1],
                                              anchor_scales=[8, 16])
    print("model construct completed")
    trainer = Head_Detector_Trainer(head_detector_vgg16).cuda()
    lr_ = opt.lr
    for epoch in range(opt.epoch):
        trainer.reset_meters()
        for ii, (img, bbox_, scale) in enumerate(train_dataloader):
            scale = at.scalar(scale)
            img, bbox = img.cuda().float(), bbox_.cuda()
            img, bbox = Variable(img), Variable(bbox)
            _, _, _ = trainer.train_step(img, bbox, scale)
            print("Forward and backward pass done.")
            if (ii + 1) % opt.plot_every == 0:
                trainer.vis.plot_many(trainer.get_meter_data())
                ori_img_ = inverse_normalize(at.tonumpy(img[0]))
                gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0]))
                trainer.vis.img('gt_img', gt_img)
                rois, _ = trainer.head_detector.predict(img,
                                                        scale=scale,
                                                        mode='visualize')
                pred_img = visdom_bbox(ori_img_, at.tonumpy(rois))
                trainer.vis.img('pred_img', pred_img)
                trainer.vis.text(str(trainer.rpn_cm.value().tolist()),
                                 win='rpn_cm')

        avg_test_CorrLoc = eval(test_dataloader, head_detector_vgg16)

        print("Epoch {} of {}.".format(epoch + 1, opt.epoch))
        print("  test average corrLoc accuracy:\t\t{:.3f}".format(
            avg_test_CorrLoc))

        model_save_path = trainer.save(best_map=avg_test_CorrLoc)

        if epoch == 8:
            trainer.load(model_save_path)
            trainer.head_detector.scale_lr(opt.lr_decay)
            lr_ = lr_ * opt.lr_decay