def eval(dataloader, head_detector): """ Given the dataloader of the test split compute the average corLoc of the dataset using the head detector model given as the argument to the function. """ test_img_num = 0 test_corrLoc = 0.0 for _, (img, bbox_, scale) in enumerate(dataloader): scale = at.scalar(scale) img, bbox = img.cuda().float(), bbox_.cuda() img, bbox = Variable(img), Variable(bbox) pred_bboxes_, _ = head_detector.predict(img, scale, mode='evaluate') gt_bboxs = at.tonumpy(bbox_)[0] pred_bboxes_ = at.tonumpy(pred_bboxes_) if pred_bboxes_.shape[0] == 0: test_img_num += 1 continue else: ious = bbox_iou(pred_bboxes_, gt_bboxs) max_ious = ious.max(axis=1) corr_preds = np.where(max_ious >= 0.5)[0] num_boxs = gt_bboxs.shape[0] num_corr_preds = len(corr_preds) test_corrLoc += num_corr_preds / num_boxs test_img_num += 1 return test_corrLoc / test_img_num
def forward(self, imgs, bboxs, scale): n,_,_ = bboxs.size() if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.size() img_size = (H, W) features = self.head_detector.extractor(imgs) rpn_locs, rpn_scores, rois, rois_scores, anchor = self.head_detector.rpn(features, img_size, scale) bbox = bboxs[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] # ------------------ RPN losses -------------------# gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(at.tonumpy(bbox),anchor,img_size) gt_rpn_label = at.tovariable(gt_rpn_label).long() gt_rpn_loc = at.tovariable(gt_rpn_loc) rpn_loc_loss = head_detector_loss( rpn_loc, gt_rpn_loc, gt_rpn_label.data, self.rpn_sigma) rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1) _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] _rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1] self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long()) losses = [rpn_loc_loss, rpn_cls_loss] losses = losses + [sum(losses)] return LossTuple(*losses), rois, rois_scores
def update_meters(self, losses): loss_d = { k: at.scalar(at.tonumpy(v)) for k, v in losses._asdict().items() } for key, meter in self.meters.items(): meter.add(loss_d[key])
def predict(self, x, scale=1., mode='evaluate', thresh=0.01): if mode == 'evaluate': nms_thresh = 0.3 #0.3 score_thresh = thresh elif mode == 'visualize': nms_thresh = 0.3 #0.3 score_thresh = thresh _, _, rois, rois_scores, _ = self.forward(x, scale=scale) roi = at.totensor(rois) probabilities = at.tonumpy(F.softmax(at.tovariable(rois_scores))) _, _, H, W = x.size() size = (H,W) roi[:, 0::2] = (roi[:, 0::2]).clamp(min=0, max=size[0]) roi[:, 1::2] = (roi[:, 1::2]).clamp(min=0, max=size[1]) roi_raw = at.tonumpy(roi) probabilities = np.squeeze(probabilities) bbox, score = self._suppress(roi_raw, probabilities, nms_thresh, score_thresh) return bbox, score
def eval(dataloader, head_detector): test_img_num = 0 test_corrLoc = 0.0 for _, (img, bbox_, scale) in enumerate(dataloader): scale = at.scalar(scale) img, bbox = img.cuda().float(), bbox_.cuda() img, bbox = Variable(img), Variable(bbox) pred_bboxes_, _ = head_detector.predict(img, scale, mode='evaluate') gt_bboxs = at.tonumpy(bbox_)[0] pred_bboxes_ = at.tonumpy(pred_bboxes_) if pred_bboxes_.shape[0] == 0: test_img_num += 1 continue else: ious = bbox_iou(pred_bboxes_, gt_bboxs) max_ious = ious.max(axis=1) corr_preds = np.where(max_ious >= 0.5)[0] num_boxs = gt_bboxs.shape[0] num_corr_preds = len(corr_preds) test_corrLoc += num_corr_preds / num_boxs test_img_num += 1 return test_corrLoc / test_img_num
def train(): # Get the dataset for phase in phases: if phase == 'train': if dataset_name == 'hollywood': train_data_list_path = os.path.join( opt.hollywood_dataset_root_path, 'hollywood_train.idl') train_data_list = utils.get_phase_data_list( train_data_list_path, dataset_name) if dataset_name == 'brainwash': train_data_list_path = os.path.join( opt.brainwash_dataset_root_path, 'brainwash_train.idl') train_data_list = utils.get_phase_data_list( train_data_list_path, dataset_name) elif phase == 'val': if dataset_name == 'hollywood': val_data_list_path = os.path.join( opt.hollywood_dataset_root_path, 'hollywood_val.idl') val_data_list = utils.get_phase_data_list( val_data_list_path, dataset_name) if dataset_name == 'brainwash': val_data_list_path = os.path.join( opt.brainwash_dataset_root_path, 'brainwash_val.idl') val_data_list = utils.get_phase_data_list( val_data_list_path, dataset_name) elif phase == 'test': if dataset_name == 'hollywood': test_data_list_path = os.path.join( opt.hollywood_dataset_root_path, 'hollywood_test.idl') test_data_list = utils.get_phase_data_list( test_data_list_path, dataset_name) if dataset_name == 'brainwash': test_data_list_path = os.path.join( opt.brainwash_dataset_root_path, 'brainwash_test.idl') test_data_list = utils.get_phase_data_list( test_data_list_path, dataset_name) print("Number of images for training: %s" % (len(train_data_list))) print("Number of images for val: %s" % (len(val_data_list))) print("Number of images for test: %s" % (len(test_data_list))) if data_check_flag: utils.check_loaded_data(train_data_list[random.randint( 1, len(train_data_list))]) utils.check_loaded_data(val_data_list[random.randint( 1, len(val_data_list))]) utils.check_loaded_data(test_data_list[random.randint( 1, len(test_data_list))]) # Load the train dataset train_dataset = Dataset(train_data_list) test_dataset = Dataset(val_data_list) print("Load data.") train_dataloader = data_.DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=1) test_dataloader = data_.DataLoader(test_dataset, batch_size=1, shuffle=True, num_workers=1) # Initialize the head detector. head_detector_vgg16 = Head_Detector_VGG16(ratios=[1], anchor_scales=[8, 16]) print("model construct completed") trainer = Head_Detector_Trainer(head_detector_vgg16).cuda() lr_ = opt.lr for epoch in range(opt.epoch): trainer.reset_meters() for ii, (img, bbox_, scale) in enumerate(train_dataloader): scale = at.scalar(scale) img, bbox = img.cuda().float(), bbox_.cuda() img, bbox = Variable(img), Variable(bbox) _, _, _ = trainer.train_step(img, bbox, scale) print("Forward and backward pass done.") if (ii + 1) % opt.plot_every == 0: trainer.vis.plot_many(trainer.get_meter_data()) ori_img_ = inverse_normalize(at.tonumpy(img[0])) gt_img = visdom_bbox(ori_img_, at.tonumpy(bbox_[0])) trainer.vis.img('gt_img', gt_img) rois, _ = trainer.head_detector.predict(img, scale=scale, mode='visualize') pred_img = visdom_bbox(ori_img_, at.tonumpy(rois)) trainer.vis.img('pred_img', pred_img) trainer.vis.text(str(trainer.rpn_cm.value().tolist()), win='rpn_cm') avg_test_CorrLoc = eval(test_dataloader, head_detector_vgg16) print("Epoch {} of {}.".format(epoch + 1, opt.epoch)) print(" test average corrLoc accuracy:\t\t{:.3f}".format( avg_test_CorrLoc)) model_save_path = trainer.save(best_map=avg_test_CorrLoc) if epoch == 8: trainer.load(model_save_path) trainer.head_detector.scale_lr(opt.lr_decay) lr_ = lr_ * opt.lr_decay