def box_filter(box, box_scores, thresh, use_nms = True): """ :param box: N x d_box :param box_scores: N scores :param thresh: :param use_nms: :return: """ d_box = box.size(-1) inds = torch.nonzero(box_scores > thresh).view(-1) if inds.numel() > 0: cls_scores = box_scores[inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = box[inds, :] if use_nms: cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.COMMON.NMS) cls_scores = cls_dets[keep.view(-1).long()][:, -1] cls_dets = cls_dets[keep.view(-1).long()][:, :-1] order = order[keep.view(-1).long()] else: cls_scores = cls_scores[order] cls_dets = cls_boxes[order] cls_dets = cls_dets.cpu().numpy() cls_scores = cls_scores.cpu().numpy() order = order.cpu().numpy() else: cls_scores = np.zeros(shape=(0,), dtype=np.float32) cls_dets = np.zeros(shape=(0, d_box), dtype=np.float32) order = np.array([], dtype=np.int32) return cls_dets, cls_scores, (inds.cpu().numpy())[order]
def apply_nms(utt2predict, nms_thres, device): # predict has the shape [*, 4] # (start_t, end_t, prob_bg, spk_label) utt2seg = {} uttlist = list(utt2predict.keys()) for utt in uttlist: utt_predict = utt2predict[utt] spklist = list(set(utt_predict[:, 3])) spklist.sort() segments_list = [] for spk in spklist: predict = utt_predict[utt_predict[:, 3] == spk, :] predict[:, :2] = (predict[:, :2] * 100.0).astype(int) predict = torch.from_numpy(predict).to(device) # apply nms # convert to 4 dim for NMS predict_input = torch.zeros(predict.size(0), 5).type_as(predict) predict_input[:, 0] = predict[:, 0] predict_input[:, 2] = predict[:, 1] predict_input[:, 4] = 1 - predict[:, 2] keep = nms(predict_input, nms_thres, force_cpu=True) segments = predict[keep.view(-1).long()].data.cpu().numpy() segments = segments[segments[:, 0].argsort()] segments = segments[:, [0, 1]] segments[:, :2] = segments[:, :2] / 100.0 segments = np.insert(segments, 2, spk, axis=1) segments_list.append(segments) segments_array = np.concatenate(segments_list, axis=0) segments_array = segments_array[segments_array[:, 0].argsort()] utt2seg[utt] = segments_array return utt2seg
def get_all_boxes(classes, im2show): for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat( (cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis or args.save_for_vis: im2show = vis_detections( im2show, id2chn[imdb.classes[j]], np.array([cls_dets.cpu().numpy()[0, :]]), 0.5) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array return all_boxes, im2show
def visualize_without_paths(video_dataset, pred_boxes, scores, pred_trk_boxes, det_classes): print("Visualizing...") list_im = video_dataset._frame_paths num_classes = len(det_classes) num_frames = len(list_im) for i_frame in range(num_frames - 1): print('frame: {}/{}'.format(i_frame, num_frames)) fig, ax = plt.subplots(figsize=(12, 12)) img_path = list_im[i_frame] img = cv2.imread(img_path) img = img[:, :, (2, 1, 0)] disp_image = Image.fromarray(np.uint8(img)) for cls_ind in range(1, num_classes): ax.imshow(disp_image, aspect='equal') class_name = det_classes[cls_ind] keep = torch.nonzero( scores[i_frame][0][:, cls_ind] > CONF_THRESH).view(-1) if keep.numel() == 0: # no detections above threshold for this class continue cls_scores = scores[i_frame][0][keep][:, cls_ind] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[i_frame][0][keep, :] cls_dets = torch.cat( [cls_boxes, cls_scores.contiguous().view(-1, 1)], dim=1) cls_dets = cls_dets[order] keep = nms(cls_dets, 0.3) cls_dets = cls_dets[keep.view(-1).long()] for ibox in range(cls_dets.size(0)): bbox = cls_dets[ibox, :4].cpu().numpy().flatten() score = cls_dets[ibox, 4] ax.add_patch( plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1], fill=False, edgecolor=COLOR_WHEEL[cls_ind], linewidth=3.5)) ax.text(bbox[0], bbox[1] - 2, '{:s} {:.3f}'.format(class_name, score), bbox=dict(facecolor=COLOR_WHEEL[cls_ind], alpha=0.5), fontsize=14, color='white') # Save image with bboxes overlaid plt.axis('off') plt.tight_layout() #plt.show() if not os.path.exists(video_dataset._output_dir): os.makedirs(video_dataset._output_dir) plt.savefig( os.path.join(video_dataset._output_dir, os.path.basename(img_path))) plt.clf() plt.close('all')
def nms_detections(obj_rois, obj_scores): obj_scores = obj_scores.unsqueeze(1) ### # print(obj_rois.shape) ### # print(obj_scores.shape) ### cls_dets = torch.cat((obj_rois, obj_scores), 1) keep = nms(cls_dets, 0.9) return keep
def bbox_proposal(obj_prob, att_prob, rois, conf_thresh=0.2, thresh=0.01): scores, clss = obj_prob[:, 1:].max(1) clss = clss.view(-1) + 1 scores = scores.view(-1) max_conf = obj_prob.new(obj_prob.size(0)).zero_() max_index = obj_prob.new(obj_prob.size(0)).zero_().long() for obj_cls_ind in range(1, obj_prob.size(1)): obj_cls_scores = obj_prob[:, obj_cls_ind] inds = torch.nonzero(obj_cls_scores > thresh).view(-1) # if there is det if inds.numel() > 0: obj_cls_boxes_p = rois[inds] obj_cls_scores_p = obj_cls_scores[inds] _, order = torch.sort(obj_cls_scores_p, 0, True) cls_dets = torch.cat((obj_cls_boxes_p, obj_cls_scores_p), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, 0.5) keep = keep.view(-1).long() keep_idx = inds[order[keep]] tmp_idx = torch.nonzero( obj_cls_scores[keep_idx] > max_conf[keep_idx]).view(-1) if tmp_idx.numel() != 0: max_index[keep_idx[tmp_idx]] = obj_cls_ind max_conf[keep_idx] = torch.max(obj_cls_scores[keep_idx], max_conf[keep_idx]) keep_boxes = (max_conf >= conf_thresh).nonzero().view(-1) num_boxes = keep_boxes.numel() if num_boxes < MIN_BOXES: _, order = torch.sort(max_conf, 0, True) keep_boxes = order[:MIN_BOXES] keep_clss = max_index[keep_boxes] all_clss = clss[keep_boxes] keep_clss[num_boxes:] = all_clss[num_boxes:] keep_scores = max_conf[keep_boxes] all_scores = scores[keep_boxes] keep_scores[num_boxes:] = all_scores[num_boxes:] elif num_boxes > MAX_BOXES: _, order = torch.sort(max_conf, 0, True) keep_boxes = order[:MAX_BOXES] keep_clss = max_index[keep_boxes] keep_scores = max_conf[keep_boxes] else: keep_clss = max_index[keep_boxes] keep_scores = max_conf[keep_boxes] return keep_boxes, keep_clss, keep_scores, num_boxes
def _get_single_obj_det_results(self, cls_prob, bbox_pred, im_info): scores = cls_prob.data thresh = 0.05 # filter out low confidence boxes for acceleration results = [] if cfg.TEST.COMMON.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.COMMON.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.COMMON.BBOX_NORMALIZE_STDS).type_as(box_deltas) \ + torch.FloatTensor(cfg.TRAIN.COMMON.BBOX_NORMALIZE_MEANS).type_as(box_deltas) box_deltas = box_deltas.view(1, -1, 4) pred_boxes = bbox_transform_inv(self.priors.type_as(bbox_pred).data, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(self.priors.data, (1, scores.shape[1])) scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() for j in xrange(1, self.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.COMMON.NMS) cls_dets = cls_dets[keep.view(-1).long()] final_keep = torch.nonzero(cls_dets[:, -1] > cfg.TEST.COMMON.OBJ_DET_THRESHOLD).squeeze() result = cls_dets[final_keep] if result.numel()>0 and result.dim() == 1: result = result.unsqueeze(0) # in testing, concat object labels if final_keep.numel() > 0: if self.training: result = result[:,:4] else: result = torch.cat([result[:,:4], j * torch.ones(result.size(0),1).type_as(result)],1) if result.numel() > 0: results.append(result) if len(results): final = torch.cat(results, 0) else: final = torch.Tensor([]).type_as(bbox_pred) return final
def detect(file_path, NMS_THRESH=0.3): im = cv2.imread(file_path) scores, boxes = im_detect(net, im) cls_scores = scores[:, 1] cls_boxes = boxes[:, 4:8] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype( np.float32) keep = nms(dets, NMS_THRESH) return dets[keep, :]
def detect(net, im_path, device, thresh=0.5, visualize=False, timers=None, pyramid=False, visualization_folder=None): """ Main module to detect faces :param net: The trained network :param im_path: The path to the image :param device: GPU or CPU device to be used :param thresh: Detection with a less score than thresh are ignored :param visualize: Whether to visualize the detections :param timers: Timers for calculating detect time (if None new timers would be created) :param pyramid: Whether to use pyramid during inference :param visualization_folder: If set the visualizations would be saved in this folder (if visualize=True) :return: cls_dets (bounding boxes concatenated with scores) and the timers """ if not timers: timers = {'detect': Timer(), 'misc': Timer()} im = cv2.imread(im_path) imfname = os.path.basename(im_path) sys.stdout.flush() timers['detect'].tic() if not pyramid: im_scale = _compute_scaling_factor(im.shape, cfg.TEST.SCALES[0], cfg.TEST.MAX_SIZE) im_blob = _get_image_blob(im, [im_scale])[0] ssh_rois = forward(net, im_blob, im_scale, device, thresh) else: assert False, 'not implement' timers['detect'].toc() timers['misc'].tic() nms_keep = nms(ssh_rois, cfg.TEST.RPN_NMS_THRESH) cls_dets = ssh_rois[nms_keep, :] if visualize: plt_name = os.path.splitext(imfname)[0] + '_detections_{}'.format( "SSH pytorch") visusalize_detections(im, cls_dets, plt_name=plt_name, visualization_folder=visualization_folder) timers['misc'].toc() return cls_dets, timers
def generate_paths(self): for cls_ix in range(1, self.num_classes): # skip background all_scores = np.ndarray(shape=(self.num_frame_pairs, ), dtype=np.object) cls_boxes = np.ndarray(shape=(self.num_frame_pairs, ), dtype=np.object) cls_scores = np.ndarray(shape=(self.num_frame_pairs, ), dtype=np.object) print('Class: {}'.format(self.classes[cls_ix])) self._curr_class = self.classes[cls_ix] for pair_ix in range(self.num_frame_pairs): boxes_t0 = self.pred_boxes[pair_ix][0].clone() scores_t0 = self.scores[pair_ix][0][:, cls_ix].clone() pick = torch.nonzero(scores_t0 > 0.0).view(-1) # If no good scores for this frame/class, go to next frame assert pick.numel() > 0, "No detections found for this class." if pick.numel() == 0: all_scores[pair_ix] = torch.cuda.FloatTensor( 0) # empty tensor cls_boxes[pair_ix] = torch.cuda.FloatTensor( 0) # empty tensor cls_scores[pair_ix] = torch.cuda.FloatTensor( 0) # empty tensor continue # Get scores that passed filter and sort highest-->lowest scores_t0 = scores_t0[pick] boxes_t0 = boxes_t0[pick, :] all_scores_t0 = self.scores[pair_ix][0][pick, :] _, pick = torch.sort(scores_t0, descending=True) # Take at most 50 per frame per class to_pick = min(50, pick.numel()) pick = pick[:to_pick] scores_t0 = scores_t0[pick] boxes_t0 = boxes_t0[pick, :] all_scores_t0 = all_scores_t0[pick, :] cls_dets_t0 = torch.cat( [boxes_t0, scores_t0.contiguous().view(-1, 1)], dim=1) pick = nms(cls_dets_t0, 0.3) # TODO check pick is sorted in descending order # Take top 10 dets after nms pick = pick.view(-1).long() pick = pick[:min(10, pick.numel())] print(pick) cls_boxes[pair_ix] = boxes_t0[pick, :].clone() cls_scores[pair_ix] = scores_t0[pick].clone() all_scores[pair_ix] = all_scores_t0[pick, :].clone() paths = self.incremental_linking(cls_boxes, cls_scores, all_scores) self.all_paths[cls_ix] = paths
def load_predict(predict_file, topn): """ nms within class ans then select the top n bbox of higher score among classes :param predict_file: :return: """ with open(predict_file, 'rb') as fp: predict = pkl.load(fp) pred_boxes = predict['bbox'].squeeze() scores = predict['cls_prob'].squeeze() roi_feat = predict['roi_feat'].squeeze() pthresh = 0.00001 bbox = [] keep_inds = [] first = True for j in range(81): if j == 0: continue # skip the background inds = torch.nonzero(scores[:, j] > pthresh).view(-1) if len(inds) == 0: continue cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, 0.4, force_cpu=0) inds = keep.view(-1).long() if len(inds) > 0: tmp_bbox = cls_dets[inds].cpu().data.numpy() keep = keep.cpu().data.numpy() if first: bbox = tmp_bbox keep_inds = keep else: bbox = np.vstack((bbox, tmp_bbox)) keep_inds = np.vstack((keep_inds, keep)) first = False rank_ind = bbox[:, -1].argsort() select_inds = keep_inds[rank_ind][-topn:] select_classme = scores[select_inds, :].squeeze() select_feat = roi_feat[select_inds, :].squeeze() select_bbox = bbox[rank_ind][-topn:, 0:4] return select_bbox, select_classme.cpu().data.numpy(), select_feat.cpu( ).data.numpy()
def detect(self, img_data): np_arr = np.fromstring(img_data, np.uint8) image = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) blobs, im_scales = get_image_blob(image) im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) self.im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) self.im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) self.gt_boxes.data.resize_(1, 1, 5).zero_() self.num_boxes.data.resize_(1).zero_() rois, cls_prob, bbox_pred, *_ = \ self.model(self.im_data, self.im_info, self.gt_boxes, self.num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] box_deltas = bbox_pred.data box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(self.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, self.im_info.data, 1) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() thresh = 0.7 inds = torch.nonzero(scores[:, 1] > thresh).view(-1) cls_dets = [] if inds.numel() > 0: cls_scores = scores[:, 1][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, 4:8] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=False) cls_dets = cls_dets[keep.view(-1).long()] cls_dets = cls_dets.cpu().numpy() cls_dets = [cls_det[:4] for cls_det in cls_dets] return cls_dets
def from_dets(imdb_name, output_dir, args): imdb = get_imdb(imdb_name) imdb.competition_mode(args.comp_mode) imdb.config['matlab_eval'] = args.matlab_eval with open(os.path.join(output_dir, 'detections.pkl'), 'rb') as f: dets = cPickle.load(f) if args.apply_nms: print 'Applying NMS to all detections' nms_dets = nms(dets, cfg.TEST.NMS) else: nms_dets = dets print 'Evaluating detections' imdb.evaluate_detections(nms_dets, output_dir)
def apply_nms(all_boxes, thresh): """Apply non-maximum suppression to all predicted boxes output by the test_net method. """ num_classes = len(all_boxes) num_images = len(all_boxes[0]) nms_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] for cls_ind in range(num_classes): for im_ind in range(num_images): dets = all_boxes[cls_ind][im_ind] if dets == []: continue keep = nms(torch.FloatTensor(dets).cuda(), thresh) if len(keep) == 0: continue nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() return nms_boxes
def _make_tubes(self, frameBoxes, max_per_image, box_voting, tracks_cell): '''Build tubes for cth class. ''' tracks = {'boxes': None, 'scores': None, 'c': None} tracks['boxes'] = tracks_cell[0] tracks['scores'] = tracks_cell[1] tracks['c'] = tracks_cell[2] nms_thresh = 0.3 object_frames_boxes = np.ndarray((len(frameBoxes), ), dtype=np.object) object_frames_scores = np.ndarray((len(frameBoxes), ), dtype=np.object) object_frames_boxes_idx = np.ndarray((len(frameBoxes), ), dtype=np.object) object_frames_trackedboxes = np.ndarray((len(frameBoxes), ), dtype=np.object) # Iterate over the non-empty frames for f in range(len(frameBoxes) - 1): # boxes in frame f boxes = frameBoxes[f] if box_voting: # TODO raise NotImplementedError else: nms_idx = nms(boxes[:, :5].clone(), nms_thresh).long().view(-1) if nms_idx.numel() > max_per_image: nms_idx = nms_idx[:max_per_image] boxes = boxes[nms_idx] object_frames_boxes[f] = boxes[:, :4] object_frames_scores[f] = boxes[:, 4] object_frames_boxes_idx[f] = torch.arange(boxes.size(0)).cuda() if tracks['boxes'] is not None and tracks['boxes'][ f, 0] is not None: object_frames_trackedboxes[f] = tracks['boxes'][f, :] paths = self._zero_jump_link(object_frames_boxes, object_frames_scores, object_frames_boxes_idx, object_frames_trackedboxes) return paths
def non_max_suppression(self, ssh_rois: List[torch.Tensor]): """ perform NMS on ROIs given by SSH network Arguments: ssh_rois {List[torch.Tensor]} -- list ROIs given by SSH network Returns: bounding_boxes {[numpy.ndarray]} -- final list of bounding boxes for all detected faces """ # NOTE :- The ROI operations are currently being perfomred on CPU, instead of cuda Tensors. # I've tried moving them to gpu but it doesn't work, atleast on my machine, despite there being a gpu version # of nms_code (./model/nms/gpu_nms.pyx) # NMS part of the code is barely taking any time as is, so i've left it this way for now bounding_boxes = [] for single_roi in ssh_rois: single_roi = single_roi.cpu().numpy() nms_keep = nms(single_roi, cfg.TEST.RPN_NMS_THRESH) cls_single = single_roi[nms_keep, :] bounding_boxes.append(cls_single) return bounding_boxes
def normalized_predictions(scores,boxes,img2,thresh=0.6,NMS=0.3): if cfg.TEST.BBOX_REG: box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() im2show = np.copy(img2) # Iterating over all classes for j in range(1, len(classes)): inds = torch.nonzero(scores[:,j]>thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:,j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets,NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] im2show = vis_detections(im2show, classes[j], cls_dets.cpu().numpy(), 0.5) return im2show,cls_dets.cpu().numpy()
def bbox_proposal_fast(obj_prob, att_prob, rois): batch_size = obj_prob.size(0) # get the top obj cls, excluded the background class. max_obj_prob, max_obj_clss = obj_prob[:, :, 1:].max(2) # get the top att cls, exclude the background class. max_att_prob, max_att_clss = att_prob[:, :, 1:].max(2) # get the top rel cls, exlude the background class # max_rel_scores, max_rel_ind = rel_prob[:, :, 1:].max(2) # compute the final score, B x N obj_att_scores = max_obj_prob * max_att_prob # sort final scores obj_att_scores_sorted, order = torch.sort(obj_att_scores, 1, True) rois_pop = rois.new(batch_size, MIN_BOXES, rois.size(2)).zero_() rois_pop_id = rois.new(batch_size, MIN_BOXES).long().zero_() # rel_pairs_pop = rel_pairs.new(batch_size, self.rel_num, rel_pairs.size(2)) # rel_pairs_pop_id = rel_pairs.new(batch_size, self.rel_num) # pdb.set_trace() for i in range(batch_size): proposals_i = rois[i][order[i]][:, 1:] scores_i = obj_att_scores[i][order[i]].view(-1, 1) keep_idx_i = nms(torch.cat((proposals_i, scores_i), 1), 0.5) keep_idx_i = keep_idx_i.long().view(-1) num_rois_pop = min(keep_idx_i.size(0), MIN_BOXES) rois_pop[i][:num_rois_pop] = rois[i][order[i][ keep_idx_i[:num_rois_pop]]] rois_pop_id[i][:num_rois_pop] = order[i][keep_idx_i[:num_rois_pop]] return rois_pop, rois_pop_id
def vis_dets(img, im_info, rois, bbox_pred, obj_cls_prob, imdb): pdb.set_trace() im2show = img.data.permute(2, 3, 1, 0).squeeze().cpu().numpy() im2show += cfg.PIXEL_MEANS thresh = 0.01 boxes = rois[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) obj_scores = obj_cls_prob.squeeze() pred_boxes = pred_boxes.squeeze() for j in xrange(1, len(imdb._classes)): inds = torch.nonzero(obj_scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: obj_cls_scores = obj_scores[:, j][inds] _, order = torch.sort(obj_cls_scores, 0, True) cls_boxes = pred_boxes[inds, :] cls_dets = torch.cat((cls_boxes, obj_cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] im2show = vis_detections(im2show, imdb._classes[j], cls_dets.cpu().numpy(), 0.2) # save image to disk cv2.imwrite("detections.jpg", im2show)
for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] dets = cls_dets.cpu().numpy() for i in range(dets.shape[0]): writer.writerow([ imglist[num_images], pascal_classes[j], dets[i, 0], dets[i, 1], dets[i, 2], dets[i, 3] ]) if vis: im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5) misc_toc = time.time() nms_time = misc_toc - misc_tic
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, :, 1] # batch_size x num_rois x 1 bbox_deltas = input[1] # batch_size x num_rois x 4 im_info = input[2] cfg_key = input[3] feat_shapes = input[4] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) anchors = torch.from_numpy( generate_anchors_all_pyramids( self._fpn_scales, self._anchor_ratios, feat_shapes, self._fpn_feature_strides, self._fpn_anchor_stride)).type_as(scores) num_anchors = anchors.size(0) anchors = anchors.view(1, num_anchors, 4).expand(batch_size, num_anchors, 4) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze() scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single return output
def bbox_selection(self, relation, pred_boxes, scores, pooled_feat): """ delete bbox of low scores and do NMS :param pred_boxes: :param scores: :return: """ scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() class_bboxes = {} class_feats = {} class_pros = {} sub, pre, obj = relation[0].split('-') sind, oind = self.classes.index(sub), self.classes.index(obj) for c, j in enumerate([sind, oind]): inds = torch.nonzero(scores[:, j] > self.pthresh).view(-1) if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if self.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) inds = keep.view(-1).long() #inds = inds.numpy() inds_new = [] for i in inds: bbox = cls_dets[i, 0:4] if bbox[2] - bbox[1] < 5.0 or bbox[3] - bbox[1] < 5.0: continue inds_new.append(i) if len(inds_new) == 0: continue inds = torch.cuda.LongTensor(np.array(inds_new)) cls_dets = cls_dets[inds] cls_feats = pooled_feat[inds] cls_pros = scores[inds] class_bboxes[c] = cls_dets.data.cpu().numpy() class_feats[c] = cls_feats.data.cpu().numpy() class_pros[c] = cls_pros.data.cpu().numpy() if c == 0 and sind == oind: class_bboxes[1] = cls_dets.data.cpu().numpy() class_feats[1] = cls_feats.data.cpu().numpy() class_pros[1] = cls_pros.data.cpu().numpy() break """ print('bbox shape:{}\t classme shape:{}\tfeat shape:{}'.format(cls_dets.shape, cls_feats.shape, cls_pros.shape)) """ return class_bboxes, class_pros, class_feats
_, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] if args.soft_nms: np_dets = cls_dets.cpu().numpy().astype(np.float32) keep = soft_nms(np_dets, method=cfg.TEST.SOFT_NMS_METHOD ) # np_dets will be changed keep = torch.from_numpy(keep).type_as(cls_dets).int() cls_dets = torch.from_numpy(np_dets).type_as(cls_dets) else: keep = nms(cls_dets, 0.1) cls_dets = cls_dets[keep.view(-1).long()] cls_dets = cls_dets.cpu().numpy() else: cls_dets = np.array([]) if vis: im2show = vis_detections(final_result, imglist[i], im2show, classes[j], cls_dets, thresh=args.thresh) misc_toc = time.time() nms_time = misc_toc - misc_tic
def evaluator(model, args, evl_rec=False): fasterRCNN = model np.random.seed(cfg.RNG_SEED) if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] args.cfg_file = "cfgs/{}_ls.yml".format( args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdbval_name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True if args.cuda: fasterRCNN.cuda() start = time.time() max_per_image = 100 vis = False if vis: thresh = 0.05 else: thresh = 0.0 save_name = 'faster_rcnn_10' num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, save_name) # These models are pytorch pretrained with RGB channel rgb = True if args.net in ('res18', 'res34', 'inception') else False dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \ imdb.num_classes, training=False, normalize = False, rgb=rgb) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) if evl_rec: true_postive, ground_truth = 0.0, 0.0 recall = AverageMeter() for i in range(num_images): data = next(data_iter) im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() if evl_rec: # evluate rpn recall only boxes_per_img = boxes.squeeze().cpu().numpy() / data[1][0][2].item( ) #pdb.set_trace() #TP, GT = evaluate_final_recall(pred_boxes.squeeze().cpu().numpy(), i, imdb, thr=0.5) TP, GT = evaluate_recall(boxes_per_img, i, imdb, thr=0.5) recall.update(TP, GT) sys.stdout.write('TP/GT: {}/{} | Recall: {:.3f} \r'.format( TP, GT, recall.avg)) sys.stdout.flush() continue scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite('result.png', im2show) pdb.set_trace() #cv2.imshow('test', im2show) #cv2.waitKey(0) if evl_rec: print('\r\nThe average rpn recall is: {:.4f}'.format(recall.avg)) return recall.avg with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') mAP = imdb.evaluate_detections(all_boxes, output_dir) end = time.time() print("test time: %0.4fs" % (end - start)) return mAP
def test_net(tdcnn_demo, dataloader, args): start = time.time() # TODO: Add restriction for max_per_video max_per_video = 0 if args.vis: thresh = 0.05 else: thresh = 0.005 all_twins = [[[] for _ in xrange(args.num_videos)] for _ in xrange(args.num_classes)] _t = {'im_detect': time.time(), 'misc': time.time()} tdcnn_demo.eval() empty_array = np.transpose(np.array([[],[],[]]), (1,0)) data_tic = time.time() for i, (video_data, gt_twins, num_gt, video_info) in enumerate(dataloader): video_data = video_data.cuda() gt_twins = gt_twins.cuda() batch_size = video_data.shape[0] data_toc = time.time() data_time = data_toc - data_tic det_tic = time.time() rois, cls_prob, twin_pred = tdcnn_demo(video_data, gt_twins) # rpn_loss_cls, rpn_loss_twin, \ # RCNN_loss_cls, RCNN_loss_twin, rois_label = tdcnn_demo(video_data, gt_twins) scores_all = cls_prob.data twins = rois.data[:, :, 1:3] if cfg.TEST.TWIN_REG: # Apply bounding-twin regression deltas twin_deltas = twin_pred.data if cfg.TRAIN.TWIN_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev twin_deltas = twin_deltas.view(-1, 2) * torch.FloatTensor(cfg.TRAIN.TWIN_NORMALIZE_STDS).type_as(twin_deltas) \ + torch.FloatTensor(cfg.TRAIN.TWIN_NORMALIZE_MEANS).type_as(twin_deltas) twin_deltas = twin_deltas.view(batch_size, -1, 2 * args.num_classes) pred_twins_all = twin_transform_inv(twins, twin_deltas) pred_twins_all = clip_twins(pred_twins_all, cfg.TRAIN.LENGTH[0]) else: # Simply repeat the twins, once for each class pred_twins_all = np.tile(twins, (1, scores_all.shape[1])) det_toc = time.time() detect_time = det_toc - det_tic for b in range(batch_size): misc_tic = time.time() print(video_info[b]) scores = scores_all[b] #scores.squeeze() pred_twins = pred_twins_all[b] #.squeeze() # skip j = 0, because it's the background class for j in xrange(1, args.num_classes): inds = torch.nonzero(scores[:,j]>thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:,j][inds] _, order = torch.sort(cls_scores, 0, True) cls_twins = pred_twins[inds][:, j * 2:(j + 1) * 2] cls_dets = torch.cat((cls_twins, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_twins, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) if ( len(keep)>0 ): cls_dets = cls_dets[keep.view(-1).long()] print ("activity: ", j) print (cls_dets.cpu().numpy()) all_twins[j][i*batch_size+b] = cls_dets.cpu().numpy() else: all_twins[j][i*batch_size+b] = empty_array # Limit to max_per_video detections *over all classes* if max_per_video > 0: video_scores = np.hstack([all_twins[j][i*batch_size+b][:, -1] for j in xrange(1, args.num_classes)]) if len(video_scores) > max_per_video: video_thresh = np.sort(video_scores)[-max_per_video] for j in xrange(1, args.num_classes): keep = np.where(all_twins[j][i*batch_size+b][:, -1] >= video_thresh)[0] all_twins[j][i*batch_size+b] = all_twins[j][i*batch_size+b][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic print ('im_detect: {:d}/{:d} {:.3f}s {:.3f}s {:.3f}s' \ .format(i*batch_size+b+1, args.num_videos, data_time/batch_size, detect_time/batch_size, nms_time)) if args.vis: pass data_tic = time.time() end = time.time() print("test time: %0.4fs" % (end - start))
def Predict(self, im_in, area): # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if self.cuda > 0: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable with torch.no_grad(): im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if self.cuda > 0: cfg.CUDA = True if self.cuda > 0: self.fasterRCNN.cuda() self.fasterRCNN.eval() #im_in = cv2.imread(im_file) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im_in = im_in[:, :, ::-1] im = cv2.cvtColor(im_in, cv2.COLOR_BGR2RGB) blobs, im_scales = self._get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.data.resize_(1, 1, 5).zero_() num_boxes.data.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = self.fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if self.class_agnostic: if self.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if self.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1]))) pred_boxes = _.cuda() if self.cuda > 0 else _ pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() ItemAndBoxes_all = [] im2show = np.copy(im) for j in xrange(1, len(self.pascal_classes)): inds = torch.nonzero(scores[:, j] > self.thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if self.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] im2show, ItemAndBoxes = vis_detections(im2show, self.pascal_classes[j], cls_dets.cpu().numpy(), self.visThresh) ItemAndBoxes_all.append(ItemAndBoxes) ItemAndBoxes_all = sorted(ItemAndBoxes_all, key=lambda x: x[2], reverse=True) ItemAndBoxes_all = ItemAndBoxes_all[0:3] ItemAndBoxes_all = sorted(ItemAndBoxes_all, key=lambda x: x[1][0]) if self.vis == 1: cv2.namedWindow("result", 0) cv2.resizeWindow("result", 1080, 720) cv2.imshow('result', im2show) cv2.waitKey(0) result_path = os.path.join(self.image_dir, str(area) + ".jpg") cv2.imwrite(result_path, im2show) return { "Left": ItemAndBoxes_all[0][0], "Mid": ItemAndBoxes_all[1][0], "Right": ItemAndBoxes_all[2][0] }
im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:,j]>thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:,j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
def forward(self, im_data, im_info, gt_boxes, num_boxes, opt_frcn=None, vis_flag=False): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data #self.FRCN.eval() #self.FRCN.RCNN_base.eval() #for param in self.FRCN.parameters(): # param.requires_grad = False gt_boxes_frcn = gt_boxes.cpu().data.numpy()[0] for gt in gt_boxes_frcn: if gt[..., -1] < 4 and gt[..., -1] > 0: gt[..., -1] = 1 elif gt[..., -1] >= 4: gt[..., -1] = 2 gt_boxes_frcn = gt_boxes_frcn[None, ...] gt_boxes_frcn = torch.Tensor(gt_boxes_frcn) if self.use_cuda: gt_boxes_frcn = gt_boxes_frcn.cuda() rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, \ RCNN_loss_cls, RCNN_loss_bbox, rois_label \ = self.FRCN(im_data, im_info, gt_boxes_frcn, num_boxes) if opt_frcn is not None: opt.zero_grad() # get global and local region from Faster R-CNN base_feat = self.FRCN.RCNN_base(im_data) #print(rois.data.cpu().numpy()) scores = cls_prob.data boxes = rois.data[:, :, 1:5] box_deltas = self.FRCN._bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: if self.class_agnostic: if self.use_cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda( ) + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS) * torch.FlaotTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if self.use_cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda( ) + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torhc.FlaotTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS) + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * 3) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() #print(scores) # get global region thresh = 0.0 region_g = np.ndarray((0, 5)) region_l = np.ndarray((0, 5)) # get glocal region inds = torch.nonzero(scores[:, 1] >= thresh).view(-1) if inds.numel() > 0: cls_scores = scores[:, 1][inds] _, order = torch.sort(cls_scores, 0, True) if self.class_agnostic: cls_boxes = pred_boxes[inds] else: cls_boxes = pred_boxes[inds][:, 4:8] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] region_g = cls_dets # get local region inds = torch.nonzero(scores[:, 2] >= thresh).view(-1) if inds.numel() > 0: cls_scores = scores[:, 2][inds] _, order = torch.sort(cls_scores, 0, True) if self.class_agnostic: cls_boxes = pred_boxes[inds] else: cls_boxes = pred_boxes[inds][:, 8:12] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] region_l = cls_dets # select region for recognition if not self.training: self.minibatch = 1 if self.training: if self.minibatch % 2 == 0: high_ind = self.minibatch // 2 low_ind = self.minibatch // 2 elif self.minibatch == 1: high_ind = 1 low_ind = 0 else: high_ind = self.minibatch // 2 + 1 low_ind = self.minibatch // 2 keep = nms(torch.tensor(region_g).cuda(), self.nms_iou, force_cpu=not cfg.USE_GPU_NMS) if type(keep) is not list: keep = keep.view(-1).long() region_g = region_g[keep] sort_ind = np.argsort(region_g[..., -1]) high_ind_g = sort_ind[-high_ind:] low_ind_g = sort_ind[:low_ind] keep = nms(torch.tensor(region_l).cuda(), self.nms_iou, force_cpu=not cfg.USE_GPU_NMS) if type(keep) is not list: keep = keep.view(-1).long() region_l = region_l[keep] sort_ind = np.argsort(region_l[..., -1]) high_ind_l = sort_ind[-high_ind:] low_ind_l = sort_ind[:low_ind] high_num = min(len(high_ind_g), len(high_ind_l)) high_ind_g = high_ind_g[:high_num] high_ind_l = high_ind_l[:high_num] low_num = min(len(low_ind_g), len(low_ind_l)) low_ind_g = low_ind_g[:low_num] low_ind_l = low_ind_l[:low_num] if len(high_ind_g.data) < 1: region_g_high = torch.tensor(np.ndarray((5))[None, ...]) else: region_g_high = region_g[high_ind_g] if len(low_ind_g.data) < 1: region_g_low = torch.tensor(np.ndarray((5))[None, ...]) else: region_g_low = region_g[low_ind_g] if len(high_ind_l.data) < 1: region_l_high = torch.tensor(np.ndarray((5))[None, ...]) else: region_l_high = region_l[high_ind_l] if len(low_ind_l.data) < 1: region_l_low = torch.tensor(np.ndarray((5))[None, ...]) else: region_l_low = region_l[low_ind_l] proposal_g = np.vstack((region_g_high, region_g_low)) proposal_l = np.vstack((region_l_high, region_l_low)) #proposal_g = np.vstack((region_g[high_ind_g], region_g[low_ind_g])) #proposal_l = np.vstack((region_l[high_ind_l], region_l[low_ind_l])) #self.proposal_g.data.resize_(proposal_g.size()).copy_(proposal_g) #self.proposal_l.data.resize_(proposal_l.size()).copy_(proposal_l) gt_boxes = gt_boxes.cpu().numpy()[0, :2] gt_g = gt_boxes[np.where(gt_boxes[..., -1] < 4)[0]] gt_l = gt_boxes[np.where(gt_boxes[..., -1] >= 4)[0]] # compute pare ground truth def compute_iou(ps, gt): iou_x1 = np.maximum(ps[..., 0], gt[0]) iou_y1 = np.maximum(ps[..., 1], gt[1]) iou_x2 = np.minimum(ps[..., 2], gt[2]) iou_y2 = np.minimum(ps[..., 3], gt[3]) iou_w = np.maximum(iou_x2 - iou_x1, 0) iou_h = np.maximum(iou_y2 - iou_y1, 0) iou_area = iou_w * iou_h gt_area = (gt[2] - gt[0]) * (gt[3] - gt[1]) p_area = (ps[..., 2] - ps[..., 0]) * (ps[..., 3] - ps[..., 1]) overlap = iou_area / (gt_area + p_area - iou_area) count = np.zeros((ps.shape[0]), dtype=int) count[overlap >= self.gt_iou] += 1 return count cou = compute_iou(proposal_g, gt_g[0]) + compute_iou( proposal_l, gt_l[0]) ## 2019.2.13 glcc_gt = np.zeros((proposal_g.shape[0]), dtype=int) glcc_gt[cou >= 1] = gt_g[0, -1] #glcc_gt[:] = gt_g[0, -1] glcc_gt = torch.tensor(glcc_gt, dtype=torch.long).cuda() #print(glcc_gt) #self.glcc_gt.data.resize_(glcc_gt.size()).copy_(glcc_gt) else: # test phase proposal_g = region_g[np.argmax(region_g[..., -1])][None, ...] proposal_l = region_l[np.argmax(region_l[..., -1])][None, ...] #self.proposal_g.data.resize_(proposal_g.size()).copy_(proposal_g.size()) #self.proposal_l.data.resize_(proposal_l.size()).copy_(proposal_l.size()) # if true, then show detection global and local region if vis_flag: gt_boxes = gt_boxes.astype(np.int) im = im_data.cpu().numpy()[0] im = np.transpose(im, (1, 2, 0))[..., ::-1] im -= im.min() im /= im.max() plt.imshow(im.astype(np.float)) ax = plt.axes() ax.add_patch( plt.Rectangle((region_g[0, 0], region_g[0, 1]), region_g[0, 2] - region_g[0, 0], region_g[0, 3] - region_g[0, 1], fill=False, edgecolor='red', linewidth=1)) ax.add_patch( plt.Rectangle((region_l[0, 0], region_l[0, 1]), region_l[0, 2] - region_l[0, 0], region_l[0, 3] - region_l[0, 1], fill=False, edgecolor='yellow', linewidth=1)) ax.add_patch( plt.Rectangle((gt_boxes[0, 0], gt_boxes[0, 1]), gt_boxes[0, 2] - gt_boxes[0, 0], gt_boxes[0, 3] - gt_boxes[0, 1], fill=False, edgecolor='green', linewidth=1)) ax.add_patch( plt.Rectangle((gt_boxes[1, 0], gt_boxes[1, 1]), gt_boxes[1, 2] - gt_boxes[1, 0], gt_boxes[1, 3] - gt_boxes[1, 1], fill=False, edgecolor='white', linewidth=1)) plt.show() rois_g = np.zeros((1, proposal_g.shape[0], 5), dtype=np.float32) rois_g[0, :, 1:5] = proposal_g[:, :4] #rois_g /= 16. rois_l = np.zeros((1, proposal_l.shape[0], 5), dtype=np.float32) rois_l[0, :, 1:5] = proposal_l[:, :4] #rois_l /= 16. rois_g = torch.tensor(rois_g, dtype=torch.float).cuda() rois_l = torch.tensor(rois_l, dtype=torch.float).cuda() self.rois_g.data.resize_(rois_g.size()).copy_(rois_g) self.rois_l.data.resize_(rois_l.size()).copy_(rois_l) # global region if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(self.rois_g.view(-1, 5), base_feat.size()[2:], self.FRCN.grid_size) grid_yx = torch.stack([grid_xy.data[..., 1], grid_xy.data[..., 0]], 3).contiguous() pooled_feat_g = self.FRCN.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat_g = F.max_pool2d(pooled_feat_g, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat_g = self.FRCN.RCNN_roi_align(base_feat, self.rois_g.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat_g = self.FRCN.RCNN_roi_pool(base_feat, self.rois_g.view(-1, 5)) # local region if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(self.rois_l.view(-1, 5), base_feat.size()[2:], self.FRCN.grid_size) grid_yx = torch.stack([grid_xy.data[..., 1], grid_xy.data[..., 0]], 3).contiguous() pooled_feat_l = self.FRCN.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat_l = F.max_pool2d(pooled_feat_l, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat_l = self.FRCN.RCNN_roi_align(base_feat, self.rois_l.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat_l = self.FRCN.RCNN_roi_pool(base_feat, self.rois_l.view(-1, 5)) #print(pooled_feat_g.cpu().detach().numpy().shape) x = torch.cat((pooled_feat_g, pooled_feat_l), dim=1) #print(x.cpu().detach().numpy().shape) x = self.glcc_conv1(x) x = F.relu(x) x = x.view(-1, self.roipool * self.roipool * 512) x = self.glcc_fc1(x) x = F.relu(x) x = nn.Dropout()(x) x = self.glcc_fc2(x) x = F.relu(x) x = nn.Dropout()(x) glcc_out = self.glcc_fc_out(x) if self.training: glcc_gt = torch.tensor(glcc_gt, dtype=torch.long).cuda() glcc_loss = F.cross_entropy(glcc_out, glcc_gt) else: glcc_out = F.softmax(glcc_out, dim=1) glcc_loss = 0. glcc_gt = None return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, glcc_out, glcc_loss, glcc_gt
im2show = np.copy(im) for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, config.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: # TODO: want: frame_number x_min y_min x_max y_max confidence_score # cls_dets.cpu().numpy()'s last float is confidence score! # print('dets: ', cls_dets.cpu().numpy()) # writeout for detection in cls_dets.cpu().numpy(): fid.write( str(processed_images) + "\t" + "\t".join(map(str, detection)) + "\n") im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5)
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # proposals = clip_boxes_batch(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) # trim keep index to make it euqal over batch # keep_idx = torch.cat(tuple(keep_idx), 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single if DEBUG: debug_img = np.zeros( im_info[0, :2].cpu().numpy().astype('int')) for i in range(proposals_single.shape[0]): x0, y0, x1, y1 = proposals_single[i, :] p1 = (int(x0), int(y0)) p2 = (int(x1), int(y1)) debug_img = cv2.rectangle(debug_img, p1, p2, color=(255, 255, 255)) cv2.imshow("region_proposals", debug_img) cv2.waitKey(1) return output
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # proposals = clip_boxes_batch(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) # trim keep index to make it euqal over batch # keep_idx = torch.cat(tuple(keep_idx), 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1,1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i,:,0] = i output[i,:num_proposal,1:] = proposals_single return output
if vis: im2show = np.copy(im) for j in xrange(1, len(_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores[:, None]), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, _classes[j], cls_dets.cpu().numpy(), 0.5) misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: # cv2.imshow('test', im2show) # cv2.waitKey(0)
im2show = np.copy(im) for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:,j]>thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:,j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5) misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() if vis and webcam_num == -1: # cv2.imshow('test', im2show) # cv2.waitKey(0)