def nms(dets, thresh, force_cpu=False): """Dispatch to either CPU or GPU NMS implementations.""" if dets.shape[0] == 0: return [] if args.USE_GPU_NMS and not force_cpu: return gpu_nms(dets, thresh, device_id=args.GPU_ID) else: return cpu_nms(dets, thresh)
def nms(dets, thresh): "Dispatch to either CPU or GPU NMS implementations.\ Accept dets as tensor""" # return pth_nms(dets, thresh) if torch.cuda.is_available(): return gpu_nms(dets, thresh) else: #TODO: make sure this is implemented correctly return cpu_nms(dets, thresh)
def nms(dets, thresh, force_cpu=False): """Dispatch to either CPU or GPU NMS implementations.""" if dets.shape[0] == 0: return [] # print "gpu_id used by nms is: %d" % cfg.GPU_ID if cfg.USE_GPU_NMS and not force_cpu: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) else: return cpu_nms(dets, thresh)
def apply_nms(cls_dets, nms_method, nms_thresh): # nms and filter keep = np.where((cls_dets[:, 4] >= min_scores) & ((cls_dets[:, 3] - cls_dets[:, 1]) * (cls_dets[:, 2] - cls_dets[:, 0]) >= min_box_size))[0] cls_dets = cls_dets[keep] if len(cls_dets) > 0: if nms_method == 'nms': keep = gpu_nms(cls_dets, nms_thresh) elif nms_method == 'soft': keep = cpu_soft_nms(np.ascontiguousarray(cls_dets, dtype=np.float32), method=2) else: assert False cls_dets = cls_dets[keep] return cls_dets, keep
def nms(dets, thresh): "Dispatch to either CPU or GPU NMS implementations.\ Accept dets as tensor" "" dets = dets.cpu().numpy() return gpu_nms(dets, thresh)
def clean_and_denorm(self, out, p2s, p2_invs, scales): cls = out[0].clone() prob = out[1].clone() bbox_2d = out[2].clone() bbox_3d = out[3].clone() batch_size = cls.shape[0] # denorm the 2D boxes bbox_2d[:, :, 0] = bbox_2d[:, :, 0] * self.bbox_stds[:, 0][0] + self.bbox_means[:, 0][0] bbox_2d[:, :, 1] = bbox_2d[:, :, 1] * self.bbox_stds[:, 1][0] + self.bbox_means[:, 1][0] bbox_2d[:, :, 2] = bbox_2d[:, :, 2] * self.bbox_stds[:, 2][0] + self.bbox_means[:, 2][0] bbox_2d[:, :, 3] = bbox_2d[:, :, 3] * self.bbox_stds[:, 3][0] + self.bbox_means[:, 3][0] # denorm the 3D boxes bbox_x2d_raw = bbox_3d[:, :, 0] * self.bbox_stds[:, 4][ 0] + self.bbox_means[:, 4][0] bbox_y2d_raw = bbox_3d[:, :, 1] * self.bbox_stds[:, 5][ 0] + self.bbox_means[:, 5][0] bbox_z2d_raw = bbox_3d[:, :, 2] * self.bbox_stds[:, 6][ 0] + self.bbox_means[:, 6][0] bbox_w3d_raw = bbox_3d[:, :, 3] * self.bbox_stds[:, 7][ 0] + self.bbox_means[:, 7][0] bbox_h3d_raw = bbox_3d[:, :, 4] * self.bbox_stds[:, 8][ 0] + self.bbox_means[:, 8][0] bbox_l3d_raw = bbox_3d[:, :, 5] * self.bbox_stds[:, 9][ 0] + self.bbox_means[:, 9][0] bbox_rsin_raw = bbox_3d[:, :, 6] * self.bbox_stds[:, 11][ 0] + self.bbox_means[:, 11][0] bbox_rcos_raw = bbox_3d[:, :, 7] * self.bbox_stds[:, 12][ 0] + self.bbox_means[:, 12][0] bbox_axis_raw = bbox_3d[:, :, 8] bbox_head_raw = bbox_3d[:, :, 9] bbox_un = bbox_3d[:, :, 10] # anchor equations 2D pred_ctr_x = bbox_2d[:, :, 0] * self.rois_widths + self.rois_ctr_x pred_ctr_y = bbox_2d[:, :, 1] * self.rois_heights + self.rois_ctr_y pred_w = torch.exp(bbox_2d[:, :, 2]) * self.rois_widths pred_h = torch.exp(bbox_2d[:, :, 3]) * self.rois_heights # x1, y1, x2, y2 bbox_2d[:, :, 0] = pred_ctr_x - 0.5 * pred_w bbox_2d[:, :, 1] = pred_ctr_y - 0.5 * pred_h bbox_2d[:, :, 2] = pred_ctr_x + 0.5 * pred_w bbox_2d[:, :, 3] = pred_ctr_y + 0.5 * pred_h # anchor equations 3D bbox_x2d_raw = bbox_x2d_raw * self.rois_widths + self.rois_ctr_x bbox_y2d_raw = bbox_y2d_raw * self.rois_heights + self.rois_ctr_y bbox_z2d_raw = self.rois_3d[:, 4] + bbox_z2d_raw bbox_w3d_raw = torch.exp(bbox_w3d_raw) * self.rois_3d[:, 5] bbox_h3d_raw = torch.exp(bbox_h3d_raw) * self.rois_3d[:, 6] bbox_l3d_raw = torch.exp(bbox_l3d_raw) * self.rois_3d[:, 7] has_vel = bbox_3d.shape[2] == 20 if has_vel: bbox_vel = bbox_3d[:, :, 19] * self.bbox_stds[:, 13][ 0] + self.bbox_means[:, 13][0] bbox_vel = self.rois_3d[:, 11] + bbox_vel bbox_vel = bbox_vel.clamp(min=0) bbox_rsin_raw = self.rois_3d[:, 9] + bbox_rsin_raw bbox_rcos_raw = self.rois_3d[:, 10] + bbox_rcos_raw bbox_axis_sin_mask = bbox_axis_raw >= 0.5 #bbox_head_pos_mask = bbox_head_raw >= 0.5 bbox_alp_raw = bbox_rcos_raw.clone() bbox_alp_raw[bbox_axis_sin_mask] = bbox_rsin_raw[bbox_axis_sin_mask] #bbox_alp_raw[bbox_head_pos_mask] = bbox_alp_raw[bbox_head_pos_mask] + math.pi boxes_batch = [] cls_batch = [] for bind in range(batch_size): p2_inv = torch.from_numpy(p2_invs[bind]).type( torch.cuda.FloatTensor) boxes = None cls_feat = None p2_a = p2s[bind][0, 0].item() p2_b = p2s[bind][0, 2].item() p2_c = p2s[bind][0, 3].item() p2_d = p2s[bind][1, 1].item() p2_e = p2s[bind][1, 2].item() p2_f = p2s[bind][1, 3].item() p2_h = p2s[bind][2, 3].item() thresh_s = self.score_thres fg_scores, fg_cls = prob[bind, :, 1:].max(dim=1) fg_cls = fg_cls + 1 fg_mask = fg_scores >= thresh_s fg_inds = torch.nonzero(fg_mask) if fg_inds.shape[0] > 0: fg_inds = fg_inds.squeeze(1) # scale down 2D boxes bbox_2d.data[bind, fg_inds] = bbox_2d.data[bind, fg_inds] / scales[bind] #bbox_2d[bind, fg_inds] = bbox_2d[bind, fg_inds].clone().detach() / scales[bind] # setup 2D boxes and scores bbox_2d_np = bbox_2d[bind, fg_inds].detach().cpu().numpy() aboxes = np.hstack( (bbox_2d_np, fg_scores[fg_inds].detach().cpu().numpy()[:, np.newaxis])) # perform NMS in non-forecasted space keep_inds = gpu_nms(aboxes.astype(np.float32), self.nms_thres, device_id=0) keep_inds = torch.from_numpy(np.array(keep_inds)) # update mask fg_inds = fg_inds[keep_inds] fg_mask[...] = 0 fg_mask[fg_inds] = 1 cls_feat = cls[bind, fg_inds, :] bbox_2d_fg = bbox_2d[bind, fg_inds] scores = fg_scores[fg_inds] cls_fg = fg_cls[fg_inds] bbox_x2d_dn_fg = bbox_x2d_raw[bind, fg_inds] bbox_y2d_dn_fg = bbox_y2d_raw[bind, fg_inds] bbox_z2d_dn_fg = bbox_z2d_raw[bind, fg_inds] bbox_w3d_dn_fg = bbox_w3d_raw[bind, fg_inds] bbox_h3d_dn_fg = bbox_h3d_raw[bind, fg_inds] bbox_l3d_dn_fg = bbox_l3d_raw[bind, fg_inds] bbox_alp_dn_fg = bbox_alp_raw[bind, fg_inds] bbox_head_dn_fg = bbox_head_raw[bind, fg_inds] bbox_un_fg = bbox_un[bind, fg_inds] # scale x2d and y2d back down bbox_x2d_dn_fg = bbox_x2d_dn_fg / scales[bind] bbox_y2d_dn_fg = bbox_y2d_dn_fg / scales[bind] # project back to 3D z3d = bbox_z2d_dn_fg - p2_h x3d = ((z3d + p2_h) * bbox_x2d_dn_fg - p2_b * (z3d) - p2_c) / p2_a y3d = ((z3d + p2_h) * bbox_y2d_dn_fg - p2_e * (z3d) - p2_f) / p2_d # gather 2D coords #coords_2d = torch.cat( # (bbox_x2d_dn_fg[np.newaxis, :] * bbox_z2d_dn_fg[np.newaxis, :], # bbox_y2d_dn_fg[np.newaxis, :] * bbox_z2d_dn_fg[np.newaxis, :], # bbox_z2d_dn_fg[np.newaxis, :]), dim=0) # pad ones for a 4x1 #coords_2d = torch.cat((coords_2d, torch.ones([1, coords_2d.shape[1]])), dim=0) #coords_3d = torch.mm(p2_inv, coords_2d) #x3d = coords_3d[0, :] #y3d = coords_3d[1, :] #z3d = coords_3d[2, :] ry3d = convertAlpha2Rot(bbox_alp_dn_fg, z3d, x3d) # [x y, x2, y2, score, cls, x, y, z, w, h, l, theta, head, vars, vel] boxes = torch.cat( (bbox_2d_fg, scores.unsqueeze(1), cls_fg.unsqueeze(1).type( torch.cuda.FloatTensor), x3d.unsqueeze(1), y3d.unsqueeze(1), z3d.unsqueeze(1), bbox_w3d_dn_fg.unsqueeze(1), bbox_h3d_dn_fg.unsqueeze(1), bbox_l3d_dn_fg.unsqueeze(1), ry3d.unsqueeze(1), bbox_head_dn_fg.unsqueeze(1), bbox_un_fg.unsqueeze(1)), dim=1) if has_vel: boxes[:, 23] = bbox_vel[bind, fg_inds] cls_batch.append(cls_feat) boxes_batch.append(boxes) return boxes_batch, cls_batch
def nms(dets, thresh): "Dispatch to either CPU or GPU NMS implementations.\ Accept dets as tensor""" # TODO: implement nms for cuda tensor - without transfering to cpu.numpy and back to gpu dets = dets.cpu().numpy() return gpu_nms(dets, thresh)
def forward(self, predictions): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ loc, conf = predictions loc_data = loc.data conf_data = conf.data prior_data = self.priors.data num = loc_data.size(0) # batch size num_priors = prior_data.size(0) #self.output.zero_() if num == 1: # size batch x num_classes x num_priors conf_preds = conf_data.t().contiguous().unsqueeze(0) else: conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) #self.output.expand_(num, self.num_classes, self.top_k, 5) output = torch.zeros(num, self.num_classes, self.top_k, 5) _t = { 'decode': Timer(), 'misc': Timer(), 'box_mask': Timer(), 'score_mask': Timer(), 'nms': Timer(), 'cpu': Timer(), 'sort': Timer() } gpunms_time = 0 scores_time = 0 box_time = 0 cpu_tims = 0 sort_time = 0 decode_time = 0 _t['misc'].tic() # Decode predictions into bboxes. for i in range(num): _t['decode'].tic() decoded_boxes = decode(loc_data[i], prior_data, self.variance) decode_time += _t['decode'].toc() # For each class, perform nms conf_scores = conf_preds[i].clone() num_det = 0 for cl in range(1, self.num_classes): _t['cpu'].tic() c_mask = conf_scores[cl].gt( self.conf_thresh).nonzero().view(-1) cpu_tims += _t['cpu'].toc() if c_mask.size(0) == 0: continue _t['score_mask'].tic() scores = conf_scores[cl][c_mask] scores_time += _t['score_mask'].toc() if scores.size(0) == 0: continue _t['box_mask'].tic() # l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) # boxes = decoded_boxes[l_mask].view(-1, 4) boxes = decoded_boxes[c_mask, :] box_time += _t['box_mask'].toc() # idx of highest scoring and non-overlapping boxes per class _t['nms'].tic() # cls_dets = torch.cat((boxes, scores), 1) # _, order = torch.sort(scores, 0, True) # cls_dets = cls_dets[order] # keep = nms(cls_dets, self.nms_thresh) # cls_dets = cls_dets[keep.view(-1).long()] try: new_boxes = boxes * 300 # ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) ids = gpu_nms( torch.cat((new_boxes, scores.unsqueeze(1)), 1).cpu().numpy(), self.nms_thresh) # new_ids_cpu = cpu_nms(torch.cat((boxes, scores.unsqueeze(1)), 1).cpu().numpy(), self.nms_thresh) if len(ids) > self.top_k: count = self.top_k else: count = len(ids) except: print(c_mask.size()) print(boxes.size()) print(scores.size()) gpunms_time += _t['nms'].toc() output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) nms_time = _t['misc'].toc() # print(nms_time, cpu_tims, scores_time,box_time,gpunms_time) # flt = self.output.view(-1, 5) # _, idx = flt[:, 0].sort(0) # _, rank = idx.sort(0) # flt[(rank >= self.top_k).unsqueeze(1).expand_as(flt)].fill_(0) return output
def _nms(dets): return gpu_nms(dets, thresh, device_id)
def nms(dets, thresh): """Dispatch to either CPU or GPU NMS implementations.\ Accept dets as tensor""" return gpu_nms(dets, thresh)
w.requires_grad = True aboxes = torch.zeros((num_boxes, 5)) aboxes[:, 2] = w aboxes[:, 3] = w aboxes[:, 4] = scores print(aboxes) iou_overlaps = iou(aboxes[:, :4], aboxes[:, :4], mode='combinations') #, shift= shift) print("IOU overlaps") print(torch.round(iou_overlaps.clone() * 1000) / 1000) print(iou_overlaps.requires_grad) # Remember to make a clone of the aboxes otherwise somehow iou values get changed keep_inds = gpu_nms(aboxes.clone().detach().numpy().astype(np.float32), nms_overlap_threshold, device_id=0) print("Indices from Garrick et al", keep_inds) keep_inds = navneeth_soft_nms(aboxes.clone().detach().numpy(), Nt=nms_overlap_threshold, shift=shift) print("Indices from Navneet et al", keep_inds.tolist()) keep_inds = girshick_nms(aboxes.clone().detach().numpy(), nms_overlap_threshold, shift=shift) print("Indices from Girsick et al", keep_inds) # keep_inds = dpp_nms(aboxes.clone().detach()) # print("Indices from DPP-NMS ", keep_inds.tolist())