def decode(self, loc_preds, cls_preds, input_size): '''Decode outputs back to bouding box locations and class labels. Args: loc_preds: (tensor) predicted locations, sized [#anchors, 4]. cls_preds: (tensor) predicted class labels, sized [#anchors, #classes]. input_size: (int/tuple) model input size of (w,h). Returns: boxes: (tensor) decode box locations, sized [#obj,4]. labels: (tensor) class labels for each box, sized [#obj,]. ''' CLS_THRESH = 0.5 NMS_THRESH = 0.5 input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) loc_xy = loc_preds[:,:2] loc_wh = loc_preds[:,2:] xy = loc_xy * anchor_boxes[:,2:] + anchor_boxes[:,:2] wh = loc_wh.exp() * anchor_boxes[:,2:] boxes = torch.cat([xy-wh/2, xy+wh/2], 1) # [#anchors,4] score, labels = cls_preds.sigmoid().max(1) # [#anchors,] ids = score > CLS_THRESH ids = ids.nonzero().squeeze() # [#obj,] keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH) return boxes[ids][keep], labels[ids][keep]
def decode(self, loc_preds, cls_preds, input_size): CLS_THRESH = 0.05 NMS_THRESH = 0.4 if isinstance(input_size, int): input_size = torch.Tensor([input_size, input_size]) else: input_size = torch.Tensor(input_size) anchor_boxes = self.get_anchor_boxes(input_size) std=Variable(self.std).cuda() loc_preds=loc_preds*std loc_xy = loc_preds.data.cpu()[:, :2] loc_wh = loc_preds.data.cpu()[:, 2:] xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = loc_wh.exp() * anchor_boxes[:, 2:] boxes = torch.cat([xy, wh], 1) boxes = change_box_order(boxes, 'xywh2xyxy') cls_preds=F.softmax(cls_preds,1) score, labels = cls_preds.max(1) ids = (labels > 0)&(score>CLS_THRESH) ids = ids.nonzero().squeeze() if len(ids.size())==0: return None, None,None ids=ids.data.cpu() keep = box_nms(boxes.cpu()[ids], score.data.cpu()[ids], threshold=NMS_THRESH) return boxes.cpu()[ids][keep],labels.data.cpu()[ids][keep],score.data.cpu()[ids][keep]
def scaled_window_object_detector(self, in_img, scale_factor=1.1, min_neighbors=3, min_size=(30,30)): """This object detector is based on scaled detector window. It scales the detector window instead of scaling image. Dectector window pyramid is constructed instead of image pyramid """ v_stride = 1 h_stride = 1 objs = [] # convert to gray scale if the image is color if(len(in_img.shape) == 3): gray_img = cv2.cvtColor(in_img, cv2.COLOR_BGR2GRAY) else: gray_img = in_img img_height = gray_img.shape[0] img_width = gray_img.shape[1] cur_win_width = self.win_width cur_win_height = self.win_height # compute integral image. just one time process ii_img = cv2.integral(gray_img) print ii_img.dtype # initial scale 1 . ie. original detector size is used scale = 1.0 # upscale the detector window and detect objects until window_size becomes more than one # of the image dimension while(cur_win_width < img_width and cur_win_height < img_height): # max possible window top left corner positions. x_max = img_width - cur_win_width + 1 y_max = img_height - cur_win_height + 1 print ('current scale = {:f}'.format(scale)) print('Detector height = {:d}, Detector width = {:d}'.format(cur_win_height, cur_win_width)) for row in range(0, y_max, v_stride): for col in range(0, x_max, h_stride): #print row, col # detect if the current window contains any objects win_pass = self._evaluate_window_scaled(col, row, scale, ii_img) # record the window if it passes if(win_pass): objs.append(tuple([int(col), int(row), int(cur_win_width), int(cur_win_height)])) # upscale the detector window scale *= scale_factor cur_win_width = int(self.win_width*scale) cur_win_height = int(self.win_height*scale) # perform new detections on the rescaled image. print('No of boxes before NMS = {:d}'.format(len(objs))) # perform NMS objs = box_nms(objs, 0.2) print('No of boxes after NMS = {:d}'.format(len(objs))) return objs
def detect_objects(self, in_img, scale_factor=1.1, min_neighbors=3, min_size=(30,30), max_size=()): """Detect objects using the LBP cascade classifier present in the given grayscale image. This has similar functionality as that of cv2.detectMultiScale() method """ v_stride = 1 h_stride = 1 objs = [] # convert to gray scale if the image is color if(len(in_img.shape) == 3): gray_img = cv2.cvtColor(in_img, cv2.COLOR_BGR2GRAY) else: gray_img = in_img org_height = gray_img.shape[0] org_width = gray_img.shape[1] cur_width = org_width cur_height = org_height win_width = self.win_width win_height = self.win_height # initial scale 1 as we process original image scale = 1.0 # downscale image and detect objects until one of the image dimension # becomes less than the window size while(cur_width > (win_width+1) and cur_height > (win_height+1)): # max possible window top left corner positions. x_max = cur_width - win_width + 1 y_max = cur_height - win_height + 1 # compute integral image ii_img = cv2.integral(gray_img) print ('current scale = {:f}'.format(scale)) for row in range(0, y_max, v_stride): for col in range(0, x_max, h_stride): # detect if the current window contains any objects win_pass = self._evaluate_window(col, row, ii_img) # record the window if it passes if(win_pass): objs.append(tuple([int(col*scale), int(row*scale), int(scale*win_width), int(scale*win_height)])) # down scale the image cur_width = int(cur_width/scale_factor) cur_height = int(cur_height/scale_factor) scale *= scale_factor gray_img = cv2.resize(gray_img, dsize=(cur_width, cur_height), interpolation=cv2.INTER_LINEAR) # perform new detections on the rescaled image. print('No of boxes before NMS = {:d}'.format(len(objs))) # perform NMS objs = box_nms(objs, 0.2) print('No of boxes after NMS = {:d}'.format(len(objs))) return objs
def decode(self, loc_preds, cls_preds, input_size): '''Decode outputs back to bouding box locations and class labels. Args: loc_preds: (tensor) predicted locations, sized [#anchors, 4]. cls_preds: (tensor) predicted class labels, sized [#anchors, #classes]. input_size: (int/tuple) model input size of (w,h). Returns: boxes: (tensor) decode box locations, sized [#obj,4]. labels: (tensor) class labels for each box, sized [#obj,]. ''' # CLS_THRESH = 0.08 # NMS_THRESH = 0.5 NMS_THRESH = 0.2 N_BBOXES = 200 input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size).cuda() # anchor_boxes = self._get_anchor_boxes(input_size) loc_xy = loc_preds[:,:2] loc_wh = loc_preds[:,2:] xy = loc_xy * anchor_boxes[:,2:] + anchor_boxes[:,:2] wh = loc_wh.exp() * anchor_boxes[:,2:] boxes = torch.cat([xy-wh/2, xy+wh/2], 1) # [#anchors,4] (x1, y1, x2, y2) score, labels = cls_preds.sigmoid().max(1) # [#anchors,] # ids = score > CLS_THRESH # ids = ids.nonzero().squeeze() # [#obj,] numpy_score = score.cpu().numpy().astype(np.float) # 如果取前200个最得分最大的框的话 # numpy_score = score.numpy().astype(np.float) rank_ids = np.argsort(numpy_score)[::-1] # print(rank_ids) if len(rank_ids) > N_BBOXES: choose_ids = rank_ids[:N_BBOXES].astype(np.int) choose_ids = torch.from_numpy(choose_ids).cuda() # choose_ids = torch.from_numpy(choose_ids) ids = choose_ids # print(ids) # print(ids.shape) # print(boxes[ids]) # print(score[ids]) # keep = nms(torch.cat((boxes[ids].cuda(), score[ids].view(-1, 1).cuda()), 1), NMS_THRESH) # keep = keep.long().squeeze(1) # print(keep.size()) keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH) return boxes[ids][keep], labels[ids][keep]
def decode(self, loc_preds, cls_preds, input_size): '''Decode outputs back to bouding box locations and class labels. Args: loc_preds: (tensor) predicted locations, sized [#anchors, 4]. cls_preds: (tensor) predicted class labels, sized [#anchors, #classes]. input_size: (int/tuple) model input size of (w,h). Returns: boxes: (tensor) decode box locations, sized [#obj,4]. labels: (tensor) class labels for each box, sized [#obj,]. ''' CLS_THRESH = config.cls_threshold NMS_THRESH = config.nms_threshold input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) loc_xy = loc_preds[:, :2] loc_wh = loc_preds[:, 2:] xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = loc_wh.exp() * anchor_boxes[:, 2:] boxes = torch.cat([xy - wh / 2, xy + wh / 2], 1) # [#anchors,4] """ cl = cls_preds.sigmoid() idd = cl[:, 1] > 0.7 sum = idd.sum() ids = idd == 1 idss = ids.nonzero().squeeze() score, labels = cls_preds.sigmoid().max(1) ids = score > CLS_THRESH ids = ids.nonzero().squeeze() sum = labels.sum() ids = labels == 1 ids = ids.nonzero().squeeze() # [#obj,] """ cl = cls_preds.sigmoid() #score, labels = cls_preds.sigmoid().max(1) ids = cl[:, 1] > CLS_THRESH ids = ids == 1 ids = ids.nonzero().squeeze() pre_score = cl[ids, 1] pre_boxes = boxes[ids] if ids.dim() == 0: return None keep = box_nms(pre_boxes, pre_score, threshold=NMS_THRESH) return boxes[ids][keep] #,labels[ids][keep]
def decode(self, outputs, input_size): '''Transform predicted loc/conf back to real bbox locations and class labels. Args: outputs: (tensor) model outputs, sized [1,125,13,13]. input_size: (int) model input size. Returns: boxes: (tensor) bbox locations, sized [#obj, 4]. labels: (tensor) class labels, sized [#obj,1]. ''' fmsize = outputs.size(2) outputs = outputs.view(5, 25, 13, 13) loc_xy = outputs[:, :2, :, :] # [5,2,13,13] grid_xy = meshgrid(fmsize, swap_dims=True).view(fmsize, fmsize, 2).permute(2, 0, 1) # [2,13,13] box_xy = loc_xy.sigmoid() + grid_xy.expand_as(loc_xy) # [5,2,13,13] loc_wh = outputs[:, 2:4, :, :] # [5,2,13,13] anchor_wh = torch.Tensor(self.anchors).view(5, 2, 1, 1).expand_as( loc_wh) # [5,2,13,13] box_wh = anchor_wh * loc_wh.exp() # [5,2,13,13] boxes = torch.cat([box_xy - box_wh / 2, box_xy + box_wh / 2], 1) # [5,4,13,13] boxes = boxes.permute(0, 2, 3, 1).contiguous().view(-1, 4) # [845,4] iou_preds = outputs[:, 4, :, :].sigmoid() # [5,13,13] cls_preds = outputs[:, 5:, :, :] # [5,20,13,13] cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous().view(-1, 20) cls_preds = softmax(cls_preds) # [5*13*13,20] score = cls_preds * iou_preds.view(-1).unsqueeze(1).expand_as( cls_preds) # [5*13*13,20] score = score.max(1)[0].view(-1) # [5*13*13,] print(iou_preds.max()) print(cls_preds.max()) print(score.max()) ids = (score > 0.5).nonzero().squeeze() keep = box_nms(boxes[ids], score[ids]) return boxes[ids][keep] / fmsize
def decode(self, loc_preds, cls_preds, input_size): '''Decode outputs back to bouding box locations and class labels. Args: loc_preds: (tensor) predicted locations, sized [#anchors, 4]. cls_preds: (tensor) predicted class labels, sized [#anchors, #classes]. input_size: (int/tuple) model input size of (w,h). Returns: boxes: (tensor) decode box locations, sized [#obj,4]. labels: (tensor) class labels for each box, sized [#obj,]. ''' CLS_THRESH = 0.3 NMS_THRESH = 0.4 input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) anchor_boxes=anchor_boxes.cuda() loc_xy = loc_preds[:,:,:2] loc_wh = loc_preds[:,:,2:] xy = loc_xy * anchor_boxes[:,2:] + anchor_boxes[:,:2] wh = loc_wh.exp() * anchor_boxes[:,2:] boxes = torch.cat([xy-wh/2, xy+wh/2], -1) # [#anchors,4] try: score, labels = cls_preds.sigmoid().max(-1) except: score, labels = cls_preds.unsqueeze(1).sigmoid().max(-1) # [#anchors,] ids = score > CLS_THRESH # ids = ids.nonzero() # [#obj,] #print(len(boxes[ids])) _t = {'im_detect': Timer()} _t['im_detect'].tic() keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH) print(_t['im_detect'].toc()*1000) return boxes[ids][keep], labels[ids][keep],score[ids][keep]
def decode(self, loc_preds, cls_preds, center_preds, input_size, cls_threshold, nms_threshold): '''Decode outputs back to bouding box locations and class labels. Args: loc_preds: (tensor) predicted locations, sized [#batch, #samples, 4]. cls_preds: (tensor) predicted class labels, sized [#batch, #samples , #classes]. center_preds: (tensor) predicted centerness, sized [#batch, #samples, 1]. input_size: (int/tuple) model input size of (h, w). Returns: boxes: (tensor) decode box locations, sized [#obj,4]. labels: (tensor) class labels for each box, sized [#obj,]. ''' scores, labels = cls_preds.sigmoid().max(1) pos_ind = scores > cls_threshold if pos_ind.to(dtype=torch.int8).nonzero().numel() == 0: return [], [], [] center_preds = center_preds.sigmoid() scores = scores[:, None] * center_preds # scores = scores[:, None] # locations = (#batch, #samples, 2(x-y coordinate)) locations = self._get_pixel_locations(input_size, loc_preds.device) boxes = loc_preds[pos_ind] locations = locations[pos_ind] scores = scores[pos_ind] labels = labels[pos_ind] boxes[:, 0] = locations[:, 0] - boxes[:, 0] boxes[:, 1] = locations[:, 1] - boxes[:, 1] boxes[:, 2] = locations[:, 0] + boxes[:, 2] boxes[:, 3] = locations[:, 1] + boxes[:, 3] # nms mode = 0: soft-nms(liner), 1: soft-nms(gaussian), 2: hard-nms keep = box_nms(boxes, scores, nms_threshold=nms_threshold, mode=2) return boxes[keep], scores[keep], labels[keep]
def decode(self, loc_preds, cls_preds, input_size): CLS_THRESH = 0.05 NMS_THRESH = 0.3 if isinstance(input_size, int): input_size = torch.Tensor([input_size, input_size]) else: input_size = torch.Tensor(input_size) anchor_boxes = self.get_anchor_boxes(input_size) loc_xy = loc_preds[:, :2] loc_wh = loc_preds[:, 2:] xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = loc_wh.exp() * anchor_boxes[:, 2:] boxes = torch.cat([xy, wh], 1) boxes = change_box_order(boxes, 'xywh2xyxy') score, labels = cls_preds.max(1) ids = (score > CLS_THRESH) & (labels > 0) ids = ids.nonzero().squeeze() keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH) return boxes[ids][keep], labels[ids][keep]
def decode(self, loc_preds, cls_preds, input_size): '''Decode outputs back to bouding box locations and class labels. Args: loc_preds: (tensor) predicted locations, sized [#anchors, 4]. cls_preds: (tensor) predicted class labels, sized [#anchors, #classes]. input_size: (int/tuple) model input size of (input_height, input_width). Returns: boxes: (tensor) decode box locations, sized [#obj,4]. labels: (tensor) class labels for each box, sized [#obj,]. ''' CLS_THRESH = 0.05 NMS_THRESH = 0.5 scale_factor = torch.Tensor([10,10,5,5]) # scale [tx,ty,tw,th] input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) loc_preds /= scale_factor loc_xy = loc_preds[:,:2] loc_wh = loc_preds[:,2:] xy = loc_xy * anchor_boxes[:,2:] + anchor_boxes[:,:2] wh = loc_wh.exp() * anchor_boxes[:,2:] boxes = torch.cat([xy-wh/2, xy+wh/2], 1) # [#anchors,4] boxes[:,0].clamp_(min=0) boxes[:,1].clamp_(min=0) boxes[:,2].clamp_(max=input_size[1]) boxes[:,3].clamp_(max=input_size[0]) score, labels = cls_preds.max(1) # [#anchors,] ids = (score > CLS_THRESH) & (labels > 0) ids = ids.nonzero().squeeze() # [#obj,] keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH) return boxes[ids][keep], labels[ids][keep]
num_batch = loc_preds.shape[0] for iter_batch in range(num_batch): torch.cuda.synchronize() timer_post.tic() boxes, labels, scores = dataset.data_encoder.decode( loc_preds=loc_preds[iter_batch], cls_preds=cls_preds[iter_batch], input_size=(img_size[1], img_size[0]), cls_threshold=cls_th, top_k=top_k) if len(boxes) > 0: # nms mode = 0: soft-nms(liner), 1: soft-nms(gaussian), 2: hard-nms keep = utils.box_nms(boxes, scores, nms_threshold=nms_th, mode=2) boxes = boxes[keep] scores = scores[keep] labels = labels[keep] torch.cuda.synchronize() timer_post.toc() utils._write_results(result_dir, paths[iter_batch], boxes, scores, labels, dataset.class_idx_map, img_size, bbox_colormap) print() print(f'device: {device}') print( f'mean. elapsed time(inference): {timer_infer.average_time * 1000.:.4f}'
def detect_objects(self, in_img, scale_factor=1.1, min_neighbors=3, min_size=(30, 30), max_size=()): """Detect objects using the LBP cascade classifier present in the given grayscale image. This has similar functionality as that of cv2.detectMultiScale() method """ v_stride = 1 h_stride = 1 objs = [] # convert to gray scale if the image is color if (len(in_img.shape) == 3): gray_img = cv2.cvtColor(in_img, cv2.COLOR_BGR2GRAY) else: gray_img = in_img org_height = gray_img.shape[0] org_width = gray_img.shape[1] cur_width = org_width cur_height = org_height win_width = self.win_width win_height = self.win_height # initial scale 1 as we process original image scale = 1.0 # downscale image and detect objects until one of the image dimension # becomes less than the window size while (cur_width > (win_width + 1) and cur_height > (win_height + 1)): # max possible window top left corner positions. x_max = cur_width - win_width + 1 y_max = cur_height - win_height + 1 # compute integral image ii_img = cv2.integral(gray_img) print('current scale = {:f}'.format(scale)) for row in range(0, y_max, v_stride): for col in range(0, x_max, h_stride): # detect if the current window contains any objects win_pass = self._evaluate_window(col, row, ii_img) # record the window if it passes if (win_pass): objs.append( tuple([ int(col * scale), int(row * scale), int(scale * win_width), int(scale * win_height) ])) # down scale the image cur_width = int(cur_width / scale_factor) cur_height = int(cur_height / scale_factor) scale *= scale_factor gray_img = cv2.resize(gray_img, dsize=(cur_width, cur_height), interpolation=cv2.INTER_LINEAR) # perform new detections on the rescaled image. print('No of boxes before NMS = {:d}'.format(len(objs))) # perform NMS objs = box_nms(objs, 0.2) print('No of boxes after NMS = {:d}'.format(len(objs))) return objs
def iter_scan(scan, scan_array, patient_df, net, cube_size=64, stride=50, iou=0.01): scan_df = pd.DataFrame(columns=["scan_id", "z", "y", "x", "iou"]) start_time = time.time() gt_boxes, gt_labels = annotation(patient_df) #print(gt_boxes, gt_labels) ais_gt_boxes, mia_gt_boxes = split_class(gt_boxes, gt_labels) #print(ais_gt_boxes, mia_gt_boxes) ais_locs = torch.FloatTensor(1, 6) ais_probs = torch.FloatTensor(1) mia_locs = torch.FloatTensor(1, 6) mia_probs = torch.FloatTensor(1) for z in range(0, scan_array.shape[0], stride): for y in range(0, scan_array.shape[1], stride): for x in range(0, scan_array.shape[2], stride): start_coord = torch.FloatTensor([z, y, x]) end_coord = start_coord + torch.FloatTensor( [cube_size, cube_size, cube_size]) zmax = min(z + cube_size, scan_array.shape[0]) ymax = min(y + cube_size, scan_array.shape[1]) xmax = min(x + cube_size, scan_array.shape[2]) cube_sample = np.zeros((cube_size, cube_size, cube_size), dtype=np.float32) cube_sample[:(zmax - z), :(ymax - y), :(xmax - x)] = scan_array[z:zmax, y:ymax, x:xmax] cube_sample = np.expand_dims(cube_sample, 0) cube_sample = np.expand_dims(cube_sample, 0) input_cube = Variable(torch.from_numpy(cube_sample).cuda()) locs, clss = net(input_cube) locs = locs.data.cpu().squeeze() clss = clss.data.cpu().squeeze() ais_boxes, ais_scores, ais_labels, mia_boxes, mia_scores, mia_labels = DataEncoder( ).decode(locs, clss, [cube_size, cube_size, cube_size]) if not isinstance(ais_boxes, int): ais_boxes = calc_scan_coord(ais_boxes, start_coord) ais_locs = torch.cat([ais_locs, ais_boxes], 0) ais_probs = torch.cat([ais_probs, ais_scores], 0) if not isinstance(mia_boxes, int): mia_boxes = calc_scan_coord(mia_boxes, start_coord) mia_locs = torch.cat([mia_locs, mia_boxes], 0) mia_probs = torch.cat([mia_probs, mia_scores], 0) end_time = time.time() run_time = end_time - start_time print(run_time) if not isinstance(ais_gt_boxes, int): ais_locs = ais_locs[1:, :] ais_probs = ais_probs[1:] ais_keep = box_nms(ais_locs, ais_probs) ais_locs = ais_locs[ais_keep] ais_probs = ais_probs[ais_keep] ais_count, best_ious = find_best_pred(ais_gt_boxes, ais_locs) ais_locs = change_box_order(ais_locs, "zyxzyx2zyxdhw") for i in range(ais_locs.size(0)): insert = { "scan_id": scan, "z": ais_locs[i, 0], "y": ais_locs[i, 1], "x": ais_locs[i, 2], "iou": best_ious[i] } la_df = pd.DataFrame(data=insert, index=["0"]) scan_df = scan_df.append(la_df, ignore_index=True) else: ais_count = np.zeros(3) if not isinstance(mia_gt_boxes, int): mia_locs = mia_locs[1:, :] mia_probs = mia_probs[1:] mia_keep = box_nms(mia_locs, mia_probs) mia_locs = mia_locs[mia_keep] mia_probs = mia_probs[mia_keep] mia_count, best_ious = find_best_pred(mia_gt_boxes, mia_locs) for i in range(mia_locs.size(0)): insert = { "scan_id": scan, "z": mia_locs[i, 0], "y": mia_locs[i, 1], "x": mia_locs[i, 2], "iou": best_ious[i] } la_df = pd.DataFrame(data=insert, index=["0"]) scan_df = scan_df.append(la_df, ignore_index=True) else: mia_count = np.zeros(3) return ais_count, mia_count, scan_df
def decode(self, loc_preds, cls_preds, input_size): '''Decode outputs back to bouding box locations and class labels. Args: loc_preds: (tensor) predicted locations, sized [#anchors, 4]. cls_preds: (tensor) predicted class labels, sized [#anchors, #classes]. input_size: (int/tuple) model input size of (w,h). Returns: boxes: (tensor) decode box locations, sized [#obj,4]. labels: (tensor) class labels for each box, sized [#obj,]. ''' #if debug_flag: # pdb.set_trace() CLS_THRESH = 0.3 NMS_THRESH = 0.3 input_size = torch.Tensor([input_size, input_size]) if isinstance( input_size, int) else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) loc_xy = loc_preds[:, :2] loc_wh = loc_preds[:, 2:] xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = loc_wh.exp() * anchor_boxes[:, 2:] boxes = torch.cat([xy - wh / 2, xy + wh / 2], 1) # [#anchors,4] score, labels = cls_preds.sigmoid().max(1) # [#anchors,] ids = score > CLS_THRESH ids = ids.nonzero().squeeze() # [#obj,] #print('\n\n*****', ids, '\n\n*****') if torch.numel(ids) > 1: keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH) #pdb.set_trace() elif torch.numel(ids) == 1: #pdb.set_trace() keep = box_nms(boxes[ids].view(1, 4), score[ids], threshold=NMS_THRESH) if torch.numel(keep) == 1: return boxes[ids].view(1, 4), labels[ids], score return boxes[ids][keep], labels[ids], score elif torch.numel(ids) == 0: while (torch.numel(ids) == 0): if CLS_THRESH > 0.1: CLS_THRESH -= 0.05 else: CLS_THRESH -= 0.01 ids = score > CLS_THRESH ids = ids.nonzero().squeeze() if torch.numel(ids) > 1: keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH) elif torch.numel(ids) == 1: keep = box_nms(boxes[ids].view(1, 4), score[ids], threshold=NMS_THRESH) if torch.numel(keep) == 1: return boxes[ids].view(1, 4), labels[ids], score return boxes[ids][keep], labels[ids], score return boxes[ids][keep], labels[ids][keep], score
def block_integral_object_detector(self, in_img, scale_factor=1.1, blk_height=60, blk_width=80, min_neighbors=3, min_size=(30, 30)): """This uses block integral image instead of full integral image. """ v_stride = 1 h_stride = 1 objs = [] # convert to gray scale if the image is color if (len(in_img.shape) == 3): gray_img = cv2.cvtColor(in_img, cv2.COLOR_BGR2GRAY) else: gray_img = in_img org_height = gray_img.shape[0] org_width = gray_img.shape[1] cur_width = org_width cur_height = org_height win_width = self.win_width win_height = self.win_height blk_horz_stride = blk_width - win_width blk_vert_stride = blk_height - win_height # initial scale 1 as we process original image scale = 1.0 # downscale image and detect objects until one of the image dimension # becomes less than the window size while (cur_width > (win_width + 1) and cur_height > (win_height + 1)): # max possible window top left corner positions. x_max = cur_width - win_width + 1 y_max = cur_height - win_height + 1 # extract a sliding image block and compute integral image on that. # detect the objects in the current block print('Current scale = {:f}'.format(scale)) blk_y = 0 while blk_y < y_max: blk_x = 0 while blk_x < x_max: print('Block position (y,x) = ({:d},{:d})'.format( blk_y, blk_x)) # we cannot have full block in the edge of the image max_blk_width = min(blk_width, cur_width - blk_x) max_blk_height = min(blk_height, cur_height - blk_y) # extract a block and img_blk = gray_img[blk_y:blk_y + max_blk_height, blk_x:blk_x + max_blk_width] ii_img = cv2.integral(img_blk) # now use sliding window detector to find objects in the current block for row in range(0, max_blk_height - win_height + 1, v_stride): for col in range(0, max_blk_width - win_width + 1, h_stride): # detect if the current window contains any objects win_pass = self._evaluate_window(col, row, ii_img) # record the window if it passes if (win_pass): objs.append( tuple([ int((col + blk_x) * scale), int((row + blk_y) * scale), int(scale * win_width), int(scale * win_height) ])) # slide the block horizontally blk_x += blk_horz_stride # slide the block vertically blk_y += blk_vert_stride # down scale the image cur_width = int(cur_width / scale_factor) cur_height = int(cur_height / scale_factor) scale *= scale_factor gray_img = cv2.resize(gray_img, dsize=(cur_width, cur_height), interpolation=cv2.INTER_LINEAR) # perform new detections on the rescaled image. print('No of boxes before NMS = {:d}'.format(len(objs))) # perform NMS objs = box_nms(objs, 0.2) print('No of boxes after NMS = {:d}'.format(len(objs))) return objs
def decode(self, cls_preds, loc_preds, input_size=None, device=torch.device('cuda:0')): ''' 将网络的输出转化为正常的人们所能理解的标签和boxes 这里的cls_preds和loc_preds因为是网络的输出,所以其第一个维度是batch 这里需要注意,即其可以直接输入多张图片; args: cls_preds: tensor,每个anchor被预测的标签logits,size是[batch, #anchors, #classes] loc_preds: tensor,每个anchor被预测的bbr的偏移量,size是[batch, #anchors, 4],#anchors是所有特征图上的所有anchors input_size:int/tuple,输入图像的大小,可以是None,此时使用实例化 YEncoder对象时候输入的input_size; cuda:anchor_boxes所在的设备,默认是在GPU上,如果是测试和在cpu上进行 预测可能需要更改; returns: labels: list of tensors, 每个tensors的size是[#boxes_i, #classes], 表示的是一张图片中预测框在每一类的logits值; boxes: list of tensors,每个tensors的size是[#boxes_i, 4],是一张图 片中预测框的位置(xmin, ymin, xmax, ymax) ''' # 根据图像的大小和anchor设定来计算出所有anchor的信息 if input_size is None: input_size = self.input_size anchor_boxes = self.anchor_boxes anchor_boxes = anchor_boxes.to(device) else: if len(input_size) != 2: raise ValueError('TCT的input_size不是1920x1200,所以不能是None') input_size = torch.tensor(input_size, dtype=torch.float) anchor_boxes = self._get_anchor_boxes(input_size) anchor_boxes = anchor_boxes.to(device) if cls_preds.dim() == 3: anchor_boxes = anchor_boxes.unsqueeze(0).expand_as(loc_preds) # 取出预测的中心偏移量和宽高缩放量 loc_xy = loc_preds[..., :2] loc_wh = loc_preds[..., 2:] # 利用anchor的信息和bbr的效果(在每个anchor上需要再调整)来得到每个 # 最后的预测框的loc,并转换成xyxy mode xy = loc_xy * anchor_boxes[..., 2:] + anchor_boxes[..., :2] wh = loc_wh.exp() * anchor_boxes[..., 2:] boxes = torch.cat([xy - wh / 2, xy + wh / 2], 2) # xyxy format # 发现有许多的预测框超出图像,这里进行一下限制,防止干扰计算IoU boxes[..., :2] = boxes[..., :2].clamp(min=0.) boxes[..., 2] = boxes[..., 2].clamp(max=input_size[0].item()) boxes[..., 3] = boxes[..., 3].clamp(max=input_size[1].item()) # 将preds进行sigmoid,变成概率,并去除那些得分(最大)比较低的框 cls_preds = cls_preds.sigmoid() score, labels = cls_preds.max(2) ids = score > self.cls_thre # ids = ids.nonzero().squeeze() # [#obj, ] result_boxes = [] result_score = [] for i in range(cls_preds.size(0)): obj_boxes, obj_score = boxes[i][ids[i]], score[i][ids[i]] objs_score = cls_preds[i][ids[i]] # 再对剩下的预测框进行nms,得到的即是最后的结果 keep = box_nms(obj_boxes, obj_score, threshold=self.nms_thre) result_boxes.append(obj_boxes[keep]) result_score.append(objs_score[keep]) # 经过nms后,每张图片得到的预测框的数量是不一样的,所以无法将其都 # stack到一个tensor中,因为预测框的数量要作为dim=1来存在,只能 # 使用list来保存 # !!这里进行了修改,不会再返回所有类的score,而是只返回预测类的score # 和预测的类 return result_score, result_boxes
def block_integral_object_detector(self, in_img, scale_factor=1.1, blk_height=60, blk_width=80, min_neighbors=3, min_size=(30,30)): """This uses block integral image instead of full integral image. """ v_stride = 1 h_stride = 1 objs = [] # convert to gray scale if the image is color if(len(in_img.shape) == 3): gray_img = cv2.cvtColor(in_img, cv2.COLOR_BGR2GRAY) else: gray_img = in_img org_height = gray_img.shape[0] org_width = gray_img.shape[1] cur_width = org_width cur_height = org_height win_width = self.win_width win_height = self.win_height blk_horz_stride = blk_width - win_width blk_vert_stride = blk_height - win_height # initial scale 1 as we process original image scale = 1.0 # downscale image and detect objects until one of the image dimension # becomes less than the window size while(cur_width > (win_width+1) and cur_height > (win_height+1)): # max possible window top left corner positions. x_max = cur_width - win_width + 1 y_max = cur_height - win_height + 1 # extract a sliding image block and compute integral image on that. # detect the objects in the current block print('Current scale = {:f}'.format(scale)) blk_y = 0 while blk_y < y_max: blk_x = 0 while blk_x < x_max: print ('Block position (y,x) = ({:d},{:d})'.format(blk_y, blk_x)) # we cannot have full block in the edge of the image max_blk_width = min(blk_width, cur_width - blk_x) max_blk_height = min(blk_height, cur_height - blk_y) # extract a block and img_blk = gray_img[blk_y:blk_y+max_blk_height, blk_x:blk_x+max_blk_width] ii_img = cv2.integral(img_blk) # now use sliding window detector to find objects in the current block for row in range(0, max_blk_height-win_height+1, v_stride): for col in range(0, max_blk_width-win_width+1, h_stride): # detect if the current window contains any objects win_pass = self._evaluate_window(col, row, ii_img) # record the window if it passes if(win_pass): objs.append(tuple([int((col+blk_x)*scale), int((row+blk_y)*scale), int(scale*win_width), int(scale*win_height)])) # slide the block horizontally blk_x += blk_horz_stride # slide the block vertically blk_y += blk_vert_stride # down scale the image cur_width = int(cur_width/scale_factor) cur_height = int(cur_height/scale_factor) scale *= scale_factor gray_img = cv2.resize(gray_img, dsize=(cur_width, cur_height), interpolation=cv2.INTER_LINEAR) # perform new detections on the rescaled image. print('No of boxes before NMS = {:d}'.format(len(objs))) # perform NMS objs = box_nms(objs, 0.2) print('No of boxes after NMS = {:d}'.format(len(objs))) return objs
def decode(self, loc_preds, cls_preds, input_size): ''' Decode outputs back to bounding box locations and class labels. Args: loc_preds: (tensor) predicted locations, sized [#anchors, 6] cls_preds: (tensor) predicted class labels, sized [#anchors, #classes] input_size: (int/tuple) model input size of (z, h, w) Return: boxes: (tensor) decode box locations, sozed [#obj, 6] labels: (tensor) class labels for each box, sized [#obj,] ''' CLS_THRESH = 0.75 NMS_THRESH = 0.05 if isinstance(input_size, int): input_size = torch.Tensor([input_size, input_size, input_size]) else: input_size = torch.Tensor(input_size) anchor_boxes = self.get_anchor_boxes(input_size) loc_zyx = loc_preds[:, :3] loc_dhw = loc_preds[:, 3:] zyx = loc_zyx * anchor_boxes[:, 3:] + anchor_boxes[:, :3] dhw = loc_dhw.exp() * anchor_boxes[:, 3:] boxes = torch.cat([zyx - dhw / 2, zyx + dhw / 2], 1) # [#anchors, 6] scores, labels = F.softmax(Variable(cls_preds), dim=1).data.max( 1) # [#anchors,] the best class for each anchor obj_idx = (labels > 0) obj_num = obj_idx.long().sum() if obj_num == 0: #print("Not found any object") return 0, 0, 0, 0, 0, 0 else: obj_mask = obj_idx.unsqueeze(1).expand_as(boxes) masked_scores = scores[obj_idx] masked_labels = labels[obj_idx] #print(masked_scores, masked_labels) masked_boxes = boxes[obj_mask].view(-1, 6) ids = (masked_scores > CLS_THRESH) if ids.long().sum() == 0: return 0, 0, 0, 0, 0, 0 else: box_ids = ids.unsqueeze(1).expand_as(masked_boxes) obj_boxes = masked_boxes[box_ids].view(-1, 6) obj_scores = masked_scores[ids] obj_labels = masked_labels[ids] ais_ids = (obj_labels == 1) if ais_ids.long().sum() == 0: ais_pred_boxes = 0 ais_pred_scores = 0 ais_pred_labels = 0 else: # print(ais_ids.long().sum()) ais_ids = ais_ids.nonzero().squeeze() #ais_masks = ais_ids.unsqueeze(1).expand_as(obj_boxes) ais_labels = obj_labels[ais_ids] ais_scores = obj_scores[ais_ids] ais_boxes = obj_boxes[ais_ids] #print(ais_boxes.size()) ais_keep = box_nms(ais_boxes, ais_scores, threshold=NMS_THRESH) ais_pred_labels = ais_labels[ais_keep] ais_pred_scores = ais_scores[ais_keep] ais_pred_boxes = ais_boxes[ais_keep] mia_ids = (obj_labels == 2) if mia_ids.long().sum() == 0: mia_pred_boxes = 0 mia_pred_scores = 0 mia_pred_labels = 0 else: mia_ids = mia_ids.nonzero().squeeze() #mia_masks = mia_ids.unsqueeze(1).expand_as(obj_boxes) mia_labels = obj_labels[mia_ids] mia_scores = obj_scores[mia_ids] mia_boxes = obj_boxes[mia_ids] mia_keep = box_nms(mia_boxes, mia_scores, threshold=NMS_THRESH) mia_pred_boxes = mia_boxes[mia_keep] mia_pred_scores = mia_scores[mia_keep] mia_pred_labels = mia_labels[mia_keep] #keep = box_nms(masked_boxes[ids], masked_scores[ids], threshold=NMS_THRESH) return ais_pred_boxes, ais_pred_scores, ais_pred_labels, mia_pred_boxes, mia_pred_scores, mia_pred_labels
def decode(self, loc_preds, cls_preds, pad_data, input_size, ori_img_shape, img_idx): '''Decode outpus back to bounding box locations and class labels Args: loc_preds: (tesnor) predicted locations, sized [#anchors, 4]. cls_preds: (tensor) predicted class labels, sized [#anchors, #classes]. input_size : (int/tuple) model input size of (w,h) Returns: boxes (tensor) decode box locations, size [#obj, 4] lbaels: (tensor) class labels for each box, sized [#obj,] ''' CONF_THRES = 0.05 NMS_THRES = 0.5 # pdb.set_trace() input_size = torch.Tensor([input_size[2], input_size[3]]) pad_data = torch.Tensor([pad_data[3], pad_data[2]]) anchor_boxes = self._get_anchor_boxes(pad_data) boxes_preds_obj = [] score_obj = [] labels_obj = [] obj_idx = [] for p in range(len(anchor_boxes)): loc_preds[p][img_idx][:, :2] *= 0.4 loc_preds[p][img_idx][:, 2:] *= 0.8 loc_xy_preds = loc_preds[p][img_idx][:, :2] loc_wh_preds = loc_preds[p][img_idx][:, 2:] xy_preds = loc_xy_preds * anchor_boxes[p][:, 2:].cuda() +\ anchor_boxes[p][:, :2].cuda() wh_preds = torch.exp(loc_wh_preds) * anchor_boxes[p][:, 2:].cuda() x1y1_preds = xy_preds - wh_preds / 2 x1y1_preds_ori = torch.zeros(x1y1_preds.shape) x1y1_preds_ori[:,0] = x1y1_preds[:,0] * torch.Tensor([ori_img_shape[2]]).\ cuda() / torch.Tensor([input_size[0]]).cuda() x1y1_preds_ori[:,1] = x1y1_preds[:,1] * torch.Tensor([ori_img_shape[1]]).\ cuda() / torch.Tensor([input_size[1]]).cuda() x2y2_preds = xy_preds + wh_preds / 2 x2y2_preds_ori = torch.zeros(x2y2_preds.shape) x2y2_preds_ori[:,0] = x2y2_preds[:,0] * torch.Tensor([ori_img_shape[2]]).\ cuda() / torch.Tensor([input_size[0]]).cuda() x2y2_preds_ori[:,1] = x2y2_preds[:,1] * torch.Tensor([ori_img_shape[1]]).\ cuda() / torch.Tensor([input_size[1]]).cuda() boxes_preds = torch.cat([x1y1_preds_ori, x2y2_preds_ori], 1) score, labels = cls_preds[p][img_idx].sigmoid().max(1) if self.loss_fn == 'sigmoid': obj_idx_p = score > CONF_THRES elif self.loss_fn == 'softmax': obj_idx_p = torch.mul(score > CONF_THRES, labels > 0) # if boxes_preds[obj_idx_p].shape[0] > 1000: # boxes_preds_obj.append(boxes_preds[obj_idx_p][:1000]) # score_obj.append(score[obj_idx_p][:1000]) # labels_obj.append(labels[obj_idx_p][:1000]) # else: boxes_preds_obj.append(boxes_preds[obj_idx_p]) score_obj.append(score[obj_idx_p]) labels_obj.append(labels[obj_idx_p]) obj_idx.append(obj_idx_p) boxes_preds_all = torch.cat(boxes_preds_obj, 0) score_all = torch.cat(score_obj, 0) labels_all = torch.cat(labels_obj, 0) obj_idx_all = torch.cat(obj_idx, 0) if obj_idx_all.nonzero().shape[0] != 0: nms_boxes = box_nms(boxes_preds_all, score_all, threshold=NMS_THRES) return boxes_preds_all[nms_boxes], labels_all[ nms_boxes], score_all[nms_boxes] else: return torch.tensor([]), torch.tensor([]), torch.tensor([])
def scaled_window_object_detector(self, in_img, scale_factor=1.1, min_neighbors=3, min_size=(30, 30)): """This object detector is based on scaled detector window. It scales the detector window instead of scaling image. Dectector window pyramid is constructed instead of image pyramid """ v_stride = 1 h_stride = 1 objs = [] # convert to gray scale if the image is color if (len(in_img.shape) == 3): gray_img = cv2.cvtColor(in_img, cv2.COLOR_BGR2GRAY) else: gray_img = in_img img_height = gray_img.shape[0] img_width = gray_img.shape[1] cur_win_width = self.win_width cur_win_height = self.win_height # compute integral image. just one time process ii_img = cv2.integral(gray_img) print ii_img.dtype # initial scale 1 . ie. original detector size is used scale = 1.0 # upscale the detector window and detect objects until window_size becomes more than one # of the image dimension while (cur_win_width < img_width and cur_win_height < img_height): # max possible window top left corner positions. x_max = img_width - cur_win_width + 1 y_max = img_height - cur_win_height + 1 print('current scale = {:f}'.format(scale)) print('Detector height = {:d}, Detector width = {:d}'.format( cur_win_height, cur_win_width)) for row in range(0, y_max, v_stride): for col in range(0, x_max, h_stride): #print row, col # detect if the current window contains any objects win_pass = self._evaluate_window_scaled( col, row, scale, ii_img) # record the window if it passes if (win_pass): objs.append( tuple([ int(col), int(row), int(cur_win_width), int(cur_win_height) ])) # upscale the detector window scale *= scale_factor cur_win_width = int(self.win_width * scale) cur_win_height = int(self.win_height * scale) # perform new detections on the rescaled image. print('No of boxes before NMS = {:d}'.format(len(objs))) # perform NMS objs = box_nms(objs, 0.2) print('No of boxes after NMS = {:d}'.format(len(objs))) return objs