def encode(self, labels, boxes, input_size=None, test=False): ''' 编码xml中的object格式为bounding boxes regression的格式 tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) 注意,这个方法输入的是单张图片的objects,所以使用的时候必须一张图片一张 图片的输入 args: labels: tensor, 每个gtbb的标签,size是[#box,] boxes: tensor, ground truth bounding boxes, (xmin, ymin, xmax, ymax),size是[#box, 4] input_size:int/tuple,输入图像的大小 test: 测试时使用; returns: cls_targets: tensor,每个anchor被赋予的标签,size是[#anchors, ], 其中的值0代表背景类,1-k表示k个分类,-1表示忽略的anchors loc_targets: tensor,每个anchor被赋予的bbr的标签,size是 [#anchors, 4],#anchors是所有特征图上的所有anchors ''' if input_size is None: input_size = self.input_size anchor_boxes = self.anchor_boxes else: if len(input_size) != 2: raise ValueError('TCT的input_size不是1920x1200,所以不能是None') input_size = torch.tensor(input_size, dtype=torch.float) anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') # 计算每个anchor和每个gtbb间的iou,根据此来给标签 ious = box_iou(anchor_boxes, boxes, order='xywh') max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] if test: _, orders = max_ious.sort(0, True) loc_targets = change_box_order(anchor_boxes, 'xywh2xyxy')[orders] else: # 计算bbr的偏移量,即bbr的标签 loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] # 加1是为了空出0来给背景类 # 规定背景类,规定忽略的anchors cls_targets[max_ious < self.iou_thre] = 0 ignore = (max_ious > self.ignore_thres[0]) & \ (max_ious < self.ignore_thres[1]) cls_targets[ignore] = -1 # 这些anchors是不用的 if test: cls_targets = cls_targets[orders] return cls_targets, loc_targets
def encode(self, gt_quad_boxes, labels, input_size): '''Encode target bounding boxes and class labels. TextBoxes++ quad_box encoder: tx_n = (x_n - anchor_x) / anchor_w ty_n = (y_n - anchor_y) / anchor_h Args: gt_quad_boxes: (tensor) bounding boxes of (xyxyxyxy), sized [#obj, 8]. labels: (tensor) object class labels, sized [#obj, ]. input_size: (int/tuple) model input size of (w,h). Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,8]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' input_size = torch.Tensor([input_size, input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_rect_boxes = self._get_anchor_boxes( input_size) # (num_anchor, 8) anchor_quad_boxes = change_box_order(anchor_rect_boxes, "xywh2quad") # (num_anchor, 4) gt_rect_boxes = change_box_order(gt_quad_boxes, "quad2xyxy") ious = box_iou(anchor_rect_boxes, gt_rect_boxes) max_ious, max_ids = ious.max(1) # Each anchor box matches the largest iou with the gt box gt_quad_boxes = gt_quad_boxes[max_ids] # (num_gt_boxes, 8) gt_rect_boxes = gt_rect_boxes[max_ids] # (num_gt_boxes, 4) # for Rectangle boxes -> using in TextBoxes #gt_rect_boxes = change_box_order(gt_rect_boxes, "xyxy2xywh") #loc_rect_yx = (gt_rect_boxes[:, :2] - anchor_rect_boxes[:, :2]) / anchor_rect_boxes[:, 2:] #loc_rect_hw = torch.log(gt_rect_boxes[:, 2:] / anchor_rect_boxes[:, 2:]) # for Quad boxes -> using in TextBoxes++ anchor_boxes_hw = anchor_rect_boxes[:, 2:4].repeat(1, 4) loc_quad_yx = (gt_quad_boxes - anchor_quad_boxes) / anchor_boxes_hw # loc_targets = torch.cat([loc_rect_yx, loc_rect_hw, loc_quad_yx], dim=1) # (num_anchor, 12) loc_targets = loc_quad_yx cls_targets = labels[max_ids] cls_targets[max_ious < 0.5] = -1 # ignore (0.4~0.5) : -1 cls_targets[max_ious < 0.4] = 0 # background (0.0~0.4): 0 # positive (0.5~1.0) : 1 return loc_targets, cls_targets
def find_best_pred(gt_boxes, pred_boxes): ''' Find whether there is a predicted box for each ground box Args: gt_boxes: (FloatTensor) [N, 6] zyxzyx pred_boxes: (FloatTensor) [M, 6] zyxzyx Returns: count: (ndarray) (tp, fn, fp) ''' tp = 0 fn = 0 fp = 0 distance = box_distance(gt_boxes, pred_boxes) iou = box_iou(gt_boxes, pred_boxes) min_dists, min_ids = distance.min(1) best_ious, best_ids = iou.min(0) # find best gt for predict gt_boxes = change_box_order(gt_boxes, order="zyxzyx2zyxdhw") for i in range(gt_boxes.size(0)): gt = gt_boxes[i, :] diameter = math.sqrt(gt[3]**2 + gt[4]**2 + gt[5]**2) radius = diameter / 2 + 10. if min_dists[i] <= radius: tp += 1 else: fn += 1 fp = pred_boxes.size(0) - tp return np.array([tp, fn, fp]), best_ious
def encode(self, boxes, labels, input_size): """We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) args: boxes:Tensor(xmin, ymin, xmax, ymax) size(boxes_num, 4) labels:Tensor size(boxes_num,) return: target_cls:Tensor(anchor_num,) target_loc:Tensor(anchor_num, 4) """ anchor_boxes = self._get_anchor_boxes(input_size) # [anchor_num, 4] boxes = utils.change_box_order(boxes, 'xyxy2xywh') ious = utils.box_iou(anchor_boxes, boxes, order='xywh') # [anchor_num, boxes_num] max_ious, max_ids = ious.max(1) # (anchor_num,) boxes = boxes[max_ids] # (anchor_num, 4), groundtruth loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) target_loc = torch.cat([loc_xy, loc_wh], 1) target_cls = labels[max_ids] target_cls[max_ious < 0.5] = 0 ignore = (max_ious < 0.5) & (max_ious >= 0.4) target_cls[ignore] = -1 return target_loc, target_cls
def encode(self, boxes, labels, input_size): '''Encode target bounding boxes and class labels. Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax) in range [0,1], sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. input_size: (int) model input size. Returns: loc_targets: (tensor) encoded bounding boxes, sized [#total_anchors,4]. cls_targets: (tensor) encoded class labels, sized [#total_anchors]. ''' anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') boxes = boxes * input_size # scale to range [0,input_size] ious = box_iou(anchor_boxes, boxes, order='xywh') max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] cls_targets[max_ious < 0.4] = 0 ignore = (max_ious > 0.4) & (max_ious < 0.5 ) # ignore ious between [0.4,0.5] cls_targets[ignore] = -1 # for now just mark ignored to -1 return loc_targets, cls_targets
def encode(self, boxes, labels, input_size): if isinstance(input_size, int): input_size = torch.Tensor([input_size, input_size]) else: input_size = torch.Tensor(input_size) anchor_boxes = self.get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') boxes = boxes.float() ious = box_iou(anchor_boxes, boxes, order='xywh') #ious :每个候选框与多个目标框的ious 行:个数 列:分数 #max_ids 与候选框最相近的目标框索引号,max_ious 为与最相近目标框的候选框的ious分数 max_ious, max_ids = ious.max(1) #选出每个候选框对应的目标框 boxes = boxes[max_ids] loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] cls_targets[max_ious < 0.1] = 0 cls_targets[(max_ious >= 0.1) & (max_ious < 0.3)] = -1 return loc_targets, cls_targets
def encode(self, boxes, labels, input_size): '''Encode target bounding boxes and class labels. Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. input_size: (int/tuple) model input size of (input_height, input_width). Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') ious = box_iou(anchor_boxes, boxes, order='xywh') max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] cls_targets[max_ious < 0.4] = 0 ignore = (max_ious > 0.4) & (max_ious < 0.5 ) # ignore ious between [0.4,0.5] cls_targets[ignore] = -1 # for now just mark ignored to -1 return loc_targets, cls_targets
def __getitem__(self, index): img_id = self.images_list[index] img_record = self.images[img_id] path = img_record["file_name"] if self.img_dir is not None: path = os.path.join(self.img_dir, path) image = self._read_image(path) boxes = [] # each element is a tuple of (x1, y1, x2, y2, "class") for annotation in img_record["annotations"]: xyxy = self._from_pixels_to_pcnt( annotation["bbox"], img_record["width"], img_record["height"], ) assert all(0 <= num <= 1 for num in xyxy), f"All numbers should be in range [0, 1], but got {xyxy}!" bbox_class = str(self.cid_to_class[annotation["category_id"]]) boxes.append(xyxy + [str(bbox_class)]) if self.transforms is not None: transformed = self.transforms(image=image, bboxes=boxes) image, boxes = transformed["image"], transformed["bboxes"] bboxes = np.zeros((self.num_anchors, 4), dtype=np.float32) classes = np.full(self.num_anchors, self.background_cls, dtype=np.int32) for idx, (x1, y1, x2, y2, box_cls) in enumerate(boxes): bboxes[idx, :] = [x1, y1, x2, y2] classes[idx] = int(box_cls) bboxes = torch.from_numpy(bboxes) bboxes = change_box_order(bboxes, "xyxy2xywh") classes = torch.LongTensor(classes) return image, bboxes, classes
def decode(self, loc_preds, cls_preds, input_size): CLS_THRESH = 0.05 NMS_THRESH = 0.4 if isinstance(input_size, int): input_size = torch.Tensor([input_size, input_size]) else: input_size = torch.Tensor(input_size) anchor_boxes = self.get_anchor_boxes(input_size) std=Variable(self.std).cuda() loc_preds=loc_preds*std loc_xy = loc_preds.data.cpu()[:, :2] loc_wh = loc_preds.data.cpu()[:, 2:] xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = loc_wh.exp() * anchor_boxes[:, 2:] boxes = torch.cat([xy, wh], 1) boxes = change_box_order(boxes, 'xywh2xyxy') cls_preds=F.softmax(cls_preds,1) score, labels = cls_preds.max(1) ids = (labels > 0)&(score>CLS_THRESH) ids = ids.nonzero().squeeze() if len(ids.size())==0: return None, None,None ids=ids.data.cpu() keep = box_nms(boxes.cpu()[ids], score.data.cpu()[ids], threshold=NMS_THRESH) return boxes.cpu()[ids][keep],labels.data.cpu()[ids][keep],score.data.cpu()[ids][keep]
def calc_scan_coord(boxes, start_coord): ''' Calculate locations in scans Args: boxes: (FloatTensor) object locations in cubes [N, 6] zyxzyx start_coord: (FloatTensor) cube start location in scans [3] Returns: scan_loc: (FloatTensor) object locations in scans [N, 6] zyxzyx ''' boxes = change_box_order(boxes, order="zyxzyx2zyxdhw") loc_zyx = boxes[:, :3] loc_dhw = boxes[:, 3:] cube_loc = start_coord.unsqueeze(0).expand_as(loc_zyx) loc_zyx += cube_loc scan_locs = torch.cat([loc_zyx, loc_dhw], 1) scan_locs = change_box_order(scan_locs, order="zyxdhw2zyxzyx") return scan_locs
def encode(self, boxes, labels, input_size): '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. input_size: (int/tuple) model input size of (w,h). Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' input_size = torch.Tensor([input_size, input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') # if ((boxes[0][2] * boxes[0][3]).numpy() >32 * 32 / 2) : # # print((boxes[0][2]*boxes[0][3]).numpy(),end='->') # sptj='True' # else: # sptj = 'False' # print('target locked -> ',sptj) ious = box_iou(anchor_boxes, boxes, order='xywh') max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] cls_targets[max_ious < 0.5] = 0 ignore = (max_ious > 0.4) & (max_ious < 0.5 ) # ignore ious between [0.4,0.5] cls_targets[ignore] = -1 # for now just mark ignored to -1 return cls_targets, loc_targets
def encode(self, boxes, labels, input_size): '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. input_size: (int/tuple) model input size of (w,h). Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' input_size = torch.tensor([input_size, input_size], dtype=torch.float32) if isinstance(input_size, int) \ else torch.tensor(input_size, dtype=torch.float32) anchor_boxes = self._get_anchor_boxes(input_size) if boxes.numel() == 0: # 0 is background class cls_targets = torch.zeros(anchor_boxes.size(0), dtype=torch.int64) loc_targets = torch.zeros_like(anchor_boxes, dtype=torch.float32) else: boxes = change_box_order(boxes, 'xyxy2xywh') ious = box_iou(anchor_boxes, boxes, order='xywh') max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] loc_xy = (boxes[:, :2]-anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:]/anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = labels[max_ids] cls_targets[max_ious < 0.5] = 0 # 0 is background class ignore = (max_ious > 0.4) & (max_ious < 0.5) # ignore ious between [0.4,0.5] cls_targets[ignore] = -1 # for now just mark ignored to -1 return loc_targets, cls_targets
def decode(self, loc_preds, cls_preds, input_size): '''Decode outputs back to bouding box locations and class labels. Args: loc_preds: (tensor) predicted locations, sized [#anchors, 8]. cls_preds: (tensor) predicted class labels, sized [#anchors, ]. input_size: (int/tuple) model input size of (w,h). Returns: boxes: (tensor) decode box locations, sized [#obj,8]. labels: (tensor) class labels for each box, sized [#obj,]. ''' input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_rect_boxes = self._get_anchor_boxes(input_size).cuda() anchor_quad_boxes = change_box_order(anchor_rect_boxes, "xywh2quad") quad_boxes = anchor_quad_boxes + anchor_rect_boxes[:, 2:4].repeat( 1, 4) * loc_preds # [#anchor, 8] quad_boxes = torch.clamp(quad_boxes, 0, input_size[0]) score, labels = cls_preds.sigmoid().max(1) # focal loss #score, labels = softmax(cls_preds).max(1) # OHEM+softmax # Classification score Threshold ids = score > self.cls_thresh ids = ids.nonzero().squeeze() # [#obj,] score = score[ids] labels = labels[ids] quad_boxes = quad_boxes[ids].view(-1, 4, 2) quad_boxes = quad_boxes.cpu().data.numpy() score = score.cpu().data.numpy() if len(score.shape) is 0: return quad_boxes, labels, score else: keep = non_max_suppression_poly(quad_boxes, score, self.nms_thresh) return quad_boxes[keep], labels[keep], score[keep]
def encode(self, boxes, labels, input_size): '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Then we scale [tx,ty,tw,th] by [10,10,5,5] times to make loc_loss larger. Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. input_size: (int/tuple) model input size of (input_height, input_width). Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. Reference: https://github.com/tensorflow/models/blob/master/object_detection/box_coders/faster_rcnn_box_coder.py ''' scale_factor = torch.Tensor([10,10,5,5]) # scale [tx,ty,tw,th] input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') ious = box_iou(anchor_boxes, boxes, order='xywh') max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] loc_xy = (boxes[:,:2]-anchor_boxes[:,:2]) / anchor_boxes[:,2:] loc_wh = torch.log(boxes[:,2:]/anchor_boxes[:,2:]) loc_targets = torch.cat([loc_xy,loc_wh], 1) * scale_factor cls_targets = 1 + labels[max_ids] cls_targets[max_ious<0.4] = 0 ignore = (max_ious>0.4) & (max_ious<0.5) # ignore ious between [0.4,0.5] cls_targets[ignore] = -1 # for now just mark ignored to -1 return loc_targets, cls_targets
def encode(self, boxes, labels, input_size): if isinstance(input_size, int): input_size = torch.Tensor([input_size, input_size]) else: input_size = torch.Tensor(input_size) anchor_boxes = self.get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') boxes = boxes.float() ious = box_iou(anchor_boxes, boxes, order='xywh') max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] cls_targets[max_ious < 0.4] = 0 cls_targets[(max_ious >= 0.4) & (max_ious < 0.5)] = -1 return loc_targets, cls_targets
def encode(self, boxes, labels, input_size): '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. input_size: (int/tuple) model input size of (w,h). Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) #print(anchor_boxes.shape) [49104,4] boxes = change_box_order(boxes, 'xyxy2xywh') ious = box_iou(anchor_boxes, boxes, order='xywh') #print(ious.shape) [num_anchors, obj] max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] #print(cls_targets.shape) torch.Size([49104]) cls_targets[max_ious < 0.5] = 0 #print(cls_targets) ignore = (max_ious > 0.4) & (max_ious < 0.5 ) # ignore ious between [0.4,0.5] cls_targets[ignore] = -1 # for now just mark ignored to -1 return loc_targets, cls_targets
def decode(self, loc_preds, cls_preds, input_size): CLS_THRESH = 0.05 NMS_THRESH = 0.3 if isinstance(input_size, int): input_size = torch.Tensor([input_size, input_size]) else: input_size = torch.Tensor(input_size) anchor_boxes = self.get_anchor_boxes(input_size) loc_xy = loc_preds[:, :2] loc_wh = loc_preds[:, 2:] xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2] wh = loc_wh.exp() * anchor_boxes[:, 2:] boxes = torch.cat([xy, wh], 1) boxes = change_box_order(boxes, 'xywh2xyxy') score, labels = cls_preds.max(1) ids = (score > CLS_THRESH) & (labels > 0) ids = ids.nonzero().squeeze() keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH) return boxes[ids][keep], labels[ids][keep]
def encode(self, boxes, labels, input_size): '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. input_size: (int/tuple) model input size of (w,h). Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') ious = box_iou(anchor_boxes, boxes, order='xywh') max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] loc_xy = (boxes[:,:2]-anchor_boxes[:,:2]) / anchor_boxes[:,2:] loc_wh = torch.log(boxes[:,2:]/anchor_boxes[:,2:]) loc_targets = torch.cat([loc_xy,loc_wh], 1) cls_targets = 1 + labels[max_ids] cls_targets[max_ious<0.5] = 0 ignore = (max_ious>0.4) & (max_ious<0.5) # ignore ious between [0.4,0.5] cls_targets[ignore] = -1 # for now just mark ignored to -1 return loc_targets, cls_targets
def encode(self, boxes, labels, input_size): """ Encode target bounding boxes and class labels. we obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) :param boxes: (tensor) bounding boxes of (xmin, ymin, xmax, ymax), sized [#obj, 4]. :param labels: (tensor) object class labels, sized [#obj,]. :param input_size: (int/tuple) input size of the original image :return: loc_targets: (tensor) encoded bounding boxes, sized [#anchors, 4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. """ input_size = torch.Tensor([input_size, input_size]) if isinstance( input_size, int) else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') ious = box_iou(anchor_boxes, boxes, order='xywh') max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) loc_targets = loc_targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]) cls_targets = labels[max_ids] cls_targets[max_ious < 0.4] = 0 ignore = (max_ious >= 0.4) & (max_ious < 0.5 ) # ignore ious between [0:q.4, 0.5] cls_targets[ignore] = -1 return loc_targets, cls_targets
def encode(self, boxes, center_points, labels, colls_with, dimensions, bins, sines, coses, input_size): bins = bins.squeeze(1) sines = sines.squeeze(1) coses = coses.squeeze(1) '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. colls_with: (tensor) whether the vehicle collides with the player agent, sized [#obj] (binary) dimensions: (tensor), sized [#obj, 3] labels: (tensor) object class labels, sized [#obj,]. input_size: (int/tuple) model input size of (w,h). Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) try: boxes = change_box_order(boxes, 'xyxy2xywh') except: assert (0) print( "a vehicle-free frame, which should be eliminated in a clean dataset" ) boxes = torch.Tensor([[0., 0., 0., 0.]]) colls_with = torch.Tensor([0.]) dimensions = torch.Tensor([[0., 0., 0.]]) sines = torch.Tensor([0.]) coses = torch.Tensor([0.]) bins = torch.Tensor([0.]) labels = torch.Tensor([0.]) # orientations = torch.Tensor([0.]) colls_with = torch.Tensor(colls_with) colls_with = colls_with dimensions = dimensions # orientations = orientations.float() ious = box_iou(anchor_boxes, boxes, order='xywh') max_ious, max_ids = ious.max(1) # select matching instance boxes = boxes[max_ids] center_points = center_points[max_ids] colls_with = colls_with[max_ids] dimensions = dimensions[max_ids] cls_targets = labels[max_ids] bins = bins[max_ids] sines = sines[max_ids] coses = coses[max_ids] # orientations = orientations[max_ids] # build offset referring to target anchors # print(boxes[0,0], "before more") loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) # print(loc_xy[0, 0], loc_wh[0,0], "before") loc_targets = torch.cat([loc_xy, loc_wh], 1) # sized [num_anchor, 4] center_xy = (center_points[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] center_depth = center_points[:, 2].unsqueeze(1) center_targets = torch.cat([center_xy, center_depth], 1) # sized [num_anchor, 3] # filter invalid or negative instance sines[max_ious < 0.5] = 0 coses[max_ious < 0.5] = 0 bins[max_ious < 0.5] = 0 cls_targets[max_ious < 0.5] = 0 colls_with[max_ious < 0.5] = 0 dimensions[max_ious < 0.5] = 0 # orientations[max_ious<0.5] = 0 # ignore some not enough overlapped instances ignore = (max_ious > 0.4) & (max_ious < 0.5 ) # ignore ious between [0.4,0.5] cls_targets[ignore] = -1 # for now just mark ignored to -1 colls_with[ignore] = -1 dimensions[ignore] = -1 bins[ignore] = -1 # colls_with[ignore] = -1 # print(loc_targets[0, 0], "in encoder") return loc_targets, cls_targets, center_targets, colls_with, dimensions, bins, sines, coses
def iter_scan(scan, scan_array, patient_df, net, cube_size=64, stride=50, iou=0.01): scan_df = pd.DataFrame(columns=["scan_id", "z", "y", "x", "iou"]) start_time = time.time() gt_boxes, gt_labels = annotation(patient_df) #print(gt_boxes, gt_labels) ais_gt_boxes, mia_gt_boxes = split_class(gt_boxes, gt_labels) #print(ais_gt_boxes, mia_gt_boxes) ais_locs = torch.FloatTensor(1, 6) ais_probs = torch.FloatTensor(1) mia_locs = torch.FloatTensor(1, 6) mia_probs = torch.FloatTensor(1) for z in range(0, scan_array.shape[0], stride): for y in range(0, scan_array.shape[1], stride): for x in range(0, scan_array.shape[2], stride): start_coord = torch.FloatTensor([z, y, x]) end_coord = start_coord + torch.FloatTensor( [cube_size, cube_size, cube_size]) zmax = min(z + cube_size, scan_array.shape[0]) ymax = min(y + cube_size, scan_array.shape[1]) xmax = min(x + cube_size, scan_array.shape[2]) cube_sample = np.zeros((cube_size, cube_size, cube_size), dtype=np.float32) cube_sample[:(zmax - z), :(ymax - y), :(xmax - x)] = scan_array[z:zmax, y:ymax, x:xmax] cube_sample = np.expand_dims(cube_sample, 0) cube_sample = np.expand_dims(cube_sample, 0) input_cube = Variable(torch.from_numpy(cube_sample).cuda()) locs, clss = net(input_cube) locs = locs.data.cpu().squeeze() clss = clss.data.cpu().squeeze() ais_boxes, ais_scores, ais_labels, mia_boxes, mia_scores, mia_labels = DataEncoder( ).decode(locs, clss, [cube_size, cube_size, cube_size]) if not isinstance(ais_boxes, int): ais_boxes = calc_scan_coord(ais_boxes, start_coord) ais_locs = torch.cat([ais_locs, ais_boxes], 0) ais_probs = torch.cat([ais_probs, ais_scores], 0) if not isinstance(mia_boxes, int): mia_boxes = calc_scan_coord(mia_boxes, start_coord) mia_locs = torch.cat([mia_locs, mia_boxes], 0) mia_probs = torch.cat([mia_probs, mia_scores], 0) end_time = time.time() run_time = end_time - start_time print(run_time) if not isinstance(ais_gt_boxes, int): ais_locs = ais_locs[1:, :] ais_probs = ais_probs[1:] ais_keep = box_nms(ais_locs, ais_probs) ais_locs = ais_locs[ais_keep] ais_probs = ais_probs[ais_keep] ais_count, best_ious = find_best_pred(ais_gt_boxes, ais_locs) ais_locs = change_box_order(ais_locs, "zyxzyx2zyxdhw") for i in range(ais_locs.size(0)): insert = { "scan_id": scan, "z": ais_locs[i, 0], "y": ais_locs[i, 1], "x": ais_locs[i, 2], "iou": best_ious[i] } la_df = pd.DataFrame(data=insert, index=["0"]) scan_df = scan_df.append(la_df, ignore_index=True) else: ais_count = np.zeros(3) if not isinstance(mia_gt_boxes, int): mia_locs = mia_locs[1:, :] mia_probs = mia_probs[1:] mia_keep = box_nms(mia_locs, mia_probs) mia_locs = mia_locs[mia_keep] mia_probs = mia_probs[mia_keep] mia_count, best_ious = find_best_pred(mia_gt_boxes, mia_locs) for i in range(mia_locs.size(0)): insert = { "scan_id": scan, "z": mia_locs[i, 0], "y": mia_locs[i, 1], "x": mia_locs[i, 2], "iou": best_ious[i] } la_df = pd.DataFrame(data=insert, index=["0"]) scan_df = scan_df.append(la_df, ignore_index=True) else: mia_count = np.zeros(3) return ais_count, mia_count, scan_df
def encode(self, boxes, labels, input_size): ''' Encode target bounding boxes and class labels.fm_d Implement the Faster RCNN box coder in 3D image: tz = (z - anchor_z) / anchor_d ty = (y - anchor_y) / anchor_h tx = (x - anchor_x) / anchor_w td = log(d / anchor_d) th = log(h / anchor_h) tx = log(w / anchor_w) Args: boxes: (tensor) bounding boxes of (zmin, ymin, xmin, zmax, ymax, xmax), sized [#obj, 6] labels: (tensor) object class labels, sized [#obj,] input_size: (int/tuple) model input size of (d, h, w) Returns: loc_targets: (tensor) encoded boudning boxes, sized [#anchors, 6] cls_targets: (tensor) encoded class labels, sized [#anchors,] ''' if isinstance(input_size, int): input_size = torch.Tensor([input_size, input_size, input_size]) else: input_size = torch.Tensor(input_size) anchor_boxes = self.get_anchor_boxes(input_size) # (z, y, x, d, h, w) boxes = change_box_order(boxes, 'zyxzyx2zyxdhw') #print(boxes.size()) ious = box_iou(anchor_boxes, boxes, order="zyxdhw") # num_anchors x objects max_ious, max_ids = ious.max( 1 ) # find the best object for each anchor, return ious_value and object index best_ious, best_ids = ious.max( 0 ) # find the best anchor for each object, return ious_value and anchor index boxes = boxes[max_ids] #print(boxes.size()) loc_zyx = (boxes[:, :3] - anchor_boxes[:, :3]) / anchor_boxes[:, 3:] loc_dhw = boxes[:, 3:] / anchor_boxes[:, 3:] loc_dhw = loc_dhw.numpy() loc_dhw = np.log(loc_dhw) loc_dhw = torch.from_numpy(loc_dhw) loc_targets = torch.cat([loc_zyx, loc_dhw], 1) cls_targets = 1 + labels[ max_ids] # the background class = 0, so +1 for object classes #print(cls_targets.size()) cls_targets[max_ious < 0.4] = 0 for i in range(best_ids.size()[0]): cls_targets[best_ids[i]] = 1 + labels[i] ig_num = cls_targets.size()[0] - 100 cls_targets_array = cls_targets.numpy() neg_idx = np.where(cls_targets_array == 0) if ig_num > len(neg_idx[0]): ig_num -= (ig_num - len(neg_idx[0])) ig_idx = np.random.choice(neg_idx[0], ig_num, replace=False) cls_targets_array[ig_idx] = -1 cls_targets = torch.from_numpy(cls_targets_array) ''' ignore = (max_ious > 0.15) & (max_ious < 0.4) cls_targets[ignore] = -1 for i in range(best_ids.size()[0]): cls_targets[best_ids[i]] = 1 + labels[i] ''' return loc_targets, cls_targets
def encode(self, boxes, labels, input_size): '''Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. input_size: (int/tuple) model input size of (w,h). Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. ''' NEG = 10 input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \ else torch.Tensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') ious = box_iou(anchor_boxes, boxes, order='xywh') max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] loc_xy = (boxes[:,:2] - anchor_boxes[:,:2]) / anchor_boxes[:,2:] loc_wh = torch.log(boxes[:,2:] / anchor_boxes[:,2:]) loc_targets = torch.cat([loc_xy,loc_wh], 1) # 这里,我们设置正负采样比例为 1:3 cls_targets = 1 + labels[max_ids] # 类别等于label加1, 最开始初始化为正类 # print(cls_targets) cls_targets[max_ious < 0.1] = 0 ignore = (max_ious > 0.05) & (max_ious < 0.1) cls_targets[ignore] = -1 # for now just mark ignored to -1 ''' cls_targets[max_ious < 0.1] = 0 # print("cls_targets shape:", cls_targets.shape) pos = cls_targets > 0 n_pos = pos.data.float().sum().item() # print(n_pos) n_neg = NEG * n_pos if n_pos != 0 else NEG n_neg = int(n_neg) # print('n_neg',n_neg) # print(max_ious.shape) max_ious = max_ious.numpy().astype(np.float) neg_index = np.where(max_ious < 0.1)[0] # print("neg_index shape", neg_index.size) # print("neg_index", neg_index) # neg_index = neg_index.squeeze(1) # neg_index = neg_index.numpy().astype(np.int) # print("neg_index numpy shape", neg_index.shape) if neg_index.shape[0] > n_neg: disable_index = np.random.choice( neg_index, size=(len(neg_index) - n_neg), replace=False) # disable_index = disable_index.unsqueeze(1) # print("disable_index",disable_index.shape) disabel_index = torch.from_numpy(disable_index).float() cls_targets[disable_index] = -1 # print("cls_targets",cls_targets) # pos_neg = cls_targets > -1 # exclude ignored anchors # print("pos_neg", pos_neg.data.float().sum().item()) # # ignore = (max_ious > 0.05) & (max_ious<0.01) # # cls_targets[ignore] = -1 # for now just mark ignored to -1 ''' return loc_targets, cls_targets