def predict_boxes(self, images, boxes): device = list(self.parameters())[0].device images = images.to(device) boxes = boxes.to(device) targets = None original_image_sizes = [img.shape[-2:] for img in images] images, targets = self.transform(images, targets) features = self.backbone(images.tensors) if isinstance(features, torch.Tensor): features = collections.OrderedDict([(0, features)]) # proposals, proposal_losses = self.rpn(images, features, targets) from torchvision.models.detection.transform import resize_boxes boxes = resize_boxes(boxes, original_image_sizes[0], images.image_sizes[0]) proposals = [boxes] box_features = self.roi_heads.box_roi_pool(features, proposals, images.image_sizes) box_features = self.roi_heads.box_head(box_features) class_logits, box_regression = self.roi_heads.box_predictor( box_features) pred_boxes = self.roi_heads.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach() pred_boxes = resize_boxes(pred_boxes, images.image_sizes[0], original_image_sizes[0]) pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach() return pred_boxes, pred_scores
def predict_boxes(self, boxes, box_head=None, box_predictor=None): device = list(self.parameters())[0].device boxes = boxes.to(device) if isinstance(self.fpn_features, torch.Tensor): self.fpn_features = OrderedDict([(0, self.fpn_features)]) from torchvision.models.detection.transform import resize_boxes boxes = resize_boxes(boxes, self.original_image_size[0], self.image_size[0]) proposals = [boxes] box_features = self.roi_heads.box_roi_pool(self.fpn_features, proposals, self.image_size) if box_head is None: box_head = self.roi_heads.box_head box_features = box_head(box_features) if box_predictor is None: box_predictor = self.roi_heads.box_predictor class_logits, box_regression = box_predictor(box_features) pred_boxes = self.roi_heads.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach() pred_boxes = resize_boxes(pred_boxes, self.image_size[0], self.original_image_size[0]) pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach() return pred_boxes, pred_scores
def predict_boxes(self, images, boxes): device = list(self.parameters())[0].device images = images.to(device) boxes = boxes.to(device) targets = None original_image_sizes = [img.shape[-2:] for img in images] images, targets = self.transform(images, targets) features = self.backbone(images.tensors) if isinstance(features, torch.Tensor): features = OrderedDict([(0, features)]) # proposals, proposal_losses = self.rpn(images, features, targets) from torchvision.models.detection.transform import resize_boxes boxes = resize_boxes(boxes, original_image_sizes[0], images.image_sizes[0]) proposals = [boxes] box_features = self.roi_heads.box_roi_pool(features, proposals, images.image_sizes) box_features = self.roi_heads.box_head(box_features) class_logits, box_regression = self.roi_heads.box_predictor( box_features) pred_boxes = self.roi_heads.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # score_thresh = self.roi_heads.score_thresh # nms_thresh = self.roi_heads.nms_thresh # self.roi_heads.score_thresh = self.roi_heads.nms_thresh = 1.0 # self.roi_heads.score_thresh = 0.0 # self.roi_heads.nms_thresh = 1.0 # detections, detector_losses = self.roi_heads( # features, [boxes.squeeze(dim=0)], images.image_sizes, targets) # self.roi_heads.score_thresh = score_thresh # self.roi_heads.nms_thresh = nms_thresh # detections = self.transform.postprocess( # detections, images.image_sizes, original_image_sizes) # detections = detections[0] # return detections['boxes'].detach().cpu(), detections['scores'].detach().cpu() #pred_masks = class_logits.argmax(1).eq(1) pred_boxes = pred_boxes[:, 1, :].squeeze(dim=1).detach() pred_boxes = resize_boxes(pred_boxes, images.image_sizes[0], original_image_sizes[0]) pred_scores = torch.max(pred_scores[:, 1:], 1)[0].detach() # .squeeze(dim=1) return pred_boxes, pred_scores # [pred_masks]
def predict_boxes(self, boxes): device = list(self.parameters())[0].device boxes = boxes.to(device) try: boxes = resize_boxes(boxes, self.original_image_sizes[0], self.preprocessed_images.image_sizes[0]) except IndexError: print(boxes.size()) raise IndexError proposals = [boxes] box_features = self.roi_heads.box_roi_pool( self.features, proposals, self.preprocessed_images.image_sizes) box_features = self.roi_heads.box_head(box_features) class_logits, box_regression = self.roi_heads.box_predictor( box_features) pred_boxes = self.roi_heads.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # score_thresh = self.roi_heads.score_thresh # nms_thresh = self.roi_heads.nms_thresh # self.roi_heads.score_thresh = self.roi_heads.nms_thresh = 1.0 # self.roi_heads.score_thresh = 0.0 # self.roi_heads.nms_thresh = 1.0 # detections, detector_losses = self.roi_heads( # features, [boxes.squeeze(dim=0)], images.image_sizes, targets) # self.roi_heads.score_thresh = score_thresh # self.roi_heads.nms_thresh = nms_thresh # detections = self.transform.postprocess( # detections, images.image_sizes, original_image_sizes) # detections = detections[0] # return detections['boxes'].detach().cpu(), detections['scores'].detach().cpu() pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach() pred_boxes = resize_boxes(pred_boxes, self.preprocessed_images.image_sizes[0], self.original_image_sizes[0]) pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach() pred_boxes = box_ops.clip_boxes_to_image(pred_boxes, self.original_image_sizes[0]) if self.version == 'v2': for box, box_feature in zip(pred_boxes, box_features): self.box_features[str(int(box[0])) + ',' + str(int(box[1])) + ',' + str(int(box[2])) + ',' + str(int(box[3]))] = box_feature return pred_boxes, pred_scores
def predict_boxes(self, images, boxes): self.eval() device = list(self.parameters())[0].device images = images.to(device) boxes = boxes.to(device) targets = None original_image_sizes = [img.shape[-2:] for img in images] images, targets = self.transform(images, targets) features = self.backbone(images.tensors) if isinstance(features, torch.Tensor): features = OrderedDict([(0, features)]) # proposals, proposal_losses = self.rpn(images, features, targets) from torchvision.models.detection.transform import resize_boxes boxes = resize_boxes(boxes, original_image_sizes[0], images.image_sizes[0]) proposals = [boxes] box_feats = self.roi_heads.box_roi_pool(features, proposals, images.image_sizes) box_features = self.roi_heads.box_head(box_feats) class_logits, box_regression = self.roi_heads.box_predictor( box_features) pred_boxes = self.roi_heads.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach() pred_boxes = resize_boxes(pred_boxes, images.image_sizes[0], original_image_sizes[0]) pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach() mask_features = self.roi_heads.mask_roi_pool(features, proposals, images.image_sizes) cropped_features = self.roi_heads.mask_head(mask_features) mask_logits = self.roi_heads.mask_predictor(cropped_features) switch_channel_masks = torch.zeros(mask_logits.size()) switch_channel_masks[:, 0, :, :] = mask_logits[:, 1, :, :] # workaround that only works with 2 classes. otherwise try to get maskrcnn_inference running # or manually filter out the class with highest score here switch_channel_masks = torch.sigmoid(switch_channel_masks) pred_masks = paste_masks_in_image(switch_channel_masks, pred_boxes, original_image_sizes[0]).detach() return pred_boxes, pred_scores, pred_masks
def ml_collate(elems): """ Multi-level version of the `collate` function defined above. """ boxes_in, boxes_target, boxes_all, image_features, orig_image_sizes, image_sizes, lengths, feat_trans = zip(*elems) boxes_in = default_collate(boxes_in) lengths = default_collate(lengths) boxes_target = default_collate(boxes_target) orig_image_sizes = torch.cat(orig_image_sizes) image_sizes = torch.cat(image_sizes) # get resized bounding boxes for later RoI pooling first_idc = [int(sum(lengths[:i])) for i in range(0, len(lengths))] boxes_resized = [] feat_translation_resized = [] for seq_start, boxes, feat_trans in zip(first_idc, boxes_all, feat_trans): boxes_resized.append(resize_boxes(boxes, orig_image_sizes[seq_start], image_sizes[seq_start])) feat_translation_resized.append( resize_boxes(feat_trans.repeat(1, 2), orig_image_sizes[seq_start], image_sizes[seq_start])[:, :2] ) boxes_resized = torch.cat(boxes_resized) # calculate feature translation in feature scale scales = [infer_scale(feat, image_sizes[0]) for feat in image_features] feat_trans = [(t_resized * scale).round() for t_resized, scale in zip(feat_translation_resized, scales)] # apply translation to feature map all_feat_out = [] for i, feat in enumerate(image_features): pad_w = int(feat_trans[i][:, 0].abs().max()) pad_h = int(feat_trans[i][:, 1].abs().max()) if pad_w == 0 and pad_h == 0: feat_out = feat else: feat_padded = F.pad(feat, [pad_w, pad_w, pad_h, pad_h]) origin = torch.tensor([pad_w, pad_h]) new_coords = (origin - feat_trans[i]).long() h, w = feat.shape[-2:] feat_out = [] for i in range(feat_padded.shape[0]): x, y = new_coords[i] feat_out.append(feat_padded[i, :, y:(y + h), x:(x + w)]) feat_out = torch.stack(feat_out) all_feat_out.append(feat_out) levels = [roi_scales.index(s) for s in scales] return boxes_in, boxes_target, boxes_resized, all_feat_out, image_sizes, lengths, levels
def postprocess(self, results, image_shapes, original_image_sizes): if self.training: loss = results.pop() for pred, im_s, o_im_s in zip(results, image_shapes, original_image_sizes): boxes_h, boxes_o = pred['boxes_h'], pred['boxes_o'] boxes_h = transform.resize_boxes(boxes_h, im_s, o_im_s) boxes_o = transform.resize_boxes(boxes_o, im_s, o_im_s) pred['boxes_h'], pred['boxes_o'] = boxes_h, boxes_o if self.training: results.append(loss) return results
def get_features(obj_detect, img_list, curr_frame_offset, curr_gt_app): """ Input: -img_list: list (len=clip_len) of (3, w, h). Can be different sizes. -curr_frame_offset: (batch,) -curr_gt_app: (batch, 4) Output: -box_features: (batch, 256, 7, 7) CUDA -box_head_features: (batch, 1024) CUDA """ box_features_list = [] box_head_features_list = [] with torch.no_grad(): gts = curr_gt_app.cuda() for i, frame_idx in enumerate(curr_frame_offset): obj_detect.load_image(img_list[frame_idx].unsqueeze(0)) gt = gts[i].unsqueeze(0) gt = clip_boxes_to_image(gt, img_list[frame_idx].shape[-2:]) gt = resize_boxes(gt, obj_detect.original_image_sizes[0], obj_detect.preprocessed_images.image_sizes[0]) gt = [gt] box_features = obj_detect.roi_heads.box_roi_pool(obj_detect.features, gt, obj_detect.preprocessed_images.image_sizes) box_head_features = obj_detect.roi_heads.box_head(box_features) box_features_list.append(box_features.squeeze(0)) box_head_features_list.append(box_head_features.squeeze(0)) return torch.stack(box_features_list, 0), torch.stack(box_head_features_list, 0)
def resize(self, image, target=None): # type: (Tensor, Optional[Dict[str, Tensor]]) h, w = image.shape[-2:] im_shape = torch.tensor(image.shape[-2:]) min_size = float(torch.min(im_shape)) max_size = float(torch.max(im_shape)) # if self.training: ### COMMENTING OUT FOR NOW # size = float(self.torch_choice(self.min_size)) # else: # FIXME assume for now that testing uses the largest scale size = float(self.model.cfg['INPUT']['MIN_SIZE_TEST']) scale_factor = size / min_size if max_size * scale_factor > self.model.cfg['INPUT']['MAX_SIZE_TEST']: scale_factor = self.model.cfg['INPUT']['MAX_SIZE_TEST'] / max_size image = torch.nn.functional.interpolate( image, size = 1024, mode='bilinear', align_corners=False)[0] # had to remove the [None] part # removed scale_factor=scale_factor if target is None: return image #, target bbox = target["boxes"] bbox = resize_boxes(bbox, (h, w), image.shape[-2:]) target["boxes"] = bbox if "masks" in target: mask = target["masks"] mask = misc_nn_ops.interpolate(mask[None].float(), scale_factor=scale_factor)[0].byte() target["masks"] = mask if "keypoints" in target: keypoints = target["keypoints"] keypoints = resize_keypoints(keypoints, (h, w), image.shape[-2:]) target["keypoints"] = keypoints return image #, target
def get_roi_features(self, obj_detect, img_list, gts): """ Input: -img_list: list of (1, 3, w, h). Can be different sizes. -gts: (batch, 4) Output: -box_features: (batch, 256, 7, 7) """ box_features_list = [] with torch.no_grad(): for i, img in enumerate(img_list): obj_detect.load_image(img) gt = gts[i].unsqueeze(0) gt = clip_boxes_to_image(gt, img.shape[-2:]) gt = resize_boxes( gt, obj_detect.original_image_sizes[0], obj_detect.preprocessed_images.image_sizes[0]) gt = [gt] box_features = obj_detect.roi_heads.box_roi_pool( obj_detect.features, gt, obj_detect.preprocessed_images.image_sizes) box_features_list.append(box_features.squeeze(0)) return torch.stack(box_features_list, 0)
def detect_with_proposal(self, img, t_1_proposal): """ https://github.com/pytorch/vision/blob/master/torchvision/models/detection/generalized_rcnn.py https://github.com/pytorch/vision/blob/master/torchvision/models/detection/roi_heads.py """ images = img device = list(self.parameters())[0].device images = images.to(device) original_image_sizes = [] for img in images: val = img.shape[-2:] assert len(val) == 2 original_image_sizes.append((val[0], val[1])) images, _ = self.transform(images, None) features = self.backbone(images.tensors) if isinstance(features, torch.Tensor): features = OrderedDict([('0', features)]) if not len(t_1_proposal): return torch.Tensor([]), torch.Tensor([]) tt = resize_boxes(t_1_proposal.to(device), original_image_sizes[0], images.image_sizes[0]) # detections, _ = self.roi_heads(features, tt, images.image_sizes, None) # detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes) # detections = detections[0] box_features = self.roi_heads.box_roi_pool(features, [tt], images.image_sizes) box_features = self.roi_heads.box_head(box_features) class_logits, box_regression = self.roi_heads.box_predictor( box_features) pred_boxes = self.roi_heads.box_coder.decode(box_regression, [tt]) pred_scores = F.softmax(class_logits, -1) pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach() pred_boxes = resize_boxes(pred_boxes, images.image_sizes[0], original_image_sizes[0]) pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach() return pred_boxes, pred_scores
def bbox_regression(self, img, boxes): """ Tracking of the objects from previous frame with the bounding box regressor of the FRCNN_FPN """ # Move image and boxes to the device device = list(self.parameters())[0].device img = img.to(device) boxes = boxes.to(device) # Perform input transformation before feeding the image into a GeneralizedRCNN model of torchvision img_size = img.shape[-2:] img_transformed, targets = self.transform(img) img_transformed_size = img_transformed.image_sizes[0] # Calculate the backbone features and put them in a compatible format with RoIHeads and RPN classes of torchvision backbone_features = self.backbone(img_transformed.tensors) if isinstance(backbone_features, torch.Tensor): backbone_features = OrderedDict([('0', backbone_features)]) # Resize boxes to img_transformed size boxes = resize_boxes(boxes, img_size, img_transformed_size) # Forward pass of the RoIHeads class of torchvision box_features = self.roi_heads.box_roi_pool(backbone_features, [boxes], [img_transformed_size]) box_features = self.roi_heads.box_head(box_features) class_logits, box_regression = self.roi_heads.box_predictor( box_features) # Post-process the detections boxes = self.roi_heads.box_coder.decode(box_regression, [boxes]) scores = F.softmax(class_logits, -1) # Remove predictions with the background label boxes = boxes[:, 1:] scores = scores[:, 1:] # Put the tensors in the correct shape for the Track class boxes = boxes.squeeze(dim=1) scores = scores.squeeze(dim=1) # Resize to img size boxes = resize_boxes(boxes, img_transformed_size, img_size) return boxes.detach().cpu(), scores.detach().cpu()
def collate(elems): """ Collate function for PyTorch `DataLoader` that handles efficient batching of image features. """ boxes_in, boxes_target, boxes_all, image_features, orig_image_sizes, image_sizes, lengths, feat_trans = zip(*elems) boxes_in = default_collate(boxes_in) lengths = default_collate(lengths) boxes_target = default_collate(boxes_target) image_features = torch.cat(image_features) orig_image_sizes = torch.cat(orig_image_sizes) image_sizes = torch.cat(image_sizes) # get resized bounding boxes for later RoI pooling first_idc = [int(sum(lengths[:i])) for i in range(0, len(lengths))] boxes_resized = [] feat_translation_resized = [] for seq_start, boxes, feat_trans in zip(first_idc, boxes_all, feat_trans): boxes_resized.append(resize_boxes(boxes, orig_image_sizes[seq_start], image_sizes[seq_start])) feat_translation_resized.append( resize_boxes(feat_trans.repeat(1, 2), orig_image_sizes[seq_start], image_sizes[seq_start])[:, :2] ) boxes_resized = torch.cat(boxes_resized) feat_translation_resized = torch.cat(feat_translation_resized) # calculate feature translation in feature scale scale = infer_scale(image_features, image_sizes[0]) feat_trans = (feat_translation_resized * scale).round() # apply translation to feature map pad_w = int(feat_trans[:, 0].abs().max()) pad_h = int(feat_trans[:, 1].abs().max()) if pad_w == 0 and pad_h == 0: feat_out = image_features else: feat_padded = F.pad(image_features, [pad_w, pad_w, pad_h, pad_h]) origin = torch.tensor([pad_w, pad_h]) new_coords = (origin - feat_trans).long() h, w = image_features.shape[-2:] feat_out = [] for i in range(feat_padded.shape[0]): x, y = new_coords[i] feat_out.append(feat_padded[i, :, y:(y + h), x:(x + w)]) feat_out = torch.stack(feat_out) return boxes_in, boxes_target, boxes_resized, feat_out, image_sizes, lengths, None
def predict_boxes(self, boxes): device = list(self.parameters())[0].device boxes = boxes.to(device) boxes = resize_boxes(boxes, self.original_image_sizes[0], self.preprocessed_images.image_sizes[0]) proposals = [boxes] box_features = self.roi_heads.box_roi_pool( self.features, proposals, self.preprocessed_images.image_sizes) box_features = self.roi_heads.box_head(box_features) class_logits, box_regression = self.roi_heads.box_predictor( box_features) pred_boxes = self.roi_heads.box_coder.decode(box_regression, proposals) pred_scores = F.softmax(class_logits, -1) # score_thresh = self.roi_heads.score_thresh # nms_thresh = self.roi_heads.nms_thresh # self.roi_heads.score_thresh = self.roi_heads.nms_thresh = 1.0 # self.roi_heads.score_thresh = 0.0 # self.roi_heads.nms_thresh = 1.0 # detections, detector_losses = self.roi_heads( # features, [boxes.squeeze(dim=0)], images.image_sizes, targets) # self.roi_heads.score_thresh = score_thresh # self.roi_heads.nms_thresh = nms_thresh # detections = self.transform.postprocess( # detections, images.image_sizes, original_image_sizes) # detections = detections[0] # return detections['boxes'].detach().cpu(), detections['scores'].detach().cpu() #print("Pred Boxes 1") #print(pred_boxes.shape) pred_boxes = pred_boxes[:, 10, :].detach() #print("Pred Boxes 2") #print(pred_boxes.shape) pred_boxes = resize_boxes(pred_boxes, self.preprocessed_images.image_sizes[0], self.original_image_sizes[0]) pred_scores = pred_scores[:, 10].detach() return pred_boxes, pred_scores
def predict_with_correlation(self, prev_boxes, current_boxes, boxes_to_shift): prev_boxes_features, current_boxes_features = self.get_feature_patches( prev_boxes, current_boxes) boxes_deltas = self.correlation_head(prev_boxes_features, current_boxes_features) boxes_to_shift = resize_boxes(boxes_to_shift, self.original_image_sizes[0], self.preprocessed_images.image_sizes[0]) pred_boxes = self.roi_heads.box_coder.decode( boxes_deltas, [boxes_to_shift]).squeeze(dim=1) pred_boxes = resize_boxes(pred_boxes, self.preprocessed_images.image_sizes[0], self.original_image_sizes[0]) return pred_boxes
def forward(self, images, targets=None): """ Arguments: images (list[Tensor]): images to be processed targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") original_image_sizes = [img.shape[-2:] for img in images] images, targets = self.transform(images, targets) features = self.backbone(images.tensors) if self.n_channel_backbone < 5: in_channels = [(i, features[i]) for i in range(self.n_channel_backbone)] features = OrderedDict(in_channels) if self.n_channel_backbone > 5: in_channels = [(i, features[i]) for i in range(5)] features = OrderedDict(in_channels) if isinstance(features, torch.Tensor): features = OrderedDict([(0, features)]) proposals, scores, proposal_losses = self.rpn(images, features, targets) detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets) detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes) losses = {} losses.update(detector_losses) losses.update(proposal_losses) for i, (pred, im_s, o_im_s) in enumerate( zip(proposals, images.image_sizes, original_image_sizes)): boxes = resize_boxes(pred, im_s, o_im_s) proposals[i] = boxes if self.training: return losses #return detections, proposals return detections, features
def preprocess(self, images, detections, targets=None): original_image_sizes = [img.shape[-2:] for img in images] images, targets = self.transform(images, targets) for det, o_im_s, im_s in zip(detections, original_image_sizes, images.image_sizes): boxes = det['boxes'] boxes = transform.resize_boxes(boxes, o_im_s, im_s) det['boxes'] = boxes return images, detections, targets, original_image_sizes
def get_feature_patches(self, prev_boxes, current_boxes): device = list(self.parameters())[0].device prev_boxes = prev_boxes.to(device) current_boxes = current_boxes.to(device) prev_boxes = resize_boxes(prev_boxes, self.prev_original_image_sizes[0], self.prev_preprocessed_images.image_sizes[0]) current_boxes = resize_boxes(current_boxes, self.original_image_sizes[0], self.preprocessed_images.image_sizes[0]) prev_boxes_features = self.roi_heads.box_roi_pool( self.prev_features, [prev_boxes], self.prev_preprocessed_images.image_sizes) current_boxes_features = self.roi_heads.box_roi_pool( self.features, [current_boxes], self.preprocessed_images.image_sizes) return prev_boxes_features, current_boxes_features
def losses(self, batch, loss): patch1, patch2, gt_boxes, prev_boxes, _, _, _, preprocessed_image_sizes, original_image_sizes = batch patch1 = Variable(patch1).cuda() patch2 = Variable(patch2).cuda() gt_boxes = gt_boxes.cuda() prev_boxes = prev_boxes.cuda() # print("fmap:") # print(patch1*100) # print("fmap_enlarged:") # print(patch2*100) # print("labels:") # print(gt_boxes) boxes_deltas = self.forward(patch1, patch2) prev_boxes = resize_boxes(prev_boxes, original_image_sizes[0], preprocessed_image_sizes[0]) pred_boxes = self.roi_heads.box_coder.decode( boxes_deltas, [prev_boxes]).squeeze(dim=1) pred_boxes = resize_boxes(pred_boxes, preprocessed_image_sizes[0], original_image_sizes[0]) if loss == "GIoU": total_loss = self.giou_loss(pred_boxes, gt_boxes) elif loss == "IoU": total_loss = box_iou(pred_boxes, gt_boxes).diag() total_loss = torch.mean(total_loss) elif loss == "MSE": total_loss = F.mse_loss(pred_boxes, gt_boxes) elif loss == "fasterRCNN": total_loss = self.smooth_l1_loss(pred_boxes, gt_boxes) total_loss /= len(gt_boxes) else: raise NotImplementedError("Loss: {}".format(loss)) return total_loss
def postprocess(self, result, image_shapes, original_image_sizes): if self.training: return result for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)): boxes = pred["boxes"] boxes = resize_boxes(boxes, im_s, o_im_s) result[i]["boxes"] = boxes for k in ['pose2d', 'body_pose2d', 'hand_pose2d', 'face_pose2d']: if k in pred and pred[k] is not None: pose2d = pred[k] pose2d = resize_keypoints(pose2d, im_s, o_im_s) result[i][k] = pose2d return result
def resize(self, image, target): """ Override method to resize box pairs """ h, w = image.shape[-2:] min_size = float(min(image.shape[-2:])) max_size = float(max(image.shape[-2:])) scale_factor = min(self.min_size[0] / min_size, self.max_size / max_size) image = nn.functional.interpolate(image[None], scale_factor=scale_factor, mode='bilinear', align_corners=False)[0] if target is None: return image, target target['boxes_h'] = transform.resize_boxes(target['boxes_h'], (h, w), image.shape[-2:]) target['boxes_o'] = transform.resize_boxes(target['boxes_o'], (h, w), image.shape[-2:]) return image, target
def bounding_box_regression(self, image, prev_boxes): original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], []) original_image_sizes.append((image.size()[2], image.size()[3])) images, targets = self.obj_detect.transform(image.cuda(), None) prev_boxes = torch.Tensor(prev_boxes) # plot_boxes(image,prev_boxes) prev_boxes = resize_boxes(prev_boxes.squeeze(1), original_image_sizes[0], images.image_sizes[0]) feats = self.obj_detect.backbone(images.tensors) roi_heads = self.obj_detect.roi_heads box_features = roi_heads.box_roi_pool(feats, [prev_boxes.cuda()], images.image_sizes) box_features = roi_heads.box_head(box_features) class_logits, box_regression = roi_heads.box_predictor(box_features) pred_boxes = roi_heads.box_coder.decode(box_regression, [prev_boxes.cuda()]) pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach() # new boxes pred_boxes = resize_boxes(pred_boxes, images.image_sizes[0], original_image_sizes[0]) pred_scores = F.softmax(class_logits, -1) # classification scores for new boxes pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach() return pred_boxes, pred_scores
def forward(self, images, targets=None): """ Arguments: images (list[Tensor]): images to be processed targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields. """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") original_image_sizes = [img.shape[-2:] for img in images] images, targets = self.transform(images, targets) features = self.backbone(images.tensors) if isinstance(features, torch.Tensor): features = OrderedDict([(0, features)]) proposals, scores, proposal_losses = self.rpn(images, features, targets) boxes, scores = self.filter_proposals(proposals, scores) result = [] for i in range(len(scores)): score = scores[i].cpu().numpy() eps = 0.05 score = (score - np.min(score)) / abs(np.max(score) - np.min(score)) + eps #score = np.ones(len(score)) score = torch.tensor(score) result.append({ "boxes": boxes[i], "scores": score, "labels": torch.tensor([1] * len(scores[i])) }) detections = self.transform.postprocess(result, images.image_sizes, original_image_sizes) for i, (pred, im_s, o_im_s) in enumerate( zip(proposals, images.image_sizes, original_image_sizes)): boxes = resize_boxes(pred, im_s, o_im_s) proposals[i] = boxes losses = {} losses.update(proposal_losses) if self.training: return losses return detections, proposals
def predict_boxes(self, boxes): device = self.model.cfg['MODEL']['DEVICE'] boxes = boxes.to(device) boxes = resize_boxes(boxes, self.original_image_sizes[0], self.preprocessed_images.shape) proposals = [box_class(box) for box in boxes] boxes = box_class(boxes) proposals = [boxes] # proposals lookg ood # print('proposals: ',proposals[0].tensor) # print('feature keys: ',self.features.keys()) # print('features: ',self.features) # print('image_shape: ',self.preprocessed_images.shape) try: box_features = self.model.model.roi_heads.box_pooler(self.features, proposals) # image_sizes # self.preprocessed_images.shape except: # print('trying pure list') feat_list = [self.features[k] for k in self.features][:-1] # for box in boxes: # boxc = box_class(box) box_features = self.model.model.roi_heads.box_pooler(feat_list, proposals) # image_sizes # self.preprocessed_images.shape #box_features = self.roi_heads.box_roi_pool(self.features, proposals, self.preprocessed_images.image_sizes) #self.features, # removed self.features box_features = self.model.model.roi_heads.box_head(box_features) class_logits, box_regression = self.model.model.roi_heads.box_predictor(box_features) # print(box_regression) # pred_boxes = self.model.model.roi_heads.box_coder.decode(box_regression, proposals) # failing here pred_boxes = proposals[0].tensor #.detach() pred_scores = F.softmax(class_logits, -1) # pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach() pred_boxes = resize_boxes(pred_boxes, self.preprocessed_images.shape, self.original_image_sizes[0]) # image_sizes[0] pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach() return pred_boxes, pred_scores
def get_features(obj_detect, img, gts): with torch.no_grad(): obj_detect.load_image(img) gts = gts.squeeze(0).cuda() gts = resize_boxes(gts, obj_detect.original_image_sizes[0], obj_detect.preprocessed_images.image_sizes[0]) gts = [gts] box_features = obj_detect.roi_heads.box_roi_pool( obj_detect.features, gts, obj_detect.preprocessed_images.image_sizes) box_head_features = obj_detect.roi_heads.box_head(box_features) return box_features.cpu(), box_head_features.cpu()
def preprocess( self, images: List[Tensor], detections: List[dict], targets: Optional[List[dict]] = None ) -> Tuple[List[Tensor], List[dict], List[dict], List[Tuple[int, int]]]: original_image_sizes = [img.shape[-2:] for img in images] images, targets = self.transform(images, targets) for det, o_im_s, im_s in zip(detections, original_image_sizes, images.image_sizes): boxes = det['boxes'] boxes = transform.resize_boxes(boxes, o_im_s, im_s) det['boxes'] = boxes return images, detections, targets, original_image_sizes
def __call__(self, image, target): h, w = image.shape[-2:] min_size = float(min(image.shape[-2:])) max_size = float(max(image.shape[-2:])) size = self.min_side scale_factor = size / min_size if max_size * scale_factor > self.max_side: scale_factor = self.max_side / max_size image = torch.nn.functional.interpolate( image[None], scale_factor=scale_factor, mode='bilinear', align_corners=False)[0] if target is None or target["boxes"].nelement() == 0: return image, target bbox = target["boxes"] bbox = resize_boxes(bbox, (h, w), image.shape[-2:]) target["boxes"] = bbox return image, target
def get_pooled_features(self, bboxs): """ Get roi-pooled features from backbone of object detector, if not using BackboneMotionModel. Input: -bboxs: (N, 4) Output: -box_features: (N, 256, 7, 7) -box_head_features: (N, 1024) """ bboxs = resize_boxes( bboxs, self.obj_detect.original_image_sizes[0], self.obj_detect.preprocessed_images.image_sizes[0]) bboxs = [bboxs] box_features = self.obj_detect.roi_heads.box_roi_pool( self.obj_detect.features, bboxs, self.obj_detect.preprocessed_images.image_sizes) box_head_features = self.obj_detect.roi_heads.box_head(box_features) return box_features, box_head_features
def forward(self, images, features, targets=None): if self.training and targets is None: raise ValueError("In training mode, targets should be passed") num_images = len(images.tensors) device = images.tensors.device proposals = [] for idx in range(num_images): image_id = int(targets[idx]['image_id'].item()) orig_size = targets[idx]["size"] new_size = images.image_sizes[idx] box = self.edgeboxes[image_id] box = torch.Tensor(box).float() box = resize_boxes(box, orig_size, new_size) box = box.to(device) proposals.append(box) boxes = proposals losses = {} return boxes, losses
def resize(self, image, target, fixed_size=None): h, w = image.shape[-2:] im_shape = torch.tensor(image.shape[-2:]) min_size = float(torch.min(im_shape)) max_size = float(torch.max(im_shape)) if fixed_size is not None: size = fixed_size elif self.training: size = random.choice(self.min_size) else: # FIXME assume for now that testing uses the largest scale size = self.min_size[-1] scale_factor = size / min_size if max_size * scale_factor > self.max_size: scale_factor = self.max_size / max_size image = torch.nn.functional.interpolate(image[None], scale_factor=scale_factor, mode='bilinear', align_corners=False)[0] if target is None: return image, target bbox = target["boxes"] bbox = resize_boxes(bbox, (h, w), image.shape[-2:]) target["boxes"] = bbox if "masks" in target: mask = target["masks"] mask = misc_nn_ops.interpolate( mask[None].float(), scale_factor=scale_factor)[0].byte() target["masks"] = mask if "keypoints" in target: keypoints = target["keypoints"] keypoints = resize_keypoints(keypoints, (h, w), image.shape[-2:]) target["keypoints"] = keypoints return image, target