def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) proposals, proposal_losses = self.rpn(images, features, targets) if self.roi_heads: x, result, detector_losses = self.roi_heads(features, proposals, targets) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def get_detectron_features(self, image_paths): img_tensor, im_scales = [], [] for image_path in image_paths: im, im_scale = self._image_transform(image_path) img_tensor.append(im) im_scales.append(im_scale) # Image dimensions should be divisible by 32, to allow convolutions # in detector to work current_img_list = to_image_list(img_tensor, size_divisible=32) current_img_list = current_img_list.to("cuda") with torch.no_grad(): output = self.detection_model(current_img_list) feat_list = self._process_feature_extraction( output, im_scales, self.args.feature_name, self.args.confidence_threshold) return feat_list
def compute_prediction(self, img, img_h, img_w): """ Arguments: imgs, imgs_h, imgs_w (np.ndarray): imgs as returned by data_loader Returns: top_predictions ([BoxList]): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ img_list = to_image_list(img, self.cfg.DATALOADER.SIZE_DIVISIBILITY) img_list = img_list.to(self.device) # compute predictions with torch.no_grad(): predictions = self.model(img_list) predictions = [o.to(self.cpu_device) for o in predictions] prediction = predictions[0].resize((img_w, img_h)) top_prediction = self.select_top_predictions(prediction, img_w, img_h) return top_prediction
def forward(self, images, targets=None, force_boxes=False): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) if (not self.training) and targets and force_boxes: # if in the reference model, we want to force boxes given in the target proposals = [ BoxList(target.bbox, target.size, target.mode) for target in targets ] proposal_losses = {} else: proposals, proposal_losses = self.rpn(images, features, targets) if self.roi_heads: x, result, detector_losses = self.roi_heads( features, proposals, targets, force_boxes) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def __call__(self, batch): transposed_batch = list(zip(*batch)) img_lists = list(zip(*(transposed_batch[0]))) # img_lists = transposed_batch[0] target_lists = list(zip(*(transposed_batch[1]))) # target_lists = transposed_batch[1] imgs = [] tgts = [] for img_l in img_lists: imgs += img_l # imgs.append(img_l[0]) for tgt in target_lists: tgts += tgt # tgts.append(tgt[0]) images = to_image_list(imgs, self.size_divisible) targets = tgts img_ids = transposed_batch[2] return images, targets, img_ids
def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) proposals, proposal_losses = self.rpn(images, features, targets) if self.roi_heads: x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # RPN-only models don't have roi_heads result = proposals detector_losses = {} if self.training: losses = {} # Prepend the key name so as to allow better visualisation in tensorboard detector_losses_tb = { 'detector_losses/' + k: v for k, v in detector_losses.items() } proposal_losses_tb = { 'proposal_losses/' + k: v for k, v in proposal_losses.items() } losses.update(detector_losses_tb) losses.update(proposal_losses_tb) return losses return result
def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) # conduct multi-label classification on the top of features multilable_loss = {} if self.training: labels_list = [target.get_field("labels") for target in targets] multilabel_loss = self.multilabel_cls(features[-1], labels_list) proposals, proposal_losses = self.rpn(images, features, targets) if self.roi_heads: x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) losses.update(multilabel_loss) return losses return result
def forward(self, images, targets=None, centerness_pack=None, iter=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) # print([image.shape for image in images.tensors]) # print([feature.shape for feature in features]) # print(targets) proposals, proposal_losses, pred_targets = self.rpn( images, features[:-1], targets, features[-1], centerness_pack, iter) if self.roi_heads: x, result, detector_losses = self.roi_heads( features[:-1], proposals, targets, pred_targets) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) if iter <= self.warmup: for key, loss in losses.items(): if key != 'loss_centerness' and key != 'loss_rpn_center_box_reg': loss *= 0.0 return losses return result
def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) if self.fp4p_on: # get you C4 proposals, proposal_losses = self.rpn(images, (features[-1], ), targets) else: proposals, proposal_losses = self.rpn(images, features, targets) # features = [feature.detach() for feature in features] if self.roi_heads: x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def compute_prediction(self, original_image): """ Arguments: original_image (np.ndarray): an image as returned by OpenCV Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ # apply pre-processing to image #print(len(original_image)) image = [self.transforms(original_image[i]) for i in range(len(original_image))] ''' with open('test.txt','a') as f: np.set_printoptions(threshold='nan') i = torch.LongTensor([[1,1],[1,2],[2,1],[2,2]]) print(image[0].shape) a = image[0][0] b = image[0][1] c = image[0][2] f.write(str(a)) f.write(str(b)) f.write(str(c)) sys.exit(0) ''' # convert to an ImageList, padded so that it is divisible by # cfg.DATALOADER.SIZE_DIVISIBILITY image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) #print(image_list) image_list = image_list.to(self.device) # compute predictions predictions = self.model(image_list) predictions = [o.to(self.cpu_device) for o in predictions] #print("predictions: {}".format(predictions)) # always single image is passed at a time #prediction = predictions[0] # reshape prediction (a BoxList) into the original image size height = width = original_image[0].shape[-2] prediction = [predictions[i].resize((width, height)) for i in range(len(predictions))] return prediction
def compute_prediction(self, original_image): """ Arguments: original_image (np.ndarray): an image as returned by OpenCV Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ self.pool_feature = [] self.global_feature = [] def hook(module, input, output): self.global_feature = module.backbone.fpn.global_feature # apply pre-processing to image image = self.transforms(original_image) # convert to an ImageList, padded so that it is divisible by # cfg.DATALOADER.SIZE_DIVISIBILITY image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) image_list = image_list.to(self.device) # compute predictions with torch.no_grad(): handle = self.model.register_forward_hook(hook) predictions = self.model(image_list) print(self.global_feature.shape) predictions = [o.to(self.cpu_device) for o in predictions] # always single image is passed at a time prediction = predictions[0] # reshape prediction (a BoxList) into the original image size height, width = original_image.shape[:-1] prediction = prediction.resize((width, height)) if prediction.has_field("mask"): # if we have masks, paste the masks in the right position # in the image, as defined by the bounding boxes masks = prediction.get_field("mask") # always single image is passed at a time masks = self.masker([masks], [prediction])[0] prediction.add_field("mask", masks) return prediction
def compute_prediction(self, original_image): """ Arguments: original_image (np.ndarray): an image as returned by OpenCV Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ # apply pre-processing to image image = self.transforms(original_image) # convert to an ImageList, padded so that it is divisible by # cfg.DATALOADER.SIZE_DIVISIBILITY image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) image_list = image_list.to(self.device) # compute predictions height, width = original_image.shape[:-1] with torch.no_grad(): predictions = self.model(image_list) predictions = [o.to(self.cpu_device) for o in predictions] # always single image is passed at a time prediction = predictions[0] # reshape prediction (a BoxList) into the original image size prediction = prediction.resize((width, height)) if prediction.has_field("mask"): # if we have masks, paste the masks in the right position # in the image, as defined by the bounding boxes masks = prediction.get_field("mask") # always single image is passed at a time masks = self.masker([masks], [prediction])[0] prediction.add_field("mask", masks) if prediction.has_field("ke") and self.cfg.MODEL.KE_ON: kes = prediction.get_field("ke") mty = prediction.get_field("mty") prediction.add_field("ke", kes.kes) prediction.add_field("mty", mty) return prediction
def forward(self, images, targets=None, logits_only=False, adv_patch=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) proposals, proposal_losses = self.rpn(images, features, targets, logits_only) if logits_only: if self.training: tv_losses = {"loss_tv": 2.5 * SmoothTV(adv_patch)} losses = {} losses.update(proposal_losses) losses.update(tv_losses) return losses if self.roi_heads: x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def compute_prediction(self, original_image): """ Arguments: original_image (np.ndarray): an image as returned by OpenCV Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ # apply pre-processing to image image = self.transforms(original_image) # convert to an ImageList, padded so that it is divisible by # cfg.DATALOADER.SIZE_DIVISIBILITY image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) image_list = image_list.to(self.device) # compute predictions with torch.no_grad(): predictions = self.model(image_list) predictions = [o.to(self.cpu_device) for o in predictions] # always single image is passed at a time prediction = predictions[0] # reshape prediction (a BoxList) into the original image size height, width = original_image.shape[:-1] prediction = prediction.resize((width, height)) # print('from compute_prediction:', prediction.bbox,prediction.get_field("scores")) if prediction.has_field("mask"): # if we have masks, paste the masks in the right position # in the image, as defined by the bounding boxes masks = prediction.get_field("mask") # always single image is passed at a time masks, res_polys, res_maskscore = self.masker([masks], [prediction]) # print("masks:", len(masks), masks[0][0][0].shape, res_polys[0][0], prediction.bbox[0].data.cpu().numpy()) # cv2.imshow("win", masks[0][0][0].data.cpu().numpy().astype(np.uint8)) # cv2.waitKey(0) prediction.add_field("mask", masks[0]) prediction.add_field("mask_score", res_maskscore[0]) return prediction
def forward(self, images, targets=None, features=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) features (list[Tensor]): encoder output features (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") if features is None: images = to_image_list(images) features = self.encoder(images.tensors) return self.decoder(images, features, targets)
def get_vinvl_features(self, image_paths): img_tensor, im_infos = [], [] for image_path in image_paths: im, im_info = self._image_transform(image_path) img_tensor.append(im) im_infos.append(im_info) current_img_list = to_image_list(img_tensor, size_divisible=32) current_img_list = current_img_list.to(DEVICE) torch.manual_seed(0) with torch.no_grad(): output = self.detection_model(current_img_list) feat_list = self._process_feature_extraction( output, im_infos, ) return feat_list
def get_detectron_features(self, image): start = time.time() image, scale = self.image_transform(image) images = to_image_list([image], size_divisible=32) images = images.to(self.device.type) with torch.no_grad(): output = self.detectron_model(images) features = self.feature_extract(output, [scale], 'fc6', 0.2) end = time.time() print( f'Tiki : Getting Features : Detectron - Finished in {end-start:7.3f} Seconds' ) processing['Detectron'] = end - start return features[0]
def im_detect_bbox_hflip(model, images, target_scale, target_max_size, device): """ Performs bbox detection on the horizontally flipped image. Function signature is the same as for im_detect_bbox. """ transform = TT.Compose([ T.Resize(target_scale, target_max_size), TT.RandomHorizontalFlip(1.0), TT.ToTensor(), T.Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255) ]) images = [transform(image) for image in images] images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) boxlists = model(images.to(device)) # Invert the detections computed on the flipped image boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists] return boxlists_inv
def forward(self, images, targets=None, vis=False): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) vis (bool): not used Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") if self.training and self.has_aux_heads: targets, targets_aux = targets images = to_image_list(images) features = self.neck(self.backbone(images.tensors)) proposals, proposal_losses = self.rpn(images, features, targets, vis=vis) if self.roi_heads: x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # self.warm_start -= 1 # RPN-only models don't have roi_heads result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ assert self.training is False all_anchors, all_box_cls, all_box_regression = [], [], [] for resizer in self.resizers: image_size = images.image_sizes[0] size = resizer.get_size(image_size[::-1]) aug_images = interpolate( images.tensors[:, :, :image_size[0], :image_size[1]], size, mode='bilinear', align_corners=True)[0] aug_images = to_image_list( aug_images, size_divisible=self.retinanet.cfg.DATALOADER.SIZE_DIVISIBILITY) features = self.retinanet.backbone(aug_images.tensors) if self.retinanet.cfg.RETINANET.BACKBONE == "p2p7": features = features[1:] box_cls, box_regression = self.retinanet.rpn.head(features) anchors = self.retinanet.rpn.anchor_generator( aug_images, features)[0] all_anchors.extend(anchors), all_box_cls.extend( box_cls), all_box_regression.extend(box_regression) detections = self.retinanet.rpn.box_selector_test([all_anchors], all_box_cls, all_box_regression) return detections
def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) # R-50-FPN-RETINANET FPN backbone_features = self.backbone(images.tensors) # for f in backbone_features: # print(f.shape) # exit(0) fpn_features = self.fpn(backbone_features) proposals, proposal_losses = self.rpn(images, backbone_features[1:], fpn_features, targets) if self.roi_heads: x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # RPN-only models don't have roi_heads #FOCS 代替rpn x = fpn_features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def forward(self, images, targets=None, query=False): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) proposals, proposal_losses = self.rpn(images, features, targets) if self.roi_heads: if not self.training and query: x, result, detector_losses = self.roi_heads( features, targets, targets, query) else: x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) if not (self.cfg.MODEL.REID.TRAIN_PART or self.cfg.MODEL.REID.TRAIN_PADREG): losses.update(proposal_losses) return losses return result
def forward(self, images, targets=None): # for target in targets: # print(target.get_field('rotations'), '==========') """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") imagen_tensor = images.tensors images = to_image_list(images) features = self.backbone(images.tensors) # if not self.flag: # self.save_feature_map(features,imagen_tensor) #=============================================== SAVING FEATURE MAPS # print(images.tensors.size(),'=========================================') proposals, proposal_losses = self.rpn(images, features, targets) if self.roi_heads: x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def forward(self, images, targets=None): # first forward pass of the Gnet outputs = self.Gnet(images, targets) if self.training: losses = outputs images = to_image_list(images) # entering the discrimantor's loss g_losses, d_losses = self.gan_loss_evaluator((torch.sigmoid(self.Gnet.roi_heads.mask.loss_evaluator.mask_dict['masks'].unsqueeze(1)), \ self.Gnet.roi_heads.mask.loss_evaluator.mask_dict['targets'].unsqueeze(1), \ self.Gnet.roi_heads.mask.loss_evaluator.positive_proposals, self.Gnet.features), \ (self.Gnet.roi_heads.box.bbox_dict['bbox'], self.Gnet.roi_heads.box.bbox_dict['target'], \ images.tensors)) losses.update(g_losses) return losses, d_losses return outputs
def get_detectron_features(image_paths, detection_model, get_boxes, feat_name, device): img_tensor, im_scales = [], [] for img_path in image_paths: im = read_image(img_path) im, im_scale = image_transform(im) img_tensor.append(im) im_scales.append(im_scale) current_img_list = to_image_list(img_tensor, size_divisible=32) current_img_list = current_img_list.to(device) with torch.no_grad(): output = detection_model(current_img_list) feat_list = process_feature_extraction(output, im_scales, get_boxes=get_boxes, feat_name=feat_name, conf_thresh=0.2) return feat_list
def forward(self, images, targets=None, logger=None, ret_sg=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) proposals, proposal_losses = self.rpn(images, features, targets) if self.roi_heads: x, result, detector_losses, sg = self.roi_heads( features, proposals, targets, logger) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) if not self.cfg.MODEL.RELATION_ON: # During the relationship training stage, the rpn_head should be fixed, and no loss. losses.update(proposal_losses) return losses, sg if ret_sg: return sg return result
def compute_prediction(self, original_image, id): """ Arguments: original_image (np.ndarray): an image as returned by OpenCV Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ # apply pre-processing to image image = self.transforms(original_image) # depth1 = np.load("/home/zoey/ssds/data/Kitchen/simulator/rgbd/ADE_val_depth{0}.npy".format(id)) # imgWidth, imgHeight = 640, 480 # fov, aspect, nearplane, farplane = 45, imgWidth / imgHeight, 0.01, 100 # depth = farplane * nearplane / (farplane - (farplane - nearplane) * depth1) # image = torch.cat((F.to_tensor(depth), image * 5)) # convert to an ImageList, padded so that it is divisible by # cfg.DATALOADER.SIZE_DIVISIBILITY image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) image_list = image_list.to(self.device) # compute predictions with torch.no_grad(): predictions = self.model(image_list) predictions = [o.to(self.cpu_device) for o in predictions] # always single image is passed at a time prediction = predictions[0] # reshape prediction (a BoxList) into the original image size height, width = original_image.shape[:-1] prediction = prediction.resize((width, height)) if prediction.has_field("mask"): # if we have masks, paste the masks in the right position # in the image, as defined by the bounding boxes masks = prediction.get_field("mask") # always single image is passed at a time masks = self.masker([masks], [prediction])[0] prediction.add_field("mask", masks) return prediction
def forward(self, images, targets=None, visualizer=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) visualizer: for visualization Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) proposals, proposal_losses = self.rpn(images, features, targets) if self.roi_heads: x, result, detector_losses = self.roi_heads(features, proposals, targets) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if not self.training: # convert image format to opencv format image = visualizer.transformBackForShowing(images.tensors[0].to("cpu"), None)[0].numpy().astype(np.uint8) image = np.transpose(image, (1, 2, 0)) visualizer.run_on_opencv_image_prediction(image, result) if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def prepare_images(self, inputCOCO_Image_maskrcnnTransform_list): # Transform so that the min size is no smaller than cfg.INPUT.MIN_SIZE_TRAIN, and the max size is no larger than cfg.INPUT.MIN_SIZE_TRAIN # image_batch = [self.transforms(original_image) for original_image in original_image_batch_list] image_batch = inputCOCO_Image_maskrcnnTransform_list image_sizes_after_transform = [(image_after.shape[2], image_after.shape[1]) for image_after in image_batch] # if self.training: # for original_image, image_after, image_after_size in zip(inputCOCO_Image_maskrcnnTransform, image_batch, image_sizes_after_transform): # self.printer.print('[generalized_rcnn_rui-prepare_images] Image sizes:', original_image.shape, '-->', image_after.shape, image_after_size) # [Rui] PADDING # convert to an ImageList, ``padded`` so that it is divisible by cfg.DATALOADER.SIZE_DIVISIBILITY image_list = to_image_list(image_batch, self.cfg.DATALOADER.SIZE_DIVISIBILITY) # print(self.cfg.INPUT.MIN_SIZE_TRAIN, self.cfg.INPUT.MAX_SIZE_TRAIN, self.cfg.INPUT.MIN_SIZE_TEST, self.cfg.INPUT.MAX_SIZE_TEST) if self.training: self.printer.print( 'PADDED: image_list.tensors, image_list.image_sizes (before pad):', image_list.tensors.shape, image_list.image_sizes) image_list = image_list.to(self.device) return image_list, image_sizes_after_transform
def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") # usually, it seems this is already an ImageList. images = to_image_list(images) features = self.backbone(images.tensors) # for panoptic FPN, it seems like we might need to disentangle this so we can # feed the features to the semantic head. proposals, proposal_losses = self.rpn(images, features, targets) if self.roi_heads: x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) # import ipdb;ipdb.set_trace() ##self.rpn为build_rpn中的RPNModule(cfg, in_channels) ##RPNModule返回boxes, losses proposals, proposal_losses = self.rpn(images, features, targets) if self.roi_heads: ##self.roi_heads为build_roi_heads中的CombinedROIHeads(cfg, roi_heads) ##CombinedROIHeads返回x, detections, losses x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def compute_prediction(self, original_image): """ Arguments: original_image (np.ndarray): an image as returned by OpenCV Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ # apply pre-processing to image image = self.transforms(original_image) # convert to an ImageList, padded so that it is divisible by # cfg.DATALOADER.SIZE_DIVISIBILITY image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) image_list = image_list.to(self.device) # compute predictions with torch.no_grad(): predictions = self.model(image_list) predictions = [o.to(self.cpu_device) for o in predictions] # always single image is passed at a time prediction = predictions[0] # reshape prediction (a BoxList) into the original image size height, width = original_image.shape[:-1] prediction = prediction.resize((width, height)) if prediction.has_field("mask"): # if we have masks, paste the masks in the right position # in the image, as defined by the bounding boxes masks = prediction.get_field("mask") # always single image is passed at a time masks = self.masker([masks], [prediction])[0] prediction.add_field("mask", masks) return prediction
def __call__(self, batch): transposed_batch = list(zip(*batch)) images = to_image_list(transposed_batch[0], self.size_divisible) targets = transposed_batch[1] img_ids = transposed_batch[2] return images, targets, img_ids