def forward(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) proposals, proposal_losses = self.rpn(images, features, targets) if self.roi_heads: x, result, detector_losses = self.roi_heads( features, proposals, targets) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} if self.training: losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def compute_prediction(self, original_image): """ Arguments: original_image (np.ndarray): an image as returned by OpenCV Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ # apply pre-processing to image image = self.transforms(original_image) # convert to an ImageList, padded so that it is divisible by # cfg.DATALOADER.SIZE_DIVISIBILITY image_list = to_image_list(image, self.cfg.DATALOADER.SIZE_DIVISIBILITY) # compute predictions with jt.no_grad(): predictions = self.model(image_list) # always single image is passed at a time prediction = predictions[0] # reshape prediction (a BoxList) into the original image size height, width = original_image.shape[:-1] input_w, input_h = prediction.size prediction = prediction.resize((width, height)) if prediction.has_field("mask"): # if we have masks, paste the masks in the right position # in the image, as defined by the bounding boxes masks = prediction.get_field("mask") if masks.ndim == 3: # resize masks stride_mask = float(prediction.get_field('stride').item()) h = math.ceil(masks.shape[1] * stride_mask * height / input_h) w = math.ceil(masks.shape[2] * stride_mask * width / input_w) mask_th = prediction.get_field('mask_th') masks = masks masks = nn.interpolate(X=masks.unsqueeze(1).float(), size=(h, w), mode="bilinear", align_corners=False) > mask_th masks = masks[:, :, :height, :width] #masks = masks.unsqueeze(1) prediction.add_field("mask", masks) else: # always single image is passed at a time masks = self.masker([masks], [prediction])[0] prediction.add_field("mask", masks) return prediction
def __call__(self, batch): transposed_batch = list(zip(*batch)) images = to_image_list(transposed_batch[0], self.size_divisible) targets = transposed_batch[1] img_ids = transposed_batch[2] if isinstance(images, tuple): return images[0], images[1], img_ids return images, targets, img_ids
def execute(self, images, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.is_training() and targets is None: raise ValueError("In training mode, targets should be passed") #print(3,time.asctime()) images = to_image_list(images) features = self.backbone(images.tensors) # print('backbone',jt.mean(features[0])) #jt.sync_all() #print(4,time.asctime()) # print('Backbone',features[0].mean()) proposals, proposal_losses = self.rpn(images, features, targets) # print('RPN',proposals[0].bbox,proposals[0].bbox.shape) #jt.sync_all() #print(5,time.asctime()) if self.roi_heads: x, result, detector_losses = self.roi_heads(features, proposals, targets) #print('x',x) #print('result',result[0].bbox) #print('detector_losses',detector_losses) else: # RPN-only models don't have roi_heads x = features result = proposals detector_losses = {} #jt.sync_all() #print(6,time.asctime()) if self.is_training(): losses = {} losses.update(detector_losses) losses.update(proposal_losses) return losses return result
def im_detect_bbox(model, images, target_scale, target_max_size): """ Performs bbox detection on the original image. """ transform = TT.Compose([ T.Resize(target_scale, target_max_size), TT.ToTensor(), T.Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255) ]) images = [transform(image) for image in images] images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) return model(images)
def im_detect_bbox_hflip(model, images, target_scale, target_max_size): """ Performs bbox detection on the horizontally flipped image. Function signature is the same as for im_detect_bbox. """ transform = TT.Compose([ T.Resize(target_scale, target_max_size), TT.RandomHorizontalFlip(1.0), TT.ToTensor(), T.Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255) ]) images = [transform(image) for image in images] images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) boxlists = model(images) # Invert the detections computed on the flipped image boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists] return boxlists_inv
def run_model(config_file, img_f=None): original_image = load(img_f) from detectron.config import cfg from detectron.modeling.detector import build_detection_model from detectron.utils.checkpoint import DetectronCheckpointer from detectron.structures.image_list import to_image_list from detectron.modeling.roi_heads.mask_head.inference import Masker from jittor import transform as T from jittor import nn import jittor as jt from jittor_utils import auto_diff jt.flags.use_cuda = 1 confidence_threshold = 0.0 cfg.merge_from_file(config_file) model = build_detection_model(cfg) checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.OUTPUT_DIR) _ = checkpointer.load(cfg.MODEL.WEIGHT) name = config_file.split('/')[-1].split('.')[0] # hook = auto_diff.Hook(name) # hook.hook_module(model) model.eval() class Resize(object): def __init__(self, min_size, max_size): self.min_size = min_size self.max_size = max_size # modified from torchvision to add support for max size def get_size(self, image_size): w, h = image_size size = self.min_size max_size = self.max_size if max_size is not None: min_original_size = float(min((w, h))) max_original_size = float(max((w, h))) if max_original_size / min_original_size * size > max_size: size = int( round(max_size * min_original_size / max_original_size)) if (w <= h and w == size) or (h <= w and h == size): return (h, w) if w < h: ow = size oh = int(size * h / w) else: oh = size ow = int(size * w / h) return (oh, ow) def __call__(self, image): size = self.get_size(image.size) image = T.resize(image, size) return image def build_transform(): if cfg.INPUT.TO_BGR255: to_bgr_transform = T.Lambda(lambda x: x * 255) else: to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]]) normalize_transform = T.ImageNormalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD) min_size = cfg.INPUT.MIN_SIZE_TEST max_size = cfg.INPUT.MAX_SIZE_TEST transform = T.Compose([ T.ToPILImage(), Resize(min_size, max_size), T.ToTensor(), to_bgr_transform, normalize_transform, ]) return transform transforms = build_transform() image = transforms(original_image) image_list = to_image_list(image, cfg.DATALOADER.SIZE_DIVISIBILITY) predictions = model(image_list) predictions = predictions[0] if predictions.has_field("mask_scores"): scores = predictions.get_field("mask_scores") else: scores = predictions.get_field("scores") keep = jt.nonzero(scores > confidence_threshold).squeeze(1) predictions = predictions[keep] scores = predictions.get_field("scores") idx, _ = jt.argsort(scores, 0, descending=True) predictions = predictions[idx] result_diff(predictions)