def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (C, H, W) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. """ images, images_whwh = self.preprocess_image(batched_inputs) if isinstance(images, (list, torch.Tensor)): images = nested_tensor_from_tensor_list(images) # Feature Extraction. src = self.backbone(images.tensor) features = list() for f in self.in_features: feature = src[f] features.append(feature) # Cls & Reg Prediction. outputs_class, outputs_coord = self.head(features) output = {'pred_logits': outputs_class, 'pred_boxes': outputs_coord} if self.training: gt_instances = [ x["instances"].to(self.device) for x in batched_inputs ] targets = self.prepare_targets(gt_instances) loss_dict = self.criterion(output, targets) weight_dict = self.criterion.weight_dict for k in loss_dict.keys(): if k in weight_dict: loss_dict[k] *= weight_dict[k] return loss_dict else: box_cls = output["pred_logits"] box_pred = output["pred_boxes"] results = self.inference(box_cls, box_pred, images.image_sizes) processed_results = [] for results_per_image, input_per_image, image_size in zip( results, batched_inputs, images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results
def forward( self, batched_inputs, return_raw_results=False, is_mc_dropout=False): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (C, H, W) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. return_raw_results (bool): if True return unprocessed results for probabilistic inference. is_mc_dropout (bool): if True, return unprocessed results even if self.is_training flag is on. Returns: dict[str: Tensor]: mapping from a named loss to a tensor storing the loss. Used during training only. """ images = self.preprocess_image(batched_inputs) output = self.detr(images) if self.training and not is_mc_dropout: gt_instances = [ x["instances"].to( self.device) for x in batched_inputs] targets = self.prepare_targets(gt_instances) loss_dict = self.criterion(output, targets) weight_dict = self.criterion.weight_dict for k in loss_dict.keys(): if k in weight_dict: loss_dict[k] *= weight_dict[k] self.current_step += 1 return loss_dict elif return_raw_results: return output else: box_cls = output["pred_logits"] box_pred = output["pred_boxes"] mask_pred = output["pred_masks"] if self.mask_on else None results = self.inference(box_cls, box_pred, mask_pred, images.image_sizes) processed_results = [] for results_per_image, input_per_image, image_size in zip( results, batched_inputs, images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results
def forward(self, batched_inputs: Tuple[Dict[str, Tensor]]): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (C, H, W) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: in training, dict[str: Tensor]: mapping from a named loss to a tensor storing the loss. Used during training only. in inference, the standard output format, described in :doc:`/tutorials/models`. """ nums_images = len(batched_inputs) images = self.preprocess_image(batched_inputs) features = self.backbone(images.tensor) features = [features[self.backbone_level]] anchors_image = self.anchor_generator(features) anchors = [copy.deepcopy(anchors_image) for _ in range(nums_images)] pred_logits, pred_anchor_deltas = self.decoder( self.encoder(features[0])) # Transpose the Hi*Wi*A dimension to the middle: pred_logits = [permute_to_N_HWA_K(pred_logits, self.nums_classes)] pred_anchor_deltas = [permute_to_N_HWA_K(pred_anchor_deltas, 4)] if self.training: assert not torch.jit.is_scripting(), "Not supported" assert "instances" in batched_inputs[ 0], "Instance annotations are missing in training" gt_instances = [ x["instances"].to(self.device) for x in batched_inputs ] indices = self.get_ground_truth(anchors, pred_anchor_deltas, gt_instances) loss = self.losses(indices, gt_instances, anchors, pred_logits, pred_anchor_deltas) return loss else: results = self.inference(anchors_image, pred_logits, pred_anchor_deltas, images.image_sizes) if torch.jit.is_scripting(): return results processed_results = [] for results_per_image, input_per_image, image_size in zip( results, batched_inputs, images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results
def inference(self, batched_inputs, detected_instances=None, do_postprocess=True): """ Run inference on the given inputs. Args: batched_inputs (list[dict]): same as in :meth:`forward` detected_instances (None or list[Instances]): if not None, it contains an `Instances` object per image. The `Instances` object contains "pred_boxes" and "pred_classes" which are known boxes in the image. The inference will then skip the detection of bounding boxes, and only predict other per-ROI outputs. do_postprocess (bool): whether to apply post-processing on the outputs. Returns: same as in :meth:`forward`. """ assert not self.training images = self.preprocess_image(batched_inputs) features = self.backbone(images.tensor) if detected_instances is None: if self.proposal_generator: proposals, _ = self.proposal_generator(images, features, None) else: assert "proposals" in batched_inputs[0] proposals = [ x["proposals"].to(self.device) for x in batched_inputs ] results, _ = self.roi_heads(images, features, proposals, None) else: detected_instances = [ x.to(self.device) for x in detected_instances ] results = self.roi_heads.forward_with_given_boxes( features, detected_instances) if do_postprocess: processed_results = [] for results_per_image, input_per_image, image_size in zip( results, batched_inputs, images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results else: return results
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (C, H, W) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: dict[str: Tensor]: mapping from a named loss to a tensor storing the loss. Used during training only. """ images_lists = self.preprocess_image(batched_inputs) # convert images_lists to Nested Tensor? nested_images = self.imagelist_to_nestedtensor(images_lists) output = self.detr(nested_images) if self.training: gt_instances = [ x["instances"].to(self.device) for x in batched_inputs ] # targets: List[Dict[str, torch.Tensor]]. Keys # "labels": [NUM_BOX,] # "boxes": [NUM_BOX, 4] targets = self.prepare_targets(gt_instances) loss_dict = self.criterion(output, targets) weight_dict = self.criterion.weight_dict for k in loss_dict.keys(): if k in weight_dict: loss_dict[k] *= weight_dict[k] return loss_dict else: box_cls = output["pred_logits"] box_pred = output["pred_boxes"] mask_pred = output["pred_masks"] if self.mask_on else None results = self.inference(box_cls, box_pred, mask_pred, images_lists.image_sizes) processed_results = [] for results_per_image, input_per_image, image_size in zip( results, batched_inputs, images_lists.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results
def _postprocess(instances, batched_inputs, image_sizes): """ Rescale the output instances to the target size. """ # note: private function; subject to changes processed_results = [] for results_per_image, input_per_image, image_size in zip( instances, batched_inputs, image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results
def inference(self, batched_inputs, do_postprocess=True): images = self.preprocess_image(batched_inputs) inp = images.tensor features = self.backbone(inp) proposals, _ = self.proposal_generator(images, features, None) processed_results = [] for results_per_image, input_per_image, image_size in zip( proposals, batched_inputs, images.image_sizes): if do_postprocess: height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) else: r = results_per_image processed_results.append(r) return processed_results
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (C, H, W) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. """ images, images_whwh = self.preprocess_image(batched_inputs) if isinstance(images, (list, torch.Tensor)): images = nested_tensor_from_tensor_list(images) # Feature Extraction. src = self.backbone(images.tensor) features = list() for f in self.in_features: feature = src[f] features.append(feature) # Prepare Proposals. proposal_boxes = self.init_proposal_boxes.weight.clone() proposal_boxes = box_cxcywh_to_xyxy(proposal_boxes) proposal_boxes = proposal_boxes[None] * images_whwh[:, None, :] img_feats = self.IFE(features) bs = len(features[0]) pos_embeddings = self.pos_embeddings.weight[None].repeat(bs, 1, 1) proposal_feats = img_feats + pos_embeddings del img_feats if self.training: gt_instances = [ x["instances"].to(self.device) for x in batched_inputs ] targets = self.prepare_targets(gt_instances) outputs_class, outputs_coord, outputs_mask = self.head( features, proposal_boxes, proposal_feats, targets) output = { 'pred_logits': outputs_class[-1], 'pred_boxes': outputs_coord[-1], 'pred_masks': outputs_mask[-1] } if self.deep_supervision: output['aux_outputs'] = [{ 'pred_logits': a, 'pred_boxes': b, 'pred_masks': c } for a, b, c in zip(outputs_class[:-1], outputs_coord[:-1], outputs_mask[:-1])] loss_dict = self.criterion(output, targets, self.mask_encoding) weight_dict = self.criterion.weight_dict for k in loss_dict.keys(): if k in weight_dict: loss_dict[k] *= weight_dict[k] return loss_dict else: outputs_class, outputs_coord, outputs_mask = self.head( features, proposal_boxes, proposal_feats) output = { 'pred_logits': outputs_class[-1], 'pred_boxes': outputs_coord[-1], 'pred_masks': outputs_mask[-1] } box_cls = output["pred_logits"] box_pred = output["pred_boxes"] mask_pred = output["pred_masks"].unsqueeze(dim=2) results = self.inference(box_cls, box_pred, mask_pred, images.image_sizes) processed_results = [] for results_per_image, input_per_image, image_size in zip( results, batched_inputs, images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (C, H, W) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. """ images, images_whwh = self.preprocess_image(batched_inputs) if isinstance(images, (list, torch.Tensor)): images = nested_tensor_from_tensor_list(images) # Feature Extraction. src = self.backbone(images.tensor) features = list() for f in self.in_features: feature = src[f] features.append(feature) # print('!!! pin1\n', len(features), features[0].size(), '\n!!!pin1') # Prepare Proposals. proposal_boxes = self.init_proposal_boxes.weight.clone() proposal_boxes = box_cxcywh_to_xyxy(proposal_boxes) proposal_boxes = proposal_boxes[None] * images_whwh[:, None, :] # proposal size: absolute size, x1y1, x2y2 # Prediction. outputs_class, outputs_coord, bboxes = self.head(features, proposal_boxes, self.init_proposal_features.weight) #TODO #3 mask forward # print('!!! pin3\n', bboxes.size(), '\n!!!pin3') output = {'pred_logits': outputs_class[-1], 'pred_boxes': outputs_coord[-1]} #TODO #3 mask forward # print('!!! pin2\n', mask_features.size(), '\n!!!pin2') proposal_list_instances = self.boxes2list_instances(bboxes, images.image_sizes) # print('!!! pin4\n', len(proposal_list_instances), len(proposal_list_instances[0]), '\n!!!pin4') if self.training: #TODO #3 mask forward gt_instances = [x["instances"].to(self.device) for x in batched_inputs] proposals_gt = self.label_and_sample_proposals(proposal_list_instances, gt_instances) # print('!!! pin5\n', len(proposals_gt), len(proposals_gt[0]), '\n!!!pin5') instances_fg, _ = select_foreground_proposals(proposals_gt, self.num_classes) # print('\ninstances_fg\n', len(proposals_gt), len(instances_fg), '\ninstances_fg\n') boxes_fg = [x.proposal_boxes for x in instances_fg] mask_features = self.mask_pooler(features, boxes_fg) # print('!!! pin6\n', len(instances_fg), len(instances_fg[0]), '\n!!!pin6') targets = self.prepare_targets(gt_instances) if self.deep_supervision: output['aux_outputs'] = [{'pred_logits': a, 'pred_boxes': b} for a, b in zip(outputs_class[:-1], outputs_coord[:-1])] loss_dict, match_indices = self.criterion(output, targets) # print('!!!pin1\n', loss_dict.keys(), '\n!!!pin1') #TODO #4 mask loss update loss_dict.update(self.mask_head(mask_features, instances_fg)) # print('!!!pin2\n', loss_dict.keys(), '\n!!!pin2') weight_dict = self.criterion.weight_dict for k in loss_dict.keys(): if k in weight_dict: loss_dict[k] *= weight_dict[k] return loss_dict else: box_cls = output["pred_logits"] box_pred = output["pred_boxes"] results = self.inference(box_cls, box_pred, images.image_sizes) #TODO #5 mask inference boxes_list = [] for boxes_per_image in bboxes: boxes_list.append(Boxes(boxes_per_image)) mask_features = self.mask_pooler(features, boxes_list) self.mask_head(mask_features, results) processed_results = [] for results_per_image, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper`. Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * "image": Tensor, image in (C, H, W) format. * "instances": Instances * "sem_seg": semantic segmentation ground truth. * Other information that's included in the original dicts, such as: "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: list[dict]: each dict is the results for one image. The dict contains the following keys: * "instances": see :meth:`GeneralizedRCNN.forward` for its format. * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format. * "panoptic_seg": available when `PANOPTIC_FPN.COMBINE.ENABLED`. See the return value of :func:`combine_semantic_and_instance_outputs` for its format. """ image_path = [x['file_name'] for x in batched_inputs] if self.training: flips = [x['flip'] for x in batched_inputs] else: flips = None if self.training: exemplar_input = self.get_exemplar_input(image_path, sample_size=1) if exemplar_input is not None: l = len(batched_inputs) batched_inputs = batched_inputs + exemplar_input images, features, proposals, gt_instances, gt_integral_sem_seg, _, losses = self._forward( batched_inputs) exemplar_features = {} for k, v in features.items(): exemplar_features[k] = v[l:] exemplar_gt_instances = gt_instances[l:] image_path = [x['file_name'] for x in batched_inputs] if self.training: exemplar_flips = [x['flip'] for x in batched_inputs] else: exemplar_flips = None with torch.no_grad(): exemplar_info = self.roi_heads.get_box_features( exemplar_features, exemplar_gt_instances) detector_results, detector_losses = self.roi_heads( images, features, proposals, gt_instances, gt_integral_sem_seg, image_path=image_path, flips=exemplar_flips, exemplar_info=exemplar_info) del exemplar_info, exemplar_input else: exemplar_info = None images, features, proposals, gt_instances, gt_integral_sem_seg, _, losses = self._forward( batched_inputs) detector_results, detector_losses = self.roi_heads( images, features, proposals, gt_instances, gt_integral_sem_seg, image_path=image_path, flips=flips, exemplar_info=exemplar_info) else: exemplar_info = None images, features, proposals, gt_instances, gt_integral_sem_seg, sem_seg_results, losses = self._forward( batched_inputs) detector_results, detector_losses = self.roi_heads( images, features, proposals, gt_instances, gt_integral_sem_seg, image_path=image_path, flips=flips, exemplar_info=exemplar_info) if self.training: losses.update({ k: v * self.instance_loss_weight for k, v in detector_losses.items() }) return losses processed_results = [] for sem_seg_result, detector_result, input_per_image, image_size in zip( sem_seg_results, detector_results, batched_inputs, images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width) detector_r = detector_postprocess(detector_result, height, width) processed_results.append({ "sem_seg": sem_seg_r, "instances": detector_r }) if self.combine_on: panoptic_r = combine_semantic_and_instance_outputs( detector_r, sem_seg_r.argmax(dim=0), self.combine_overlap_threshold, self.combine_stuff_area_limit, self.combine_instances_confidence_threshold, ) processed_results[-1]["panoptic_seg"] = panoptic_r return processed_results
def postprogress(self, results, batched_inputs, image_sizes): processed_results = [] tumor_mask_root = './test_tumor_whole' wall_mask_root = './test_wall_whole' pred_tumor_path = './predictionTumor' pred_wall_path = './predictionWall' if not os.path.exists(pred_tumor_path): os.mkdir(pred_tumor_path) if not os.path.exists(pred_wall_path): os.mkdir(pred_wall_path) pred_tumor_masks = [] pred_wall_masks = [] gt_wall_masks = [] gt_tumor_masks = [] for results_per_image, input_per_image, image_size in zip(results, batched_inputs, image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) if self.mask_on: img_name = input_per_image['file_name'].split('/')[-1] dice_path = 'diceLog.csv' readstyle = 'a+' if img_name in pd.read_csv(dice_path, usecols=['SubjectID']): readstyle = "w+" if r.pred_classes.shape == torch.Size([0]) and \ (np.max(cv2.imread(os.path.join(tumor_mask_root, img_name), flags=0)) == 0) and \ (np.max(cv2.imread(os.path.join(tumor_mask_root, img_name), flags=0)) == 0): #没有检测出目标 with open(dice_path, readstyle, newline='') as file: csv_file = csv.writer(file) datas = [img_name, 1., 1., 1.] csv_file.writerow(datas) continue elif r.pred_classes.shape == torch.Size([0]) and \ (np.max(cv2.imread(os.path.join(tumor_mask_root, img_name), flags=0)) == 0) and \ (np.max(cv2.imread(os.path.join(tumor_mask_root, img_name), flags=0)) != 0): with open(dice_path, readstyle, newline='') as file: csv_file = csv.writer(file) datas = [img_name, 1., 0, 0.5] csv_file.writerow(datas) continue elif r.pred_classes.shape == torch.Size([0]) and \ (np.max(cv2.imread(os.path.join(tumor_mask_root, img_name), flags=0)) != 0) and \ (np.max(cv2.imread(os.path.join(tumor_mask_root, img_name), flags=0)) == 0): with open(dice_path, readstyle, newline='') as file: csv_file = csv.writer(file) datas = [img_name, 0, 1., 0.5] csv_file.writerow(datas) continue if r.pred_classes.min().cpu().numpy() == 0 and ( #兩個类都有 r.pred_classes.max().cpu().numpy() == self.num_classes - 1): for i in range(self.num_classes): pred_sum = np.zeros((r.pred_masks.shape[1], r.pred_masks.shape[2]), dtype=np.int32) pred_classes = r.pred_classes index = (pred_classes == i).nonzero() index = index.reshape(len(index)) scores, idx = r.scores[index].sort(descending=True) pred_masks = r.pred_masks[index][idx] # (7,310,420) if i == 0: for j in range(pred_masks.shape[0]): pred_sum += pred_masks[j].int().cpu().numpy() pred_sum[pred_sum >= 2] = 1 assert pred_sum.max() <= 1 pred = (pred_sum * 255).astype(np.uint8) cv2.imwrite(os.path.join(pred_tumor_path, img_name),pred) pred_tumor_masks.append(torch.tensor(pred_sum,dtype=torch.float32,device='cuda')) mask_path = os.path.join(tumor_mask_root, img_name) mask_arr = cv2.imread(mask_path, flags=0) # (320,410) gt_tumor_masks.append(torch.from_numpy(mask_arr/255.).type(torch.float32).cuda()) else: for j in range(pred_masks.shape[0]): pred_sum += pred_masks[j].int().cpu().numpy() pred_sum[pred_sum >= 2] = 1 assert pred_sum.max() <= 1 pred = (pred_sum * 255).astype(np.uint8) cv2.imwrite(os.path.join(pred_wall_path, img_name),pred) pred_wall_masks.append(torch.tensor(pred_sum,dtype=torch.float32,device='cuda')) mask_path = os.path.join(wall_mask_root, img_name) mask_arr = cv2.imread(mask_path, flags=0) gt_wall_masks.append(torch.from_numpy(mask_arr/255.).type(torch.float32).cuda()) else: scores, idx = r.scores.sort(descending=True) pred_masks = r.pred_masks[idx] # (7,310,4) pred_sum = np.zeros((r.pred_masks.shape[1], r.pred_masks.shape[2]), dtype=np.int32) if r.pred_classes.max().item() == 0: for i in range(pred_masks.shape[0]): pred_sum += pred_masks[i].int().cpu().numpy() pred_sum[pred_sum >= 2] = 1 assert pred_sum.max() <= 1 pred = (pred_sum * 255).astype(np.uint8) cv2.imwrite(os.path.join(pred_tumor_path, img_name),pred) pred_tumor_masks.append(torch.tensor(pred_sum,dtype=torch.float32,device='cuda')) mask_path = os.path.join(tumor_mask_root, img_name) mask_arr = cv2.imread(mask_path, flags=0) gt_tumor_masks.append(torch.from_numpy(mask_arr/255.).type(torch.float32).cuda()) else: for i in range(pred_masks.shape[0]): pred_sum += pred_masks[i].int().cpu().numpy() pred_sum[pred_sum >= 2] = 1 assert pred_sum.max() <= 1 pred = (pred_sum * 255).astype(np.uint8) cv2.imwrite(os.path.join(pred_wall_path, img_name), pred) pred_wall_masks.append(torch.tensor(pred_sum,dtype=torch.float32,device='cuda')) mask_path = os.path.join(wall_mask_root, img_name) mask_arr = cv2.imread(mask_path, flags=0) gt_wall_masks.append(torch.from_numpy(mask_arr/255.).type(torch.float32).cuda()) if pred_tumor_masks: tumor_dice_mean = dice_coefficient(torch.cat(pred_tumor_masks, dim=1).view(len(pred_tumor_masks), -1), torch.cat(gt_tumor_masks, dim=1).view(len(pred_tumor_masks), -1)) elif cv2.imread(os.path.join(tumor_mask_root, img_name), flags=0).max() == 0: tumor_dice_mean = torch.tensor(1.).to(self.device) else: tumor_dice_mean = torch.tensor(0.).to(self.device) if pred_wall_masks: wall_dice_mean = dice_coefficient(torch.cat(pred_wall_masks, dim=1).view(len(pred_wall_masks), -1), torch.cat(gt_wall_masks, dim=1).view(len(pred_wall_masks), -1)) elif cv2.imread(os.path.join(wall_mask_root, img_name), flags=0).max() == 0: wall_dice_mean = torch.tensor(1.).to(self.device) else: wall_dice_mean = torch.tensor(0.).to(self.device) dice_mean = (tumor_dice_mean + wall_dice_mean) / 2 with open(dice_path, readstyle, newline='') as file: csv_file = csv.writer(file) datas = [img_name, tumor_dice_mean.item(), wall_dice_mean.item(), dice_mean.item()] csv_file.writerow(datas) return processed_results
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (C, H, W) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: dict[str: Tensor]: mapping from a named loss to a tensor storing the loss. Used during training only. """ if self.training: # prepare images & gt. images = self.preprocess_image(batched_inputs) if isinstance(batched_inputs[0], tuple): gt_instances = list() for paired_inputs in batched_inputs: paired_instances = [ x["instances"].to(self.device) for x in paired_inputs ] gt_instances.append(paired_instances) else: gt_instances = [ x["instances"].to(self.device) for x in batched_inputs ] # detection first. output = self.detr(copy.deepcopy(images)) output, pre_embed = output if self.track_on: # generate targets for tracking. targets = self.prepare_targets_for_tracking( gt_instances) # cur_targets / pre_targets. # compute loss for detection (pre frame) & generate indices. loss_det, indices_det = self.criterion(output, targets[1], track_on=False) # track. if self.track_aug: track_embed = list() track_indices = list() for i, indices in enumerate(indices_det): embedding = pre_embed[i] indices_pos = indices[0] size_embedding = len(embedding) size_pos = len(indices_pos) indices_cand = torch.ones(size_embedding) indices_cand[indices_pos] = 0 indices_neg = indices_cand.nonzero().squeeze(1) assert len(indices_pos) <= len(indices_neg) indices_select = torch.randperm( len(indices_neg))[:size_pos] indices_neg = indices_neg[indices_select] # make a copy/aug. embedding[indices_neg, :] = embedding[indices_pos, :] track_embed.append(embedding.unsqueeze(0)) track_indices.append( tuple(list(indices) + [indices_neg])) # pos / gt / neg. track_embed = torch.cat(track_embed, 0).permute(1, 0, 2) else: track_embed = pre_embed.permute(1, 0, 2) track_indices = indices_det output = self.detr(images, pre_embed=track_embed) output, _ = output # compute loss for tracking (cur frame). loss_track, _ = self.criterion(output, targets, indices_track=track_indices, track_on=True) # aggregate losses. losses_dict = dict() weight_dict = self.criterion.weight_dict loss_candidates = [loss_det, loss_track] if self.freeze_det: loss_candidates.pop(0) for loss_dict in loss_candidates: for k in loss_dict.keys(): if k in weight_dict: if k not in losses_dict.keys(): losses_dict[k] = loss_dict[k] * weight_dict[k] else: losses_dict[k] = ( losses_dict[k] + loss_dict[k] * weight_dict[k]) / 2 return losses_dict else: raise NotImplementedError targets = self.prepare_targets(gt_instances) loss_dict = self.criterion(output, targets) weight_dict = self.criterion.weight_dict for k in loss_dict.keys(): if k in weight_dict: loss_dict[k] *= weight_dict[k] return loss_dict else: assert len(batched_inputs) == 1, \ print("Only support ONE image each time during inference, " "while there are {} images now.".format(len(batched_inputs))) # prepare images. images = self.preprocess_image(batched_inputs) if isinstance(batched_inputs[0], tuple): cur_input = batched_inputs[0][0] pre_embed = cur_input.get("pre_embed", None) else: raise NotImplementedError # inference. output = self.detr(images, pre_embed=pre_embed) output, pre_embed = output box_cls = output["pred_logits"] box_pred = output["pred_boxes"] if self.track_on: box_track = output["pred_tracks"] results = self.inference(box_cls, box_pred, images.image_sizes, box_track=box_track) else: raise NotImplementedError processed_results = [] for results_per_image, input_per_image, image_size in zip( results, batched_inputs[0], images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results, pre_embed.permute(1, 0, 2)
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one video. For now, each item in the list is a list of dict that contains: * image: Tensor, image in (C, H, W) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. """ assert len(batched_inputs) == 1 dataset_dict = batched_inputs[0] images, images_xywh = self.preprocess_image(dataset_dict) # Feature Extraction. src = self.backbone(images.tensor) features = list() for f in self.in_features: feature = src[f] features.append(feature) # Prepare Proposals. proposal_boxes = images_xywh[:, None, :].repeat(1, self.num_proposals, 1) # Prediction. outputs_class, outputs_coord = self.head( features, proposal_boxes, self.init_proposal_features.weight) output = { 'pred_logits': outputs_class[-1], 'pred_boxes': outputs_coord[-1] } if self.training: gt_instances = [ x["instances"].to(self.device) for x in batched_inputs ] targets = self.prepare_targets(gt_instances) if self.deep_supervision: output['aux_outputs'] = [{ 'pred_logits': a, 'pred_boxes': b } for a, b in zip(outputs_class[:-1], outputs_coord[:-1])] loss_dict = self.criterion(output, targets) weight_dict = self.criterion.weight_dict for k in loss_dict.keys(): if k in weight_dict: loss_dict[k] *= weight_dict[k] return loss_dict else: box_cls = output["pred_logits"] box_pred = output["pred_boxes"] results = self.inference(box_cls, box_pred, images.image_sizes) processed_results = [] for results_per_image, input_per_image, image_size in zip( results, batched_inputs, images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (C, H, W) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. """ # images(ImageList), images_whwh(Tensor) with [w,h,w,h] for what? images, images_whwh = self.preprocess_image(batched_inputs) if isinstance(images, (list, torch.Tensor)): images = nested_tensor_from_tensor_list(images) # Feature Extraction. # return a dict = {"res2":fmap,..."res5":} src = self.backbone(images.tensor) features = list() # self.in_features: ["res2", "res3", "res4", "res5"] for f in self.in_features: feature = src[f] features.append(feature) # Cls & Reg Prediction. # outputs_class: with shape (N, num_class, H/4, W/4) # outputs_coord(Tensor): with shape (N, 4, H/4, W/4) # coord 是已经调整跟实际 outputs_class, outputs_coord = self.head(features) output = {'pred_logits': outputs_class, 'pred_boxes': outputs_coord} """ output:{ "pred_logits":with shape (N, num_class, H/4, W/4) "pred_boxes": with shape (N, 4, H/4, W/4) boxes 已经中心原图化 } """ if self.training: gt_instances = [x["instances"].to(self.device) for x in batched_inputs] targets = self.prepare_targets(gt_instances) """ targets = [dict] 每个dict 是一个图片的 dict = { label:(tensor) [num] 类别 boxes:(tensor) [num, 4] (, cx,cy,w,h) 归一化后的,boxes format boxes_xyxy: (tensor), [num, 4] # 原来的boxes image_size_xyxy:(tensor) [4] # [w,h,w,h] image_size_xyxy_tgt:(tensor) [num,4] item 同上 area: (tensor), [num] # 计算每个boxes 的面积 all is *.to(self.device) } """ # 2021.2.26 # loss_dict:{"loss_ce":, "loss_giou":, "loss_bbox":} loss_bbox:l1_loss(x1,y1,x2,y2) already normalized loss_dict = self.criterion(output, targets) # 对loss 加权 weight_dict = self.criterion.weight_dict for k in loss_dict.keys(): if k in weight_dict: loss_dict[k] *= weight_dict[k] return loss_dict else: box_cls = output["pred_logits"] box_pred = output["pred_boxes"] #ret: list[Instances] with shape [batch_size] """ if without nms, shape is [topk] for each attr Instances: .pred_boxes(Boxes): Boxes.tensor with shape[topk] .scores(Tensor): shape[topk] .pred_classes(Tensor): prediction of class id """ # images.image_sizes is the size after data tranformation results = self.inference(box_cls, box_pred, images.image_sizes) processed_results = [] for results_per_image, input_per_image, image_size in zip(results, batched_inputs, images.image_sizes): # 获取图片h,w 信息, 此处的尺寸是原始图片的尺寸 # image_size 是经过transform 后的 height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) # 对 bbox 缩放对应于原始图片的尺寸 r = detector_postprocess(results_per_image, height, width) processed_results.append({"instances": r}) return processed_results
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper`. Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * "image": Tensor, image in (C, H, W) format. * "instances": Instances * "sem_seg": semantic segmentation ground truth. * Other information that's included in the original dicts, such as: "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: list[dict]: each dict is the results for one image. The dict contains the following keys: * "instances": see :meth:`GeneralizedRCNN.forward` for its format. * "sem_seg": see :meth:`SemanticSegmentor.forward` for its format. * "panoptic_seg": available when `PANOPTIC_FPN.COMBINE.ENABLED`. See the return value of :func:`combine_semantic_and_instance_outputs` for its format. """ image_path = [x['file_name'] for x in batched_inputs] if self.training: flips = [x['flip'] for x in batched_inputs] else: flips = None images = [x["image"].to(self.device) for x in batched_inputs] images = [(x - self.pixel_mean) / self.pixel_std for x in images] images = ImageList.from_tensors(images, self.backbone.size_divisibility) features = self.backbone(images.tensor) if "proposals" in batched_inputs[0]: proposals = [ x["proposals"].to(self.device) for x in batched_inputs ] proposal_losses = {} if "sem_seg" in batched_inputs[0]: gt_sem_seg = [x["sem_seg"].to(self.device) for x in batched_inputs] gt_sem_seg = ImageList.from_tensors( gt_sem_seg, self.backbone.size_divisibility, self.sem_seg_head.ignore_value).tensor else: gt_sem_seg = None sem_seg_results, sem_seg_losses = self.sem_seg_head( features, gt_sem_seg) if "integral_sem_seg" in batched_inputs[0] and self.training: gt_integral_sem_seg = [ x["integral_sem_seg"].to(self.device) for x in batched_inputs ] else: gt_integral_sem_seg = None if "instances" in batched_inputs[0]: gt_instances = [ x["instances"].to(self.device) for x in batched_inputs ] if hasattr(self.roi_heads.box_predictor, 'add_pseudo_label'): gt_instances = self.roi_heads.box_predictor.add_pseudo_label( gt_instances, image_path, flips) else: gt_instances = None if self.proposal_generator: proposals, proposal_losses = self.proposal_generator( images, features, gt_instances, gt_integral_sem_seg) detector_results, detector_losses = self.roi_heads( images, features, proposals, gt_instances, gt_integral_sem_seg, image_path=image_path, flips=flips) if self.training: losses = {} losses.update(sem_seg_losses) losses.update({ k: v * self.instance_loss_weight for k, v in detector_losses.items() }) losses.update(proposal_losses) return losses processed_results = [] for sem_seg_result, detector_result, input_per_image, image_size in zip( sem_seg_results, detector_results, batched_inputs, images.image_sizes): height = input_per_image.get("height", image_size[0]) width = input_per_image.get("width", image_size[1]) sem_seg_r = sem_seg_postprocess(sem_seg_result, image_size, height, width) detector_r = detector_postprocess(detector_result, height, width) processed_results.append({ "sem_seg": sem_seg_r, "instances": detector_r }) if self.combine_on: panoptic_r = combine_semantic_and_instance_outputs( detector_r, sem_seg_r.argmax(dim=0), self.combine_overlap_threshold, self.combine_stuff_area_limit, self.combine_instances_confidence_threshold, ) processed_results[-1]["panoptic_seg"] = panoptic_r return processed_results