def __call__( self, instances: Instances, select=None, ) -> Tuple[Optional[DensePoseEmbeddingPredictorOutput], Optional[torch.Tensor], Optional[List[int]]]: if not (instances.has("pred_densepose") and instances.has("pred_boxes")): return None, None, None dpout = instances.pred_densepose boxes_xyxy = instances.pred_boxes boxes_xywh = extract_boxes_xywh_from_instances(instances) if instances.has("pred_classes"): classes = instances.pred_classes.tolist() else: classes = None if select is not None: dpout = dpout[select] boxes_xyxy = boxes_xyxy[select] if classes is not None: classes = classes[select] return dpout, boxes_xywh, classes
def setUp(self): self.evaluator = PredictionCountEvaluator() image_size = (224, 224) self.mock_outputs = [ { "instances": Instances(image_size, scores=torch.Tensor([0.9, 0.8, 0.7])) }, { "instances": Instances(image_size, scores=torch.Tensor([0.9, 0.8, 0.7])) }, { "instances": Instances(image_size, scores=torch.Tensor([0.9, 0.8])) }, { "instances": Instances(image_size, scores=torch.Tensor([0.9, 0.8])) }, { "instances": Instances(image_size, scores=torch.Tensor([0.9])) }, ] # PredictionCountEvaluator does not depend on inputs self.mock_inputs = [None] * len(self.mock_outputs)
def threshold_bbox(self, proposal_bbox_inst, thres=0.7, proposal_type="roih"): if proposal_type == "rpn": valid_map = proposal_bbox_inst.objectness_logits > thres # create instances containing boxes and gt_classes image_shape = proposal_bbox_inst.image_size new_proposal_inst = Instances(image_shape) # create box new_bbox_loc = proposal_bbox_inst.proposal_boxes.tensor[valid_map, :] new_boxes = Boxes(new_bbox_loc) # add boxes to instances new_proposal_inst.gt_boxes = new_boxes new_proposal_inst.objectness_logits = proposal_bbox_inst.objectness_logits[ valid_map ] elif proposal_type == "roih": valid_map = proposal_bbox_inst.scores > thres # create instances containing boxes and gt_classes image_shape = proposal_bbox_inst.image_size new_proposal_inst = Instances(image_shape) # create box new_bbox_loc = proposal_bbox_inst.pred_boxes.tensor[valid_map, :] new_boxes = Boxes(new_bbox_loc) # add boxes to instances new_proposal_inst.gt_boxes = new_boxes new_proposal_inst.gt_classes = proposal_bbox_inst.pred_classes[valid_map] new_proposal_inst.scores = proposal_bbox_inst.scores[valid_map] return new_proposal_inst
def draw_instance_groundtruth(self, img_rgb, groundtruth): visual = Visualizer(img_rgb, self.metadata, self.scale, self.instance_mode) instance = Instances( image_size=groundtruth.image_size, pred_boxes=groundtruth.gt_boxes, pred_classes=groundtruth.gt_classes, ) if groundtruth.has('gt_keypoints'): instance.pred_keypoints = groundtruth.gt_keypoints vis_img = visual.draw_instance_predictions(instance) return vis_img.get_image()
def execute_on_outputs(cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances): image_fpath = entry["file_name"] logger.info(f"Processing {image_fpath}") result = {"file_name": image_fpath} if outputs.has("scores"): result["scores"] = outputs.get("scores").cpu() if outputs.has("pred_boxes"): result["pred_boxes_XYXY"] = outputs.get("pred_boxes").tensor.cpu() if outputs.has("pred_densepose"): result["pred_densepose"], _ = DensePoseResultExtractor()( outputs) context["results"].append(result)
def _forward_mask_heads_test(self, proposals, mask_feats, iuv_feats, imgsize): # prepare the inputs for mask heads for im_id, per_im in enumerate(proposals): per_im.im_inds = per_im.locations.new_ones( len(per_im), dtype=torch.long) * im_id pred_instances = Instances.cat(proposals) pred_instances.mask_head_params = pred_instances.top_feat # iuv_logits = self.iuv_head(iuv_feats, self.mask_branch.out_stride, pred_instances) # densepose_instances, densepose_outputs = self.mask_head( # mask_feats, iuv_logits, self.mask_branch.out_stride, pred_instances # ) densepose_instances, densepose_outputs = self.mask_head( self.iuv_head, iuv_feats, mask_feats, self.mask_branch.out_stride, pred_instances) # im_inds = densepose_instances.get('im_inds') boxes = densepose_instances.pred_boxes.tensor boxes = boxes / densepose_instances.image_size[0] * imgsize[0] # boxes_new = [] # for idx in range(boxes.shape[0]): # bb = boxes[idx:idx+1] # x1, y1, x2, y2 = bb[0] # im_idx = im_inds[idx].item() # boxes_new.append(bb/(y2-y1)*imgsize_ori_list[im_idx][0]) # pdb.set_trace() densepose_instances.set('pred_boxes', Boxes(boxes)) return [{'instances': densepose_instances}], densepose_outputs
def __call__(self, instances: Instances, select=None): if instances.has("pred_densepose") and instances.has("pred_boxes"): dpout = instances.pred_densepose boxes_xyxy = instances.pred_boxes boxes_xywh = extract_boxes_xywh_from_instances(instances) if select is not None: dpout = dpout[select] boxes_xyxy = boxes_xyxy[select] converter = ToChartResultConverterWithConfidences() results = [ converter.convert(dpout[i], boxes_xyxy[[i]]) for i in range(len(dpout)) ] return results, boxes_xywh else: return None
def extract_boxes_xywh_from_instances(instances: Instances, select=None): if instances.has("pred_boxes"): boxes_xywh = instances.pred_boxes.tensor.clone() boxes_xywh[:, 2] -= boxes_xywh[:, 0] boxes_xywh[:, 3] -= boxes_xywh[:, 1] return boxes_xywh if select is None else boxes_xywh[select] return None
def execute_on_outputs( cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances ): image_fpath = entry["file_name"] logger.info(f"Processing {image_fpath}") result = {"file_name": image_fpath} if outputs.has("scores"): result["scores"] = outputs.get("scores").cpu() if outputs.has("pred_boxes"): result["pred_boxes_XYXY"] = outputs.get("pred_boxes").tensor.cpu() if outputs.has("pred_densepose"): if isinstance(outputs.pred_densepose, DensePoseChartPredictorOutput): extractor = DensePoseResultExtractor() elif isinstance(outputs.pred_densepose, DensePoseEmbeddingPredictorOutput): extractor = DensePoseOutputsExtractor() result["pred_densepose"] = extractor(outputs)[0] context["results"].append(result)
def __call__(self, instances: Instances, select=None): boxes_xywh = extract_boxes_xywh_from_instances(instances) if instances.has("pred_keypoints") and (boxes_xywh is not None): kpout = instances.pred_keypoints kpout = kpout.cpu().numpy() return kpout else: return None
def offset_instances(instances, offset, im_size): instance_dict = { 'pred_boxes': offset_boxes(instances.pred_boxes, offset), 'scores': instances.scores, 'pred_classes': instances.pred_classes, 'pred_masks': offset_masks(instances.pred_masks, offset) } return Instances(im_size, **instance_dict)
def doit(raw_image, raw_boxes, predictor): # Process Boxes raw_boxes = Boxes(torch.from_numpy(raw_boxes).cuda()) with torch.no_grad(): raw_height, raw_width = raw_image.shape[:2] # print("Original image size: ", (raw_height, raw_width)) # Preprocessing image = predictor.transform_gen.get_transform(raw_image).apply_image(raw_image) # print("Transformed image size: ", image.shape[:2]) # Scale the box new_height, new_width = image.shape[:2] scale_x = 1. * new_width / raw_width scale_y = 1. * new_height / raw_height #print(scale_x, scale_y) boxes = raw_boxes.clone() boxes.scale(scale_x=scale_x, scale_y=scale_y) # ---- image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) inputs = [{"image": image, "height": raw_height, "width": raw_width}] images = predictor.model.preprocess_image(inputs) # Run Backbone Res1-Res4 features = predictor.model.backbone(images.tensor) # Run RoI head for each proposal (RoI Pooling + Res5) proposal_boxes = [boxes] features = [features[f] for f in predictor.model.roi_heads.in_features] box_features = predictor.model.roi_heads._shared_roi_transform( features, proposal_boxes ) feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1 # print('Pooled features size:', feature_pooled.shape) # Predict classes pred_class_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(feature_pooled) and boxes for each proposal. pred_class_logits, pred_attr_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor( feature_pooled) pred_class_prob = nn.functional.softmax(pred_class_logits, -1) pred_scores, pred_classes = pred_class_prob[..., :-1].max(-1) attr_prob = pred_attr_logits[..., :-1].softmax(-1) max_attr_prob, max_attr_label = attr_prob.max(-1) # Detectron2 Formatting (for visualization only) roi_features = feature_pooled instances = Instances( image_size=(raw_height, raw_width), pred_boxes=raw_boxes, scores=pred_scores, pred_classes=pred_classes, attr_scores=max_attr_prob, attr_classes=max_attr_label ) return instances, roi_features
def __call__(self, instances: Instances, select=None): boxes_xywh = extract_boxes_xywh_from_instances(instances) if instances.has("pred_densepose") and (boxes_xywh is not None): dpout = instances.pred_densepose if select is not None: dpout = dpout[select] boxes_xywh = boxes_xywh[select] return dpout.to_result(boxes_xywh) else: return None
def _forward_mask_heads_train(self, proposals, pixel_embed, gt_instances): # prepare the inputs for mask heads pred_instances = proposals["instances"] assert (self.max_proposals == -1) or (self.topk_proposals_per_im == -1), \ "MAX_PROPOSALS and TOPK_PROPOSALS_PER_IM cannot be used at the same time." if self.max_proposals != -1: if self.max_proposals < len(pred_instances): inds = torch.randperm(len(pred_instances), device=pixel_embed.device).long() logger.info("clipping proposals from {} to {}".format( len(pred_instances), self.max_proposals)) pred_instances = pred_instances[inds[:self.max_proposals]] elif self.topk_proposals_per_im != -1: num_images = len(gt_instances) kept_instances = [] for im_id in range(num_images): instances_per_im = pred_instances[pred_instances.im_inds == im_id] if len(instances_per_im) == 0: kept_instances.append(instances_per_im) continue unique_gt_inds = instances_per_im.gt_inds.unique() num_instances_per_gt = max( int(self.topk_proposals_per_im / len(unique_gt_inds)), 1) for gt_ind in unique_gt_inds: instances_per_gt = instances_per_im[ instances_per_im.gt_inds == gt_ind] if len(instances_per_gt) > num_instances_per_gt: scores = instances_per_gt.logits_pred.sigmoid().max( dim=1)[0] ctrness_pred = instances_per_gt.ctrness_pred.sigmoid() inds = (scores * ctrness_pred).topk( k=num_instances_per_gt, dim=0)[1] instances_per_gt = instances_per_gt[inds] kept_instances.append(instances_per_gt) pred_instances = Instances.cat(kept_instances) pred_instances.proposal_embed = pred_instances.top_feats[:, : pixel_embed. size(1)] pred_instances.proposal_margin = pred_instances.top_feats[:, pixel_embed. size(1):] loss_mask = self.mask_pred(pixel_embed, self.mask_branch.out_stride, pred_instances, gt_instances) return loss_mask
def postprocess(self, results, output_height, output_width, padded_im_h, padded_im_w, mask_threshold=0.5): """ Resize the output instances. The input images are often resized when entering an object detector. As a result, we often need the outputs of the detector in a different resolution from its inputs. This function will resize the raw outputs of an R-CNN detector to produce outputs according to the desired output resolution. Args: results (Instances): the raw outputs from the detector. `results.image_size` contains the input image resolution the detector sees. This object might be modified in-place. output_height, output_width: the desired output resolution. Returns: Instances: the resized output from the model, based on the output resolution """ scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0]) resized_im_h, resized_im_w = results.image_size results = Instances((output_height, output_width), **results.get_fields()) if results.has("pred_boxes"): output_boxes = results.pred_boxes elif results.has("proposal_boxes"): output_boxes = results.proposal_boxes output_boxes.scale(scale_x, scale_y) output_boxes.clip(results.image_size) results = results[output_boxes.nonempty()] if results.has("pred_global_masks"): mask_h, mask_w = results.pred_global_masks.size()[-2:] factor_h = padded_im_h // mask_h factor_w = padded_im_w // mask_w assert factor_h == factor_w factor = factor_h pred_global_masks = aligned_bilinear(results.pred_global_masks, factor) pred_global_masks = pred_global_masks[:, :, :resized_im_h, : resized_im_w] pred_global_masks = F.interpolate(pred_global_masks, size=(output_height, output_width), mode="bilinear", align_corners=False) pred_global_masks = pred_global_masks[:, 0, :, :] results.pred_masks = (pred_global_masks > mask_threshold).float() return results
def generate_instance(self, instance: Instances, class_names: List[str], total_classes: List[str]): instance = instance.to('cpu') boxes = instance.pred_boxes.tensor.numpy() masks = None scores = instance.scores.numpy() if instance.has("pred_masks"): masks = instance.pred_masks.numpy() for index, name in enumerate(class_names): if name not in total_classes: boxes[index:index+1] = 0 scores[index] = 0 if masks is not None: masks[index:index+1] = False instance.pred_boxes = Boxes(torch.from_numpy(boxes)) if masks is not None: instance.pred_masks = torch.from_numpy(masks) instance.scores = torch.from_numpy(scores) return instance
def draw_instance_predictions(self, img_rgb, predictions, num_instance=20): processed_results = self.postprocess(predictions, img_rgb.shape[0], img_rgb.shape[1]) visual = Visualizer(img_rgb, self.metadata, self.scale, self.instance_mode) instance = Instances( image_size=processed_results.image_size, pred_boxes=processed_results.pred_boxes.tensor.detach().cpu( ).numpy()[:num_instance], scores=processed_results.scores.detach().cpu().numpy() [:num_instance], pred_classes=processed_results.pred_classes.detach().cpu().int( ).numpy()[:num_instance], ) if processed_results.has('pred_keypoints'): instance.pred_keypoints = processed_results.pred_keypoints.detach( ).cpu().numpy()[:num_instance] vis_img = visual.draw_instance_predictions(instance) return vis_img.get_image()
def select_pred_inst(self, proposals): pred_instances = proposals["instances"] N = len(pred_instances.image_size) try: num_instance = pred_instances.gt_inds.max() + 1 except: #TODO fix this bug print('error') print(pred_instances) num_instance = 0 max_num_instances_per_gt = self.max_proposals_per_im // num_instance max_num_instances_per_gt = max(max_num_instances_per_gt, 1) kept_instances = [] num_loss = [] for im_id in range(N): instances_per_im = pred_instances[pred_instances.im_inds == im_id] if len(instances_per_im) == 0: continue unique_gt_inds = instances_per_im.gt_inds.unique() for gt_ind in unique_gt_inds: instances_per_gt = instances_per_im[instances_per_im.gt_inds == gt_ind] if len(instances_per_gt) > max_num_instances_per_gt: scores = instances_per_gt.logits_pred.sigmoid().max( dim=1)[0] ctrness_pred = instances_per_gt.ctrness_pred.sigmoid() inds = (scores * ctrness_pred).topk( k=max_num_instances_per_gt, dim=0)[1] instances_per_gt = instances_per_gt[inds] num_loss_per_inst = unique_gt_inds.new_full( [max_num_instances_per_gt], max_num_instances_per_gt) else: num_loss_per_inst = unique_gt_inds.new_full( [len(instances_per_gt)], len(instances_per_gt)) kept_instances.append(instances_per_gt) num_loss.append(num_loss_per_inst) pred_instances = Instances.cat(kept_instances) num_loss = torch.cat(num_loss, dim=0) # del kept_instances, proposals # pred_instances.mask_head_params = pred_instances.top_feats attns = pred_instances.top_feats im_inds = pred_instances.im_inds locations = pred_instances.locations levels = pred_instances.fpn_levels gt_inds = pred_instances.gt_inds return attns, im_inds, locations, levels, gt_inds, num_instance, num_loss
def _forward_mask_heads_test(self, proposals, mask_feats, iuv_logits): # prepare the inputs for mask heads for im_id, per_im in enumerate(proposals): per_im.im_inds = per_im.locations.new_ones(len(per_im), dtype=torch.long) * im_id pred_instances = Instances.cat(proposals) pred_instances.mask_head_params = pred_instances.top_feat pred_instances_w_masks = self.mask_head( mask_feats, iuv_logits, self.mask_branch.out_stride, pred_instances ) return pred_instances_w_masks
def pp_predictions(self, inspred, carmask, pedmask, cyclistmask): selector = torch.ones_like(inspred.scores) if inspred.scores.shape[0] == 0: return inspred # Get indices sort by score inspred = self.erase_srhink(inspred, carmask, cat=2) inspred = self.erase_srhink(inspred, cyclistmask, cat=1) inspred = self.erase_srhink(inspred, pedmask, cat=0) for k in range(inspred.scores.shape[0]): cat = inspred.pred_classes[k].item() numpixel = torch.sum(inspred.pred_masks[k]).item() if numpixel < self.minpixel[cat]: selector[k] = 0 pp_inspred = Instances(image_size=inspred.image_size) selector = selector == 1 pp_inspred.scores = inspred.scores[selector] pp_inspred.pred_classes = inspred.pred_classes[selector] pp_inspred.pred_boxes = inspred.pred_boxes[selector] pp_inspred.pred_masks = inspred.pred_masks[selector] return pp_inspred
def execute_on_outputs(cls: type, context: Dict[str, Any], entry: Dict[str, Any], outputs: Instances): image_fpath = entry["file_name"] logger.info(f"Processing {image_fpath}") result = {"file_name": image_fpath} if outputs.has("scores"): result["scores"] = outputs.get("scores").cpu() if outputs.has("pred_classes"): result["pred_classes"] = outputs.get("pred_classes").cpu() if outputs.has("pred_boxes"): result["pred_boxes_XYXY"] = outputs.get("pred_boxes").tensor.cpu() if outputs.has("pred_densepose"): boxes_XYWH = BoxMode.convert(result["pred_boxes_XYXY"], BoxMode.XYXY_ABS, BoxMode.XYWH_ABS) result["pred_densepose"] = outputs.get( "pred_densepose").to_result(boxes_XYWH) context["results"].append(result)
def _forward_mask_heads_test(self, proposals, pixel_embed): # prepare the inputs for mask heads for im_id, per_im in enumerate(proposals): per_im.im_inds = per_im.locations.new_ones( len(per_im), dtype=torch.long) * im_id pred_instances = Instances.cat(proposals) pred_instances.proposal_embed = pred_instances.top_feat[:, :pixel_embed .size(1)] pred_instances.proposal_margin = pred_instances.top_feat[:, pixel_embed. size(1):] pred_instances_w_masks = self.mask_pred(pixel_embed, self.mask_branch.out_stride, pred_instances) return pred_instances_w_masks
def highest_only(self, predict): instance = predict["instances"].to(self.cpu_device) image_size = instance.image_size get_scores = instance.get("scores") pred_classes_index = [] if len(get_scores.tolist()) != 0: _, highest_index = torch.max(get_scores, 0) pred_classes_index.append(highest_index) pred_classes = self.tensor_transform(instance.get("pred_classes"), pred_classes_index) scores = self.tensor_transform(instance.get("scores"), pred_classes_index) pred_boxes = Boxes( self.tensor_transform( instance.get("pred_boxes").tensor, pred_classes_index)) return Instances(image_size=image_size, pred_boxes=pred_boxes, scores=scores, pred_classes=pred_classes)
def deserialize_output(self, instances_dict=None, filename=None): if filename is not None: with Path(filename).open("r") as f: instances_dict = json.load(f) # handle image case is_image = False if "frames" not in instances_dict: instances_dict = {"frames": [instances_dict]} is_image = True frames = [] for frame in instances_dict["frames"]: heights, widths, boxes, scores, labels, embeddings, segmentations = zip( *[( i["img_height"], i["img_width"], i["box"], i["score"], i["label"]["value"], i["embedding"] if i["embedding"] is not None else [np.nan], i["segmentation"] if i["segmentation"] is not None else [np.nan], ) for i in frame["instances"]]) frames.append( Instances( (heights[0], widths[0]), pred_boxes=boxes, scores=scores, pred_classes=labels, pred_densepose=DensePoseEmbeddingPredictorOutput( embedding=torch.tensor(embeddings), coarse_segm=torch.tensor(segmentations), ), )) # if image or single frame, just return the instance if is_image: return frames[0] else: return frames
def visualize_sequence(img, bboxes, labels, cfg): mapping = {'verb': 'action', 'noun': 'object', 'hoi': 'hoi'} MetadataCatalog.get('vis').set(thing_classes=getattr(Metadata, mapping[cfg.EXP.LABEL_TYPE])) metadata = MetadataCatalog.get('vis') classes = list() boxes = list() for box, label in zip(bboxes, labels): for idx, x in enumerate(label): if x == 1: classes.append(idx) boxes.append(box) outputs = {"instances": Instances((img.shape[0], img.shape[1]), pred_boxes=boxes, pred_classes=classes)} v = Visualizer(img, metadata=metadata, scale=0.8, instance_mode=ColorMode.IMAGE ) vis = v.draw_instance_predictions(outputs["instances"].to("cpu")).get_image() plt.imshow(vis) plt.show()
def flaw_only(self, predict): instance = predict["instances"].to(self.cpu_device) image_size = instance.image_size get_pred_classes = instance.get("pred_classes").numpy() pred_classes_index = [] pred_classes = [] for c in range(len(get_pred_classes)): if get_pred_classes[c] != 0 and get_pred_classes[c] != 1: pred_classes_index.append(c) pred_classes.append(get_pred_classes[c]) pred_classes = torch.from_numpy(np.asarray(pred_classes)) scores = self.tensor_transform(instance.get("scores"), pred_classes_index) pred_masks = self.tensor_transform(instance.get("pred_masks"), pred_classes_index) pred_boxes = Boxes( self.tensor_transform( instance.get("pred_boxes").tensor, pred_classes_index)) return Instances(image_size=image_size, pred_boxes=pred_boxes, scores=scores, pred_classes=pred_classes, pred_masks=pred_masks)
def flaw_only(predict): ''' 预测结果中有正常元件,水印,瑕疵,在这里筛选出瑕疵信息,其他的去掉。 :param predict: 模型的正常输出的预测结果,预测出了许多矩形框,包含了矩形框的位置和大小信息(用左上角和右下角坐标来表示), 矩形框预测的类别、分数,矩形框内部的mask(用只含bool类型的矩阵表示像素级别的mask),等等 :return: 筛选完的预测结果 ''' cpu_device = torch.device("cpu") instance = predict["instances"].to(cpu_device) image_size = instance.image_size get_pred_classes = instance.get("pred_classes").numpy() pred_classes_index = [] pred_classes = [] for c in range(len(get_pred_classes)): if get_pred_classes[c] != 0 and get_pred_classes[c] != 1: pred_classes_index.append(c) pred_classes.append(get_pred_classes[c]) pred_classes = torch.from_numpy(np.asarray(pred_classes)) scores = tensor_transform(instance.get("scores"), pred_classes_index) pred_masks = tensor_transform(instance.get("pred_masks"), pred_classes_index) pred_boxes = Boxes(tensor_transform(instance.get("pred_boxes").tensor, pred_classes_index)) return Instances(image_size=image_size, pred_boxes=pred_boxes, scores=scores, pred_classes=pred_classes, pred_masks=pred_masks)
def review(self): for image_info in self: filepath = image_info["filepath"] image = plt.imread(filepath).copy() annotations = image_info["annotations"] boxes = [annotation["bbox"] for annotation in annotations] boxes = np.array(boxes) ids = [annotation["category_id"] for annotation in annotations] ids = np.array(ids) _, quantities = np.unique(ids, return_counts=True) text_info = ", ".join( f"{class_}: {quantity}" for class_, quantity in zip(MAP_NAMES, quantities) ) scores = [annotation["score"] for annotation in annotations] plt.title(text_info) image_size = (image_info["height"], image_info["width"]) instances = Instances( image_size=image_size, pred_boxes=boxes, pred_classes=ids, scores=scores, ) visualize(image, instances, self.metadata)
def predict_large(self, im, span = 256, stride=96, nmsalg = 'poly'): print('run model') #add padding padding = 60 im = np.pad(im, ((padding, padding), (padding, padding), (0, 0)), mode='constant', constant_values=0) im_height, im_width, _ = im.shape all_instances = [] for i in range(0, im_height, stride): for j in range(0, im_width, stride): sub_img = im[i:i+span, j:j+span, :] predictions = self.prediction_model(sub_img) sub_instances = predictions['instances'] sub_instances = exclude_boundary(sub_instances, padding=60) # 30 sub_instances = offset_instances(sub_instances, (j, i), (im_height, im_width)) all_instances.append(sub_instances) all_instances.append(sub_instances) all_instances = Instances.cat(all_instances) all_instances.pred_masks = np.asarray(all_instances.pred_masks, dtype=object) if nmsalg == 'poly': all_instances = polygon_nms(all_instances) elif nmsalg == 'bbox': all_instances = bbox_nms(all_instances, overlap=0.6) else: assert False, 'nms algorithm must be polygon or bbox' #strip padding all_instances.pred_boxes.tensor -= padding all_instances.pred_masks = [[comp - 60 for comp in mask] for mask in all_instances.pred_masks] return all_instances
def pp_predictions_simple(self, inspred): selector = torch.ones_like(inspred.scores) if inspred.scores.shape[0] == 0: return inspred for k in range(inspred.scores.shape[0]): cat = inspred.pred_classes[k].item() conf = inspred.scores[k].item() numpixel = torch.sum(inspred.pred_masks).item() if conf < self.catconfbar[cat]: selector[k] = 0 if numpixel < self.minpixel[cat]: selector[k] = 0 pp_inspred = Instances(image_size=inspred.image_size) selector = selector == 1 pp_inspred.scores = inspred.scores[selector] pp_inspred.pred_classes = inspred.pred_classes[selector] pp_inspred.pred_boxes = inspred.pred_boxes[selector] pp_inspred.pred_masks = inspred.pred_masks[selector] return pp_inspred