def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (H, W, C) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: dict[str: Tensor]: mapping from a named loss to a tensor storing the loss. Used during training only. """ batched_inputs = self.preprocess_image(batched_inputs) features = self.backbone(batched_inputs) if len(self.in_features) == 0: print( f"Error no input features for retinanet, use all features {features.keys()}" ) features = list(features.values()) else: features = [features[f] for f in self.in_features] pred_maps = self.head(features) gt_boxes = batched_inputs.get(GT_BOXES, None) gt_length = batched_inputs.get(GT_LENGTH, None) gt_labels = batched_inputs.get(GT_LABELS, None) gt_keypoints = batched_inputs.get(GT_KEYPOINTS, None) outputs = build_outputs( name=self.cfg.MODEL.KEYPOINTS.OUTPUTS, cfg=self.cfg.MODEL.KEYPOINTS, parent=self, pred_maps=pred_maps, gt_boxes=gt_boxes, gt_labels=gt_labels, gt_length=gt_length, gt_keypoints=gt_keypoints, max_detections_per_image=self.cfg.TEST.DETECTIONS_PER_IMAGE, ) outputs.batched_inputs = batched_inputs if self.is_training: if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: results = outputs.inference(inputs=batched_inputs, pred_maps=pred_maps) else: results = {} return results, outputs.losses() else: results = outputs.inference(inputs=batched_inputs, pred_maps=pred_maps) return results, {}
def forward(self, batched_inputs): outputs = self.snpe.tf_forward(batched_inputs[IMAGE]) head_outputs = {} head_outputs['offset'] = outputs[0] head_outputs['heatmaps_ct'] = outputs[1] head_outputs['hw'] = outputs[2] head_outputs['id_embedding'] = outputs[3] head_outputs = [head_outputs] gt_boxes = batched_inputs.get(GT_BOXES, None) gt_length = batched_inputs.get(GT_LENGTH, None) gt_labels = batched_inputs.get(GT_LABELS, None) outputs = build_outputs( name=self.cfg.MODEL.SNPE.OUTPUTS, cfg=self.cfg.MODEL.SNPE, parent=self, box2box_transform=self.box2box_transform, head_outputs=head_outputs, gt_boxes=gt_boxes, gt_labels=gt_labels, gt_length=gt_length, max_detections_per_image=self.cfg.TEST.DETECTIONS_PER_IMAGE, ) if self.is_training: if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: results = outputs.inference(inputs=batched_inputs, head_outputs=head_outputs) else: results = {} return results, outputs.losses() else: results = outputs.inference(inputs=batched_inputs, head_outputs=head_outputs) return results, {}
def forward(self, inputs, features): features = [features[f] for f in self.in_features] gt_boxes = inputs.get(GT_BOXES, None) #gt_labels = inputs.gt_labels gt_length = inputs.get(GT_LENGTH, None) pred_objectness_logits, pred_anchor_deltas = self.rpn_head( inputs, features) anchors = self.rpn_head.anchor_generator(inputs, features) self.anchors_num_per_level = [ wmlt.combined_static_and_dynamic_shape(x)[0] for x in anchors ] outputs = build_outputs(self.cfg.MODEL.RPN.OUTPUTS, self.box2box_transform, self.anchor_matcher, self.batch_size_per_image, self.positive_fraction, pred_objectness_logits, pred_anchor_deltas, anchors, gt_boxes, gt_length=gt_length) if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: outputs.inputs = inputs if self.is_training: losses = { k: v * self.loss_weight for k, v in outputs.losses().items() } rpn_threshold = 0.0 else: rpn_threshold = self.cfg.MODEL.PROPOSAL_GENERATOR.SCORE_THRESH_TEST losses = {} # Find the top proposals by applying NMS and removing boxes that # are too small. The proposals are treated as fixed for approximate # joint training with roi heads. This approach ignores the derivative # w.r.t. the proposal boxes’ coordinates that are also network # responses, so is approximate. pre_nms_topk_max_per_layer = self.cfg.MODEL.RPN.PRE_NMS_TOPK_MAX_PER_LAYER proposals, logits = find_top_rpn_proposals( outputs.predict_proposals(), outputs.predict_objectness_logits(), self.nms_thresh, self.pre_nms_topk[self.is_training], self.post_nms_topk[self.is_training], self.anchors_num_per_level, score_threshold=rpn_threshold, is_training=self.is_training, pre_nms_topk_max_per_layer=pre_nms_topk_max_per_layer) if self.cfg.MODEL.RPN.SORT_RESULTS: with tf.name_scope("sort_rpn_results"): def fn(bboxes, keys): N = wmlt.combined_static_and_dynamic_shape(keys) new_keys, indices = tf.nn.top_k(keys, k=N[0]) bboxes = tf.gather(bboxes, indices) return [bboxes, keys] proposals, logits = tf.map_fn(lambda x: fn(x[0], x[1]), elems=[proposals, logits], back_prop=False) outdata = {PD_BOXES: proposals, PD_PROBABILITY: tf.nn.sigmoid(logits)} wsummary.detection_image_summary(images=inputs[IMAGE], boxes=outdata[PD_BOXES], name="rpn/proposals") return outdata, losses
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (H, W, C) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: dict[str: Tensor]: mapping from a named loss to a tensor storing the loss. Used during training only. """ batched_inputs = self.preprocess_image(batched_inputs) features = self.backbone(batched_inputs) features = [features[f] for f in self.in_features] head_outputs = self.head(features) anchors = self.anchor_generator(batched_inputs, features) gt_boxes = batched_inputs[GT_BOXES] gt_length = batched_inputs[GT_LENGTH] gt_labels = batched_inputs[GT_LABELS] outputs = build_outputs( name=self.cfg.MODEL.YOLACT.OUTPUTS, cfg=self.cfg.MODEL.YOLACT, parent=self, box2box_transform=self.box2box_transform, anchor_matcher=self.anchor_matcher, pred_logits=head_outputs[LOGITS], pred_anchor_deltas=head_outputs[BOXES_REGS], anchors=anchors, gt_boxes=gt_boxes, gt_labels=gt_labels, gt_length=gt_length, max_detections_per_image=self.cfg.TEST.DETECTIONS_PER_IMAGE, head_outputs=head_outputs, batched_inputs=batched_inputs, coefficient_nr=self.cfg.MODEL.YOLACT.PROTONET_NR, ) if self.is_training: if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: results = outputs.inference(inputs=batched_inputs, box_cls=head_outputs[LOGITS], box_delta=head_outputs[BOXES_REGS], anchors=anchors) else: results = {} return results, outputs.losses() else: results = outputs.inference(inputs=batched_inputs, box_cls=head_outputs[LOGITS], box_delta=head_outputs[BOXES_REGS], anchors=anchors) return results, {}
def forward(self, batched_inputs, features): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (H, W, C) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: dict[str: Tensor]: mapping from a named loss to a tensor storing the loss. Used during training only. """ if len(self.in_features) == 0: print( f"Error no input features for retinanet, use all features {features.keys()}" ) features = list(features.values()) else: features = [features[f] for f in self.in_features] pred_logits, pred_regression, pred_center_ness = self.head(features) gt_boxes = batched_inputs[GT_BOXES] gt_length = batched_inputs[GT_LENGTH] gt_labels = batched_inputs[GT_LABELS] outputs = build_outputs( name=self.cfg.MODEL.FCOSPG.OUTPUTS, cfg=self.cfg.MODEL.FCOSPG, parent=self, box2box_transform=self.box2box_transform, pred_logits=pred_logits, pred_regression=pred_regression, pred_center_ness=pred_center_ness, gt_boxes=gt_boxes, gt_labels=gt_labels, gt_length=gt_length, batched_inputs=batched_inputs, max_detections_per_image=self.cfg.TEST.DETECTIONS_PER_IMAGE, ) results = outputs.inference(inputs=batched_inputs, box_cls=pred_logits, box_regression=pred_regression, center_ness=pred_center_ness) losses = {} if self.is_training: _losses = outputs.losses() for k, v in _losses.items(): losses["pg_" + k] = v outdata = { PD_BOXES: results[RD_BOXES], PD_PROBABILITY: results[RD_PROBABILITY] } wsummary.detection_image_summary(images=batched_inputs[IMAGE], boxes=outdata[PD_BOXES], name="fcospg/proposals") return outdata, losses
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (H, W, C) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: dict[str: Tensor]: mapping from a named loss to a tensor storing the loss. Used during training only. """ batched_inputs = self.preprocess_image(batched_inputs) features = self.backbone(batched_inputs) if len(self.in_features) == 0: print( f"Error no input features for deeplab, use all features {features.keys()}" ) features = list(features.values()) else: features = [features[f] for f in self.in_features] pred_logits = self.head(features) gt_labels = batched_inputs.get(GT_SEMANTIC_LABELS, None) outputs = build_outputs( name=self.cfg.MODEL.DEEPLAB.OUTPUTS, cfg=self.cfg.MODEL.DEEPLAB, parent=self, pred_logits=pred_logits, labels=gt_labels, ) outputs.batched_inputs = batched_inputs max_outputs = 3 wsummary.batch_semantic_summary(batched_inputs[IMAGE], masks=gt_labels[..., 1:], max_outputs=max_outputs, name="gt") if self.is_training: if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: results = outputs.inference(inputs=batched_inputs, logits=pred_logits) wsummary.batch_semantic_summary(batched_inputs[IMAGE], masks=results[RD_SEMANTIC][..., 1:], max_outputs=max_outputs, name="pred") wsummary.feature_map_summary(gt_labels, name="gt_semantic", max_outputs=10) wsummary.feature_map_summary(results[RD_SEMANTIC], name="pred_semantic", max_outputs=10) else: results = {} return results, outputs.losses() else: results = outputs.inference(inputs=batched_inputs, logits=pred_logits) wsummary.batch_semantic_summary(batched_inputs[IMAGE], masks=results[RD_SEMANTIC][..., 1:], max_outputs=max_outputs, name="pred") wsummary.feature_map_summary(gt_labels, name="gt_semantic", max_outputs=10) wsummary.feature_map_summary(results[RD_SEMANTIC], name="pred_semantic", max_outputs=10) return results, {}