def forward(self, features, image_width, image_height): anchor_bboxes = RegionProposalNetwork._generate_anchors( image_width, image_height, num_x_anchors=features.shape[3], num_y_anchors=features.shape[2]).to(device) features = self._features(features) objectnesses = self._objectness(features) transformers = self._transformer(features) objectnesses = objectnesses.permute(0, 2, 3, 1).contiguous().view(-1, 2) transformers = transformers.permute(0, 2, 3, 1).contiguous().view(-1, 4) proposal_bboxes = RegionProposalNetwork._generate_proposals( anchor_bboxes, objectnesses, transformers, image_width, image_height) proposal_bboxes = proposal_bboxes[:12000 if self.training else 6000] keep_indices = NMS.suppress(proposal_bboxes, threshold=0.7) proposal_bboxes = proposal_bboxes[keep_indices] proposal_bboxes = proposal_bboxes[:2000 if self.training else 300] return anchor_bboxes, objectnesses, transformers, proposal_bboxes
class InferenceProcedure(object): def __init__(self, model): self.model = model self.num_classes = NUM_CLASSES + 1 self.image_size = np.array([IMAGE_HEIGHT, IMAGE_WIDTH], dtype=np.float32) self.nms_op = NMS() def __get_ssd_prediction(self, image): output = self.model(image, training=False) pred = ssd_prediction(feature_maps=output, num_classes=self.num_classes) return pred @staticmethod def __resize_boxes(boxes, image_height, image_width): cx = boxes[..., 0] * image_width cy = boxes[..., 1] * image_height w = boxes[..., 2] * image_width h = boxes[..., 3] * image_height xmin = cx - w / 2 ymin = cy - h / 2 xmax = cx + w / 2 ymax = cy + h / 2 resized_boxes = tf.stack(values=[xmin, ymin, xmax, ymax], axis=-1) return resized_boxes def __filter_background_boxes(self, ssd_predict_boxes): num_of_total_predict_boxes = ssd_predict_boxes.shape[1] scores = tf.argmax(input=tf.nn.softmax( ssd_predict_boxes[..., :self.num_classes]), axis=-1) filtered_boxes_list = [] for i in range(num_of_total_predict_boxes): if scores[:, i] != 0: filtered_boxes_list.append(ssd_predict_boxes[:, i, :]) filtered_boxes = tf.stack(values=filtered_boxes_list, axis=1) return filtered_boxes def get_final_boxes(self, image): pred_boxes = self.__get_ssd_prediction(image) pred_boxes = self.__filter_background_boxes(pred_boxes) pred_boxes_class = tf.nn.softmax( logits=pred_boxes[..., :self.num_classes]) pred_boxes_class = tf.reshape(tensor=pred_boxes_class, shape=(-1, self.num_classes)) pred_boxes_coord = pred_boxes[..., self.num_classes:] pred_boxes_coord = tf.reshape(tensor=pred_boxes_coord, shape=(-1, 4)) resized_pred_boxes = self.__resize_boxes(boxes=pred_boxes_coord, image_height=image.shape[1], image_width=image.shape[2]) box_tensor, score_tensor, class_tensor = self.nms_op.nms( boxes=resized_pred_boxes, box_scores=pred_boxes_class) return box_tensor, score_tensor, class_tensor
def get_final_boxes(self): boxes_list = [] box_scores_list = [] for i in range(len(SCALE_SIZE)): boxes, box_scores = self.__yolo_post_processing( feature=self.yolo_output[i], scale_type=i) boxes_list.append(boxes) box_scores_list.append(box_scores) boxes_array = tf.concat(boxes_list, axis=0) box_scores_array = tf.concat(box_scores_list, axis=0) return NMS().nms(boxes=boxes_array, box_scores=box_scores_array)
def _generate_detections( self, proposal_bboxes: Tensor, proposal_classes: Tensor, proposal_transformers: Tensor, image_width: int, image_height: int) -> Tuple[Tensor, Tensor, Tensor]: proposal_transformers = proposal_transformers.view( -1, Model.NUM_CLASSES, 4) mean = self._transformer_normalize_mean.repeat(1, Model.NUM_CLASSES, 1).cpu() std = self._transformer_normalize_std.repeat(1, Model.NUM_CLASSES, 1).cpu() proposal_transformers = proposal_transformers * std - mean proposal_bboxes = proposal_bboxes.view(-1, 1, 4).repeat( 1, Model.NUM_CLASSES, 1) detection_bboxes = BBox.apply_transformer( proposal_bboxes.view(-1, 4), proposal_transformers.view(-1, 4)) detection_bboxes = detection_bboxes.view(-1, Model.NUM_CLASSES, 4) detection_bboxes[:, :, [0, 2]] = detection_bboxes[:, :, [0, 2]].clamp( min=0, max=image_width) detection_bboxes[:, :, [1, 3]] = detection_bboxes[:, :, [1, 3]].clamp( min=0, max=image_height) proposal_probs = F.softmax(proposal_classes, dim=1) detection_bboxes = detection_bboxes.cpu() proposal_probs = proposal_probs.cpu() generated_bboxes = [] generated_labels = [] generated_probs = [] for c in range(1, Model.NUM_CLASSES): detection_class_bboxes = detection_bboxes[:, c, :] proposal_class_probs = proposal_probs[:, c] _, sorted_indices = proposal_class_probs.sort(descending=True) detection_class_bboxes = detection_class_bboxes[sorted_indices] proposal_class_probs = proposal_class_probs[sorted_indices] keep_indices = NMS.suppress(detection_class_bboxes.cpu(), threshold=0.3) detection_class_bboxes = detection_class_bboxes[keep_indices] proposal_class_probs = proposal_class_probs[keep_indices] generated_bboxes.append(detection_class_bboxes) generated_labels.append(torch.ones(len(keep_indices)) * c) generated_probs.append(proposal_class_probs) generated_bboxes = torch.cat(generated_bboxes, dim=0) generated_labels = torch.cat(generated_labels, dim=0) generated_probs = torch.cat(generated_probs, dim=0) return generated_bboxes, generated_labels, generated_probs
def testing_procedure(self, efficientdet_ouputs, input_image_size): box_transform = BoxTransform() clip_boxes = ClipBoxes() map_to_original = MapToInputImage(input_image_size) nms = NMS() anchors = self.anchors(image_size=Config.get_image_size()) reg_results, cls_results = efficientdet_ouputs[ ..., :4], efficientdet_ouputs[..., 4:] transformed_anchors = box_transform(anchors, reg_results) transformed_anchors = clip_boxes(transformed_anchors) transformed_anchors = map_to_original(transformed_anchors) scores = tf.math.reduce_max(cls_results, axis=2).numpy() classes = tf.math.argmax(cls_results, axis=2).numpy() final_boxes, final_scores, final_classes = nms( boxes=transformed_anchors[0, :, :], box_scores=np.squeeze(scores), box_classes=np.squeeze(classes)) return final_boxes.numpy(), final_scores.numpy(), final_classes.numpy()
def __init__(self, model): self.model = model self.num_classes = NUM_CLASSES self.image_size = np.array([IMAGE_HEIGHT, IMAGE_WIDTH], dtype=np.float32) self.nms_op = NMS()
class InferenceProcedure(object): def __init__(self, model): self.model = model self.num_classes = NUM_CLASSES self.image_size = np.array([IMAGE_HEIGHT, IMAGE_WIDTH], dtype=np.float32) self.nms_op = NMS() def __get_ssd_prediction(self, image): output = self.model(image, training=False) pred = ssd_prediction(feature_maps=output, num_classes=self.num_classes) return pred, output @staticmethod def __resize_boxes(boxes, image_height, image_width): cx = boxes[..., 0] * image_width cy = boxes[..., 1] * image_height w = boxes[..., 2] * image_width h = boxes[..., 3] * image_height xmin = cx - w / 2 ymin = cy - h / 2 xmax = cx + w / 2 ymax = cy + h / 2 resized_boxes = tf.stack(values=[xmin, ymin, xmax, ymax], axis=-1) return resized_boxes def __filter_background_boxes(self, ssd_predict_boxes): is_object_exist = True num_of_total_predict_boxes = ssd_predict_boxes.shape[1] # scores = tf.nn.softmax(ssd_predict_boxes[..., :self.num_classes]) scores = ssd_predict_boxes[..., :self.num_classes] classes = tf.math.argmax(input=scores, axis=-1) filtered_boxes_list = [] # print(ssd_predict_boxes) for i in range(num_of_total_predict_boxes): if classes[:, i] != 0: # print('-----------------i---------------') # print(i) # print('----------------classes----------------') # print(classes[:, i]) # print('-------------boxes-----------') # print(ssd_predict_boxes[:, i, :]) filtered_boxes_list.append(ssd_predict_boxes[:, i, :]) if filtered_boxes_list: filtered_boxes = tf.stack(values=filtered_boxes_list, axis=1) return is_object_exist, filtered_boxes else: is_object_exist = False return is_object_exist, ssd_predict_boxes def __offsets_to_true_coordinates(self, pred_boxes, ssd_output): pred_classes = tf.reshape(tensor=pred_boxes[..., :self.num_classes], shape=(-1, self.num_classes)) pred_coords = tf.reshape(tensor=pred_boxes[..., self.num_classes:], shape=(-1, 4)) default_boxes = DefaultBoxes( feature_map_list=ssd_output).generate_default_boxes() d_cx, d_cy, d_w, d_h = default_boxes[:, 0: 1], default_boxes[:, 1: 2], default_boxes[:, 2: 3], default_boxes[:, 3: 4] offset_cx, offset_cy, offset_w, offset_h = pred_coords[:, 0: 1], pred_coords[:, 1: 2], pred_coords[:, 2: 3], pred_coords[:, 3: 4] true_cx = offset_cx * d_w + d_cx true_cy = offset_cy * d_h + d_cy true_w = tf.math.exp(offset_w) * d_w true_h = tf.math.exp(offset_h) * d_h true_coords = tf.concat(values=[true_cx, true_cy, true_w, true_h], axis=-1) true_classes_and_coords = tf.concat(values=[pred_classes, true_coords], axis=-1) true_classes_and_coords = tf.expand_dims(input=true_classes_and_coords, axis=0) return true_classes_and_coords def get_final_boxes(self, image): pred_boxes, ssd_output = self.__get_ssd_prediction(image) pred_boxes = self.__offsets_to_true_coordinates(pred_boxes=pred_boxes, ssd_output=ssd_output) is_object_exist, filtered_pred_boxes = self.__filter_background_boxes( pred_boxes) if is_object_exist: # scores = tf.nn.softmax(filtered_pred_boxes[..., :self.num_classes]) scores = filtered_pred_boxes[..., :self.num_classes] pred_boxes_scores = tf.reshape(tensor=scores, shape=(-1, self.num_classes)) pred_boxes_coord = filtered_pred_boxes[..., self.num_classes:] pred_boxes_coord = tf.reshape(tensor=pred_boxes_coord, shape=(-1, 4)) resized_pred_boxes = self.__resize_boxes( boxes=pred_boxes_coord, image_height=image.shape[1], image_width=image.shape[2]) box_tensor, score_tensor, class_tensor = self.nms_op.nms( boxes=resized_pred_boxes, box_scores=pred_boxes_scores) return is_object_exist, box_tensor, score_tensor, class_tensor else: return is_object_exist, tf.zeros(shape=(1, 4)), tf.zeros( shape=(1, )), tf.zeros(shape=(1, ))