def predict_bounding_boxes(net, image, min_c, overlap_thres, topk, ctx=mx.gpu()): ''' Given the outputs of the dataset (image and bounding box) and the network, the predicted bounding boxes are provided. Parameters ---------- net: SSD The trained SSD network. image: np.array A grayscale image of the handwriting passages. Returns ------- predicted_bb: [(x, y, w, h)] The predicted bounding boxes. ''' image = mx.nd.array(image).expand_dims(axis=2) image = mx.image.resize_short(image, 350) image = image.transpose([2, 0, 1]) / 255. image = image.as_in_context(ctx) image = image.expand_dims(0) bb = np.zeros(shape=(13, 5)) bb = mx.nd.array(bb) bb = bb.as_in_context(ctx) bb = bb.expand_dims(axis=0) default_anchors, class_predictions, box_predictions = net(image) box_target, box_mask, cls_target = net.training_targets( default_anchors, class_predictions, bb) cls_probs = mx.nd.SoftmaxActivation(mx.nd.transpose( class_predictions, (0, 2, 1)), mode='channel') predicted_bb = MultiBoxDetection( *[cls_probs, box_predictions, default_anchors], force_suppress=True, clip=False) predicted_bb = box_nms(predicted_bb, overlap_thresh=overlap_thres, valid_thresh=min_c, topk=topk) predicted_bb = predicted_bb.asnumpy() predicted_bb = predicted_bb[0, predicted_bb[0, :, 0] != -1] predicted_bb = predicted_bb[:, 2:] predicted_bb[:, 2] = predicted_bb[:, 2] - predicted_bb[:, 0] predicted_bb[:, 3] = predicted_bb[:, 3] - predicted_bb[:, 1] return predicted_bb
def predict_bounding_boxes(net, image, bb): ''' Given the outputs of the dataset (image and bounding box) and the network, the predicted bounding boxes are provided. Parameters ---------- net: SSD The trained SSD network. image: np.array A grayscale image of the handwriting passages. bb: [(x1, y1, x2, y2)] A tuple that contains the bounding box. Returns ------- predicted_bb: [(x, y, w, h)] The predicted bounding boxes. actual_bb: [(x, y, w, h)] The actual bounding bounding boxes. ''' image, bb = transform(image, bb) image = image.as_in_context(ctx[0]) image = image.expand_dims(axis=0) bb = bb.as_in_context(ctx[0]) bb = bb.expand_dims(axis=0) default_anchors, class_predictions, box_predictions = net(image) box_target, box_mask, cls_target = net.training_targets( default_anchors, class_predictions, bb) cls_probs = nd.SoftmaxActivation(nd.transpose(class_predictions, (0, 2, 1)), mode='channel') predicted_bb = MultiBoxDetection( *[cls_probs, box_predictions, default_anchors], force_suppress=True, clip=False) predicted_bb = box_nms(predicted_bb, overlap_thresh=overlap_thres, valid_thresh=min_c, topk=topk) predicted_bb = predicted_bb.asnumpy() predicted_bb = predicted_bb[0, predicted_bb[0, :, 0] != -1] predicted_bb = predicted_bb[:, 2:] predicted_bb[:, 2] = predicted_bb[:, 2] - predicted_bb[:, 0] predicted_bb[:, 3] = predicted_bb[:, 3] - predicted_bb[:, 1] labeled_bb = bb[:, :, 1:].asnumpy() labeled_bb[:, :, 2] = labeled_bb[:, :, 2] - labeled_bb[:, :, 0] labeled_bb[:, :, 3] = labeled_bb[:, :, 3] - labeled_bb[:, :, 1] labeled_bb = labeled_bb[0] return predicted_bb, labeled_bb
def generate_output_image(box_predictions, default_anchors, cls_probs, box_target, box_mask, cls_target, x, y): ''' Generate the image with the predicted and actual bounding boxes. Parameters ---------- box_predictions: nd.array Bounding box predictions relative to the anchor boxes, output of the network default_anchors: nd.array Anchors used, output of the network cls_probs: nd.array Output of nd.SoftmaxActivation(nd.transpose(class_predictions, (0, 2, 1)), mode='channel') where class_predictions is the output of the network. box_target: nd.array Output classification probabilities from network.training_targets(default_anchors, class_predictions, y) box_mask: nd.array Output bounding box predictions from network.training_targets(default_anchors, class_predictions, y) cls_target: nd.array Output targets from network.training_targets(default_anchors, class_predictions, y) x: nd.array The input images y: nd.array The actual labels Returns ------- output_image: np.array The images with the predicted and actual bounding boxes drawn on number_of_bbs: int The number of predicting bounding boxes ''' output = MultiBoxDetection(*[cls_probs, box_predictions, default_anchors], force_suppress=True, clip=False) output = box_nms(output, overlap_thresh=overlap_thres, valid_thresh=min_c, topk=topk) output = output.asnumpy() number_of_bbs = 0 predicted_bb = [] for b in range(output.shape[0]): predicted_bb_ = output[b, output[b, :, 0] != -1] predicted_bb_ = predicted_bb_[:, 2:] number_of_bbs += predicted_bb_.shape[0] predicted_bb_[:, 2] = predicted_bb_[:, 2] - predicted_bb_[:, 0] predicted_bb_[:, 3] = predicted_bb_[:, 3] - predicted_bb_[:, 1] predicted_bb.append(predicted_bb_) labels = y[:, :, 1:].asnumpy() labels[:, :, 2] = labels[:, :, 2] - labels[:, :, 0] labels[:, :, 3] = labels[:, :, 3] - labels[:, :, 1] output_image = draw_boxes_on_image(predicted_bb, labels, x.asnumpy()) output_image[output_image < 0] = 0 output_image[output_image > 1] = 1 return output_image, number_of_bbs