def detect_image(img_file): if not os.path.exists(img_file): print('can not find image: ', img_file) img = Image.open(img_file) img = ImageOps.fit(img, [data_shape, data_shape], Image.ANTIALIAS) print(img) origin_img = np.array(img) img = origin_img - np.array([123, 117, 104]) # organize as [batch-channel-height-width] img = np.transpose(img, (2, 0, 1)) img = img[np.newaxis, :] # convert to ndarray img = nd.array(img) print('input image shape: ', img.shape) net = ToySSD(num_class) ctx = mx.cpu() net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx) net.collect_params().reset_ctx(ctx) params = 'ssd_pretrained.params' net.load_params(params, ctx=ctx) anchors, cls_preds, box_preds = net(img.as_in_context(ctx)) print('anchors', anchors) print('class predictions', cls_preds) print('box delta predictions', box_preds) # convert predictions to probabilities using softmax cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)), mode='channel') # apply shifts to anchors boxes, non-maximum-suppression, etc... output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress=True, clip=False) output = output.asnumpy() print(output) print(output.shape) pens = dict() plt.imshow(origin_img) thresh = 0.69 for det in output[0]: cid = int(det[0]) if cid < 0: continue score = det[1] if score < thresh: continue if cid not in pens: pens[cid] = (random.random(), random.random(), random.random()) scales = [origin_img.shape[1], origin_img.shape[0]] * 2 xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)] rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=pens[cid], linewidth=3) plt.gca().add_patch(rect) text = class_names[cid] plt.gca().text(xmin, ymin - 2, '{:s} {:.3f}'.format(text, score), bbox=dict(facecolor=pens[cid], alpha=0.5), fontsize=12, color='white') plt.axis('off') plt.savefig('result.png', dpi=100) plt.show()
def predict_bounding_boxes(net, image, min_c, overlap_thres, topk, ctx=mx.gpu()): ''' Given the outputs of the dataset (image and bounding box) and the network, the predicted bounding boxes are provided. Parameters ---------- net: SSD The trained SSD network. image: np.array A grayscale image of the handwriting passages. Returns ------- predicted_bb: [(x, y, w, h)] The predicted bounding boxes. ''' image = mx.nd.array(image).expand_dims(axis=2) image = mx.image.resize_short(image, 350) image = image.transpose([2, 0, 1]) / 255. image = image.as_in_context(ctx) image = image.expand_dims(0) bb = np.zeros(shape=(13, 5)) bb = mx.nd.array(bb) bb = bb.as_in_context(ctx) bb = bb.expand_dims(axis=0) default_anchors, class_predictions, box_predictions = net(image) box_target, box_mask, cls_target = net.training_targets( default_anchors, class_predictions, bb) cls_probs = mx.nd.SoftmaxActivation(mx.nd.transpose( class_predictions, (0, 2, 1)), mode='channel') predicted_bb = MultiBoxDetection( *[cls_probs, box_predictions, default_anchors], force_suppress=True, clip=False) predicted_bb = box_nms(predicted_bb, overlap_thresh=overlap_thres, valid_thresh=min_c, topk=topk) predicted_bb = predicted_bb.asnumpy() predicted_bb = predicted_bb[0, predicted_bb[0, :, 0] != -1] predicted_bb = predicted_bb[:, 2:] predicted_bb[:, 2] = predicted_bb[:, 2] - predicted_bb[:, 0] predicted_bb[:, 3] = predicted_bb[:, 3] - predicted_bb[:, 1] return predicted_bb
def predict_bounding_boxes(net, image, bb): ''' Given the outputs of the dataset (image and bounding box) and the network, the predicted bounding boxes are provided. Parameters ---------- net: SSD The trained SSD network. image: np.array A grayscale image of the handwriting passages. bb: [(x1, y1, x2, y2)] A tuple that contains the bounding box. Returns ------- predicted_bb: [(x, y, w, h)] The predicted bounding boxes. actual_bb: [(x, y, w, h)] The actual bounding bounding boxes. ''' image, bb = transform(image, bb) image = image.as_in_context(ctx[0]) image = image.expand_dims(axis=0) bb = bb.as_in_context(ctx[0]) bb = bb.expand_dims(axis=0) default_anchors, class_predictions, box_predictions = net(image) box_target, box_mask, cls_target = net.training_targets( default_anchors, class_predictions, bb) cls_probs = nd.SoftmaxActivation(nd.transpose(class_predictions, (0, 2, 1)), mode='channel') predicted_bb = MultiBoxDetection( *[cls_probs, box_predictions, default_anchors], force_suppress=True, clip=False) predicted_bb = box_nms(predicted_bb, overlap_thresh=overlap_thres, valid_thresh=min_c, topk=topk) predicted_bb = predicted_bb.asnumpy() predicted_bb = predicted_bb[0, predicted_bb[0, :, 0] != -1] predicted_bb = predicted_bb[:, 2:] predicted_bb[:, 2] = predicted_bb[:, 2] - predicted_bb[:, 0] predicted_bb[:, 3] = predicted_bb[:, 3] - predicted_bb[:, 1] labeled_bb = bb[:, :, 1:].asnumpy() labeled_bb[:, :, 2] = labeled_bb[:, :, 2] - labeled_bb[:, :, 0] labeled_bb[:, :, 3] = labeled_bb[:, :, 3] - labeled_bb[:, :, 1] labeled_bb = labeled_bb[0] return predicted_bb, labeled_bb
def predict(x, net, ctx): anchors, cls_preds, box_preds = net(x.as_in_context(ctx)) cls_probs = nd.SoftmaxActivation( cls_preds.transpose((0, 2, 1)), mode='channel') return MultiBoxDetection(cls_probs, box_preds, anchors, force_suppress=True, clip=False) '''
def test(net,ctx):#,image): #image = cv2.imread('/BELLO/dog/data/normalsize/n02091244_3075.jpg') image = cv2.imread('/BELLO/dog/data/dog3.jpg') x = preprocess(image) anchors, cls_preds, box_preds = net(x.as_in_context(ctx)) cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)), mode='channel') output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress=True, clip=False) #print(output[0].asnumpy()) display(image[:, :, (2, 1, 0)], output[0].asnumpy(), thresh=0.7)
def inference(x, epochs= 295): ctx = mx.cpu(1) net = ToySSD(1) start_time = time.time() net.load_params('models/ssd_%d.params' % epochs, ctx) anchors, cls_preds, box_preds = net(x.as_in_context(ctx)) cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0,2,1)), mode = 'channel') output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress = False, clip = False, nms_threshold = 0.001 ) end_time = time.time() print(end_time-start_time) return output
def forward(img_path, net): ctx = mx.gpu(1) img_original = cv2.imread(img_path) img = preprocess(img_original) anchors, cls_preds, box_preds = net(img.as_in_context(ctx)) cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)), mode='channel') output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress=True, clip=True, nms_threshold=0.01) return img_original, output
def inference(x, epochs=295): ctx = mx.cpu(1) net = ToySSD(1) net.load_params('models/ssd_%d.params' % epochs, ctx) print("load sucecuss") anchors, cls_preds, box_preds = net(x.as_in_context(ctx)) cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)), mode='channel') output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress=True, clip=False) return output
image = cv2.imread('img/pikachu.jpg') x = preprocess(image) print('x', x.shape) # if pre-trained model is provided, we can load it # net.load_params('ssd_%d.params' % epochs, ctx) anchors, cls_preds, box_preds = net(x.as_in_context(ctx)) print('anchors', anchors) print('class predictions', cls_preds) print('box delta predictions', box_preds) from mxnet.contrib.ndarray import MultiBoxDetection # convert predictions to probabilities using softmax cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)), mode='channel') # apply shifts to anchors boxes, non-maximum-suppression, etc... output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress=True, clip=False) print(output) def display(img, out, thresh=0.5): import random import matplotlib as mpl mpl.rcParams['figure.figsize'] = (10,10) pens = dict() plt.clf() plt.imshow(img) for det in out: cid = int(det[0]) if cid < 0: continue score = det[1]
def generate_output_image(box_predictions, default_anchors, cls_probs, box_target, box_mask, cls_target, x, y): ''' Generate the image with the predicted and actual bounding boxes. Parameters ---------- box_predictions: nd.array Bounding box predictions relative to the anchor boxes, output of the network default_anchors: nd.array Anchors used, output of the network cls_probs: nd.array Output of nd.SoftmaxActivation(nd.transpose(class_predictions, (0, 2, 1)), mode='channel') where class_predictions is the output of the network. box_target: nd.array Output classification probabilities from network.training_targets(default_anchors, class_predictions, y) box_mask: nd.array Output bounding box predictions from network.training_targets(default_anchors, class_predictions, y) cls_target: nd.array Output targets from network.training_targets(default_anchors, class_predictions, y) x: nd.array The input images y: nd.array The actual labels Returns ------- output_image: np.array The images with the predicted and actual bounding boxes drawn on number_of_bbs: int The number of predicting bounding boxes ''' output = MultiBoxDetection(*[cls_probs, box_predictions, default_anchors], force_suppress=True, clip=False) output = box_nms(output, overlap_thresh=overlap_thres, valid_thresh=min_c, topk=topk) output = output.asnumpy() number_of_bbs = 0 predicted_bb = [] for b in range(output.shape[0]): predicted_bb_ = output[b, output[b, :, 0] != -1] predicted_bb_ = predicted_bb_[:, 2:] number_of_bbs += predicted_bb_.shape[0] predicted_bb_[:, 2] = predicted_bb_[:, 2] - predicted_bb_[:, 0] predicted_bb_[:, 3] = predicted_bb_[:, 3] - predicted_bb_[:, 1] predicted_bb.append(predicted_bb_) labels = y[:, :, 1:].asnumpy() labels[:, :, 2] = labels[:, :, 2] - labels[:, :, 0] labels[:, :, 3] = labels[:, :, 3] - labels[:, :, 1] output_image = draw_boxes_on_image(predicted_bb, labels, x.asnumpy()) output_image[output_image < 0] = 0 output_image[output_image > 1] = 1 return output_image, number_of_bbs
# print("train_cls_loss: %5f, train_box_loss %5f," # "time %.1f sec" # % (nd.mean(loss_cls).asscalar(), nd.mean(loss_bbox).asscalar(), time.time() - tic)) if epoch % checkpoint_period == 0: net.save_params(filename='output/exp1/ssd_{}_{}_epoch{}.params'.format(net.network, net.data_shape, epoch)) if val_iter is not None: for i, batch in enumerate(val_iter): x = batch.data[0].as_in_context(ctx) # 32 x 3 x 300 x 300 y = batch.label[0].as_in_context(ctx) # 32 x 43 x 8 anchors, class_preds, box_preds = net(x) cls_probs = mx.nd.SoftmaxActivation(data=class_preds.transpose((0,2,1)), mode='channel') det = MultiBoxDetection(*[cls_probs, box_preds, anchors], \ name="detection", nms_threshold=0.45, force_suppress=False, variances=(0.1, 0.1, 0.2, 0.2), nms_topk=400) val_metric.update(labels=[y], preds=[det]) names, values = val_metric.get() # epoch_str = ("Epoch %d. Loss: %f, Train acc %f, Valid acc %f, " # % (epoch, train_loss / len(train_data), # train_acc / len(train_data), valid_acc)) for name, value in zip(names, values): logger.info('Epoch[{}] Validation-{}={}'.format(epoch, name, value)) else: logger.info ("Epoch %2d. train_cls_loss: %5f, train_box_loss %5f," " time %.1f sec" % (epoch, train_cls_loss / i, train_bbox_loss / i, time.time()-tic)) logger.info ('Epoch %2d, train %s %.2f, %s %.5f, time %.1f sec' % (
def predict(net, data): anchors, box_preds, cls_preds = net(data) cls_probs = nd.SoftmaxActivation(cls_preds.transpose((0, 2, 1)), mode='channel') out = MultiBoxDetection(cls_probs, box_preds, anchors, force_suppress=True, clip=False, nms_threshold=0.45) return out