Esempio n. 1
0
def detect_image(img_file):
    if not os.path.exists(img_file):
        print('can not find image: ', img_file)
    img = Image.open(img_file)
    img = ImageOps.fit(img, [data_shape, data_shape], Image.ANTIALIAS)
    print(img)
    origin_img = np.array(img)
    img = origin_img - np.array([123, 117, 104])
    # organize as [batch-channel-height-width]
    img = np.transpose(img, (2, 0, 1))
    img = img[np.newaxis, :]
    # convert to ndarray
    img = nd.array(img)
    print('input image shape: ', img.shape)

    net = ToySSD(num_class)
    ctx = mx.cpu()
    net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
    net.collect_params().reset_ctx(ctx)
    params = 'ssd_pretrained.params'
    net.load_params(params, ctx=ctx)

    anchors, cls_preds, box_preds = net(img.as_in_context(ctx))
    print('anchors', anchors)
    print('class predictions', cls_preds)
    print('box delta predictions', box_preds)

    # convert predictions to probabilities using softmax
    cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)), mode='channel')
    # apply shifts to anchors boxes, non-maximum-suppression, etc...
    output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress=True, clip=False)
    output = output.asnumpy()
    print(output)
    print(output.shape)
    pens = dict()

    plt.imshow(origin_img)

    thresh = 0.69
    for det in output[0]:
        cid = int(det[0])
        if cid < 0:
            continue
        score = det[1]
        if score < thresh:
            continue
        if cid not in pens:
            pens[cid] = (random.random(), random.random(), random.random())
        scales = [origin_img.shape[1], origin_img.shape[0]] * 2
        xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)]
        rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=pens[cid], linewidth=3)
        plt.gca().add_patch(rect)
        text = class_names[cid]
        plt.gca().text(xmin, ymin - 2, '{:s} {:.3f}'.format(text, score),
                       bbox=dict(facecolor=pens[cid], alpha=0.5),
                       fontsize=12, color='white')
    plt.axis('off')
    plt.savefig('result.png', dpi=100)
    plt.show()
Esempio n. 2
0
def predict_bounding_boxes(net,
                           image,
                           min_c,
                           overlap_thres,
                           topk,
                           ctx=mx.gpu()):
    '''
    Given the outputs of the dataset (image and bounding box) and the network, 
    the predicted bounding boxes are provided.
    
    Parameters
    ----------
    net: SSD
    The trained SSD network.
    
    image: np.array
    A grayscale image of the handwriting passages.
        
    Returns
    -------
    predicted_bb: [(x, y, w, h)]
    The predicted bounding boxes.
    '''
    image = mx.nd.array(image).expand_dims(axis=2)
    image = mx.image.resize_short(image, 350)
    image = image.transpose([2, 0, 1]) / 255.

    image = image.as_in_context(ctx)
    image = image.expand_dims(0)

    bb = np.zeros(shape=(13, 5))
    bb = mx.nd.array(bb)
    bb = bb.as_in_context(ctx)
    bb = bb.expand_dims(axis=0)

    default_anchors, class_predictions, box_predictions = net(image)

    box_target, box_mask, cls_target = net.training_targets(
        default_anchors, class_predictions, bb)

    cls_probs = mx.nd.SoftmaxActivation(mx.nd.transpose(
        class_predictions, (0, 2, 1)),
                                        mode='channel')

    predicted_bb = MultiBoxDetection(
        *[cls_probs, box_predictions, default_anchors],
        force_suppress=True,
        clip=False)
    predicted_bb = box_nms(predicted_bb,
                           overlap_thresh=overlap_thres,
                           valid_thresh=min_c,
                           topk=topk)
    predicted_bb = predicted_bb.asnumpy()
    predicted_bb = predicted_bb[0, predicted_bb[0, :, 0] != -1]
    predicted_bb = predicted_bb[:, 2:]
    predicted_bb[:, 2] = predicted_bb[:, 2] - predicted_bb[:, 0]
    predicted_bb[:, 3] = predicted_bb[:, 3] - predicted_bb[:, 1]

    return predicted_bb
Esempio n. 3
0
def predict_bounding_boxes(net, image, bb):
    '''
    Given the outputs of the dataset (image and bounding box) and the network, 
    the predicted bounding boxes are provided.
    
    Parameters
    ----------
    net: SSD
    The trained SSD network.
    
    image: np.array
    A grayscale image of the handwriting passages.
    
    bb: [(x1, y1, x2, y2)]
    A tuple that contains the bounding box.
    
    Returns
    -------
    predicted_bb: [(x, y, w, h)]
    The predicted bounding boxes.
    
    actual_bb: [(x, y, w, h)]
    The actual bounding bounding boxes.
    '''
    image, bb = transform(image, bb)

    image = image.as_in_context(ctx[0])
    image = image.expand_dims(axis=0)

    bb = bb.as_in_context(ctx[0])
    bb = bb.expand_dims(axis=0)

    default_anchors, class_predictions, box_predictions = net(image)
    box_target, box_mask, cls_target = net.training_targets(
        default_anchors, class_predictions, bb)
    cls_probs = nd.SoftmaxActivation(nd.transpose(class_predictions,
                                                  (0, 2, 1)),
                                     mode='channel')

    predicted_bb = MultiBoxDetection(
        *[cls_probs, box_predictions, default_anchors],
        force_suppress=True,
        clip=False)
    predicted_bb = box_nms(predicted_bb,
                           overlap_thresh=overlap_thres,
                           valid_thresh=min_c,
                           topk=topk)
    predicted_bb = predicted_bb.asnumpy()
    predicted_bb = predicted_bb[0, predicted_bb[0, :, 0] != -1]
    predicted_bb = predicted_bb[:, 2:]
    predicted_bb[:, 2] = predicted_bb[:, 2] - predicted_bb[:, 0]
    predicted_bb[:, 3] = predicted_bb[:, 3] - predicted_bb[:, 1]

    labeled_bb = bb[:, :, 1:].asnumpy()
    labeled_bb[:, :, 2] = labeled_bb[:, :, 2] - labeled_bb[:, :, 0]
    labeled_bb[:, :, 3] = labeled_bb[:, :, 3] - labeled_bb[:, :, 1]
    labeled_bb = labeled_bb[0]
    return predicted_bb, labeled_bb
Esempio n. 4
0
def generate_output_image(box_predictions, default_anchors, cls_probs,
                          box_target, box_mask, cls_target, x, y):
    '''
    Generate the image with the predicted and actual bounding boxes.
    Parameters
    ----------
    box_predictions: nd.array
        Bounding box predictions relative to the anchor boxes, output of the network

    default_anchors: nd.array
        Anchors used, output of the network
    
    cls_probs: nd.array
        Output of nd.SoftmaxActivation(nd.transpose(class_predictions, (0, 2, 1)), mode='channel')
        where class_predictions is the output of the network.

    box_target: nd.array
        Output classification probabilities from network.training_targets(default_anchors, class_predictions, y)

    box_mask: nd.array
        Output bounding box predictions from network.training_targets(default_anchors, class_predictions, y) 

    cls_target: nd.array
        Output targets from network.training_targets(default_anchors, class_predictions, y)
    
    x: nd.array
       The input images

    y: nd.array
        The actual labels

    Returns
    -------
    output_image: np.array
        The images with the predicted and actual bounding boxes drawn on

    number_of_bbs: int
        The number of predicting bounding boxes
    '''
    output = MultiBoxDetection(*[cls_probs, box_predictions, default_anchors],
                               force_suppress=True,
                               clip=False)
    output = box_nms(output,
                     overlap_thresh=overlap_thres,
                     valid_thresh=min_c,
                     topk=topk)
    output = output.asnumpy()

    number_of_bbs = 0
    predicted_bb = []
    for b in range(output.shape[0]):
        predicted_bb_ = output[b, output[b, :, 0] != -1]
        predicted_bb_ = predicted_bb_[:, 2:]
        number_of_bbs += predicted_bb_.shape[0]
        predicted_bb_[:, 2] = predicted_bb_[:, 2] - predicted_bb_[:, 0]
        predicted_bb_[:, 3] = predicted_bb_[:, 3] - predicted_bb_[:, 1]
        predicted_bb.append(predicted_bb_)

    labels = y[:, :, 1:].asnumpy()
    labels[:, :, 2] = labels[:, :, 2] - labels[:, :, 0]
    labels[:, :, 3] = labels[:, :, 3] - labels[:, :, 1]

    output_image = draw_boxes_on_image(predicted_bb, labels, x.asnumpy())
    output_image[output_image < 0] = 0
    output_image[output_image > 1] = 1

    return output_image, number_of_bbs