Esempio n. 1
0
def detect_image(img_file):
    if not os.path.exists(img_file):
        print('can not find image: ', img_file)
    img = Image.open(img_file)
    img = ImageOps.fit(img, [data_shape, data_shape], Image.ANTIALIAS)
    print(img)
    origin_img = np.array(img)
    img = origin_img - np.array([123, 117, 104])
    # organize as [batch-channel-height-width]
    img = np.transpose(img, (2, 0, 1))
    img = img[np.newaxis, :]
    # convert to ndarray
    img = nd.array(img)
    print('input image shape: ', img.shape)

    net = ToySSD(num_class)
    ctx = mx.cpu()
    net.initialize(mx.init.Xavier(magnitude=2), ctx=ctx)
    net.collect_params().reset_ctx(ctx)
    params = 'ssd_pretrained.params'
    net.load_params(params, ctx=ctx)

    anchors, cls_preds, box_preds = net(img.as_in_context(ctx))
    print('anchors', anchors)
    print('class predictions', cls_preds)
    print('box delta predictions', box_preds)

    # convert predictions to probabilities using softmax
    cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)), mode='channel')
    # apply shifts to anchors boxes, non-maximum-suppression, etc...
    output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress=True, clip=False)
    output = output.asnumpy()
    print(output)
    print(output.shape)
    pens = dict()

    plt.imshow(origin_img)

    thresh = 0.69
    for det in output[0]:
        cid = int(det[0])
        if cid < 0:
            continue
        score = det[1]
        if score < thresh:
            continue
        if cid not in pens:
            pens[cid] = (random.random(), random.random(), random.random())
        scales = [origin_img.shape[1], origin_img.shape[0]] * 2
        xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)]
        rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=pens[cid], linewidth=3)
        plt.gca().add_patch(rect)
        text = class_names[cid]
        plt.gca().text(xmin, ymin - 2, '{:s} {:.3f}'.format(text, score),
                       bbox=dict(facecolor=pens[cid], alpha=0.5),
                       fontsize=12, color='white')
    plt.axis('off')
    plt.savefig('result.png', dpi=100)
    plt.show()
Esempio n. 2
0
def predict_bounding_boxes(net,
                           image,
                           min_c,
                           overlap_thres,
                           topk,
                           ctx=mx.gpu()):
    '''
    Given the outputs of the dataset (image and bounding box) and the network, 
    the predicted bounding boxes are provided.
    
    Parameters
    ----------
    net: SSD
    The trained SSD network.
    
    image: np.array
    A grayscale image of the handwriting passages.
        
    Returns
    -------
    predicted_bb: [(x, y, w, h)]
    The predicted bounding boxes.
    '''
    image = mx.nd.array(image).expand_dims(axis=2)
    image = mx.image.resize_short(image, 350)
    image = image.transpose([2, 0, 1]) / 255.

    image = image.as_in_context(ctx)
    image = image.expand_dims(0)

    bb = np.zeros(shape=(13, 5))
    bb = mx.nd.array(bb)
    bb = bb.as_in_context(ctx)
    bb = bb.expand_dims(axis=0)

    default_anchors, class_predictions, box_predictions = net(image)

    box_target, box_mask, cls_target = net.training_targets(
        default_anchors, class_predictions, bb)

    cls_probs = mx.nd.SoftmaxActivation(mx.nd.transpose(
        class_predictions, (0, 2, 1)),
                                        mode='channel')

    predicted_bb = MultiBoxDetection(
        *[cls_probs, box_predictions, default_anchors],
        force_suppress=True,
        clip=False)
    predicted_bb = box_nms(predicted_bb,
                           overlap_thresh=overlap_thres,
                           valid_thresh=min_c,
                           topk=topk)
    predicted_bb = predicted_bb.asnumpy()
    predicted_bb = predicted_bb[0, predicted_bb[0, :, 0] != -1]
    predicted_bb = predicted_bb[:, 2:]
    predicted_bb[:, 2] = predicted_bb[:, 2] - predicted_bb[:, 0]
    predicted_bb[:, 3] = predicted_bb[:, 3] - predicted_bb[:, 1]

    return predicted_bb
Esempio n. 3
0
def predict_bounding_boxes(net, image, bb):
    '''
    Given the outputs of the dataset (image and bounding box) and the network, 
    the predicted bounding boxes are provided.
    
    Parameters
    ----------
    net: SSD
    The trained SSD network.
    
    image: np.array
    A grayscale image of the handwriting passages.
    
    bb: [(x1, y1, x2, y2)]
    A tuple that contains the bounding box.
    
    Returns
    -------
    predicted_bb: [(x, y, w, h)]
    The predicted bounding boxes.
    
    actual_bb: [(x, y, w, h)]
    The actual bounding bounding boxes.
    '''
    image, bb = transform(image, bb)

    image = image.as_in_context(ctx[0])
    image = image.expand_dims(axis=0)

    bb = bb.as_in_context(ctx[0])
    bb = bb.expand_dims(axis=0)

    default_anchors, class_predictions, box_predictions = net(image)
    box_target, box_mask, cls_target = net.training_targets(
        default_anchors, class_predictions, bb)
    cls_probs = nd.SoftmaxActivation(nd.transpose(class_predictions,
                                                  (0, 2, 1)),
                                     mode='channel')

    predicted_bb = MultiBoxDetection(
        *[cls_probs, box_predictions, default_anchors],
        force_suppress=True,
        clip=False)
    predicted_bb = box_nms(predicted_bb,
                           overlap_thresh=overlap_thres,
                           valid_thresh=min_c,
                           topk=topk)
    predicted_bb = predicted_bb.asnumpy()
    predicted_bb = predicted_bb[0, predicted_bb[0, :, 0] != -1]
    predicted_bb = predicted_bb[:, 2:]
    predicted_bb[:, 2] = predicted_bb[:, 2] - predicted_bb[:, 0]
    predicted_bb[:, 3] = predicted_bb[:, 3] - predicted_bb[:, 1]

    labeled_bb = bb[:, :, 1:].asnumpy()
    labeled_bb[:, :, 2] = labeled_bb[:, :, 2] - labeled_bb[:, :, 0]
    labeled_bb[:, :, 3] = labeled_bb[:, :, 3] - labeled_bb[:, :, 1]
    labeled_bb = labeled_bb[0]
    return predicted_bb, labeled_bb
def predict(x, net, ctx):
    anchors, cls_preds, box_preds = net(x.as_in_context(ctx))
    cls_probs = nd.SoftmaxActivation(
        cls_preds.transpose((0, 2, 1)), mode='channel')

    return MultiBoxDetection(cls_probs, box_preds, anchors, force_suppress=True, clip=False)
    '''
Esempio n. 5
0
def test(net,ctx):#,image):
    #image = cv2.imread('/BELLO/dog/data/normalsize/n02091244_3075.jpg')
    image = cv2.imread('/BELLO/dog/data/dog3.jpg')
    x = preprocess(image)
    anchors, cls_preds, box_preds = net(x.as_in_context(ctx))
    cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)), mode='channel')
    output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress=True, clip=False)
    #print(output[0].asnumpy())
    display(image[:, :, (2, 1, 0)], output[0].asnumpy(), thresh=0.7)
Esempio n. 6
0
def inference(x, epochs= 295):
    ctx = mx.cpu(1)
    net = ToySSD(1)
    start_time = time.time()
    net.load_params('models/ssd_%d.params' % epochs, ctx)
    anchors, cls_preds, box_preds = net(x.as_in_context(ctx))
    cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0,2,1)), mode = 'channel')
    output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress = False, clip = False, nms_threshold = 0.001 )
    end_time = time.time()
    print(end_time-start_time)
    return output
Esempio n. 7
0
def forward(img_path, net):
    ctx = mx.gpu(1)
    img_original = cv2.imread(img_path)
    img = preprocess(img_original)
    anchors, cls_preds, box_preds = net(img.as_in_context(ctx))
    cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)),
                                     mode='channel')
    output = MultiBoxDetection(*[cls_probs, box_preds, anchors],
                               force_suppress=True,
                               clip=True,
                               nms_threshold=0.01)
    return img_original, output
Esempio n. 8
0
def inference(x, epochs=295):
    ctx = mx.cpu(1)
    net = ToySSD(1)
    net.load_params('models/ssd_%d.params' % epochs, ctx)
    print("load sucecuss")
    anchors, cls_preds, box_preds = net(x.as_in_context(ctx))
    cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)),
                                     mode='channel')
    output = MultiBoxDetection(*[cls_probs, box_preds, anchors],
                               force_suppress=True,
                               clip=False)
    return output
Esempio n. 9
0
image = cv2.imread('img/pikachu.jpg')
x = preprocess(image)
print('x', x.shape)

# if pre-trained model is provided, we can load it
# net.load_params('ssd_%d.params' % epochs, ctx)
anchors, cls_preds, box_preds = net(x.as_in_context(ctx))
print('anchors', anchors)
print('class predictions', cls_preds)
print('box delta predictions', box_preds)

from mxnet.contrib.ndarray import MultiBoxDetection
# convert predictions to probabilities using softmax
cls_probs = nd.SoftmaxActivation(nd.transpose(cls_preds, (0, 2, 1)), mode='channel')
# apply shifts to anchors boxes, non-maximum-suppression, etc...
output = MultiBoxDetection(*[cls_probs, box_preds, anchors], force_suppress=True, clip=False)
print(output)


def display(img, out, thresh=0.5):
    import random
    import matplotlib as mpl
    mpl.rcParams['figure.figsize'] = (10,10)
    pens = dict()
    plt.clf()
    plt.imshow(img)
    for det in out:
        cid = int(det[0])
        if cid < 0:
            continue
        score = det[1]
Esempio n. 10
0
def generate_output_image(box_predictions, default_anchors, cls_probs,
                          box_target, box_mask, cls_target, x, y):
    '''
    Generate the image with the predicted and actual bounding boxes.
    Parameters
    ----------
    box_predictions: nd.array
        Bounding box predictions relative to the anchor boxes, output of the network

    default_anchors: nd.array
        Anchors used, output of the network
    
    cls_probs: nd.array
        Output of nd.SoftmaxActivation(nd.transpose(class_predictions, (0, 2, 1)), mode='channel')
        where class_predictions is the output of the network.

    box_target: nd.array
        Output classification probabilities from network.training_targets(default_anchors, class_predictions, y)

    box_mask: nd.array
        Output bounding box predictions from network.training_targets(default_anchors, class_predictions, y) 

    cls_target: nd.array
        Output targets from network.training_targets(default_anchors, class_predictions, y)
    
    x: nd.array
       The input images

    y: nd.array
        The actual labels

    Returns
    -------
    output_image: np.array
        The images with the predicted and actual bounding boxes drawn on

    number_of_bbs: int
        The number of predicting bounding boxes
    '''
    output = MultiBoxDetection(*[cls_probs, box_predictions, default_anchors],
                               force_suppress=True,
                               clip=False)
    output = box_nms(output,
                     overlap_thresh=overlap_thres,
                     valid_thresh=min_c,
                     topk=topk)
    output = output.asnumpy()

    number_of_bbs = 0
    predicted_bb = []
    for b in range(output.shape[0]):
        predicted_bb_ = output[b, output[b, :, 0] != -1]
        predicted_bb_ = predicted_bb_[:, 2:]
        number_of_bbs += predicted_bb_.shape[0]
        predicted_bb_[:, 2] = predicted_bb_[:, 2] - predicted_bb_[:, 0]
        predicted_bb_[:, 3] = predicted_bb_[:, 3] - predicted_bb_[:, 1]
        predicted_bb.append(predicted_bb_)

    labels = y[:, :, 1:].asnumpy()
    labels[:, :, 2] = labels[:, :, 2] - labels[:, :, 0]
    labels[:, :, 3] = labels[:, :, 3] - labels[:, :, 1]

    output_image = draw_boxes_on_image(predicted_bb, labels, x.asnumpy())
    output_image[output_image < 0] = 0
    output_image[output_image > 1] = 1

    return output_image, number_of_bbs
Esempio n. 11
0
            # print("train_cls_loss: %5f, train_box_loss %5f,"
            #       "time %.1f sec"
            #       % (nd.mean(loss_cls).asscalar(), nd.mean(loss_bbox).asscalar(), time.time() - tic))

        if epoch % checkpoint_period == 0:
            net.save_params(filename='output/exp1/ssd_{}_{}_epoch{}.params'.format(net.network,
                                                                              net.data_shape, epoch))

        if val_iter is not None:
            for i, batch in enumerate(val_iter):
                x = batch.data[0].as_in_context(ctx)  # 32 x 3 x 300 x 300
                y = batch.label[0].as_in_context(ctx)  # 32 x 43 x 8
                anchors, class_preds, box_preds = net(x)
                cls_probs = mx.nd.SoftmaxActivation(data=class_preds.transpose((0,2,1)), mode='channel')
                det = MultiBoxDetection(*[cls_probs, box_preds, anchors], \
                    name="detection", nms_threshold=0.45, force_suppress=False,
                    variances=(0.1, 0.1, 0.2, 0.2), nms_topk=400)

                val_metric.update(labels=[y], preds=[det])
            names, values = val_metric.get()
            # epoch_str = ("Epoch %d. Loss: %f, Train acc %f, Valid acc %f, "
            #              % (epoch, train_loss / len(train_data),
            #                 train_acc / len(train_data), valid_acc))
            for name, value in zip(names, values):
                logger.info('Epoch[{}] Validation-{}={}'.format(epoch, name, value))
        else:
            logger.info ("Epoch %2d. train_cls_loss: %5f, train_box_loss %5f,"
                         " time %.1f sec"
                         % (epoch, train_cls_loss / i, train_bbox_loss / i,
                             time.time()-tic))
            logger.info ('Epoch %2d, train %s %.2f, %s %.5f, time %.1f sec' % (
Esempio n. 12
0
def predict(net, data):
    anchors, box_preds, cls_preds = net(data)
    cls_probs = nd.SoftmaxActivation(cls_preds.transpose((0, 2, 1)), mode='channel')
    out = MultiBoxDetection(cls_probs, box_preds, anchors, force_suppress=True, clip=False, nms_threshold=0.45)
    return out