Esempio n. 1
0
def detect_box(image, scale=600, maxScale=900):
    H, W = image.shape[:2]
    image, rate = resize_img(image, scale, maxScale=maxScale)
    h, w = image.shape[:2]
    if GPU:
        im = array_to_image(image)
        res = predict_image(textNet, im)
        scale = 16
        iw = int(np.ceil(im.w / scale))
        ih = int(np.ceil(im.h / scale))
        h, w = image.shape[:2]
        out = [res[i] for i in range(40 * ih * iw)]
        out = np.array(out).reshape((1, 40, ih, iw))
    else:
        inputBlob = cv2.dnn.blobFromImage(image,
                                          scalefactor=1.0,
                                          size=(w, h),
                                          swapRB=False,
                                          crop=False)
        outputName = textNet.getUnconnectedOutLayersNames()
        textNet.setInput(inputBlob)
        out = textNet.forward(outputName)[0]
    clsOut = reshape(out[:, :20, ...])
    boxOut = reshape(out[:, 20:, ...])
    boxes = get_origin_box((w, h), anchors, boxOut[0])
    scores = soft_max(clsOut[0])
    boxes[:, 0:4][boxes[:, 0:4] < 0] = 0
    boxes[:, 0][boxes[:, 0] >= w] = w - 1
    boxes[:, 1][boxes[:, 1] >= h] = h - 1
    boxes[:, 2][boxes[:, 2] >= w] = w - 1
    boxes[:, 3][boxes[:, 3] >= h] = h - 1
    # print (boxes)

    return scores, boxes, rate, w, h
Esempio n. 2
0
def predict_darknet(image):
    scale = image.size[1] * 1.0 / 32
    w = image.size[0] / scale
    w = int(w)
    image = image.resize((w, 32), Image.BILINEAR)
    image = (np.array(image.convert('L')) / 255.0 - 0.5) / 0.5
    h, w = image.shape
    if w < 8:
        return ''
    tmp = np.zeros((h, w, 1))
    tmp[:, :, 0] = image

    im = array_to_image(image)
    res = predict_image(ocrNet, im)
    outW = int(np.ceil(w / 4) - 3)
    nchars = len(charactersPred)
    out = [res[i] for i in range(outW * nchars)]
    out = np.array(out).reshape((nchars, outW))
    out = out.transpose((1, 0))
    return decode(out)
Esempio n. 3
0
def predict_darknet(image):
    scale = image.size[1] * 1.0 / 32
    w = image.size[0] / scale
    w = int(w)
    image = image.resize((w, 32), Image.BILINEAR)
    image = (np.array(image.convert('L')) / 255.0 - 0.5) / 0.5
    h, w = image.shape
    if w < 8:
        return {'chars': [], 'text': '', 'prob': 0}
    tmp = np.zeros((h, w, 1))
    tmp[:, :, 0] = image

    im = array_to_image(image)
    res = predict_image(ocrNet, im)
    outW = int(np.ceil(w / 4) - 3)
    nchars = len(charactersPred)
    out = np.zeros(outW * nchars, dtype=res._type_)
    memmove(out.ctypes.data, res, out.nbytes)
    out = out.reshape((nchars, outW))
    out = out.transpose((1, 0))
    out = softmax(out)

    return decode(out)