Ejemplo n.º 1
0
def scan(file):
    img_array = np.asarray(bytearray(file.stream.read()), dtype=np.uint8)
    image = cv2.imdecode(img_array,0)
    split_images = utils.splitImg(image)
    
    ocr_texts = []

    for i, split_image in enumerate(split_images):
        image =utils.img2bwinv(split_image)
        image = utils.dropZeroEdges(image)  
        image = utils.resize(image, ocr.image_height)
        utils.save(image,os.path.join(curr_dir,"test","%s.png"%i))
        maxImageWidth = image.shape[1]+5
        image_vec = utils.img2vec(image,ocr.image_height,maxImageWidth)
        ocr_inputs = np.zeros([1, maxImageWidth, ocr.image_height])
        ocr_inputs[0,:] = np.transpose(image_vec.reshape((ocr.image_height,maxImageWidth)))         
        ocr_seq_len = np.ones(ocr_inputs.shape[0]) * maxImageWidth
        feed = {inputs: ocr_inputs, seq_len: ocr_seq_len,  input_keep_prob: 1.0}
        start = time.time()
        decoded_list = session.run(decoded[0], feed)
        seconds = round(time.time() - start,2)
        print("filished ocr %s , paid %s seconds" % (i,seconds))
        detected_list = ocr.decode_sparse_tensor(decoded_list)            
        for detect_number in detected_list:
            ocr_texts.append(ocr.list_to_chars(detect_number))

    return ocr_texts
Ejemplo n.º 2
0
def scan(file):
    img = Image.open(file.stream)
    image = np.array(img)
    image = utils.img2gray(image)
    image = utils.clearImgGray(image)    
    utils.save(image, os.path.join(curr_dir,"test","src.png"))
    split_images = utils.splitImg(image)
    
    ocr_texts = []

    for i, split_image in enumerate(split_images):
        # image = utils.img2bwinv(split_image)
        image = utils.clearImgGray(split_image)
        # image = utils.clearBackgroundColor(image, 255)    
        image = 255. - image
        image = utils.dropZeroEdges(image)  
        image = utils.resize(image, ocr.image_height)
        utils.save(image,os.path.join(curr_dir,"test","%s.png"%i))
        image = image / 255.
        maxImageWidth = image.shape[1]
        maxImageWidth = maxImageWidth + (ocr.POOL_SIZE - maxImageWidth % ocr.POOL_SIZE)
        image_vec = utils.img2vec(image,ocr.image_height,maxImageWidth)
        ocr_inputs = np.zeros([1, maxImageWidth, ocr.image_height])
        ocr_inputs[0,:] = np.transpose(image_vec.reshape((ocr.image_height,maxImageWidth)))         
        ocr_seq_len = np.ones(ocr_inputs.shape[0]) * (maxImageWidth * ocr.image_height) // (ocr.POOL_SIZE * ocr.POOL_SIZE)
        feed = {inputs: ocr_inputs, seq_len: ocr_seq_len,  input_keep_prob: 1.0}
        start = time.time()
        decoded_list = session.run(decoded[0], feed)
        seconds = round(time.time() - start,2)
        print("filished ocr %s , paid %s seconds" % (i,seconds))
        detected_list = ocr.decode_sparse_tensor(decoded_list)            
        for detect_number in detected_list:
            ocr_texts.append(ocr.list_to_chars(detect_number))

    return ocr_texts
Ejemplo n.º 3
0
def scan(file):
    img = Image.open(file.stream)
    image = np.array(img)
    image = utils.img2gray(image)
    utils.save(image * 255, os.path.join(curr_dir, "test", "p0.png"))
    # image = utils.clearImgGray(image)
    # utils.save(image * 255, os.path.join(curr_dir,"test","p1.png"))
    split_images = utils.splitImg(image)

    ocr_texts = []

    for i, split_image in enumerate(split_images):
        inv_image = utils.img2bwinv(split_image)
        inv_image = utils.clearImg(inv_image)
        image = 255. - split_image
        image = utils.dropZeroEdges(inv_image, image)
        image = utils.resize(image, ocr.image_height)
        image = image / 255.
        ocr_inputs = np.zeros([1, ocr.image_size, ocr.image_size])
        ocr_inputs[0, :] = utils.square_img(
            image, np.zeros([ocr.image_size, ocr.image_size]))

        ocr_seq_len = np.ones(1) * (ocr.image_size * ocr.image_size) // (
            ocr.POOL_SIZE * ocr.POOL_SIZE)

        start = time.time()
        p_net_g = session.run(net_g, {inputs: ocr_inputs})
        p_net_g = np.squeeze(p_net_g, axis=3)

        debug_net_g = np.copy(p_net_g)
        for j in range(1):
            _t_img = utils.unsquare_img(p_net_g[j], ocr.image_height)
            _t_img_bin = np.copy(_t_img)
            _t_img_bin[_t_img_bin <= 0.2] = 0
            _t_img = utils.dropZeroEdges(_t_img_bin, _t_img, min_rate=0.1)
            _t_img = utils.resize(_t_img, ocr.image_height)
            if _t_img.shape[0] * _t_img.shape[
                    1] <= ocr.image_size * ocr.image_size:
                p_net_g[j] = utils.square_img(
                    _t_img, np.zeros([ocr.image_size, ocr.image_size]),
                    ocr.image_height)

        _img = np.vstack((ocr_inputs[0], debug_net_g[0], p_net_g[0]))
        utils.save(_img * 255, os.path.join(curr_dir, "test", "%s.png" % i))

        decoded_list = session.run(res_decoded[0], {
            inputs: p_net_g,
            seq_len: ocr_seq_len
        })
        seconds = round(time.time() - start, 2)
        print("filished ocr %s , paid %s seconds" % (i, seconds))
        detected_list = utils.decode_sparse_tensor(decoded_list)
        for detect_number in detected_list:
            ocr_texts.append(ocr.list_to_chars(detect_number))

    return ocr_texts