def scan(file): img_array = np.asarray(bytearray(file.stream.read()), dtype=np.uint8) image = cv2.imdecode(img_array,0) split_images = utils.splitImg(image) ocr_texts = [] for i, split_image in enumerate(split_images): image =utils.img2bwinv(split_image) image = utils.dropZeroEdges(image) image = utils.resize(image, ocr.image_height) utils.save(image,os.path.join(curr_dir,"test","%s.png"%i)) maxImageWidth = image.shape[1]+5 image_vec = utils.img2vec(image,ocr.image_height,maxImageWidth) ocr_inputs = np.zeros([1, maxImageWidth, ocr.image_height]) ocr_inputs[0,:] = np.transpose(image_vec.reshape((ocr.image_height,maxImageWidth))) ocr_seq_len = np.ones(ocr_inputs.shape[0]) * maxImageWidth feed = {inputs: ocr_inputs, seq_len: ocr_seq_len, input_keep_prob: 1.0} start = time.time() decoded_list = session.run(decoded[0], feed) seconds = round(time.time() - start,2) print("filished ocr %s , paid %s seconds" % (i,seconds)) detected_list = ocr.decode_sparse_tensor(decoded_list) for detect_number in detected_list: ocr_texts.append(ocr.list_to_chars(detect_number)) return ocr_texts
def scan(file): img = Image.open(file.stream) image = np.array(img) image = utils.img2gray(image) image = utils.clearImgGray(image) utils.save(image, os.path.join(curr_dir,"test","src.png")) split_images = utils.splitImg(image) ocr_texts = [] for i, split_image in enumerate(split_images): # image = utils.img2bwinv(split_image) image = utils.clearImgGray(split_image) # image = utils.clearBackgroundColor(image, 255) image = 255. - image image = utils.dropZeroEdges(image) image = utils.resize(image, ocr.image_height) utils.save(image,os.path.join(curr_dir,"test","%s.png"%i)) image = image / 255. maxImageWidth = image.shape[1] maxImageWidth = maxImageWidth + (ocr.POOL_SIZE - maxImageWidth % ocr.POOL_SIZE) image_vec = utils.img2vec(image,ocr.image_height,maxImageWidth) ocr_inputs = np.zeros([1, maxImageWidth, ocr.image_height]) ocr_inputs[0,:] = np.transpose(image_vec.reshape((ocr.image_height,maxImageWidth))) ocr_seq_len = np.ones(ocr_inputs.shape[0]) * (maxImageWidth * ocr.image_height) // (ocr.POOL_SIZE * ocr.POOL_SIZE) feed = {inputs: ocr_inputs, seq_len: ocr_seq_len, input_keep_prob: 1.0} start = time.time() decoded_list = session.run(decoded[0], feed) seconds = round(time.time() - start,2) print("filished ocr %s , paid %s seconds" % (i,seconds)) detected_list = ocr.decode_sparse_tensor(decoded_list) for detect_number in detected_list: ocr_texts.append(ocr.list_to_chars(detect_number)) return ocr_texts
def scan(file): img = Image.open(file.stream) image = np.array(img) image = utils.img2gray(image) utils.save(image * 255, os.path.join(curr_dir, "test", "p0.png")) # image = utils.clearImgGray(image) # utils.save(image * 255, os.path.join(curr_dir,"test","p1.png")) split_images = utils.splitImg(image) ocr_texts = [] for i, split_image in enumerate(split_images): inv_image = utils.img2bwinv(split_image) inv_image = utils.clearImg(inv_image) image = 255. - split_image image = utils.dropZeroEdges(inv_image, image) image = utils.resize(image, ocr.image_height) image = image / 255. ocr_inputs = np.zeros([1, ocr.image_size, ocr.image_size]) ocr_inputs[0, :] = utils.square_img( image, np.zeros([ocr.image_size, ocr.image_size])) ocr_seq_len = np.ones(1) * (ocr.image_size * ocr.image_size) // ( ocr.POOL_SIZE * ocr.POOL_SIZE) start = time.time() p_net_g = session.run(net_g, {inputs: ocr_inputs}) p_net_g = np.squeeze(p_net_g, axis=3) debug_net_g = np.copy(p_net_g) for j in range(1): _t_img = utils.unsquare_img(p_net_g[j], ocr.image_height) _t_img_bin = np.copy(_t_img) _t_img_bin[_t_img_bin <= 0.2] = 0 _t_img = utils.dropZeroEdges(_t_img_bin, _t_img, min_rate=0.1) _t_img = utils.resize(_t_img, ocr.image_height) if _t_img.shape[0] * _t_img.shape[ 1] <= ocr.image_size * ocr.image_size: p_net_g[j] = utils.square_img( _t_img, np.zeros([ocr.image_size, ocr.image_size]), ocr.image_height) _img = np.vstack((ocr_inputs[0], debug_net_g[0], p_net_g[0])) utils.save(_img * 255, os.path.join(curr_dir, "test", "%s.png" % i)) decoded_list = session.run(res_decoded[0], { inputs: p_net_g, seq_len: ocr_seq_len }) seconds = round(time.time() - start, 2) print("filished ocr %s , paid %s seconds" % (i, seconds)) detected_list = utils.decode_sparse_tensor(decoded_list) for detect_number in detected_list: ocr_texts.append(ocr.list_to_chars(detect_number)) return ocr_texts