def scan(file): img_array = np.asarray(bytearray(file.stream.read()), dtype=np.uint8) image = cv2.imdecode(img_array,0) split_images = utils.splitImg(image) ocr_texts = [] for i, split_image in enumerate(split_images): image =utils.img2bwinv(split_image) image = utils.dropZeroEdges(image) image = utils.resize(image, ocr.image_height) utils.save(image,os.path.join(curr_dir,"test","%s.png"%i)) maxImageWidth = image.shape[1]+5 image_vec = utils.img2vec(image,ocr.image_height,maxImageWidth) ocr_inputs = np.zeros([1, maxImageWidth, ocr.image_height]) ocr_inputs[0,:] = np.transpose(image_vec.reshape((ocr.image_height,maxImageWidth))) ocr_seq_len = np.ones(ocr_inputs.shape[0]) * maxImageWidth feed = {inputs: ocr_inputs, seq_len: ocr_seq_len, input_keep_prob: 1.0} start = time.time() decoded_list = session.run(decoded[0], feed) seconds = round(time.time() - start,2) print("filished ocr %s , paid %s seconds" % (i,seconds)) detected_list = ocr.decode_sparse_tensor(decoded_list) for detect_number in detected_list: ocr_texts.append(ocr.list_to_chars(detect_number)) return ocr_texts
def get_next_batch(batch_size=128): inputs = np.zeros([batch_size, image_size[1] * image_size[0]]) labels = np.zeros([batch_size, label_size], dtype=int) batch = random.sample(train_files, batch_size) for i, line in enumerate(batch): lines = line.split(" ") imageFileName = lines[0] + ".png" text = line[line.index(' '):].strip() # 在宋体9号字体下,O和0完全一致,因此全部按0处理 # text = text.replace('O','0') # 文本需要补齐空格 text = text + "".join([' ' for x in range(label_size - len(text))]) if imageFileName in images: imgvec = images[imageFileName] else: # 输出图片为反色黑白 image = readImgFile(os.path.join(curr_dir, DATA_DIR, imageFileName)) image = img2bwinv(image) image = dropZeroEdges(image) image = resize(image, image_size[0]) imgvec = img2vec(image, image_size[0], image_size[1]) # images[imageFileName] = imgvec inputs[i, :] = imgvec label_list = [] for c in text: if c in CHARS: label_list.append(CHARS.index(c)) else: label_list.append(CHARS.index(UNKOWN_CHAR)) labels[i, :] = label_list return inputs, labels
def get_next_batch(batch_size=128): images = [] to_images = [] max_width_image = 0 font_min_length = random.randint(10, 20) for i in range(batch_size): font_name = random.choice(AllFontNames) # font_length = random.randint(font_min_length-5, font_min_length+5) font_length = random.randint(3, 5) font_size = random.randint(image_height, 64) font_mode = random.choice([0, 1, 2, 4]) font_hint = random.choice([0, 1, 2, 3, 4, 5]) text = utils_font.get_random_text(CHARS, eng_world_list, font_length) image = utils_font.get_font_image_from_url(text, font_name, font_size, fontmode=font_mode, fonthint=font_hint) to_image = image.copy() image = utils_font.add_noise(image) image = utils_pil.convert_to_gray(image) rate = random.randint(8, 17) / font_size image = utils_pil.resize(image, rate) image = np.asarray(image) image = utils.resize(image, height=image_height) image = (255. - image) / 255. images.append(image) # to_image = utils_font.get_font_image_from_url(text, font_name ,image_height, fontmode = font_mode, fonthint = font_hint) to_image = utils_pil.convert_to_gray(to_image) to_image = np.asarray(to_image) to_image = utils.resize(to_image, height=image_height) to_image = utils.img2bwinv(to_image) to_image = to_image / 255. to_images.append(to_image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] if to_image.shape[1] > max_width_image: max_width_image = to_image.shape[1] max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(images)): image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False) inputs[i, :] = np.transpose(image_vec) labels = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(to_images)): image_vec = utils.img2vec(to_images[i], height=image_height, width=max_width_image, flatten=False) labels[i, :] = np.transpose(image_vec) return inputs, labels
def scan(file): img = Image.open(file.stream) image = np.array(img) image = utils.img2gray(image) utils.save(image * 255, os.path.join(curr_dir, "test", "p0.png")) # image = utils.clearImgGray(image) # utils.save(image * 255, os.path.join(curr_dir,"test","p1.png")) split_images = utils.splitImg(image) ocr_texts = [] for i, split_image in enumerate(split_images): inv_image = utils.img2bwinv(split_image) inv_image = utils.clearImg(inv_image) image = 255. - split_image image = utils.dropZeroEdges(inv_image, image) image = utils.resize(image, ocr.image_height) image = image / 255. ocr_inputs = np.zeros([1, ocr.image_size, ocr.image_size]) ocr_inputs[0, :] = utils.square_img( image, np.zeros([ocr.image_size, ocr.image_size])) ocr_seq_len = np.ones(1) * (ocr.image_size * ocr.image_size) // ( ocr.POOL_SIZE * ocr.POOL_SIZE) start = time.time() p_net_g = session.run(net_g, {inputs: ocr_inputs}) p_net_g = np.squeeze(p_net_g, axis=3) debug_net_g = np.copy(p_net_g) for j in range(1): _t_img = utils.unsquare_img(p_net_g[j], ocr.image_height) _t_img_bin = np.copy(_t_img) _t_img_bin[_t_img_bin <= 0.2] = 0 _t_img = utils.dropZeroEdges(_t_img_bin, _t_img, min_rate=0.1) _t_img = utils.resize(_t_img, ocr.image_height) if _t_img.shape[0] * _t_img.shape[ 1] <= ocr.image_size * ocr.image_size: p_net_g[j] = utils.square_img( _t_img, np.zeros([ocr.image_size, ocr.image_size]), ocr.image_height) _img = np.vstack((ocr_inputs[0], debug_net_g[0], p_net_g[0])) utils.save(_img * 255, os.path.join(curr_dir, "test", "%s.png" % i)) decoded_list = session.run(res_decoded[0], { inputs: p_net_g, seq_len: ocr_seq_len }) seconds = round(time.time() - start, 2) print("filished ocr %s , paid %s seconds" % (i, seconds)) detected_list = utils.decode_sparse_tensor(decoded_list) for detect_number in detected_list: ocr_texts.append(ocr.list_to_chars(detect_number)) return ocr_texts
def getImage(text, font_name, font_length, font_size, noise=False, fontmode=None, fonthint=None): params= {} params['text'] = text params['fontname'] = font_name params['fontsize'] = font_size # params['fontmode'] = random.choice([0,1,2,4,8]) if fontmode == None: params['fontmode'] = random.choice([0,1,2,4]) else: params['fontmode'] = fontmode if fonthint == None: params['fonthint'] = random.choice([0,1,2,3,4,5]) else: params['fonthint'] = fonthint r = http('http://192.168.2.113:8888/',params) _img = Image.open(io.BytesIO(r)) img=Image.new("RGB",_img.size,(255,255,255)) img.paste(_img,(0,0),_img) img = utils.trim(img) if noise: w,h = img.size _h = random.randint(9, image_height) _w = round(w * _h / h) img = img.resize((_w,_h), Image.ANTIALIAS) img = np.asarray(img) img = 1 - utils.img2gray(img)/255. img = utils.dropZeroEdges(img) filter = np.random.random(img.shape) - 0.9 filter = np.maximum(filter, 0) img = img + filter * 5 imin, imax = img.min(), img.max() img = (img - imin)/(imax - imin) else: img = np.asarray(img) img = utils.img2gray(img) img = utils.img2bwinv(img) img = img / 255. img = utils.dropZeroEdges(img) return img
def get_next_batch(batch_size=128): images = [] to_images = [] codes = [] max_width_image = 0 for i in range(batch_size): font_name = random.choice(AllFontNames) font_length = random.randint(25, 30) font_size = 36 #random.randint(image_height, 64) font_mode = random.choice([0, 1, 2, 4]) font_hint = random.choice([0, 1, 2, 3, 4, 5]) text = utils_font.get_random_text(CHARS, eng_world_list, font_length) # text = random.sample(CHARS, 12) # text = text+text # random.shuffle(text) # text = "".join(text).strip() codes.append([CHARS.index(char) for char in text]) image = utils_font.get_font_image_from_url(text, font_name, font_size, fontmode=font_mode, fonthint=font_hint) image = utils_pil.resize_by_height(image, image_height) to_image = image.copy() image = utils_font.add_noise(image) image = utils_pil.convert_to_gray(image) _h = random.randint(9, image_height // random.choice([1, 1.5, 2, 2.5])) image = utils_pil.resize_by_height(image, _h, random.random() > 0.5) image = utils_pil.resize_by_height(image, image_height, random.random() > 0.5) image = np.asarray(image) image = utils.resize(image, height=image_height) image = (255. - image) / 255. images.append(image) # to_image = utils_font.get_font_image_from_url(text, font_name ,image_height, fontmode = font_mode, fonthint = font_hint) to_image = utils_pil.convert_to_gray(to_image) to_image = np.asarray(to_image) to_image = utils.resize(to_image, height=image_height) to_image = utils.img2bwinv(to_image) to_image = to_image / 255. to_images.append(to_image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] if to_image.shape[1] > max_width_image: max_width_image = to_image.shape[1] max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(images)): image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False) inputs[i, :] = np.transpose(image_vec) targets = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(to_images)): image_vec = utils.img2vec(to_images[i], height=image_height, width=max_width_image, flatten=False) targets[i, :] = np.transpose(image_vec) labels = [np.asarray(i) for i in codes] sparse_labels = utils.sparse_tuple_from(labels) seq_len = np.ones(batch_size) * (max_width_image * image_height) // (POOL_SIZE * POOL_SIZE) return inputs, targets, sparse_labels, seq_len
os.mkdir(os.path.join(curr_dir, "data" ,"dataset")) with open(os.path.join(curr_dir, "data", "index.txt")) as index_file: for i, line in enumerate(index_file.readlines()): if i%10000==0: print("resizing image no: ",i) lines = line.split(" ") image_name = lines[0]+".png" dst_image_name = os.path.join(curr_dir, "data" ,"dataset", image_name) if os.path.exists(dst_image_name): train_files.append(line) continue if not os.path.exists(os.path.dirname(dst_image_name)): os.mkdir(os.path.dirname(dst_image_name)) src_image_name = os.path.join(curr_dir,"data",image_name) try: image = readImgFile(src_image_name) image = img2bwinv(image) image = dropZeroEdges(image) except: print(dst_image_name,"error") continue resized_image = resize(image,image_height) save(resized_image,dst_image_name) train_files.append(line) def neural_networks(): # 输入:训练的数量,一张图片的宽度,一张图片的高度 [-1,-1,16] inputs = tf.placeholder(tf.float32, [None, None, image_height], name="inputs") # 定义 ctc_loss 是稀疏矩阵 labels = tf.sparse_placeholder(tf.int32, name="labels") # 1维向量 size [batch_size] 等于 np.ones(batch_size)* image_width