def get_next_batch(batch_size=128): images = [] to_images = [] max_width_image = 0 font_min_length = random.randint(10, 20) for i in range(batch_size): font_name = random.choice(AllFontNames) font_length = random.randint(font_min_length-5, font_min_length+5) font_size = random.randint(9, 64) font_mode = random.choice([0,1,2,4]) text = getRedomText(CHARS, eng_world_list, font_length) image= getImage(text, font_name, font_length, font_size, noise = True, fontmode = font_mode) image=utils.resize(image, height=image_height) images.append(image) to_image=getImage(text, font_name, font_length, image_height, noise = False, fontmode = font_mode, fonthint = 0) to_image=utils.resize(to_image, height=image_height) to_images.append(to_image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] if to_image.shape[1] > max_width_image: max_width_image = to_image.shape[1] max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(images)): image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False) inputs[i,:] = np.transpose(image_vec) labels = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(to_images)): image_vec = utils.img2vec(to_images[i], height=image_height, width=max_width_image, flatten=False) labels[i,:] = np.transpose(image_vec) return inputs, labels
def get_next_batch(batch_size=128): images = [] to_images = [] max_width_image = 0 font_min_length = random.randint(10, 20) for i in range(batch_size): font_name = random.choice(AllFontNames) # font_length = random.randint(font_min_length-5, font_min_length+5) font_length = random.randint(3, 5) font_size = random.randint(image_height, 64) font_mode = random.choice([0, 1, 2, 4]) font_hint = random.choice([0, 1, 2, 3, 4, 5]) text = utils_font.get_random_text(CHARS, eng_world_list, font_length) image = utils_font.get_font_image_from_url(text, font_name, font_size, fontmode=font_mode, fonthint=font_hint) to_image = image.copy() image = utils_font.add_noise(image) image = utils_pil.convert_to_gray(image) rate = random.randint(8, 17) / font_size image = utils_pil.resize(image, rate) image = np.asarray(image) image = utils.resize(image, height=image_height) image = (255. - image) / 255. images.append(image) # to_image = utils_font.get_font_image_from_url(text, font_name ,image_height, fontmode = font_mode, fonthint = font_hint) to_image = utils_pil.convert_to_gray(to_image) to_image = np.asarray(to_image) to_image = utils.resize(to_image, height=image_height) to_image = utils.img2bwinv(to_image) to_image = to_image / 255. to_images.append(to_image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] if to_image.shape[1] > max_width_image: max_width_image = to_image.shape[1] max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(images)): image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False) inputs[i, :] = np.transpose(image_vec) labels = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(to_images)): image_vec = utils.img2vec(to_images[i], height=image_height, width=max_width_image, flatten=False) labels[i, :] = np.transpose(image_vec) return inputs, labels
def get_next_batch_for_srgan(batch_size=128): inputs_images = [] targets_images = [] max_width_image = 0 for i in range(batch_size): font_name = random.choice(AllFontNames) font_length = random.randint(4, 5) font_size = 36 #random.randint(image_height, 64) font_mode = random.choice([0,1,2,4]) font_hint = random.choice([0,1,2,3,4,5]) #删除了2 text = utils_font.get_random_text(CHARS, eng_world_list, font_length) image = utils_font.get_font_image_from_url(text, font_name, font_size, font_mode, font_hint) image = utils_pil.resize_by_height(image, image_height) image = utils_pil.convert_to_gray(image) targets_image = image.copy() _h = random.randint(9, image_height // random.choice([1,1.5,2,2.5])) image = utils_pil.resize_by_height(image, _h) image = utils_pil.resize_by_height(image, image_height, random.random()>0.5) targets_image = np.asarray(targets_image) # targets_image = utils.resize(targets_image, height=image_height) # targets_image = utils_pil.convert_to_bw(targets_image) targets_images.append((255. - targets_image) / 255.) image = utils_font.add_noise(image) image = np.asarray(image) # image = utils.resize(image, height=image_height) image = image * random.uniform(0.3, 1) if random.random()>0.5: image = (255. - image) / 255. else: image = image / 255. inputs_images.append(image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] if targets_image.shape[1] > max_width_image: max_width_image = targets_image.shape[1] # max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(batch_size): image_vec = utils.img2vec(inputs_images[i], height=image_height, width=max_width_image, flatten=False) inputs[i,:] = np.transpose(image_vec) targets = np.zeros([batch_size, max_width_image, image_height]) for i in range(batch_size): image_vec = utils.img2vec(targets_images[i], height=image_height, width=max_width_image, flatten=False) targets[i,:] = np.transpose(image_vec) return inputs, targets
def scan(file): img = Image.open(file.stream) image = np.array(img) image = utils.img2gray(image) image = utils.clearImgGray(image) utils.save(image, os.path.join(curr_dir,"test","src.png")) split_images = utils.splitImg(image) ocr_texts = [] for i, split_image in enumerate(split_images): # image = utils.img2bwinv(split_image) image = utils.clearImgGray(split_image) # image = utils.clearBackgroundColor(image, 255) image = 255. - image image = utils.dropZeroEdges(image) image = utils.resize(image, ocr.image_height) utils.save(image,os.path.join(curr_dir,"test","%s.png"%i)) image = image / 255. maxImageWidth = image.shape[1] maxImageWidth = maxImageWidth + (ocr.POOL_SIZE - maxImageWidth % ocr.POOL_SIZE) image_vec = utils.img2vec(image,ocr.image_height,maxImageWidth) ocr_inputs = np.zeros([1, maxImageWidth, ocr.image_height]) ocr_inputs[0,:] = np.transpose(image_vec.reshape((ocr.image_height,maxImageWidth))) ocr_seq_len = np.ones(ocr_inputs.shape[0]) * (maxImageWidth * ocr.image_height) // (ocr.POOL_SIZE * ocr.POOL_SIZE) feed = {inputs: ocr_inputs, seq_len: ocr_seq_len, input_keep_prob: 1.0} start = time.time() decoded_list = session.run(decoded[0], feed) seconds = round(time.time() - start,2) print("filished ocr %s , paid %s seconds" % (i,seconds)) detected_list = ocr.decode_sparse_tensor(decoded_list) for detect_number in detected_list: ocr_texts.append(ocr.list_to_chars(detect_number)) return ocr_texts
def crack(file): img = Image.open(file.stream) image = np.array(img) image = img2vec(img2gray(image)) y_ = sess.run([prediction], feed_dict={x: [image]}) result = "".join([str(s) for s in y_[0][0]]) return result
def scan(file): img_array = np.asarray(bytearray(file.stream.read()), dtype=np.uint8) image = cv2.imdecode(img_array,0) split_images = utils.splitImg(image) ocr_texts = [] for i, split_image in enumerate(split_images): image =utils.img2bwinv(split_image) image = utils.dropZeroEdges(image) image = utils.resize(image, ocr.image_height) utils.save(image,os.path.join(curr_dir,"test","%s.png"%i)) maxImageWidth = image.shape[1]+5 image_vec = utils.img2vec(image,ocr.image_height,maxImageWidth) ocr_inputs = np.zeros([1, maxImageWidth, ocr.image_height]) ocr_inputs[0,:] = np.transpose(image_vec.reshape((ocr.image_height,maxImageWidth))) ocr_seq_len = np.ones(ocr_inputs.shape[0]) * maxImageWidth feed = {inputs: ocr_inputs, seq_len: ocr_seq_len, input_keep_prob: 1.0} start = time.time() decoded_list = session.run(decoded[0], feed) seconds = round(time.time() - start,2) print("filished ocr %s , paid %s seconds" % (i,seconds)) detected_list = ocr.decode_sparse_tensor(decoded_list) for detect_number in detected_list: ocr_texts.append(ocr.list_to_chars(detect_number)) return ocr_texts
def get_next_batch(batch_size=128): inputs = np.zeros([batch_size, image_size[1] * image_size[0]]) labels = np.zeros([batch_size, label_size], dtype=int) batch = random.sample(train_files, batch_size) for i, line in enumerate(batch): lines = line.split(" ") imageFileName = lines[0] + ".png" text = line[line.index(' '):].strip() # 在宋体9号字体下,O和0完全一致,因此全部按0处理 # text = text.replace('O','0') # 文本需要补齐空格 text = text + "".join([' ' for x in range(label_size - len(text))]) if imageFileName in images: imgvec = images[imageFileName] else: # 输出图片为反色黑白 image = readImgFile(os.path.join(curr_dir, DATA_DIR, imageFileName)) image = img2bwinv(image) image = dropZeroEdges(image) image = resize(image, image_size[0]) imgvec = img2vec(image, image_size[0], image_size[1]) # images[imageFileName] = imgvec inputs[i, :] = imgvec label_list = [] for c in text: if c in CHARS: label_list.append(CHARS.index(c)) else: label_list.append(CHARS.index(UNKOWN_CHAR)) labels[i, :] = label_list return inputs, labels
def get_next_batch(batch_size=128): batch = random.sample(train_files, batch_size) codes = [] images = [] max_width_image = 0 for line in batch: lines = line.split(" ") imageFileName = lines[0]+".png" text = line[line.index(' '):].strip() # 输入的图片为反色黑白 image = readImgFile(os.path.join(curr_dir, "data" ,"dataset", imageFileName)) images.append(image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] text_list = [CHARS.index(char) for char in text] codes.append(text_list) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(images)): image_vec = img2vec(images[i], height=image_height, width=max_width_image, flatten=False) inputs[i,:] = np.transpose(image_vec) labels = [np.asarray(i) for i in codes] #labels转成稀疏矩阵 sparse_labels = sparse_tuple_from(labels) seq_len = np.ones(batch_size) * max_width_image return inputs, sparse_labels, seq_len
def get_next_batch(batch_size=128): codes = [] images = [] max_width_image = 0 font_min_length = random.randint(10, 80) for i in range(batch_size): font_name = random.choice(FontNames) font_length = random.randint(font_min_length-5, font_min_length+5) font_size = random.randint(14, 64) text, image= getImage(CHARS, font_name, image_height, font_length, font_size, eng_world_list) images.append(image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] text_list = [CHARS.index(char) for char in text] codes.append(text_list) # 凑成4的整数倍 max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(images)): image_vec = img2vec(images[i], height=image_height, width=max_width_image, flatten=False) inputs[i,:] = np.transpose(image_vec) labels = [np.asarray(i) for i in codes] #labels转成稀疏矩阵 sparse_labels = sparse_tuple_from(labels) #因为模型做了2次pool,所以 seq_len 也需要除以4 seq_len = np.ones(batch_size) * (max_width_image * image_height) // (POOL_SIZE * POOL_SIZE) return inputs, sparse_labels, seq_len
def get_next_batch(batch_size=128): codes = [] images = [] max_width_image = 0 for i in range(batch_size): font_name = random.choice(FontNames) font_length = random.randint(50, 100) font_size = random.randint(9, 20) text, image= getImage(CHARS, font_name, image_height, font_length, font_size) images.append(image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] text_list = [CHARS.index(char) for char in text] codes.append(text_list) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(images)): image_vec = img2vec(images[i], height=image_height, width=max_width_image, flatten=False) inputs[i,:] = np.transpose(image_vec) labels = [np.asarray(i) for i in codes] #labels转成稀疏矩阵 sparse_labels = sparse_tuple_from(labels) seq_len = np.ones(batch_size) * max_width_image return inputs, sparse_labels, seq_len
def get_next_batch(batch_size=128): images = [] to_images = [] max_width_image = 0 font_min_length = random.randint(10, 20) for i in range(batch_size): font_name = random.choice(FontNames) font_length = random.randint(font_min_length - 5, font_min_length + 5) font_size = random.randint(9, 64) text, image = utils.getImage(CHARS, font_name, image_height, font_length, font_size, eng_world_list) images.append(image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] to_image = utils.renderNormalFontByPIL(ConsolasFont, 64, text) to_image = utils.trim(to_image) w, h = to_image.size _w = round(w * image_height / h) _h = image_height if _w > max_width_image: _w = max_width_image _h = round(h * max_width_image / w) to_image = to_image.resize((_w, _h), Image.ANTIALIAS) to_image = np.asarray(to_image) #to_image=utils.resize(to_image, height=image_height) to_image = utils.img2gray(to_image) to_image = to_image / 255 to_images.append(to_image) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(images)): image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False) inputs[i, :] = np.transpose(image_vec) labels = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(to_images)): image_vec = utils.img2vec(to_images[i], height=image_height, width=max_width_image, flatten=False) labels[i, :] = np.transpose(image_vec) return inputs, labels
def get_batch(batch_size=128): batch_x = np.zeros([batch_size, image_size]) batch_y = np.zeros([batch_size, captcha_size]) for i in range(batch_size): text, image = captcha( char_set=char_set, captcha_size=captcha_size, width=image_w, height=image_h) batch_x[i, :] = img2vec(img2gray(image)) batch_y[i, :] = list(text) # 注意 这里的 lable 不能 one hot return batch_x, batch_y
def get_batch(batch_size=128): batch_x = np.zeros([batch_size, image_size]) batch_y = np.zeros([batch_size, char_size]) for i in range(batch_size): text, image = captcha(char_set=char_set, captcha_size=1, width=image_w, height=image_h) batch_x[i, :] = img2vec(img2gray(image)) batch_y[i, :] = text2vec(char_set, text) return batch_x, batch_y
def get_batch(batch_size=128): batch_files = random.sample(files, batch_size) batch_x = np.zeros([batch_size, image_size]) batch_y = np.zeros([batch_size, captcha_size * char_size]) for i in range(batch_size): image = loadimg(os.path.join(fonts_dir, batch_files[i])) text = batch_files[i][:4] batch_x[i, :] = img2vec(img2gray(image)) batch_y[i, :] = text2vec(char_set, text) return batch_x, batch_y
def get_next_batch_for_res(batch_size=128): images = [] codes = [] max_width_image = 0 info = "" for i in range(batch_size): font_name = random.choice(AllFontNames) font_length = random.randint(25, 30) if random.random()>0.5: font_size = random.randint(8, 49) else: font_size = random.randint(8, 15) font_mode = random.choice([0,1,2,4]) font_hint = random.choice([0,1,2,3,4,5]) #删除了2 text = random.sample(CHARS, 12) text = text+text+[" "," "] random.shuffle(text) text = "".join(text).strip() codes.append([CHARS.index(char) for char in text]) image = utils_font.get_font_image_from_url(text, font_name, font_size, font_mode, font_hint ) image = utils_pil.resize_by_height(image, image_height, random.random()>0.5) image = utils_font.add_noise(image) image = utils_pil.convert_to_gray(image) image = np.asarray(image) image = utils.resize(image, height=image_height) if random.random()>0.5: image = (255. - image) / 255. else: image = image / 255. images.append(image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] info = info+"%s\n\r" % utils_font.get_font_url(text, font_name, font_size, font_mode, font_hint) max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(images)): image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False) inputs[i,:] = np.transpose(image_vec) labels = [np.asarray(i) for i in codes] sparse_labels = utils.sparse_tuple_from(labels) seq_len = np.ones(batch_size) * (max_width_image * image_height ) // (POOL_SIZE * POOL_SIZE) return inputs, sparse_labels, seq_len, info
def scan(): session, inputs, seq_len, input_keep_prob, decoded, log_prob = init() need_ocr_images = utils.loadImage(os.path.join(curr_dir, 'test', '1.jpg'), 0) ocr_text_groups = [] for idx, images_group in enumerate(need_ocr_images): # if idx != 1: continue ocr_texts = [] # 取最大宽度做为本组的统一输入长度 widths = [image.shape[1] for image in images_group] maxImageWidth = max(widths) + 5 ocr_inputs = np.zeros( [len(images_group), maxImageWidth, ocr.image_height]) for i, image in enumerate(images_group): # image = utils.dropZeroEdges(image) # utils.show(utils.dropZeroEdges(image)) utils.save(image, os.path.join(curr_dir, "test", "%s-%s.png" % (idx, i))) image_vec = utils.img2vec(image, ocr.image_height, maxImageWidth) ocr_inputs[i, :] = np.transpose( image_vec.reshape((ocr.image_height, maxImageWidth))) # utils.show(image) # return ocr_seq_len = np.ones(ocr_inputs.shape[0]) * maxImageWidth feed = {inputs: ocr_inputs, seq_len: ocr_seq_len, input_keep_prob: 1.0} print("starting ocr inputs %s ..." % idx) start = time.time() decoded_list = session.run(decoded[0], feed) seconds = round(time.time() - start, 2) print("filished ocr inputs %s, paid %s seconds" % (idx, seconds)) detected_list = ocr.decode_sparse_tensor(decoded_list) for detect_number in detected_list: ocr_texts.append(ocr.list_to_chars(detect_number)) ocr_text_groups.append(ocr_texts) # break return ocr_text_groups
def get_next_batch_for_res(batch_size=128, has_sparse=True, has_onehot=True, \ max_width=4096, height=32, need_pad_width_to_max_width=False): inputs_images = [] codes = [] # 当前这一批图片中的最大宽度 max_width_image = 0 info = [] seq_len = np.ones(batch_size) for i in range(batch_size): serialized_example = next(dataset, None) if serialized_example == None: raise Exception("has finished train one data file, stop") dataset_example.ParseFromString(serialized_example) font_name = str( dataset_example.features.feature['font_name'].bytes_list.value[0], encoding="utf-8") font_size = dataset_example.features.feature[ 'font_size'].int64_list.value[0] font_mode = dataset_example.features.feature[ 'font_mode'].int64_list.value[0] font_hint = dataset_example.features.feature[ 'font_hint'].int64_list.value[0] text = str( dataset_example.features.feature['label'].bytes_list.value[0], encoding="utf-8") size = dataset_example.features.feature['size'].int64_list.value image = dataset_example.features.feature['image'].bytes_list.value[0] image = utils_pil.frombytes(tuple(size), image) # 图旋转灰度 image = utils_pil.convert_to_gray(image) w, h = size if h > height: image = utils_pil.resize_by_height(image, height) # 随机移动图片位置 image = utils_pil.resize_by_height(image, height - random.randint(1, 5)) image, _ = utils_pil.random_space2(image, image, height) # 增加噪点 image = utils_font.add_noise(image) # 转为 opencv 格式 image = np.asarray(image) # 默认按高度缩放,如果宽度超过了最大宽度,就按宽度缩放 image = utils.resize(image, height, max_width) # 随机反色并归一化 if random.random() > 0.5: image = image / 255. else: image = (255. - image) / 255. # 记下当前的最大图片宽度 if max_width_image < image.shape[1]: max_width_image = image.shape[1] inputs_images.append(image) codes.append([CHARS.index(char) for char in text]) info.append([ font_name, str(font_size), str(font_mode), str(font_hint), str(len(text)) ]) # 凑成4的整数倍 if max_width_image % 4 > 0: max_width_image = max_width_image + 4 - max_width_image % 4 # 如果图片超过最大宽度,懒得去缩放,直接报异常 if max_width_image > max_width: raise Exception("img width must %s <= %s " % (max_width_image, max_width)) if need_pad_width_to_max_width: max_width_image = max_width inputs = np.zeros([batch_size, image_height, max_width_image, 1]) for i in range(batch_size): image_vec = utils.img2vec(inputs_images[i], height=image_height, width=max_width_image, flatten=False) inputs[i, :] = np.reshape(image_vec, (image_height, max_width_image, 1)) labels = [np.asarray(i) for i in codes] sparse_labels = None onehot_labels = None if has_sparse: sparse_labels = utils.sparse_tuple_from(labels) sparse_labels = np.array(sparse_labels) if has_onehot: onehot_labels = [] for label in labels: label_one_hot = np.eye(CLASSES_NUMBER)[label] onehot_labels.append(label_one_hot) onehot_labels = np.array(onehot_labels) return inputs, np.array(labels), sparse_labels, onehot_labels, info
def get_next_batch(batch_size=128): images = [] to_images = [] codes = [] max_width_image = 0 for i in range(batch_size): font_name = random.choice(AllFontNames) font_length = random.randint(25, 30) font_size = 36 #random.randint(image_height, 64) font_mode = random.choice([0, 1, 2, 4]) font_hint = random.choice([0, 1, 2, 3, 4, 5]) text = utils_font.get_random_text(CHARS, eng_world_list, font_length) # text = random.sample(CHARS, 12) # text = text+text # random.shuffle(text) # text = "".join(text).strip() codes.append([CHARS.index(char) for char in text]) image = utils_font.get_font_image_from_url(text, font_name, font_size, fontmode=font_mode, fonthint=font_hint) image = utils_pil.resize_by_height(image, image_height) to_image = image.copy() image = utils_font.add_noise(image) image = utils_pil.convert_to_gray(image) _h = random.randint(9, image_height // random.choice([1, 1.5, 2, 2.5])) image = utils_pil.resize_by_height(image, _h, random.random() > 0.5) image = utils_pil.resize_by_height(image, image_height, random.random() > 0.5) image = np.asarray(image) image = utils.resize(image, height=image_height) image = (255. - image) / 255. images.append(image) # to_image = utils_font.get_font_image_from_url(text, font_name ,image_height, fontmode = font_mode, fonthint = font_hint) to_image = utils_pil.convert_to_gray(to_image) to_image = np.asarray(to_image) to_image = utils.resize(to_image, height=image_height) to_image = utils.img2bwinv(to_image) to_image = to_image / 255. to_images.append(to_image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] if to_image.shape[1] > max_width_image: max_width_image = to_image.shape[1] max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(images)): image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False) inputs[i, :] = np.transpose(image_vec) targets = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(to_images)): image_vec = utils.img2vec(to_images[i], height=image_height, width=max_width_image, flatten=False) targets[i, :] = np.transpose(image_vec) labels = [np.asarray(i) for i in codes] sparse_labels = utils.sparse_tuple_from(labels) seq_len = np.ones(batch_size) * (max_width_image * image_height) // (POOL_SIZE * POOL_SIZE) return inputs, targets, sparse_labels, seq_len
def get_next_batch_for_res(batch_size=128): inputs_images = [] codes = [] max_width_image = 0 info = [] seq_len = np.ones(batch_size) for i in range(batch_size): serialized_example = next(dataset, None) if serialized_example==None: raise Exception("has finished train one data file, stop") dataset_example.ParseFromString(serialized_example) font_name = str(dataset_example.features.feature['font_name'].bytes_list.value[0], encoding="utf-8") font_size = dataset_example.features.feature['font_size'].int64_list.value[0] font_mode = dataset_example.features.feature['font_mode'].int64_list.value[0] font_hint = dataset_example.features.feature['font_mode'].int64_list.value[0] text = str(dataset_example.features.feature['label'].bytes_list.value[0], encoding="utf-8") size = dataset_example.features.feature['size'].int64_list.value image = dataset_example.features.feature['image'].bytes_list.value[0] image = utils_pil.frombytes(tuple(size), image) image = utils_pil.convert_to_gray(image) w, h = size if h > image_height: image = utils_pil.resize_by_height(image, image_height) image = utils_pil.resize_by_height(image, image_height-random.randint(1,5)) image, _ = utils_pil.random_space2(image, image, image_height) image = utils_font.add_noise(image) image = np.asarray(image) image = utils.resize(image, image_height, MAX_IMAGE_WIDTH) if random.random()>0.5: image = image / 255. else: image = (255. - image) / 255. if max_width_image < image.shape[1]: max_width_image = image.shape[1] inputs_images.append(image) codes.append([CHARS.index(char) for char in text]) info.append([font_name, str(font_size), str(font_mode), str(font_hint), str(len(text))]) seq_len[i]=len(text)+1 # 凑成4的整数倍 # if max_width_image % 4 > 0: # max_width_image = max_width_image + 4 - max_width_image % 4 # 如果图片超过最大宽度 if max_width_image < MAX_IMAGE_WIDTH: max_width_image = MAX_IMAGE_WIDTH # raise Exception("img width must %s <= %s " % (max_width_image, MAX_IMAGE_WIDTH)) inputs = np.zeros([batch_size, image_height, max_width_image, 1]) for i in range(batch_size): image_vec = utils.img2vec(inputs_images[i], height=image_height, width=max_width_image, flatten=False) inputs[i,:] = np.reshape(image_vec,(image_height, max_width_image, 1)) # print(inputs.shape, len(codes)) labels = [np.asarray(i) for i in codes] sparse_labels = utils.sparse_tuple_from(labels) # max_width_image = math.ceil((max_width_image-3+1.)/2.) # max_width_image = math.ceil((max_width_image-3+1.)/1.) # max_width_image = math.ceil((max_width_image-3+1.)/2.) # max_width_image = math.ceil((max_width_image-3+1.)/1.) # max_width_image = math.ceil((max_width_image-3+1.)/2.) seq_len = np.ones(batch_size) * SEQ_LENGTH # print(inputs.shape, seq_len.shape, [len(l) for l in labels]) return inputs, sparse_labels, seq_len, info
checkpoint_prefix = os.path.join(log_dir, "model.ckpt") # 找到最新的运算模型文件 metaFile= sorted( [ (x, os.path.getctime(os.path.join(log_dir,x))) for x in os.listdir(log_dir) if x.endswith('.meta') ], key=lambda i: i[1])[-1][0] sess = tf.Session() saver = tf.train.import_meta_graph(os.path.join(log_dir,metaFile)) ckpt = tf.train.get_checkpoint_state(log_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: print("error: can't load checkpoint data") exit() text, image = captcha(char_set="0123456789", captcha_size=4, width=200, height=80) image = img2vec(img2gray(image)) x = tf.get_default_graph().get_tensor_by_name('x:0') prediction = tf.get_default_graph().get_tensor_by_name('prediction:0') y_ = sess.run([prediction], feed_dict={x: [image]}) result = "".join([str(x) for x in y_[0][0]]) print("input: %s , get: %s"%(text,result))