def infer(img_path, mode='infer'): # imgList = load_img_path('/home/yang/Downloads/FILE/ml/imgs/image_contest_level_1_validate/') imgList = helper.load_img_path(img_path) print(imgList[:5]) model = cnn_lstm_otc_ocr.LSTMOCR(mode) model.build_graph() total_steps = len(imgList) / FLAGS.batch_size os.environ["CUDA_VISIBLE_DEVICES"] = '2' config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if ckpt: saver.restore(sess, ckpt) print('restore from ckpt{}'.format(ckpt)) else: print('cannot restore') decoded_expression = [] for curr_step in range(int(total_steps)): imgs_input = [] seq_len_input = [] for img in imgList[curr_step * FLAGS.batch_size:(curr_step + 1) * FLAGS.batch_size]: im = cv2.imread(img, cv2.IMREAD_COLOR).astype( np.float32) / 255. im = np.reshape(im, [ FLAGS.image_height, FLAGS.image_width, FLAGS.image_channel ]) def get_input_lens(seqs): length = np.array([FLAGS.max_stepsize for _ in seqs], dtype=np.int64) return seqs, length inp, seq_len = get_input_lens(np.array([im])) imgs_input.append(im) seq_len_input.append(seq_len) imgs_input = np.asarray(imgs_input) seq_len_input = np.asarray(seq_len_input) seq_len_input = np.reshape(seq_len_input, [-1]) feed = {model.inputs: imgs_input} dense_decoded_code = sess.run(model.dense_decoded, feed) for item in dense_decoded_code: expression = '' for i in item: if i == -1: expression += '' else: expression += utils.decode_maps[i] decoded_expression.append(expression) with open('./result.txt', 'a') as f: for code in decoded_expression: print(code) f.write(code + '\n')
def infer(img_path, mode='infer'): # imgList = load_img_path('/home/yang/Downloads/FILE/ml/imgs/image_contest_level_1_validate/') imgList = helper.load_img_path(img_path) # actual = [] # for name in imgList: # # code = name.split('/')[-1].split('_')[1].split('.')[0] # code = '-'.join(name.split('/')[-1].split('-')[:-1]) # actual.append(code) # actual = np.asarray(actual) # MAX = 120 # imgList = imgList[:MAX] print(imgList[:5]) with open('./actual.txt', 'w') as f: for name in imgList: code = name.split('/')[-1].split('_')[1].split('.')[0] # code = '-'.join(name.split('/')[-1].split('-')[:-1]) f.write(code + '\n') # exit(1) # im = cv2.imread(imgList[0], cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255. # cv2.imshow('image',im) model = cnn_lstm_otc_ocr.LSTMOCR(mode) model.build_graph() total_steps = len(imgList) // FLAGS.batch_size config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if ckpt: saver.restore(sess, ckpt) print('restore from ckpt{}'.format(ckpt)) else: print('cannot restore') decoded_expression = [] for curr_step in range(total_steps): imgs_input = [] seq_len_input = [] for img in imgList[curr_step * FLAGS.batch_size:(curr_step + 1) * FLAGS.batch_size]: # im = cv2.imread(img, 0).astype(np.float32) / 255. im = cv2.imread(img, cv2.IMREAD_GRAYSCALE).astype( np.float32) / 255. im = cv2.resize(im, (FLAGS.image_width, FLAGS.image_height)) im = np.reshape(im, [ FLAGS.image_height, FLAGS.image_width, FLAGS.image_channel ]) def get_input_lens(seqs): length = np.array([FLAGS.max_stepsize for _ in seqs], dtype=np.int64) return seqs, length inp, seq_len = get_input_lens(np.array([im])) imgs_input.append(im) seq_len_input.append(seq_len) imgs_input = np.asarray(imgs_input) seq_len_input = np.asarray(seq_len_input) seq_len_input = np.reshape(seq_len_input, [-1]) feed = {model.inputs: imgs_input} dense_decoded_code = sess.run(model.dense_decoded, feed) for item in dense_decoded_code: expression = '' for i in item: if i == -1: expression += '' else: expression += utils.decode_maps[i] decoded_expression.append(expression) with open('./result.txt', 'w') as f: for code in decoded_expression: f.write(code + '\n')
def infer(img_path, mode='infer'): imgList = helper.load_img_path(img_path) print(imgList[:5]) model = orcmodel.LSTMOCR(mode) # 生成模型类 model.build_graph() # 建立模型 total_steps = math.ceil(len(imgList) / FLAGS.batch_size) #获得最小批总批次 config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) # 初始化 saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) # saver 获取变量 ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if ckpt: saver.restore(sess, ckpt) # 恢复变量 print('restore from ckpt{}'.format(ckpt)) else: print('cannot restore') decoded_expression = [] true_expression = [] for curr_step in range(total_steps): imgs_input = [] seq_len_input = [] for img in imgList[curr_step * FLAGS.batch_size:(curr_step + 1) * FLAGS.batch_size]: # 准备预测数据 true_expression.append( img.split('/')[-1].split('_')[-1].split('.')[0]) im = np.array(Image.open(img).convert("L")).astype( np.float32) / 255. im = np.reshape(im, [ FLAGS.image_height, FLAGS.image_width, FLAGS.image_channel ]) def get_input_lens(seqs): length = np.array([FLAGS.max_stepsize for _ in seqs], dtype=np.int64) return seqs, length inp, seq_len = get_input_lens(np.array([im])) imgs_input.append(im) seq_len_input.append(seq_len) imgs_input = np.asarray(imgs_input) seq_len_input = np.asarray(seq_len_input) seq_len_input = np.reshape(seq_len_input, [-1]) feed = {model.inputs: imgs_input, model.seq_len: seq_len_input} # 运行预测,获得解码结果 dense_decoded = sess.run(model.dense_decoded, feed) # 解析对应的验证码 for item in dense_decoded: expression = '' for i in item: if i == -1: expression += '' else: expression += utils.decode_maps[i] decoded_expression.append(expression) # 记录预测结果 with open('./result.txt', 'w') as f: f.write("%-6s %-6s %-6s" % ("pred", "true", "is_right") + '\n') for code, true_code in zip(decoded_expression, true_expression): f.write("%-6s %-6s %-d" % (code, true_code, code == true_code) + '\n')
def infer(img_path, mode='infer'): # imgList = load_img_path('/home/yang/Downloads/FILE/ml/imgs/image_contest_level_1_validate/') imgList = helper.load_img_path(img_path) print(imgList[:5]) model = cnn_lstm_otc_ocr.LSTMOCR(mode) model.build_graph() total_steps = len(imgList) / FLAGS.batch_size config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if ckpt: saver.restore(sess, ckpt) print('restore from ckpt{}'.format(ckpt)) else: print('cannot restore') decoded_expression = [] for curr_step in range(total_steps): imgs_input = [] seq_len_input = [] for img in imgList[curr_step * FLAGS.batch_size: (curr_step + 1) * FLAGS.batch_size]: im = cv2.imread(img, 0).astype(np.float32) / 255. im = np.reshape(im, [FLAGS.image_height, FLAGS.image_width, FLAGS.image_channel]) def get_input_lens(seqs): length = np.array([FLAGS.max_stepsize for _ in seqs], dtype=np.int64) return seqs, length inp, seq_len = get_input_lens(np.array([im])) imgs_input.append(im) seq_len_input.append(seq_len) imgs_input = np.asarray(imgs_input) seq_len_input = np.asarray(seq_len_input) seq_len_input = np.reshape(seq_len_input, [-1]) feed = {model.inputs: imgs_input} dense_decoded_code = sess.run(model.dense_decoded, feed) for item in dense_decoded_code: expression = '' for i in item: if i == -1: expression += '' else: expression += utils.decode_maps[i] decoded_expression.append(expression) with open('./result.txt', 'a') as f: for code in decoded_expression: f.write(code + '\n')
import cv2 import numpy as np import tensorflow as tf import cnn_lstm_otc_ocr import utils import helper os.environ["CUDA_VISIBLE_DEVICES"] = '2' config = tf.ConfigProto() config.gpu_options.allow_growth = True FLAGS = utils.FLAGS imgs_input = [] # 图片所在路径 imgList = helper.load_img_path('./infer/') FLAGS.batch_size = len(imgList) print(imgList[:5]) for img in imgList: im = cv2.imread(img, cv2.IMREAD_COLOR).astype(np.float32) / 255. im = np.reshape( im, [FLAGS.image_height, FLAGS.image_width, FLAGS.image_channel]) imgs_input.append(im) imgs_input = np.asarray(imgs_input) model = cnn_lstm_otc_ocr.LSTMOCR('infer') model.build_graph() # 定义节点 logit = model.get_logist()
def infer(img_path, mode='infer'): # imgList = load_img_path('/home/yang/Downloads/FILE/ml/imgs/image_contest_level_1_validate/') imgList = helper.load_img_path(img_path) actual = [] # for name in imgList: # # code = name.split('/')[-1].split('_')[1].split('.')[0] # code = '-'.join(name.split('/')[-1].split('-')[:-1]) # actual.append(code) # actual = np.asarray(actual) # MAX = 120 # imgList = imgList[:MAX] print(imgList[:5]) with open('./actual.txt', 'w') as f: for name in imgList: code = name.split('/')[-1].split('-')[:-1] # code = name.split('/')[-1].split('_')[-1].split('.')[0] ## convert year field from 2019 -> 19 # code = code.split('-') # code[2] = code[2][2:] code = '-'.join(code) actual.append(code) f.write(code + '\n') actual = np.asarray(actual) # exit(1) # im = cv2.imread(imgList[0], cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255. # cv2.imshow('image',im) model = cnn_lstm_otc_ocr.LSTMOCR(mode) model.build_graph() total_steps = len(imgList) // FLAGS.batch_size config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) # print(ckpt) if ckpt: saver.restore(sess, ckpt) print('restore from ckpt{}'.format(ckpt)) else: print('cannot restore') decoded_expression = [] for curr_step in range(total_steps): imgs_input = [] seq_len_input = [] for img in imgList[curr_step * FLAGS.batch_size: (curr_step + 1) * FLAGS.batch_size]: # im = cv2.imread(img, 0).astype(np.float32) / 255. im = cv2.imread(img, cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255. im = cv2.resize(im, (FLAGS.image_width, FLAGS.image_height)) # im = im[10:45,8:160] # im = cv2.resize(im, (FLAGS.image_width, FLAGS.image_height)) im = np.reshape(im, [FLAGS.image_height, FLAGS.image_width, FLAGS.image_channel]) # cv2.imshow('image',im) # cv2.waitKey(0) def get_input_lens(seqs): length = np.array([FLAGS.max_stepsize for _ in seqs], dtype=np.int64) return seqs, length inp, seq_len = get_input_lens(np.array([im])) imgs_input.append(im) seq_len_input.append(seq_len) imgs_input = np.asarray(imgs_input) seq_len_input = np.asarray(seq_len_input) seq_len_input = np.reshape(seq_len_input, [-1]) feed = {model.inputs: imgs_input} dense_decoded_code = sess.run(model.dense_decoded, feed) for item in dense_decoded_code: expression = '' for i in item: if i == -1: expression += '' else: expression += utils.decode_maps[i] decoded_expression.append(expression) # visualize the layers # conv_out = sess.run(model.conv_out,feed) # img_name = imgList[curr_step].split('/')[-1].split('.')[0] # # layer0 = conv_out[0] # for i in range(len(conv_out)): # layer = conv_out[i] # print(layer.shape) # plotNNFilter(layer) # plt.show() # plt.savefig("./imgs/filters/conv-{}_{}".format(i+1,img_name)) # print(decoded_expression) # layer0 = model.conv_out[0] # print(layer0.shape) # print(layer0) # print(type(layer0.eval())) # plotNNFilter(layer0) ## visualize the layers # test image # SIZE = 167,55 # imageToUse = imgList[0] # im = cv2.imread(imageToUse, cv2.IMREAD_GRAYSCALE).astype(np.float32) / 255. # im = cv2.resize(im, SIZE) # im = im[8:48,5:155] # im = cv2.resize(im, SIZE) # im = np.reshape(im, [SIZE[1],SIZE[0],1]) # cv2.imshow('image',im) # cv2.waitKey(0) # op = sess.graph.get_operations() # for i in op: # print(i.name) # exit(1) # print layers # plt.imshow(np.reshape(imageToUse,[28,28]), interpolation="nearest", cmap="gray") with open('./result.txt', 'w') as f: for code in decoded_expression: f.write(code + '\n') # print(code) # exit() decoded_expression = np.asarray(decoded_expression) imgList = np.asarray(imgList) # print 6 corect and 6 incorrect predictions c = decoded_expression == actual w = decoded_expression != actual correct = imgList[c] wrong = imgList[w] print("correct predictions:") print(correct[:6]) print("********") print("wrong predictions:") print(wrong[:6]) print("********") for i in range(6): print("prediction = {}".format(decoded_expression[w][i])) print("actual = {}".format(actual[w][i])) print("********") acc = float(c.sum()) / (c.sum()+w.sum()) print("accuracy = {}".format(acc))
def infer(img_path, mode='infer'): # imgList = load_img_path('/home/yang/Downloads/FILE/ml/imgs/image_contest_level_1_validate/') imgList = helper.load_img_path(img_path) print(imgList[:5]) model = cnn_lstm_otc_ocr.LSTMOCR(mode) model.build_graph() total_steps = len(imgList) / FLAGS.batch_size config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if ckpt: saver.restore(sess, ckpt) print('restore from ckpt{}'.format(ckpt)) else: print('cannot restore') decoded_expression = [] for curr_step in range(int(total_steps)): imgs_input = [] seq_len_input = [] for img in imgList[curr_step * FLAGS.batch_size:(curr_step + 1) * FLAGS.batch_size]: im = cv2.imread(img, 0).astype(np.float32) / 255. im = np.reshape(im, [ FLAGS.image_height, FLAGS.image_width, FLAGS.image_channel ]) def get_input_lens(seqs): length = np.array([FLAGS.out_channels for _ in seqs], dtype=np.int64) return seqs, length inp, seq_len = get_input_lens(np.array([im])) imgs_input.append(im) seq_len_input.append(seq_len) imgs_input = np.asarray(imgs_input) seq_len_input = np.asarray(seq_len_input) seq_len_input = np.reshape(seq_len_input, [-1]) feed = {model.inputs: imgs_input} dense_decoded_code = sess.run(model.dense_decoded, feed) for item in dense_decoded_code: expression = '' for i in item: if i == -1: expression += '' else: expression += utils.decode_maps[i] decoded_expression.append(expression) print(decoded_expression) with open('./result.txt', 'w') as f: true_count = 0 for ind, code in enumerate(decoded_expression[0:len(imgList)]): img_name = imgList[ind] img_label = img_name.split('_')[-1].replace('.jpg', '') if code == img_label: true_count = true_count + 1 f.write('{} {} {}\n'.format(img_name, img_label, code)) print('{}/{} = {}'.format(true_count, len(imgList), float(true_count) / len(imgList)))
def infer(root, mode='infer'): model = cnn_lstm_otc_ocr.LSTMOCR(mode) model.build_graph() config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if ckpt: saver.restore(sess, ckpt) print('restore from ckpt{}'.format(ckpt)) else: print('cannot restore') for img_file in os.listdir(root): start_time = time.time() img_path = os.path.join(root, img_file) print(img_path) # imgList = load_img_path('/home/yang/Downloads/FILE/ml/imgs/image_contest_level_1_validate/') file_name = img_path.split('/')[-1].split('_')[0] imgList = helper.load_img_path(img_path) #print(imgList[:5]) total_steps = len(imgList) / FLAGS.batch_size sample_num = len(imgList) * 3 total_acc = 0 for curr_step in xrange(total_steps): decoded_expression = [] imgs_input = [] seq_len_input = [] imgs_label = [] for img in imgList[curr_step * FLAGS.batch_size:(curr_step + 1) * FLAGS.batch_size]: label = img.split('_')[-1].split('.')[0] imgs_label.append(label.upper()) #print (img) im = cv2.imread(img, cv2.IMREAD_GRAYSCALE).astype( np.float32) / 255. im = cv2.resize(im, (FLAGS.image_width, FLAGS.image_height)) im = np.reshape(im, [ FLAGS.image_height, FLAGS.image_width, FLAGS.image_channel ]) def get_input_lens(seqs): length = np.array([FLAGS.max_stepsize for _ in seqs], dtype=np.int64) return seqs, length inp, seq_len = get_input_lens(np.array([im])) imgs_input.append(im) seq_len_input.append(seq_len) imgs_input = np.asarray(imgs_input) seq_len_input = np.asarray(seq_len_input) seq_len_input = np.reshape(seq_len_input, [-1]) feed = {model.inputs: imgs_input, model.seq_len: seq_len_input} dense_decoded_code = sess.run(model.dense_decoded, feed) for item in dense_decoded_code: expression = '' for i in item: if i == -1: expression += '' else: expression += utils.decode_maps[i] decoded_expression.append(expression) acc = utils.test_accuracy_calculation(imgs_label, decoded_expression, True) total_acc += acc print(total_acc / total_steps) print(file_name) print(sample_num) with open('./result.txt', 'a') as f: f.write(file_name + ',' + str(round(total_acc / total_steps, 2)) + ',' + str(sample_num) + ',' + str(round((time.time() - start_time) / sample_num, 2)) + '\n')