def recognize_char_p(): label_map = load_label_map() model = load_model_nn() x = model['x'] keep_prob = model['keep_prob'] saver = model['saver'] prediction = model['prediction'] graph = model['graph'] model_ckpt_path, _ = find_model_ckpt() # print('load check-point %s'%model_ckpt_path, file=sys.stderr) with tf.Session(graph=graph) as session: tf.global_variables_initializer().run() saver.restore(session, model_ckpt_path) while True: sys.stdout.flush() captcha_path = input().strip() if captcha_path == '$exit': # for close session break im = np.reshape(cv2.imread(captcha_path, cv2.IMREAD_GRAYSCALE), IMAGE_SIZE) label = prediction.eval(feed_dict={ x: [im], keep_prob: 1.0 }, session=session)[0] sys.stdout.write(label_map[label]) sys.stdout.write('\n')
def recognize_p(nn_type=NNType.cnn): """ captcha_path $exit to exit """ # print("recognize_p") label_map = load_label_map() if nn_type == NNType.cnn: model = load_nn.load_model_cnn() else: model = load_nn.load_model_rnn() x = model['x'] keep_prob = model['keep_prob'] saver = model['saver'] prediction = model['prediction'] model_ckpt_path, _, ok = find_model_ckpt(nn_type=nn_type) init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) saver.restore(session, model_ckpt_path) while True: sys.stdout.flush() captcha_path = input().strip() # print("_recoginze", captcha_path) if captcha_path == '$exit': # for close session break try: # 将完整的验证码图片进行处理,分割成标准的训练样本式的单个字符的列表 # 然后再将每个字符处理成特征向量 formatted_letters = split_letters(captcha_path) if nn_type == NNType.cnn: formatted_letters = [letter.reshape(image_size) for letter in formatted_letters] except Exception as ex: sys.stdout.write('\n') err_msg = _fetch_stream(traceback.print_stack) err_msg = err_msg.replace('\n', '@@@') print('@@@'.join([str(ex), err_msg]), file=sys.stderr) else: print('All was well', file=sys.stderr) # for recv_err result = [] for letter in formatted_letters: label = prediction.eval(feed_dict={x: [letter], keep_prob: 1.0}, session=session)[0] # 识别的单个字符按顺序组成整体的对验证码的识别结果 result.append(label_map[label]) sys.stdout.write(label_map[label]) sys.stdout.write('\n')
def recognize_p(): """ captcha_path $exit to exit """ # print("recognize_p") label_map = load_label_map() model = load_model_nn() x = model['x'] keep_prob = model['keep_prob'] saver = model['saver'] prediction = model['prediction'] graph = model['graph'] model_ckpt_path, _ = find_model_ckpt() print('load check-point %s' % model_ckpt_path, file=sys.stderr) with tf.Session(graph=graph) as session: tf.global_variables_initializer().run() saver.restore(session, model_ckpt_path) while True: sys.stdout.flush() captcha_path = input().strip() # print("_recoginze", captcha_path) if captcha_path == '$exit': # for close session break spliter = Spliter(os.curdir) try: letters = spliter.split_letters(captcha_path) formatted_letters = map(spliter.format_splited_image, letters) formatted_letters = [ letter.reshape(image_size) for letter in formatted_letters ] except Exception as ex: sys.stdout.write('\n') continue result = [] for letter in formatted_letters: label = prediction.eval(feed_dict={ x: [letter], keep_prob: 1.0 }, session=session)[0] result.append(label_map[label]) sys.stdout.write(label_map[label]) sys.stdout.write('\n')
def recognize_char_p(nn_type=NNType.cnn): label_map = load_label_map() # 加载label值对应的label # 比如0->0, 10->`a` # 加载神经网络模型 if nn_type == NNType.cnn: model = load_nn.load_model_cnn() else: model = load_nn.load_model_rnn() x = model['x'] keep_prob = model['keep_prob'] saver = model['saver'] prediction = model['prediction'] graph = model['graph'] model_ckpt_path, _, ok = find_model_ckpt(nn_type=nn_type) # 寻找断点(checkpoint)路径 print('All was well', file=sys.stderr) with tf.Session(graph=graph) as session: tf.global_variables_initializer().run() # 各模型变量初始化 saver.restore(session, model_ckpt_path) # 从断点恢复经过训练以后的神经网络各模型的值 while True: sys.stdout.flush() captcha_path = input().strip() # 从当前进程的标准输入中读取一行,作为验证码存储路径 if captcha_path == '$exit': # for close session break try: # 通过OpenCV2.imread方法读取验证码的灰度图,返回一个像素矩阵 # 然后通过numpy.reshape的方法将矩阵变形为一维的特征向量 im = np.reshape(cv2.imread(captcha_path, cv2.IMREAD_GRAYSCALE), IMAGE_SIZE) except Exception as ex: # 如果发生异常,则向所在进程的标准输出流,写一个换行符,即返回空串 sys.stdout.write('\n') # 将错误信息从所在进程的标准错误流里抓出来,放在字符串err_msg里 err_msg = _fetch_stream(traceback.print_stack) err_msg = err_msg.replace('\n', '@@@') # 将处理后的err_msg字符串写到标准错误流里 print('@@@'.join([str(ex), err_msg]), file=sys.stderr) else: print('All was well', file=sys.stderr) # for recv_err # 根据学习的结果,对传入的特征向量进行预测,得到label值 label = prediction.eval(feed_dict={x: [im], keep_prob: 1.0}, session=session)[0] # 通过label_map[label] 得到label数值对应的数字或者英文字符 # 将字符写入所在进程的标准输出流 sys.stdout.write(label_map[label]) sys.stdout.write('\n')
def recognize_p(): """ captcha_path $exit to exit """ # print("recognize_p") label_map = load_label_map() model = load_model_nn() x = model['x'] keep_prob = model['keep_prob'] saver = model['saver'] prediction = model['prediction'] graph = model['graph'] model_ckpt_path, _ = find_model_ckpt() print('load check-point %s' % model_ckpt_path, file=sys.stderr) with tf.Session(graph=graph) as session: tf.global_variables_initializer().run() saver.restore(session, model_ckpt_path) while True: sys.stdout.flush() captcha_path = input().strip() # print("_recoginze", captcha_path) if captcha_path == '$exit': # for close session break spliter = Spliter(os.curdir) try: letters = spliter.split_letters(captcha_path) formatted_letters = map(spliter.format_splited_image,letters) formatted_letters = [letter.reshape(image_size) for letter in formatted_letters] except Exception as ex: sys.stdout.write('\n') continue result = [] for letter in formatted_letters: label = prediction.eval(feed_dict={x: [letter], keep_prob: 1.0}, session=session)[0] result.append(label_map[label]) sys.stdout.write(label_map[label]) sys.stdout.write('\n')
def start_recognize_char_daemon(nn_type=NNType.cnn): # singleton include recognize_char because of saver.restore global __p_recognize if __p_recognize is not None and __p_recognize.poll() is None: raise OSError('the checkpoint is used by another reconize process') else: model_ckpt_path, _ = find_model_ckpt(nn_type=nn_type)[:2] print('load check-point %s' % model_ckpt_path) if nn_type == NNType.cnn: nn_type_s = 'cnn' else: nn_type_s = 'rnn' p = Popen([sys.executable, __file__, 'recognize_char', '-nn', nn_type_s], bufsize=102400, stdin=PIPE, stdout=PIPE, stderr=PIPE) # p.stdin.encoding = 'utf8' # so we get `str` instead of `bytes` in p p = enhance_popen(p) __p_recognize = p return p
def recognize_char_p(): label_map = load_label_map() model = load_model_nn() x = model['x'] keep_prob = model['keep_prob'] saver = model['saver'] prediction = model['prediction'] graph = model['graph'] model_ckpt_path, _ = find_model_ckpt() # print('load check-point %s'%model_ckpt_path, file=sys.stderr) with tf.Session(graph=graph) as session: tf.global_variables_initializer().run() saver.restore(session, model_ckpt_path) while True: sys.stdout.flush() captcha_path = input().strip() if captcha_path == '$exit': # for close session break im = np.reshape(cv2.imread(captcha_path, cv2.IMREAD_GRAYSCALE), IMAGE_SIZE) label = prediction.eval(feed_dict={x: [im], keep_prob: 1.0}, session=session)[0] sys.stdout.write(label_map[label]) sys.stdout.write('\n')
def train(alpha=5e-5): print("loading %s..." % formatted_dataset_path) with open(formatted_dataset_path, 'rb') as f: import sys if sys.version_info.major == 3: save = pickle.load(f, encoding='latin1') else: save = pickle.load(f) train_dataset = save['train_dataset'] train_labels = save['train_labels'] test_dataset = save['test_dataset'] test_labels = save['test_labels'] label_map = save['label_map'] num_labels = len(label_map) print("train_dataset:", train_dataset.shape) print("train_labels:", train_labels.shape) print("test_dataset:", test_dataset.shape) print("test_labels:", test_labels.shape) print("num_labels:", num_labels) model = load_model_nn(alpha) x = model['x'] y = model['y'] loss = model['loss'] optimizer = model['optimizer'] accuracy = model['accuracy'] keep_prob = model['keep_prob'] saver = model['saver'] graph = model['graph'] save_dir = os.path.join(trainer_dir, '.checkpoint') print("Model saved path: ", save_dir) batch_size = 64 def save_model(_step): saver.save(session, os.path.join(save_dir, 'weibo.cn-model.ckpt'), global_step=_step) with tf.Session(graph=graph) as session: tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) merged = tf.summary.merge_all() writer = tf.summary.FileWriter(graph_log_dir, session.graph) tf.global_variables_initializer().run() step = 0 try: model_ckpt_path, global_step = find_model_ckpt( ) # try to continue .... except FileNotFoundError: print("Initialized") else: # try continue to train saver.restore(session, model_ckpt_path) step = global_step print('found %s, step from %d' % (model_ckpt_path, step)) origin_step = step while True: offset = (step * batch_size) % (train_labels.shape[0] - batch_size) # Generate a minibatch. batch_data = train_dataset[offset:(offset + batch_size), :] batch_labels = train_labels[offset:(offset + batch_size), :] # print(batch_data, batch_labels) session.run([optimizer, loss], feed_dict={ x: batch_data, y: batch_labels, keep_prob: 0.5 }) step += 1 if step % 50 == 0: train_accuracy = session.run(accuracy, feed_dict={ x: batch_data, y: batch_labels, keep_prob: 1.0 }) test_accuracy = session.run(accuracy, feed_dict={ x: test_dataset, y: test_labels, keep_prob: 1.0 }) print(("Step %5d, Training accuracy: %4f, Test accuracy: %4f" % (step, train_accuracy, test_accuracy))) if step % 100 == 0: # save the model every 100 step save_model(step) if test_accuracy > 0.999 or step - origin_step > 2000: print('you can re-format dataset and give a smaller alpha ' 'to continue training') save_model(step) break print("Test accuracy: %g" % session.run(accuracy, feed_dict={ x: test_dataset, y: test_labels, keep_prob: 1.0 }))
def train(alpha=5e-5, nn_type=NNType.cnn, target_accuracy=0.9955): label_map_path = os.path.join(formatted_dataset_dir, 'label_map.pickle') formatted_train_dataset_path = os.path.join(formatted_dataset_dir, 'train_dataset.pickle') formatted_test_dataset_path = os.path.join(formatted_dataset_dir, 'test_dataset.pickle') def _compat_pickle_load(path): with open(path, 'rb') as f: import sys if sys.version_info.major == 3: obj = pickle.load(f, encoding='latin1') else: obj = pickle.load(f) return obj print("loading %s" % label_map_path) label_map = _compat_pickle_load(label_map_path) print("load %s" % formatted_train_dataset_path) train_dataset = _compat_pickle_load(formatted_train_dataset_path) train_labels = train_dataset.labels print("load %s" % formatted_test_dataset_path) test_dataset = _compat_pickle_load(formatted_test_dataset_path) test_data, test_labels = test_dataset.images, test_dataset.labels if nn_type == NNType.rnn: test_data = test_data.reshape( (len(test_data), IMAGE_HEIGHT, IMAGE_WIDTH)) num_labels = len(label_map) print("train_data:", train_dataset.images.shape) print("train_labels:", train_labels.shape) print("test_data:", test_data.shape) print("test_labels:", test_labels.shape) print("num_labels:", num_labels) if nn_type == NNType.cnn: model = load_nn.load_model_cnn(alpha=alpha) else: model = load_nn.load_model_rnn(alpha=alpha) x = model['x'] y = model['y'] cost = model['loss'] optimizer = model['optimizer'] accuracy = model['accuracy'] keep_prob = model['keep_prob'] merged = model['merged'] saver = model['saver'] #graph = model['graph'] batch_size = 128 init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) model_ckpt_path, origin_step, ok = find_model_ckpt( nn_type=nn_type) # try to continue .... model_ckpt_dir = os.path.dirname(model_ckpt_path) step = origin_step if not ok: print("Initialized") else: # try continue to train saver.restore(sess, model_ckpt_path) print('found %s, step from %d' % (model_ckpt_path, step)) def save_model(_step): saver.save(sess, os.path.join(model_ckpt_dir, 'model'), global_step=_step) with open(formatted_train_dataset_path, 'wb') as f: pickle.dump(train_dataset, f, protocol=2) graph_log_dir = model_ckpt_dir writer = tf.summary.FileWriter(graph_log_dir) while True: batch_data, batch_labels = train_dataset.next_batch(batch_size) if nn_type == NNType.rnn: batch_data = batch_data.reshape( (len(batch_data), IMAGE_HEIGHT, IMAGE_WIDTH)) if step % 10 == 0: # Display, Test and Save summary, acc_train, loss, _ = sess.run( [merged, accuracy, cost, optimizer], feed_dict={ x: batch_data, y: batch_labels, keep_prob: 0.5 }) acc_test = sess.run(accuracy, feed_dict={ x: test_data, y: test_labels, keep_prob: 1.0 }) writer.add_summary(summary, step) print( "step %4d, train_accuracy: %.4f, loss: %.4f test_accuracy: %.4f" % (step, acc_train, loss, acc_test)) # Test Whether you can exit if acc_test > target_accuracy or loss < 0.002: print('training done.') save_model(step) break if step % 100 == 0: # save the model every 100 step save_model(step) else: loss, _ = sess.run([cost, optimizer], feed_dict={ x: batch_data, y: batch_labels, keep_prob: 0.5 }) step += 1
def train(alpha=5e-5): print("loading %s..." % formatted_dataset_path) with open(formatted_dataset_path, 'rb') as f: import sys if sys.version_info.major == 3: save = pickle.load(f, encoding='latin1') else: save = pickle.load(f) train_dataset = save['train_dataset'] train_labels = save['train_labels'] test_dataset = save['test_dataset'] test_labels = save['test_labels'] label_map = save['label_map'] num_labels = len(label_map) print("train_dataset:", train_dataset.shape) print("train_labels:", train_labels.shape) print("test_dataset:", test_dataset.shape) print("test_labels:", test_labels.shape) print("num_labels:", num_labels) model = load_model_nn(alpha) x = model['x'] y = model['y'] loss = model['loss'] optimizer = model['optimizer'] accuracy = model['accuracy'] keep_prob = model['keep_prob'] saver = model['saver'] graph = model['graph'] save_dir = os.path.join(trainer_dir, '.checkpoint') print("Model saved path: ", save_dir) batch_size = 64 def save_model(_step): saver.save( session, os.path.join(save_dir, 'weibo.cn-model.ckpt'), global_step=_step ) with tf.Session(graph=graph) as session: tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) merged = tf.summary.merge_all() writer = tf.summary.FileWriter(graph_log_dir, session.graph) tf.global_variables_initializer().run() step = 0 try: model_ckpt_path, global_step = find_model_ckpt() # try to continue .... except FileNotFoundError: print("Initialized") else: # try continue to train saver.restore(session, model_ckpt_path) step = global_step print('found %s, step from %d' % (model_ckpt_path, step)) origin_step = step while True: offset = (step * batch_size) % (train_labels.shape[0] - batch_size) # Generate a minibatch. batch_data = train_dataset[offset:(offset + batch_size), :] batch_labels = train_labels[offset:(offset + batch_size), :] # print(batch_data, batch_labels) session.run( [optimizer, loss], feed_dict={ x: batch_data, y: batch_labels, keep_prob: 0.5 } ) step += 1 if step % 50 == 0: train_accuracy = session.run( accuracy, feed_dict={ x: batch_data, y: batch_labels, keep_prob: 1.0 } ) test_accuracy = session.run( accuracy, feed_dict={ x: test_dataset, y: test_labels, keep_prob: 1.0 } ) print(("Step %5d, Training accuracy: %4f, Test accuracy: %4f" % (step, train_accuracy, test_accuracy))) if step % 100 == 0: # save the model every 100 step save_model(step) if test_accuracy > 0.999 or step-origin_step > 2000: print('you can re-format dataset and give a smaller alpha ' 'to continue training') save_model(step) break print("Test accuracy: %g" % session.run( accuracy, feed_dict={ x: test_dataset, y: test_labels, keep_prob: 1.0 }) )