def recognize_jmz(image_path, weights_path, char_dict_path, txt_file_path):
    """
    识别函数
    :param image_path: 图片所在路径
    :param weights_path: 模型保存路径
    :param char_dict_path: 字典文件存放位置
    :param txt_file_path: 包含图片名的txt文件
    :return: None
    """
    files = os.listdir(txt_file_path)
    txt_files = [txt for txt in files if txt.endswith(".txt") and txt.split(".")[0] + ".json" not in files]

    inputdata = tf.placeholder(dtype=tf.float32, shape=[1, CFG.ARCH.INPUT_SIZE[1], None, CFG.ARCH.INPUT_CHANNELS],  # 宽度可变
                               name='input')
    input_sequence_length = tf.placeholder(tf.int32, shape=[1], name='input_sequence_length')

    net = crnn_model.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS,
                               layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES)

    inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False)

    decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=inference_ret, sequence_length=input_sequence_length,  # 序列宽度可变
                                               merge_repeated=False, beam_width=1)

    # config tf saver
    saver = tf.train.Saver()

    # config tf session
    sess_config = tf.ConfigProto(allow_soft_placement=True)
    # sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION
    # sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH

    sess_config.gpu_options.allow_growth = True
    sess = tf.Session(config=sess_config)
    with sess.as_default():
        saver.restore(sess=sess, save_path=weights_path)
        for idx, txt_file in enumerate(txt_files):
            reg_result = {}
            txt_path = os.path.join(txt_file_path, txt_file)
            with open(txt_path, 'r') as fd:
                image_names = [line.strip() for line in fd.readlines()]
            for image_name in image_names:
                image_paths = os.path.join(image_path, image_name)
                image = cv2.imread(image_paths, cv2.IMREAD_COLOR)
                if image is None:
                    print(image_paths+'is not exist')
                    continue
                image = _resize_image(image)
                image = np.array(image, np.float32) / 127.5 - 1.0
                seq_len = np.array([image.shape[1] / 4], dtype=np.int32)
                preds = sess.run(decodes, feed_dict={inputdata: [image], input_sequence_length:seq_len})

                preds = _sparse_matrix_to_list(preds[0], char_dict_path)
                reg_result[image_name] = preds[0]
                print('Predict image {:s} result: {:s}'.format(image_name, preds[0]))
            with open(txt_path[:-4] + ".json", "w") as fw:  # 建议改为.split('.')
                json.dump(reg_result, fw)
    sess.close()

    return
Example #2
0
def recognize(image_path, weights_path, char_dict_path, txt_path):
    """
    识别函数
    :param image_path: 图片所在路径
    :param weights_path: 模型保存路径
    :param char_dict_path: 字典文件存放位置
    :param txt_path: 包含图片名的txt文件
    :return: None
    """
    
    NUM_CLASSES = get_num_class(char_dict_path)
    
    with open(txt_path, 'r', encoding='UTF-8') as fd:
        # image_names = [line.split(' ')[0] for line in fd.readlines()]  # 有标注的情况
        image_names = [line.strip() for line in fd.readlines()]  # 无标注的情况
    #with tf.device('/gpu:0'):
    inputdata = tf.placeholder(dtype=tf.float32, shape=[1, CFG.ARCH.INPUT_SIZE[1], None, CFG.ARCH.INPUT_CHANNELS], #宽度可变
             name='input')
        
    input_sequence_length = tf.placeholder(tf.int32, shape=[1], name='input_sequence_length')

    net = crnn_model.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS,
            layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=NUM_CLASSES)

    inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False)

        #decodes = inference_ret
    decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=inference_ret, sequence_length=input_sequence_length, #  序列宽度可变
           merge_repeated=False, beam_width=10)
        #preds = _sparse_matrix_to_list(decodes[0], char_dict_path)
    # 更改到此结束,把with tf.device注释了20191120

    # config tf saver
    saver = tf.train.Saver()

    # config tf session
    sess_config = tf.ConfigProto(allow_soft_placement=True) #, log_device_placement=True)
    # allow_soft_placement=True 不能在gpu上运行的自动迁移到cpu; log_device_placement=True 打印使用的设备
    sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION
    sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH

    sess = tf.Session(config=sess_config)

    with sess.as_default():
        saver.restore(sess=sess, save_path=weights_path)

        for image_name in image_names:
            # time_start = time.time()
            image_paths = os.path.join(image_path, image_name)
            # print(image_paths)
            image = cv2.imread(image_paths, cv2.IMREAD_COLOR)
            if image is None:
                print(image_paths+' is not exist')
                continue
            image = np.array(image, np.float32) / 127.5 - 1.0
            seq_len = np.array([image.shape[1] / 4], dtype=np.int32)
            # time_end_1 = time.time()
            preds = sess.run(decodes, feed_dict={inputdata: [image], input_sequence_length:seq_len})
            # time_end_2 = time.time()
            preds = _sparse_matrix_to_list(preds[0], char_dict_path)
            # time_end_3 = time.time()
            # print('Predict image {:s} result: {:s}  cost time:{:f}'.format(image_name, preds[0], time_end-time_start))
            # print('Predict image {:s}  total time:{:f}  pre_process time:{:f}, run time:{:f},  convert_time:{:f}'.format(preds[0], time_end_3 - time_start, time_end_1 - time_start, time_end_2 - time_end_1, time_end_3 - time_end_2))
            print('Predict image {:s} result: {:s}'.format(image_name, preds[0]))

    sess.close()

    return
Example #3
0
def recognize_jmz(image_path, weights_path, char_dict_path, txt_file_path, test_count):
    """
    识别函数
    :param image_path: 图片所在路径
    :param weights_path: 模型保存路径
    :param char_dict_path: 字典文件存放位置
    :param txt_file_path: 包含图片名的txt文件
    :return: None
    """
    global reg_result
    tf.reset_default_graph()
    
    NUM_CLASSES = get_num_class(char_dict_path)

    inputdata = tf.placeholder(dtype=tf.float32, shape=[1, CFG.ARCH.INPUT_SIZE[1], None, CFG.ARCH.INPUT_CHANNELS],  # 宽度可变
                               name='input')
    input_sequence_length = tf.placeholder(tf.int32, shape=[1], name='input_sequence_length')

    net = crnn_model.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS,
                               layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=NUM_CLASSES)

    inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False)

    decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=inference_ret, sequence_length=input_sequence_length,  # 序列宽度可变
                                               merge_repeated=False, beam_width=1)

    # config tf saver
    saver = tf.train.Saver()

    # config tf session
    sess_config = tf.ConfigProto(allow_soft_placement=True)
    # sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION
    # sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH

    sess_config.gpu_options.allow_growth = True
    sess = tf.Session(config=sess_config)
    
    weights_path = tf.train.latest_checkpoint(weights_path)
    
    with sess.as_default():
        saver.restore(sess=sess, save_path=weights_path)

        with open(txt_file_path, 'r') as fd:
            lines = [line.strip() for line in fd.readlines()]
            
            for i in range(test_count):
                line = lines[i]
                image_name = line.split(' ')[0]
                label = line.split(' ')[1]
                image_paths = os.path.join(image_path, image_name)
                image = cv2.imread(image_paths, cv2.IMREAD_COLOR)
                if image is None:
                    print(image_paths+'is not exist')
                    continue
                image = _resize_image(image)
                image = np.array(image, np.float32) / 127.5 - 1.0
                seq_len = np.array([image.shape[1] / 4], dtype=np.int32)
                preds = sess.run(decodes, feed_dict={inputdata: [image], input_sequence_length:seq_len})

                preds = _sparse_matrix_to_list(preds[0], char_dict_path)
                print('Label: [{:20s}]'.format(label))
                print('Pred : [{:20s}]\n'.format(preds[0]))
    sess.close()

    return
Example #4
0
def train_shadownet(dataset_dir, weights_path, char_dict_path, save_path):
    """
    训练网络,参考:
    https://github.com/MaybeShewill-CV/CRNN_Tensorflow
    :param dataset_dir: tfrecord文件路径
    :param weights_path: 要加载的预训练模型路径
    :param char_dict_path: 字典文件路径
    :param save_path: 模型保存路径
    :return: None
    """
    # prepare dataset
    train_dataset = read_tfrecord.CrnnDataFeeder(
        dataset_dir=dataset_dir, char_dict_path=char_dict_path, flags='train')

    train_images, train_labels, train_images_paths = train_dataset.inputs(
        batch_size=CFG.TRAIN.BATCH_SIZE)
####################添加数据增强##############################
    # train_images = tf.multiply(tf.add(train_images, 1.0), 128.0)   # removed since read_tfrecord.py is changed
    tf.summary.image('original_image', train_images)   # 保存到log,方便测试观察
    images = apply_with_random_selector(
        train_images,
        lambda x, ordering: distort_color(x, ordering),
        num_cases=2)  #
    images = tf.subtract(tf.divide(images, 127.5), 1.0)  # 转化到【-1,1】 changed 128.0 to 127.5 
    train_images = tf.clip_by_value(images, -1.0, 1.0)
    tf.summary.image('distord_turned_image', train_images)
################################################################

    NUM_CLASSES = get_num_class(char_dict_path)

    # declare crnn net
    shadownet = crnn_model.ShadowNet(phase='train',hidden_nums=CFG.ARCH.HIDDEN_UNITS,
        layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=NUM_CLASSES)
    # set up training graph
    with tf.device('/gpu:0'):
        # compute loss and seq distance
        train_inference_ret, train_ctc_loss = shadownet.compute_loss(inputdata=train_images,
            labels=train_labels, name='shadow_net', reuse=False)

        # set learning rate
        global_step = tf.Variable(0, name='global_step', trainable=False)
        learning_rate = tf.train.exponential_decay(learning_rate=CFG.TRAIN.LEARNING_RATE,
            global_step=global_step, decay_steps=CFG.TRAIN.LR_DECAY_STEPS,
            decay_rate=CFG.TRAIN.LR_DECAY_RATE, staircase=True)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            #optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
            #    momentum=0.9).minimize(loss=train_ctc_loss, global_step=global_step)
            optimizer = tf.train.AdadeltaOptimizer(learning_rate=\
                learning_rate).minimize(loss=train_ctc_loss, global_step=global_step)
            # 源代码优化器是momentum,改成adadelta,与CRNN论文一致


    # Set tf summary
    os.makedirs(save_path, exist_ok=True)
    tf.summary.scalar(name='train_ctc_loss', tensor=train_ctc_loss)
    tf.summary.scalar(name='learning_rate', tensor=learning_rate)
    merge_summary_op = tf.summary.merge_all()

    # Set saver configuration
    saver = tf.train.Saver()
    train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
    model_name = 'shadownet_{:s}.ckpt'.format(str(train_start_time))
    model_save_path = ops.join(save_path, model_name)

    # Set sess configuration
    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION
    sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH
    sess = tf.Session(config=sess_config)

    summary_writer = tf.summary.FileWriter(save_path)
    summary_writer.add_graph(sess.graph)

    # Set the training parameters
    train_epochs = CFG.TRAIN.EPOCHS

    with sess.as_default():
        epoch = 0
        if if weights_path is None or not os.path.exists(weights_path) or len(os.listdir(weights_path)) < 5:
            print('Training from scratch')
            init = tf.global_variables_initializer()
            sess.run(init)
        else:
            weights_path = tf.train.latest_checkpoint(weights_path)
            print('Restore model from last model checkpoint {:s}'.format(weights_path))
            saver.restore(sess=sess, save_path=weights_path)
            epoch = sess.run(tf.train.get_global_step())

        cost_history = [np.inf]
        while epoch < train_epochs:
            epoch += 1
            _, train_ctc_loss_value, merge_summary_value, learning_rate_value = sess.run(
                [optimizer, train_ctc_loss, merge_summary_op, learning_rate])

            if (epoch+1) % CFG.TRAIN.DISPLAY_STEP == 0:
                print('lr = {:9f}   epoch : {:d}     cost= {:9f}'.format(\
                    learning_rate_value, epoch+1, train_ctc_loss_value))
                # record history train ctc loss
                cost_history.append(train_ctc_loss_value)
                 # add training sumary
                summary_writer.add_summary(summary=merge_summary_value, global_step=epoch)

            if (epoch+1) % CFG.TRAIN.SAVE_STEPS == 0:
                saver.save(sess=sess, save_path=model_save_path, global_step=epoch)

    return np.array(cost_history[1:])  # Don't return the first np.inf