コード例 #1
0
ファイル: train.py プロジェクト: alexzeng1206/Projects_in_NUS
def validate(svm, dir_name):
    img_list = []
    HoG_list = []
    labels = []
    #load positive validation samples
    dir_name = dir_name + '/validation'
    #dir_name='train_data/validation'
    load_img(dir_name + '/p', img_list)
    for i in range(len(img_list)):
        labels.append(1)
    #load negtive validation samples
    tmp = len(img_list)
    load_img(dir_name + '/n', img_list)
    for i in range(len(img_list) - tmp):
        labels.append(-1)
    #get HoG features
    HoG_list = []
    get_HoG(img_list, HoG_list)
    #SVM
    #svm=cv2.ml.SVM_load('first_train.xml')
    _, pred = svm.predict(np.array(HoG_list))
    pred = [int(i) for i in pred]
    cur_acc = metrics.accuracy_score(labels, pred)
    print("on validation set,the current accuracy is ", cur_acc)
    return pred, cur_acc
コード例 #2
0
ファイル: img_aug.py プロジェクト: yixiaoer/summercamp_NUS
def create_img_data(input_file, output_file):
    #seq=iaa.Sequential([iaa.Fliplr(0.5),iaa.GaussianBlur(sigma=(0,3.0))])
    seq = iaa.SomeOf((1, 4), [
        iaa.Fliplr(0.5),
        iaa.Flipud(1.0),
        iaa.GaussianBlur(1.0),
        iaa.AdditiveGaussianNoise()
    ])
    img_list = []
    load_img(input_file, img_list)
    load_img(input_file, img_list)
    img_aug = seq.augment_images(img_list)
    for i in range(len(img_aug)):
        cv2.imwrite(output_file + '/i_imgaug_' + str(i) + '.jpg', img_aug[i])
コード例 #3
0
ファイル: train.py プロジェクト: alexzeng1206/Projects_in_NUS
def train(o_dir_name):
    dir_name = o_dir_name + '/train'
    labels = []
    img_list = []
    #get positive img
    load_img(dir_name + '/p', img_list)
    for i in range(len(img_list)):
        labels.append(1)
    #get negtive img
    tmp = len(img_list)
    load_img(dir_name + '/n', img_list)
    for i in range(len(img_list) - tmp):
        labels.append(-1)
    #get HoG feature list
    HoG_list = []
    get_HoG(img_list, HoG_list)
    #info print
    print('received ', tmp, ' positive sample(s)')
    print('received', len(img_list) - tmp, ' negtive sample(s)')
    print('start training')
    #train SVM 考虑基于Hard Example对分类器二次训练https://www.xuebuyuan.com/2083806.html
    best_c = 0
    best_gamma = 0
    best_acc = 0
    for C in [0.01, 0.1, 1, 5, 10, 50, 100]:
        for gamma in [0.1, 0.5, 0.7, 1, 1.5, 2, 2.5, 5, 10]:  #,1,1.5,2,5,10]:
            svm = cv2.ml.SVM_create()
            svm.setC(C)
            svm.setGamma(gamma)
            svm.setType(cv2.ml.SVM_C_SVC)
            svm.setKernel(cv2.ml.SVM_LINEAR)
            svm.train(np.array(HoG_list), cv2.ml.ROW_SAMPLE, np.array(labels))
            _, cur_acc = validate(svm, o_dir_name)
            if (cur_acc > best_acc):
                best_c = C
                best_gamma = gamma
                best_acc = cur_acc
    svm = cv2.ml.SVM_create()
    svm.setC(best_c)
    svm.setGamma(best_gamma)
    svm.setType(cv2.ml.SVM_C_SVC)
    svm.setKernel(cv2.ml.SVM_LINEAR)
    svm.train(np.array(HoG_list), cv2.ml.ROW_SAMPLE, np.array(labels))
    svm.save('first_train.xml')
    print('svm data has been saved')
コード例 #4
0
ファイル: test.py プロジェクト: R06942112/OCR
def test():
    tf.reset_default_graph()
    infer_graph = tf.Graph()

    with infer_graph.as_default():
        encoder_outputs_t, inputs_t = build_cnn(False, flags.batch_size,
                                                flags.height, flags.width,
                                                flags.channels)
        _, _, pred_ids, logits_t, decoder_inputs_t, \
        _, _ ,keep_prob_t= build_network(encoder_outputs_t,
                             True,
                             flags.batch_size,
                             flags.decoder_length,
                             flags.tgt_vocab_size,
                             flags.attn_num_hidden,
                             flags.encoder_length,
                             flags.max_gradient_norm
                             )
        infer_saver = tf.train.Saver()
    infer_sess = tf.Session(graph=infer_graph)
    model_file = tf.train.latest_checkpoint(flags.load_dir)
    infer_saver.restore(infer_sess, model_file)

    with open(flags.test_txt) as f:
        test = [line.rstrip() for line in f]
    test_len = len(test)
    test = np.array(test)
    data_test = Dataset(test)
    if flags.lex_txt != None:
        with open(flags.lex_txt) as f:
            lex = [line.rstrip().lower() for line in f]

    ti = int(test_len / flags.batch_size)
    rest = test_len % flags.batch_size

    gt = []
    predict = []

    for t in range(ti):
        batch_test = data_test.next_batch(flags.batch_size)
        path = []
        texts = []
        for line in batch_test:
            path.append(line.split(' ', 1)[0])
            texts.append(line.split(' ', 1)[1])

        images = load_img(path, flags.height, flags.width)

        testing_decoder_inputs = np.zeros(
            (flags.decoder_length, flags.batch_size), dtype=float)
        feed_dict_t = {
            inputs_t: images[:, :, :, np.newaxis],
            decoder_inputs_t: testing_decoder_inputs,
            keep_prob_t: 1
        }
        q = infer_sess.run(pred_ids, feed_dict=feed_dict_t)

        for j in range(flags.batch_size):
            gt.append(texts[j])
            ans = np.array(q).T[j]
            pd = []
            for c in ans:
                if c != -1:
                    character = tools.idx_to_word[c]
                    if character != '<EOS>':
                        pd.append(character)
            predict.append(''.join(pd))

    batch_test = data_test.next_batch(flags.batch_size)
    path = []
    texts = []
    for line in batch_test:
        path.append(line.split(' ', 1)[0])
        texts.append(line.split(' ', 1)[1])
    images = load_img(path, flags.height, flags.width)

    feed_dict_t = {
        inputs_t: images[:, :, :, np.newaxis],
        decoder_inputs_t: testing_decoder_inputs,
        keep_prob_t: 1
    }
    q = infer_sess.run(pred_ids, feed_dict=feed_dict_t)

    for k in range(rest):
        gt.append(texts[k])
        ans = np.array(q).T[k]
        pd = []
        for c in ans:
            if c != -1:
                character = tools.idx_to_word[c]
                if character != '<EOS>':
                    pd.append(character)
        predict.append(''.join(pd))

    correct = float(0)
    cnt = 0
    acc_s = 0

    for l in range(len(gt)):
        cnt = cnt + 1
        if gt[l] == predict[l]:
            correct = correct + 1

    acc_s = correct / cnt
    if flags.lex_txt != None:
        correct_l = float(0)
        cnt = 0
        for l in range(len(gt)):
            cnt = cnt + 1
            lexicon = lex[l].split(',')
            dt = editdistance.eval(predict[l], lexicon[0])
            pl = lexicon[0]
            for ll in lexicon[1:]:
                dt_temp = editdistance.eval(predict[l], ll)

                if dt_temp < dt:
                    dt = dt_temp
                    pl = ll
            if pl == gt[l]:
                correct_l = correct_l + 1

        acc_l = correct_l / cnt

    print('accuracy: ', acc_s)
    if flags.lex_txt != None:
        print('accuracy with lexicon: ', acc_l)
コード例 #5
0
ファイル: train.py プロジェクト: R06942112/OCR
def train():
    f_size = int(flags.img_size / 8)
    encoder_length = f_size * f_size

    with open(flags.train_txt) as f:
        sample = [line.rstrip() for line in f]
    sample = np.array(sample)
    iteration = len(sample) // flags.batch_size
    data = Dataset(sample)

    tf.reset_default_graph()
    train_graph = tf.Graph()
    infer_graph = tf.Graph()
    start = time.time()
    with train_graph.as_default():

        c, inputs = build_cnn(is_training=True,
                              batch_size=flags.batch_size,
                              img_size=flags.img_size,
                              channels=flags.channels)
        deconv_outputs = build_deconv(True, c, flags.batch_size)
        x = np.linspace(-0.5, 0.5, f_size)
        x = np.tile(x, (f_size, 1))
        y = np.transpose(x)
        x = np.expand_dims(x, axis=2)
        y = np.expand_dims(y, axis=2)
        m = np.concatenate((x, y), axis=2)
        m = np.expand_dims(m, axis=0)
        m = np.repeat(m, flags.batch_size, axis=0)
        m = tf.convert_to_tensor(m, np.float32)
        encoder_outputs = tf.concat([c, m], -1)
        encoder_outputs = tf.reshape(encoder_outputs,
                                     shape=(-1, f_size * f_size, 258))
        encoder_outputs = tf.transpose(encoder_outputs, [1, 0, 2])

        train_op, loss , sample_ids,logits, decoder_inputs,  \
        target_labels, learning_rate,attention_weights_history,att_label,lamda,att_mask,input_seg= build_network(encoder_outputs,
                                                                                          False,
                                                                                          flags.batch_size,
                                                                                          flags.decoder_length,
                                                                                          flags.tgt_vocab_size,
                                                                                          flags.attn_num_hidden,
                                                                                          encoder_length,
                                                                                          flags.max_gradient_norm,
                                                                                          f_size,
                                                                                          flags.att_loss,
                                                                                          flags.img_size,
                                                                                          deconv_outputs
                                                                                          )
        initializer = tf.global_variables_initializer()
        train_saver = tf.train.Saver()

    train_sess = tf.Session(graph=train_graph)
    train_sess.run(initializer)

    with infer_graph.as_default():
        c_t, inputs_t = build_cnn(is_training=False,
                                  batch_size=flags.batch_size,
                                  img_size=flags.img_size,
                                  channels=flags.channels)
        deconv_outputs_t = build_deconv(False, c_t, flags.batch_size)

        x_t = np.linspace(-0.5, 0.5, f_size)
        x_t = np.tile(x_t, (f_size, 1))
        y_t = np.transpose(x_t)
        x_t = np.expand_dims(x_t, axis=2)
        y_t = np.expand_dims(y_t, axis=2)
        m_t = np.concatenate((x_t, y_t), axis=2)
        m_t = np.expand_dims(m_t, axis=0)
        m_t = np.repeat(m_t, flags.batch_size, axis=0)
        m_t = tf.convert_to_tensor(m_t, np.float32)
        encoder_outputs_t = tf.concat([c_t, m_t], -1)
        encoder_outputs_t = tf.reshape(encoder_outputs_t,
                                       shape=(-1, f_size * f_size, 258))
        encoder_outputs_t = tf.transpose(encoder_outputs_t, [1, 0, 2])

        _, _ , pred_ids,logits_t, decoder_inputs_t,  \
            _, _,_,_,_,_,_= build_network(encoder_outputs_t,
                                      True,
                                      flags.batch_size,
                                      flags.decoder_length,
                                      flags.tgt_vocab_size,
                                      flags.attn_num_hidden,
                                      encoder_length,
                                      flags.max_gradient_norm,
                                      f_size,
                                      flags.att_loss,
                                      flags.img_size,
                                      deconv_outputs_t
                                      )
        infer_saver = tf.train.Saver()
    infer_sess = tf.Session(graph=infer_graph)

    # Training

    la = 10
    acc_log = 0
    count = 0
    lr = flags.learning_rate
    for h in range(flags.epoch):
        for i in range(iteration):
            batch_train = data.next_batch(flags.batch_size)
            path = []
            texts = []
            for line in batch_train:
                path.append(line.split(' ')[0])
                texts.append(line.split(' ')[1])

            if flags.att_loss:
                images, npy, mask, seg = load_img_label(
                    path, flags.img_size, flags.decoder_length)
            else:
                images = load_img(path, flags.img_size)

            training_target_labels = get_label(texts, flags.decoder_length)
            training_decoder_inputs = np.delete(training_target_labels,
                                                -1,
                                                axis=1)
            training_decoder_inputs = np.c_[
                np.zeros(training_decoder_inputs.shape[0]),
                training_decoder_inputs].T
            feed_dict = {
                inputs: images,
                decoder_inputs: training_decoder_inputs,
                target_labels: training_target_labels,
                learning_rate: lr
            }
            if flags.att_loss:
                feed_dict[att_label] = npy
                feed_dict[att_mask] = mask
                feed_dict[input_seg] = seg[:, :, :, np.newaxis]
                feed_dict[lamda] = la
            _, loss_value, att = train_sess.run(
                [train_op, loss, attention_weights_history],
                feed_dict=feed_dict)

            step = float(i + 1)
            if step % flags.display_step == 0:
                now = time.time()
                print(step, now - start, loss_value)
                start = now

            if step % flags.eval_step == 0:
                train_saver.save(train_sess, flags.save_dir)
                model_file = tf.train.latest_checkpoint(
                    flags.save_dir.rsplit('/', 1)[0])
                infer_saver.restore(infer_sess, model_file)

                gt = []
                predict = []

                images = load_img(path, flags.img_size)

                testing_decoder_inputs = np.zeros(
                    (flags.decoder_length, flags.batch_size), dtype=float)
                feed_dict_t = {
                    inputs_t: images,
                    decoder_inputs_t: testing_decoder_inputs
                }
                q = infer_sess.run(pred_ids, feed_dict=feed_dict_t)

                for j in range(flags.batch_size):
                    gt.append(texts[j])
                    ans = np.array(q).T[j]
                    pd = []
                    for c in ans:
                        if c != -1:
                            character = tools.idx_to_word[c]
                            if character != '<EOS>':
                                pd.append(character)
                    predict.append(''.join(pd))

                correct = float(0)
                cnt = 0
                acc_s = 0

                for l in range(len(gt)):
                    cnt = cnt + 1
                    if gt[l] == predict[l]:
                        correct = correct + 1
                count = count + 1
                acc_s = correct / cnt
                if acc_s > acc_log:
                    acc_log = acc_s
                    count = 0
                if count == (iteration // flags.eval_step):
                    lr = lr / 5
コード例 #6
0
ファイル: test.py プロジェクト: R06942112/OCR
def test():

    tf.reset_default_graph()
    infer_graph = tf.Graph()
    
    
    with infer_graph.as_default():
        _, _,  pred_ids,pred_logits , inputs_t, decoder_inputs_t, decoder_lengths_t, \
        _ = build_network(is_training=False,
                                                     batch_size=FLAGS.batch_size,
                                                     height=FLAGS.height,
                                                     width=FLAGS.width,
                                                     channels=FLAGS.channels,
                                                     decoder_length=FLAGS.decoder_length,
                                                     tgt_vocab_size=FLAGS.tgt_vocab_size,
                                                     num_units=FLAGS.num_units,
                                                     beam_width=FLAGS.beam_width,
                                                     encoder_length=FLAGS.encoder_length,
                                                     max_gradient_norm=FLAGS.max_gradient_norm,
                                                     embedding_size=FLAGS.embedding_size)
        infer_saver = tf.train.Saver()
    infer_sess = tf.Session(graph=infer_graph)
    

    with open(FLAGS.test_txt) as f:
        test = [line.rstrip() for line in f]
    test_len = len(test)
    
    test = np.array(test)
    data_test = Dataset_test(test)
    
    
    if FLAGS.lex_txt != None:
        with open(FLAGS.lex_txt) as f:
            lex = [line.rstrip().lower() for line in f]
                
    ti = int(test_len / FLAGS.batch_size)
    rest = test_len % FLAGS.batch_size
    gt = []
    predict = []
    model_file=tf.train.latest_checkpoint(FLAGS.load_dir)
    infer_saver.restore(infer_sess, model_file)
    for t in range(ti):
        batch_test = data_test.next_batch(FLAGS.batch_size)
        path = []
        texts = []   
        for line in batch_test:
            path.append(line.split(' ',1)[0])
            
            texts.append(line.split(' ',1)[1])
        images = load_img(path,FLAGS.height,FLAGS.width)
        
        feed_dict_t = {inputs_t:images[:, :, :, np.newaxis],
                       decoder_lengths_t:np.ones((FLAGS.batch_size), \
                       dtype=int) * FLAGS.decoder_length
                       }
        q= infer_sess.run( pred_ids,feed_dict=feed_dict_t)
    
        for j in range(len(texts)):
            gt.append(texts[j])

            ans = q[j].T[0]
            pd = []
            for c in ans:
                if c != -1:
                    character = tools.idx_to_word[c]
                    if character != '<EOS>':
                        pd.append(character)
            predict.append(''.join(pd))
    
                    
    batch_test = data_test.next_batch(FLAGS.batch_size)
    path = []
    texts = []   
    for line in batch_test:
        path.append(line.split(' ',1)[0])
        texts.append(line.split(' ',1)[1])
    images = load_img(path,FLAGS.height,FLAGS.width)
            
    feed_dict_t = {inputs_t:images[:, :, :, np.newaxis],
                   decoder_lengths_t:np.ones((FLAGS.batch_size), \
                   dtype=int) * FLAGS.decoder_length
                   }
    q = infer_sess.run( pred_ids,feed_dict=feed_dict_t)
                       
    for k in range(rest):
        gt.append(texts[k])
    
        ans = q[k].T[0]
        pd = []
        for c in ans:
            if c != -1:
                character = tools.idx_to_word[c]
                if character != '<EOS>':
                    pd.append(character)
        predict.append(''.join(pd))
    
    
            
    correct = float(0)  
    cnt = 0
    acc_s = 0
    for l in range(len(gt)):
        cnt =cnt + 1
        if gt[l] == predict[l]:
            correct = correct + 1 
                    
    acc_s = correct / cnt
    if FLAGS.lex_txt != None:          
        correct_l = float(0) 
        cnt = 0
        for l in range(len(gt)):
            cnt =cnt + 1
            lexicon = lex[l].split(',')
            dt = distance.levenshtein(predict[l], lexicon[0])
            pl = lexicon[0]
            for ll in lexicon[1:]:
                dt_temp = distance.levenshtein(predict[l], ll)
                if dt_temp < dt:
                    dt = dt_temp
                    pl = ll
            if pl == gt[l]:
                correct_l = correct_l + 1
        acc_l = correct_l / cnt   
        
    print('accuracy: ', acc_s)
    if FLAGS.lex_txt != None:
        print('accuracy with lexicon: ', acc_l)
コード例 #7
0
def train():
    with open(flags.train_txt) as f:
        sample = [line.rstrip() for line in f]
    sample = np.array(sample)
    iteration = len(sample) // flags.batch_size
    data = Dataset(sample)

    tf.reset_default_graph()
    train_graph = tf.Graph()
    infer_graph = tf.Graph()

    with train_graph.as_default():
        encoder_outputs, inputs = build_cnn(True, flags.batch_size,
                                            flags.height, flags.width,
                                            flags.channels)
        train_op, loss, sample_ids, logits, decoder_inputs, \
        target_labels, learning_rate,keep_prob = build_network(encoder_outputs,
                                                     False,
                                                     flags.batch_size,
                                                     flags.decoder_length,
                                                     flags.tgt_vocab_size,
                                                     flags.attn_num_hidden,
                                                     flags.encoder_length,
                                                     flags.max_gradient_norm
                                                     )
        initializer = tf.global_variables_initializer()
        train_saver = tf.train.Saver()

    train_sess = tf.Session(graph=train_graph)
    train_sess.run(initializer)

    with infer_graph.as_default():
        encoder_outputs_t, inputs_t = build_cnn(False, flags.batch_size,
                                                flags.height, flags.width,
                                                flags.channels)
        _, _, pred_ids, logits_t, decoder_inputs_t, \
        _, _ ,keep_prob_t= build_network(encoder_outputs_t,
                             True,
                             flags.batch_size,
                             flags.decoder_length,
                             flags.tgt_vocab_size,
                             flags.attn_num_hidden,
                             flags.encoder_length,
                             flags.max_gradient_norm
                             )
        infer_saver = tf.train.Saver()
    infer_sess = tf.Session(graph=infer_graph)

    # Training

    start = time.time()
    acc_log = 0
    count = 0
    lr = flags.learning_rate
    for h in range(flags.epoch):
        for i in range(iteration):
            batch_train = data.next_batch(flags.batch_size)
            path = []
            texts = []
            for line in batch_train:
                path.append(line.split(' ')[0])
                texts.append(line.split(' ')[1])

            images = load_img(path, flags.height, flags.width)

            training_target_labels = get_label(texts, flags.decoder_length)
            training_decoder_inputs = np.delete(training_target_labels,
                                                -1,
                                                axis=1)
            training_decoder_inputs = np.c_[
                np.zeros(training_decoder_inputs.shape[0]),
                training_decoder_inputs].T

            feed_dict = {
                inputs: images[:, :, :, np.newaxis],
                decoder_inputs: training_decoder_inputs,
                target_labels: training_target_labels,
                learning_rate: lr,
                keep_prob: 0.5
            }
            _, loss_value = train_sess.run([train_op, loss],
                                           feed_dict=feed_dict)

            step = float(i + 1)
            if step % flags.display_step == 0:

                now = time.time()
                print(step, now - start, loss_value)
                start = now

            if step % flags.eval_step == 0:
                train_saver.save(train_sess, flags.save_dir)
                model_file = tf.train.latest_checkpoint(
                    flags.save_dir.rsplit('/', 1)[0])
                infer_saver.restore(infer_sess, model_file)

                gt = []
                predict = []

                images = load_img(path, flags.height, flags.width)

                testing_decoder_inputs = np.zeros(
                    (flags.decoder_length, flags.batch_size), dtype=float)
                feed_dict_t = {
                    inputs_t: images[:, :, :, np.newaxis],
                    decoder_inputs_t: testing_decoder_inputs,
                    keep_prob_t: 1
                }
                q = infer_sess.run(pred_ids, feed_dict=feed_dict_t)

                for j in range(flags.batch_size):
                    gt.append(texts[j])
                    ans = np.array(q).T[j]
                    pd = []
                    for c in ans:
                        if c != -1:
                            character = tools.idx_to_word[c]
                            if character != '<EOS>':
                                pd.append(character)
                    predict.append(''.join(pd))

                correct = float(0)
                cnt = 0
                acc_s = 0

                for l in range(len(gt)):
                    cnt = cnt + 1
                    if gt[l] == predict[l]:
                        correct = correct + 1
                count = count + 1
                acc_s = correct / cnt
                if acc_s > acc_log:
                    acc_log = acc_s
                    count = 0
                if count == (iteration // flags.eval_step):
                    lr = lr / 5
コード例 #8
0
def test():
    from dataset import Dataset_test
    from tools import load_img
    import editdistance

    tf.reset_default_graph()
    infer_graph = tf.Graph()

    with infer_graph.as_default():
        encoder_inputs_t, x2_t, inputs_t =  build_cnn(training=False,batch_size=flags.batch_size, height=flags.height, width=flags.width, channels=flags.channels)
        _, _,  pred_ids,pred_logits , decoder_inputs_t, decoder_lengths_t, \
        _,keep_prob_t,prob_t = build_network(encoder_inputs_t,
                                                     is_training=False,
                                                     batch_size=flags.batch_size,
                                                     decoder_length=flags.decoder_length,
                                                     tgt_vocab_size=flags.tgt_vocab_size,
                                                     num_units=flags.num_units,
                                                     beam_width=flags.beam_width,
                                                     encoder_length=flags.encoder_length,
                                                     max_gradient_norm=None,
                                                     embedding_size=flags.embedding_size,
                                                     initial_learning_rate=None)

        infer_saver = tf.train.Saver()
    infer_sess = tf.Session(graph=infer_graph)
    model_file=tf.train.latest_checkpoint(flags.r_path)
    print(flags.r_path)
    infer_saver.restore(infer_sess, model_file)

    class_graph = tf.Graph()
    with class_graph.as_default():
    
        prob,inputs_f,inputs_l,loss,train_op = build_classifier(training=False,
                                                     batch_size=flags.batch_size,
                                                     c_learning_rate=None)

        class_saver = tf.train.Saver()

    class_sess = tf.Session(graph=class_graph)
    model_file=tf.train.latest_checkpoint(flags.c_path)
    class_saver.restore(class_sess, model_file)

    with open(flags.test_txt) as f:
        test = [line.rstrip() for line in f]
    test_len = len(test)
    test = np.array(test)
    data_test = Dataset_test(test)
    
    if flags.lex_txt != None:
        with open(flags.lex_txt) as f:
            lex = [line.rstrip().lower() for line in f]
    
    steps = int(test_len / flags.batch_size)
    rest = test_len % flags.batch_size

    predict_c = []
    path_log = []
    labelc = []
    
    for t in range(steps):
        batch_test = data_test.next_batch(flags.batch_size)
        path = [] 
        label = np.tile([1,0],(256,1))
        for line in batch_test:
            path.append(line.split(' ',1)[0])
            path_log.append(line.split(' ',1)[0])
        images = load_img(path,flags.height,flags.width)       
        feed_dict_t = {inputs_t:images[:, :, :, np.newaxis],
                       decoder_lengths_t:np.ones((flags.batch_size), \
                       dtype=int) * flags.decoder_length,
                       keep_prob_t:1,prob_t:np.zeros(flags.batch_size)}
        feature = infer_sess.run(x2_t, feed_dict=feed_dict_t)
        
        feed_dict = {inputs_f:feature,inputs_l:label}
        o = class_sess.run(prob, feed_dict=feed_dict)


        for j in range(len(label)):
            predict_c.append(o[j])
            labelc.append(np.argmax(o[j]))
    
            
    batch_test = data_test.next_batch(flags.batch_size)
    path = [] 
    label = np.tile([1,0],(256,1))
    for line in batch_test:
        path.append(line.split(' ',1)[0])
    images = load_img(path,flags.height,flags.width)
  
    feed_dict_t = {inputs_t:images[:, :, :, np.newaxis],
                   decoder_lengths_t:np.ones((flags.batch_size), \
                   dtype=int) * flags.decoder_length,
                   keep_prob_t:1,prob_t:np.zeros(flags.batch_size)}
    feature = infer_sess.run(x2_t, feed_dict=feed_dict_t)
    
    feed_dict = {inputs_f:feature,inputs_l:label}
    o = class_sess.run(prob, feed_dict=feed_dict)
    for j in range(len(label)):
        labelc.append(np.argmax(o[j]))
           
    for k in range(rest):
        predict_c.append(o[k])
    
    correct = float(0)  
    cnt = 0
    acc_c = 0
    
    for l in range(len(predict_c)):
        cnt =cnt + 1  
        if 0 == np.argmax(predict_c[l]):       
            correct = correct + 1 
    
    acc_c = correct / cnt
    #print('acc_c:', acc_c)
    
    with open(flags.test_txt) as f:
        test = [line.rstrip() for line in f]
    test_len = len(test)
    test = np.array(test)
    data_test = Dataset_test(test)
    
    with open(flags.lex_txt) as f:
        lex = [line.rstrip().lower() for line in f]
    
    steps = int(test_len /flags.batch_size)
    rest = test_len % flags.batch_size
     
    gt = []
    predict = []

    for t in range(steps):
        batch_test = data_test.next_batch(flags.batch_size)
        path = []
        texts = []   
        label = labelc[256*(t):256*(t+1)]
        for line in batch_test:
            path.append(line.split(' ',1)[0])
            texts.append(line.split(' ',1)[1])
    
        images = load_img(path,flags.height,flags.width)
        
        feed_dict_t = {inputs_t:images[:, :, :, np.newaxis],
                       decoder_lengths_t:np.ones((flags.batch_size), \
                       dtype=int) * flags.decoder_length,
                       keep_prob_t:1,prob_t:label}
        q= infer_sess.run( pred_ids,feed_dict=feed_dict_t)
          
        for j in range(len(texts)):
            gt.append(texts[j])
            ans = q[j].T[0]
    
            pd = []
            for c in ans:
                if c != -1:
                    character = tools.idx_to_word[c]
                    if character != '<EOS>':
                        pd.append(character)
            predict.append(''.join(pd))
                    
    batch_test = data_test.next_batch(flags.batch_size)
    path = []
    texts = []   
    label = labelc[-256:]
    for line in batch_test:
        path.append(line.split(' ',1)[0])
        texts.append(line.split(' ',1)[1])
    
    images = load_img(path,flags.height,flags.width)
    
    feed_dict_t = {inputs_t:images[:, :, :, np.newaxis],
                   decoder_lengths_t:np.ones((flags.batch_size), \
                   dtype=int) * flags.decoder_length,
                   keep_prob_t:1,prob_t:label}
    q = infer_sess.run( pred_ids,feed_dict=feed_dict_t)
       
    for k in range(rest):
        gt.append(texts[k])
        path_log.append(path[k])
    
        ans = q[k].T[0]
        pd = []
        for c in ans:
            if c != -1:
                character = tools.idx_to_word[c]
                if character != '<EOS>':
                    pd.append(character)
        predict.append(''.join(pd))
 
    correct = float(0)  
    cnt = 0
    acc_s = 0
    
    for l in range(len(gt)):
        cnt =cnt + 1
        if gt[l] == predict[l]:
            correct = correct + 1 
            
    acc_s = correct / cnt
    if flags.lex_txt != None:     
        correct_l = float(0) 
        cnt = 0
        for l in range(len(gt)):
            cnt =cnt + 1
            lexicon = lex[l].split(',')
            dt = editdistance.eval(predict[l], lexicon[0])
            pl = lexicon[0]
            for ll in lexicon[1:]:
                dt_temp = editdistance.eval(predict[l], ll)
                
                if dt_temp < dt:
                    dt = dt_temp
                    pl = ll
            if pl == gt[l]:
                correct_l = correct_l + 1
        
        acc_l = correct_l / cnt   
    print('accuracy: ', acc_s)
    if flags.lex_txt != None:
        print('accuracy with lexicon: ', acc_l)
コード例 #9
0
ファイル: test.py プロジェクト: R06942112/OCR
def test():
    f_size = int(flags.img_size / 8)
    encoder_length = f_size * f_size

    tf.reset_default_graph()
    infer_graph = tf.Graph()

    with infer_graph.as_default():
        c_t, inputs_t = build_cnn(is_training=False,
                                  batch_size=flags.batch_size,
                                  img_size=flags.img_size,
                                  channels=flags.channels)
        deconv_outputs_t = build_deconv(False, c_t, flags.batch_size)

        x_t = np.linspace(-0.5, 0.5, f_size)
        x_t = np.tile(x_t, (f_size, 1))
        y_t = np.transpose(x_t)
        x_t = np.expand_dims(x_t, axis=2)
        y_t = np.expand_dims(y_t, axis=2)
        m_t = np.concatenate((x_t, y_t), axis=2)
        m_t = np.expand_dims(m_t, axis=0)
        m_t = np.repeat(m_t, flags.batch_size, axis=0)
        m_t = tf.convert_to_tensor(m_t, np.float32)
        encoder_outputs_t = tf.concat([c_t, m_t], -1)
        encoder_outputs_t = tf.reshape(encoder_outputs_t,
                                       shape=(-1, f_size * f_size, 258))
        encoder_outputs_t = tf.transpose(encoder_outputs_t, [1, 0, 2])

        _, _ , pred_ids,logits_t, decoder_inputs_t,  \
            _, _,_,_,_,_,_= build_network(encoder_outputs_t,
                                      True,
                                      flags.batch_size,
                                      flags.decoder_length,
                                      flags.tgt_vocab_size,
                                      flags.attn_num_hidden,
                                      encoder_length,
                                      flags.max_gradient_norm,
                                      f_size,
                                      flags.att_loss,
                                      flags.img_size,
                                      deconv_outputs_t
                                      )
        infer_saver = tf.train.Saver()
    infer_sess = tf.Session(graph=infer_graph)

    model_file = tf.train.latest_checkpoint(flags.load_dir)
    infer_saver.restore(infer_sess, model_file)

    with open(flags.test_txt) as f:
        test = [line.rstrip() for line in f]
    test_len = len(test)
    test = np.array(test)
    data_test = Dataset(test)

    if flags.lex_txt != None:
        with open(flags.lex_txt) as f:
            lex = [line.rstrip().lower() for line in f]

    ti = int(test_len / flags.batch_size)
    rest = test_len % flags.batch_size

    gt = []
    predict = []

    for t in range(ti):
        batch_test = data_test.next_batch(flags.batch_size)
        path = []
        texts = []
        for line in batch_test:
            path.append(line.split(' ')[0])
            texts.append(line.split(' ')[1])

            images = load_img(path, flags.img_size)

        testing_decoder_inputs = np.zeros(
            (flags.decoder_length, flags.batch_size), dtype=float)
        feed_dict_t = {
            inputs_t: images,
            decoder_inputs_t: testing_decoder_inputs
        }
        q = infer_sess.run(pred_ids, feed_dict=feed_dict_t)

        for j in range(flags.batch_size):
            gt.append(texts[j])
            ans = np.array(q).T[j]
            pd = []
            for c in ans:
                if c != -1:
                    character = tools.idx_to_word[c]
                    if character != '<EOS>':
                        pd.append(character)
            predict.append(''.join(pd))

    batch_test = data_test.next_batch(flags.batch_size)
    path = []
    texts = []
    for line in batch_test:
        path.append(line.split(' ', 1)[0])
        texts.append(line.split(' ', 1)[1])
    images = load_img(path, flags.img_size)

    feed_dict_t = {inputs_t: images, decoder_inputs_t: testing_decoder_inputs}
    q = infer_sess.run(pred_ids, feed_dict=feed_dict_t)

    for k in range(rest):
        gt.append(texts[k])
        ans = np.array(q).T[k]
        pd = []
        for c in ans:
            if c != -1:
                character = tools.idx_to_word[c]
                if character != '<EOS>':
                    pd.append(character)
        predict.append(''.join(pd))

    correct = float(0)
    cnt = 0
    acc_s = 0

    for l in range(len(gt)):
        cnt = cnt + 1
        if gt[l] == predict[l]:
            correct = correct + 1

    acc_s = correct / cnt

    for l in range(len(gt)):
        cnt = cnt + 1
        if gt[l] == predict[l]:
            correct = correct + 1

    acc_s = correct / cnt

    if flags.lex_txt != None:
        correct_l = float(0)
        cnt = 0
        for l in range(len(gt)):
            cnt = cnt + 1
            lexicon = lex[l].split(',')
            dt = editdistance.eval(predict[l], lexicon[0])
            pl = lexicon[0]
            for ll in lexicon[1:]:
                dt_temp = editdistance.eval(predict[l], ll)

                if dt_temp < dt:
                    dt = dt_temp
                    pl = ll
            if pl == gt[l]:
                correct_l = correct_l + 1

        acc_l = correct_l / cnt

    print('accuracy: ', acc_s)
    if flags.lex_txt != None:
        print('accuracy with lexicon: ', acc_l)
コード例 #10
0
ファイル: train.py プロジェクト: R06942112/OCR
def train():

    with open(FLAGS.train_txt) as f:
        sample = [line.rstrip() for line in f]

    sample = np.array(sample)
    iteration = len(sample) // FLAGS.batch_size
    data = Dataset(sample)

    tf.reset_default_graph()
    train_graph = tf.Graph()
    infer_graph = tf.Graph()

    with train_graph.as_default():

        train_op, loss , sample_ids,logits, inputs, decoder_inputs, decoder_lengths, \
        target_labels, keep_prob = build_network(is_training=True,
                                                 batch_size=FLAGS.batch_size,
                                                 height=FLAGS.height,
                                                 width=FLAGS.width,
                                                 channels=FLAGS.channels,
                                                 decoder_length=FLAGS.decoder_length,
                                                 tgt_vocab_size=FLAGS.tgt_vocab_size,
                                                 num_units=FLAGS.num_units,
                                                 beam_width=FLAGS.beam_width,
                                                 encoder_length=FLAGS.encoder_length,
                                                 max_gradient_norm=FLAGS.max_gradient_norm,
                                                 embedding_size=FLAGS.embedding_size,
                                                 initial_learning_rate=FLAGS.learning_rate)

        initializer = tf.global_variables_initializer()
        train_saver = tf.train.Saver()

    train_sess = tf.Session(graph=train_graph)
    train_sess.run(initializer)

    with infer_graph.as_default():
        _, _,  pred_ids,pred_logits , inputs_t, decoder_inputs_t, decoder_lengths_t, \
        _,keep_prob_t = build_network(is_training=False,
                                                     batch_size=FLAGS.batch_size,
                                                     height=FLAGS.height,
                                                     width=FLAGS.width,
                                                     channels=FLAGS.channels,
                                                     decoder_length=FLAGS.decoder_length,
                                                     tgt_vocab_size=FLAGS.tgt_vocab_size,
                                                     num_units=FLAGS.num_units,
                                                     beam_width=FLAGS.beam_width,
                                                     encoder_length=FLAGS.encoder_length,
                                                     max_gradient_norm=FLAGS.max_gradient_norm,
                                                     embedding_size=FLAGS.embedding_size,
                                                     initial_learning_rate=None)
        infer_saver = tf.train.Saver()
    infer_sess = tf.Session(graph=infer_graph)

    start = time.time()
    acc_log = 0
    count = 0
    lr = FLAGS.learning_rate
    for h in range(FLAGS.epoch):
        for i in range(iteration):
            batch_train = data.next_batch(FLAGS.batch_size)
            np.random.shuffle(batch_train)
            path = []
            texts = []
            for line in batch_train:
                path.append(line.split(' ')[0])
                texts.append(line.split(' ')[1])

            images = load_train_img(path, FLAGS.height, FLAGS.width)

            training_target_labels = get_label(texts, FLAGS.decoder_length)
            training_decoder_inputs = np.delete(training_target_labels,
                                                -1,
                                                axis=1)
            training_decoder_inputs = np.c_[
                np.zeros(training_decoder_inputs.shape[0]),
                training_decoder_inputs].T
            feed_dict = {
                inputs: images[:, :, :, np.newaxis],
                decoder_inputs: training_decoder_inputs,
                decoder_lengths: np.ones(
                    (FLAGS.batch_size), dtype=int) * FLAGS.decoder_length,
                target_labels: training_target_labels,
                keep_prob: 0.8
            }
            _, loss_value = train_sess.run([train_op, loss],
                                           feed_dict=feed_dict)

            step = float(i)
            if step % FLAGS.display_step == 0:
                now = time.time()
                print(step, now - start, loss_value)
                start = now

            if step % FLAGS.eval_step == 0:
                train_saver.save(train_sess, FLAGS.save_dir)
                model_file = tf.train.latest_checkpoint(
                    FLAGS.save_dir.rsplit('/', 1)[0])
                infer_saver.restore(infer_sess, model_file)

                gt = []
                predict = []

                images = load_img(path, FLAGS.height, FLAGS.width)

                feed_dict_t = {inputs_t:images[:, :, :, np.newaxis],
                               decoder_lengths_t:np.ones((FLAGS.batch_size), \
                               dtype=int) * FLAGS.decoder_length,
                               keep_prob_t:1}
                q = infer_sess.run(pred_ids, feed_dict=feed_dict_t)

                for j in range(len(texts)):
                    gt.append(texts[j])
                    ans = q[j].T[0]

                    pd = []
                    for c in ans:
                        if c != -1:
                            character = tools.idx_to_word[c]
                            if character != '<EOS>':
                                pd.append(character)
                    predict.append(''.join(pd))

                correct = float(0)
                cnt = 0
                acc_s = 0

                for l in range(len(gt)):
                    cnt = cnt + 1
                    if gt[l] == predict[l]:
                        correct = correct + 1

                acc_s = correct / cnt
                if acc_s > acc_log:
                    acc_log = acc_s
                    count = 0
                if count == (iteration // FLAGS.eval_step):
                    lr = lr / 5
コード例 #11
0
 def read_input(self, img_path):
     self.img, self.img_name = load_img(img_path)