def test_manual_inspection(self):
        tf.reset_default_graph()
        with tf.Session() as sess:
            batch_size = 6
            max_word_len = 1024
            max_line_len = 1024
            basedir = './example_data/'
            data = data_pipe.Data(basedir, batch_size, max_word_len,
                                  max_line_len)
            sess.run(tf.tables_initializer())
            data.initialize(sess, data.datadir + '*')

            (trg, trg_sentence_len, trg_word_len) = sess.run([
                data.trg.to_tensor(-1), data.trg_sentence_len,
                data.trg_word_len.to_tensor(0)
            ])
            trg = data.array_to_strings(trg)

            print "***** BEGIN MANUAL INSPECTION ******"
            for trg, trg_sentence_len, trg_word_len in zip(
                    trg, trg_sentence_len, trg_word_len):
                print data_pipe.replace_pad_chrs(trg)
                print trg_sentence_len
                print trg_word_len
                print
            print "***** END MANUAL INSPECTION ******"
 def test_compiles(self):
     tf.reset_default_graph()
     with tf.Session() as sess:
         batch_size = 2
         max_word_len = 20
         max_line_len = 64
         basedir = './example_data/'
         data = data_pipe.Data(basedir, batch_size, max_word_len,
                               max_line_len)
         sess.run(tf.tables_initializer())
         data.initialize(sess, data.datadir + '*')
         trg = sess.run(data.trg)
Exemple #3
0
    def test_inference_vs_train(self):
        self.assertTrue(False) # disable and auto fail this test for now
        tf.reset_default_graph()
        with tf.Session() as sess:
            conf = config.generate_config(keep_prob=1.0)
            conf['batch_size'] = 1
            data = data_pipe.Data('./example_data/', conf['batch_size'], conf['max_word_len'], conf['max_line_len'])
            model, free_model = train.build_model(data, conf)
            data.initialize(sess, data.datadir + '*')
            sess.run(tf.tables_initializer())
            sess.run(tf.global_variables_initializer())
            (out_logits_4,
             src_sentence_3,
             src_sent_len_1,
             trg_sentence_3,
             trg_sent_len_1) = sess.run([model.out_logits_4,
                                         data.src.to_tensor(-1),
                                         data.src_sentence_len,
                                         data.trg.to_tensor(-1),
                                         data.trg_sentence_len])
            src = data.array_to_strings(src_sentence_3)[0].replace(data.go_stop_token, '')
            trg = data.array_to_strings(trg_sentence_3)[0].replace(data.go_stop_token, '')
            # trg is the concatenation of itself with src. Restore the stop word that delimits them
            trg = trg[len(src):]
            trg = src + ' ' + data.go_stop_token + ' ' + trg.strip() # recombine src and trg
            print src
            print trg
            feed = {data.src_place:src, data.trg_place:trg}
            (free_logits_4,
             src_sentence_inference,
             trg_sentence_inference) = sess.run([free_model.out_logits_4,
                                                 data.src_inference.to_tensor(-1),
                                                 data.trg_inference.to_tensor(-1)], feed_dict=feed)
            # Get the fist batch line and trim potential batch padding from the model's logits
            out_logits_3 = out_logits_4[0, :free_logits_4.shape[1], :free_logits_4.shape[2], :]
            # Check that the model's outputs are the same regardless of what data pipeline is used
            self.assertTrue((np.abs(out_logits_3 - free_logits_4[0]) < 1e-5).all())
            # Run the inference model as though generating one char at time, and check the outputs
            feed = {data.src_place:src, data.trg_place:''} # Start with no input
            free_logits_4 = sess.run(free_model.out_logits_4, feed_dict=feed)
            self.assertTrue((np.abs(free_logits_4[0,0,0,:] - out_logits_3[0,0,:]) <= 1e-5).all()) 
            trg = trg.split()
            trg_so_far = ''
            for word_idx, trg_word in enumerate(trg):
                for chr_num in range(len(trg_word)):
                    trg_so_far += trg_word[chr_num]
                    feed = {data.src_place:src, data.trg_place:trg_so_far}
                    free_logits_4 = sess.run(free_model.out_logits_4, feed_dict=feed)
#                    print (free_logits_4[0, word_idx, chr_num + 1,:] - out_logits_3[word_idx, chr_num + 1, :]) < 1e-4
                    self.assertTrue((np.abs(free_logits_4[0, word_idx, chr_num + 1,:] - out_logits_3[word_idx, chr_num + 1, :]) <= 1e-5).all())
                trg_so_far += ' '
Exemple #4
0
def sess_setup(datadir, restore_dir, batch_size=1):
    conf = config.generate_config(keep_prob=1.0, noise_level=0)
    data = data_pipe.Data(datadir,
                          batch_size,
                          conf['max_word_len'],
                          conf['max_line_len'],
                          eval_mode=True)
    model, free_model = train.build_model(data, conf)
    sess = tf.Session()
    saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=1)
    sess.run(tf.tables_initializer())
    sess.run(tf.global_variables_initializer())
    data.initialize(sess, data.datadir + '*')
    saver.restore(sess, restore_dir)
    return model, free_model, data, sess
 def test_seq_lens(self):
     tf.reset_default_graph()
     with tf.Session() as sess:
         batch_size = 3
         max_word_len = 16
         max_line_len = 32
         basedir = './example_data/'
         data = data_pipe.Data(basedir, batch_size, max_word_len,
                               max_line_len)
         sess.run(tf.tables_initializer())
         data.initialize(sess, data.datadir + '*')
         (trg, trg_sentence_len, trg_word_len) = sess.run([
             data.trg.to_tensor(default_value=-1), data.trg_sentence_len,
             data.trg_word_len.to_tensor()
         ])
         trg = data.array_to_strings(trg)
         for trg_sentence, trg_sent_len, trg_wrd_len in zip(
                 trg, trg_sentence_len, trg_word_len):
             for word, length in zip(trg_sentence.split(' '), trg_wrd_len):
                 self.assertEqual(len(word), length)
             self.assertEqual(len(trg_sentence.strip().split(' ')),
                              trg_sent_len)