def model(self):
        m = model.inference_graph(char_vocab_size=51, word_vocab_size=10000,
                        char_embed_size=3, batch_size=4, num_highway_layers=0,
                        num_rnn_layers=1, rnn_size=5, max_word_length=11,
                        kernels= [2], kernel_features=[2], num_unroll_steps=3,
                        dropout=0.0)
        m.update(model.loss_graph(m.logits, batch_size=4, num_unroll_steps=3))

        return m
    def xest(self):

        with self.test_session() as sess:

            m = model.inference_graph(char_vocab_size=5, word_vocab_size=5,
                        char_embed_size=3, batch_size=2, num_highway_layers=0,
                        num_rnn_layers=1, rnn_size=5, max_word_length=5,
                        kernels= [2], kernel_features=[2], num_unroll_steps=2,
                        dropout=0.0)

            logits, input_embedded = sess.run([
                    self.model.logits,
                    self.model.input_embedded,
                ], {
                'LSTM/RNN/BasicLSTMCell/Linear/Matrix:0': np.array([
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                ]),
                'LSTM/RNN/BasicLSTMCell/Linear/Bias:0': np.array(
                    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]
                ),

                'TDNN/kernel_2/w:0': np.array([[
                    [[1,1],[1,1],[1,1]],
                    [[1,1],[1,1],[1,1]]
                ]]),
                'TDNN/kernel_2/b:0': np.array([0, 0]),

                'Embedding/char_embedding:0': np.array([
                    [0, 0, 0],
                    [1, 0, 0],
                    [0, 1, 0],
                    [0, 0, 1],
                    [-1, 0, 1],
                ]),

                'input:0': np.array([
                    [[1,3,2,0,0],[1,4,2,0,0]],
                    [[1,3,3,2,0],[1,4,4,2,0]]
                ]),

            })

            print(logits)
            print(input_embedded)
            self.assertAllClose(logits, np.array([
                [[0,1,0,0,0],[0,0,0,0,0]],
                [[0,0,0,0,0],[0,0,0,0,0]]
            ]))
예제 #3
0
 def model(self):
     return model.inference_graph(char_vocab_size=51,
                                  word_vocab_size=5,
                                  char_embed_size=3,
                                  batch_size=4,
                                  num_highway_layers=0,
                                  num_rnn_layers=1,
                                  rnn_size=5,
                                  max_word_length=11,
                                  kernels=[2],
                                  kernel_features=[2],
                                  num_unroll_steps=3,
                                  dropout=0.0)
예제 #4
0
def run():
    ''' Loads trained model and evaluates it on test split '''

    if FLAGS.load_model is None:
        print('Please specify checkpoint file to load model from')
        return -1

    if not os.path.exists(FLAGS.load_model + '.meta'):
        print('Checkpoint file not found', FLAGS.load_model)
        return -1

    word_vocab, char_vocab, word_tensors, char_tensors, max_word_length, words_list = \
        load_data(FLAGS.data_dir, FLAGS.max_word_length, FLAGS.num_unroll_steps, eos=FLAGS.EOS)

    fasttext_model = FasttextModel(fasttext_path=FLAGS.fasttext_model_path).get_fasttext_model()

    print('initialized test dataset reader')
    session = tf.Session()

    # tensorflow seed must be inside graph
    tf.set_random_seed(FLAGS.seed)
    np.random.seed(seed=FLAGS.seed)

    ''' build inference graph '''
    with tf.variable_scope("Model"):
        m = model.inference_graph(
            char_vocab_size=char_vocab.size,
            word_vocab_size=word_vocab.size,
            char_embed_size=FLAGS.char_embed_size,
            batch_size=FLAGS.batch_size,
            num_highway_layers=FLAGS.highway_layers,
            num_rnn_layers=FLAGS.rnn_layers,
            rnn_size=FLAGS.rnn_size,
            max_word_length=max_word_length,
            kernels=eval(FLAGS.kernels),
            kernel_features=eval(FLAGS.kernel_features),
            num_unroll_steps=FLAGS.num_unroll_steps,
            dropout=0,
            embedding=FLAGS.embedding,
            fasttext_word_dim=300,
            acoustic_features_dim=4)
        # we need global step only because we want to read it from the model
        global_step = tf.Variable(0, dtype=tf.int32, name='global_step')

    saver = tf.train.Saver()
    saver.restore(session, FLAGS.load_model)
    print('Loaded model from', FLAGS.load_model)

    ''' training starts here '''
    return session, m, fasttext_model, max_word_length, char_vocab, word_vocab
def train():


    dataset_tensors, labels_tensors = dl.make_batches()

    input_tensor_tr, label_tensor_tr, seq_tensor_tr = dl.sequence_init(dataset_tensors, labels_tensors, FLAGS.num_unroll_steps, 'Train', allow_short_seq= False)
    input_tensor_te, label_tensor_te, seq_tensor_te = dl.sequence_init(dataset_tensors, labels_tensors, FLAGS.num_unroll_steps, 'Test', allow_short_seq= True)

    train_reader = dl.TrainDataReader(input_tensor_tr, label_tensor_tr, seq_tensor_tr, FLAGS.batch_size, FLAGS.num_unroll_steps, False)
    eval_reader = dl.EvalDataReader(input_tensor_te, label_tensor_te, seq_tensor_te, FLAGS.batch_size_eval, FLAGS.num_unroll_steps, False)

    '''

    input_tensors, label_tensors, seq_tensors = dl.make_batches(60)
    train_reader = dl.DataReader(input_tensors['Train'], label_tensors['Train'],
                                 seq_tensors['Train'], FLAGS.batch_size, FLAGS.num_unroll_steps)

    eval_reader = dl.DataReader(input_tensors['Devel'], label_tensors['Devel'], seq_tensors['Devel'],
                                FLAGS.batch_size, FLAGS.num_unroll_steps)
    '''

    labels = tf.placeholder(tf.float32, [None, FLAGS.num_unroll_steps, 3], name = 'labels')

    #labels = tf.reshape(labels, [-1, 3])

    train_model = model.inference_graph(word_vocab_size= FLAGS.word_vocab_size,
                                        kernels= eval(FLAGS.kernels),
                                        kernel_features= eval(FLAGS.kernel_features),
                                        rnn_size= FLAGS.rnn_size,
                                        dropout= FLAGS.dropout,
                                        num_rnn_layers= FLAGS.rnn_layers,
                                        num_highway_layers= FLAGS.highway_layers,
                                        num_unroll_steps= FLAGS.num_unroll_steps,
                                        max_sent_length= FLAGS.max_sent_length,
                                        #batch_size= FLAGS.batch_size,
                                        embed_size= FLAGS.word_embed_size)

    predictions = train_model.predictions

    #print(predictions)


    losses = model.loss_graph(predictions, labels)

    eval_model = model.eval_metric_graph()

    loss_arousal = losses.loss_arousal
    loss_valence = losses.loss_valence
    loss_liking = losses.loss_liking

    #loss_list = [(model.loss_graph(predictions[:,i], labels[:,i]) for i in range(3))]

    #print(loss_list)
    #loss = tf.convert_to_tensor(loss_list)

    #metric = [1. - x for x in loss_list]

    metric_arousal = 1. - loss_arousal
    metric_valence = 1. - loss_valence
    metric_liking = 1. - loss_liking

    eval_arousal = eval_model.eval_metric_arousal
    eval_valence = eval_model.eval_metric_valence
    eval_liking = eval_model.eval_metric_liking

    loss_op = loss_arousal + loss_liking + loss_valence

    optimizer = tf.train.AdamOptimizer(learning_rate= FLAGS.learning_rate).minimize(loss_op)

    saver = tf.train.Saver()

    patience = FLAGS.patience

    with tf.Session() as sess:

        sess.run(tf.initialize_all_variables())

        best_metric_arousal = 0.0
        best_metric_valence = 0.0
        best_metric_liking = 0.0


        Done = False

        epoch = 0

        while epoch < FLAGS.max_epochs and not Done:

            batch = 1
            epoch += 1

            for minibatch in train_reader.iter():

                x, y = minibatch

                #print(x.shape, y.shape)

                _, l, m_arousal, m_valence, m_liking = sess.run(
                    [optimizer, loss_op, metric_arousal, metric_valence, metric_liking],
                    feed_dict={
                    train_model.input: x,
                    labels: y,
                    train_model.sequence_length: [120] * FLAGS.batch_size,
                    train_model.batch_size: FLAGS.batch_size
                    })

                print('Epoch: %5d/%5d -- batch: %5d -- loss: %.4f' % (epoch, FLAGS.max_epochs, batch, l))

                if batch % 3 == 0:
                    print('arousal: %.4f -- valence: %.4f, liking: %.4f'
                          % (m_arousal, m_valence, m_liking))
                    log = open(LOGGING_PATH, 'a')
                    log.write('%s, %6d, %.5f, %.5f, %.5f, %.5f, \n' % ('train', epoch * batch,
                                                                       l, m_arousal, m_valence, m_liking))
                    log.close()


                if batch % 14 == 0:
                    print('evaluation process------------------------------------------')

                    eval_metric = []
                    cnt = 0
                    prev = None
                    for mb in eval_reader.iter():

                        eval_x_list, eval_y_list, eval_z_list = mb

                        for eval_x, eval_z in zip(eval_x_list, eval_z_list):
                            cnt += np.sum(eval_z)
                            eval_tmp_preds = sess.run([predictions], feed_dict={
                                train_model.input : eval_x,
                                train_model.sequence_length : eval_z,
                                train_model.batch_size: FLAGS.batch_size_eval
                            })

                            if prev is None: prev = eval_tmp_preds[0]
                            else: prev = np.vstack((prev, eval_tmp_preds[0]))
                        prev = prev[:cnt]
                        eval_y_list = np.array(eval_y_list).reshape([-1, 3])[:cnt]

                        #print(prev)
                        #print(eval_y_list)

                        e_arousal, e_valence, e_liking = sess.run([eval_arousal, eval_liking, eval_valence],
                                                        feed_dict= {
                                                            eval_model.eval_predictions : prev,
                                                            eval_model.eval_labels : eval_y_list
                                                        })


                        eval_metric.append([e_arousal, e_valence, e_liking])
                        prev = None
                        cnt = 0

                    eval_res = np.mean(np.array(eval_metric), axis= 0)
                    eval_loss = np.sum(1. - eval_res)
                    print('Epoch: %5d/%5d -- batch: %5d -- loss: %.4f -- arousal: %.4f -- valence: %.4f -- liking: %.4f'
                          % (epoch, FLAGS.max_epochs, batch, eval_loss, eval_res[0], eval_res[1], eval_res[2]))

                    log = open(LOGGING_PATH, 'a')
                    log.write('%s, %6d, %.5f, %.5f, %.5f, %.5f, \n' % ('train',
                                                                       epoch * batch, eval_loss, eval_res[0],
                                                                       eval_res[1], eval_res[2]))
                    log.close()
                    print('done evaluation------------------------------------------\n')

                '''
                if batch % 10 == 0:

                    print('evaluation process------------------------------------------')
                    metr = []
                    eval_loss = 0.0
                    cnt = 0

                    for mb in eval_reader.iter():
                        eval_x, eval_y = mb
                        cnt += 1

                        l_e, me_arousal, me_valence, me_liking = sess.run(
                            [loss_op, metric_arousal, metric_valence, metric_liking], feed_dict={
                            train_model.input: eval_x,
                            labels: eval_y
                        })

                        eval_loss += l_e

                        metr.append([me_arousal, m_valence, me_liking])

                    mean_metr = np.mean(np.array(metr), axis= 0)
                    eval_loss /= cnt

                    if mean_metr[0] > best_metric_arousal or mean_metr[1] > best_metric_valence \
                            or mean_metr[2] > best_metric_liking:
                        save_path = saver.save(sess, SAVE_PATH)

                        best_metric_arousal, best_metric_valence, best_metric_liking = mean_metr[0], \
                                                        mean_metr[1], mean_metr[2]
                        patience = FLAGS.patience
                        print('Model saved in file: %s' % save_path)

                    else:
                        patience -= 500
                        patience -= 500
                        if patience <= 0:
                            Done = True
                            break

                    print('Epoch: %5d/%5d -- batch: %5d -- loss: %.4f -- arousal: %.4f -- valence: %.4f -- liking: %.4f'
                          % (epoch, FLAGS.max_epochs, batch, eval_loss, mean_metr[0], mean_metr[1], mean_metr[2]))

                    log = open(LOGGING_PATH, 'a')
                    log.write('%s, %6d, %.5f, %.5f, %.5f, %.5f, \n' % ('train',
                                                epoch * batch, eval_loss, mean_metr[0], mean_metr[1], mean_metr[2]))
                    log.close()
                    print('done evaluation------------------------------------------\n')
                '''
                batch += 1
def main(file,
         batch_size=20,
         num_unroll_steps=35,
         char_embed_size=15,
         rnn_size=650,
         kernels="[1,2,3,4,5,6,7]",
         kernel_features="[50,100,150,200,200,200,200]",
         max_grad_norm=5.0,
         learning_rate=1.0,
         learning_rate_decay=0.5,
         decay_when=1.0,
         seed=3435,
         param_init=0.05,
         max_epochs=25,
         print_every=5):
    ''' Trains model from data '''

    if not os.path.exists(TRAINING_DIR):
        os.mkdir(TRAINING_DIR)
        print('Created training directory', TRAINING_DIR)

    word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = \
        load_dataset()

    print('initialized all dataset readers')

    with tf.Graph().as_default(), tf.Session() as session:

        train_reader = DataReader(word_tensors['train'], char_tensors['train'],
                                  batch_size, num_unroll_steps, char_vocab)

        valid_reader = DataReader(word_tensors['valid'], char_tensors['valid'],
                                  batch_size, num_unroll_steps, char_vocab)

        test_reader = DataReader(word_tensors['test'], char_tensors['test'],
                                 batch_size, num_unroll_steps, char_vocab)

        # tensorflow seed must be inside graph
        tf.set_random_seed(seed)
        np.random.seed(seed=seed)
        ''' build training graph '''
        initializer = tf.random_uniform_initializer(param_init, param_init)
        with tf.variable_scope("Model", initializer=initializer):
            train_model = model.inference_graph(
                char_vocab_size=char_vocab.size(),
                word_vocab_size=word_vocab.size(),
                char_embed_size=char_embed_size,
                batch_size=batch_size,
                rnn_size=rnn_size,
                max_word_length=max_word_length,
                kernels=eval(kernels),
                kernel_features=eval(kernel_features),
                num_unroll_steps=num_unroll_steps)
            train_model.update(
                model.loss_graph(train_model.logits, batch_size,
                                 num_unroll_steps))

            # scaling loss by FLAGS.num_unroll_steps effectively scales gradients by the same factor.
            # we need it to reproduce how the original Torch code optimizes. Without this, our gradients will be
            # much smaller (i.e. 35 times smaller) and to get system to learn we'd have to scale learning rate and max_grad_norm appropriately.
            # Thus, scaling gradients so that this trainer is exactly compatible with the original
            train_model.update(
                model.training_graph(train_model.loss * num_unroll_steps,
                                     learning_rate, max_grad_norm))

        # create saver before creating more graph nodes, so that we do not save any vars defined below
        saver = tf.train.Saver(max_to_keep=50)
        ''' build graph for validation and testing (shares parameters with the training graph!) '''
        with tf.variable_scope("Model", reuse=True):
            valid_model = model.inference_graph(
                char_vocab_size=char_vocab.size(),
                word_vocab_size=word_vocab.size(),
                char_embed_size=char_embed_size,
                batch_size=batch_size,
                rnn_size=rnn_size,
                max_word_length=max_word_length,
                kernels=eval(kernels),
                kernel_features=eval(kernel_features),
                num_unroll_steps=num_unroll_steps)
            valid_model.update(
                model.loss_graph(valid_model.logits, batch_size,
                                 num_unroll_steps))
        '''if load_model:
            saver.restore(session, load_model)
            print('Loaded model from', load_model, 'saved at global step', train_model.global_step.eval())
        else:'''
        tf.global_variables_initializer().run()
        session.run(train_model.clear_char_embedding_padding)
        print('Created and initialized fresh model. Size:', model.model_size())

        summary_writer = tf.summary.FileWriter(TRAINING_DIR,
                                               graph=session.graph)
        ''' take learning rate from CLI, not from saved graph '''
        session.run(tf.assign(train_model.learning_rate, learning_rate), )
        ''' training starts here '''
        best_valid_loss = None
        rnn_state = session.run(train_model.initial_rnn_state)
        for epoch in range(max_epochs):
            epoch_start_time = time.time()
            avg_train_loss = 0.0
            count = 0
            for x, y in train_reader.iter():
                count += 1
                start_time = time.time()

                loss, _, rnn_state, gradient_norm, step, _ = session.run(
                    [
                        train_model.loss, train_model.train_op,
                        train_model.final_rnn_state, train_model.global_norm,
                        train_model.global_step,
                        train_model.clear_char_embedding_padding
                    ], {
                        train_model.input: x,
                        train_model.targets: y,
                        train_model.initial_rnn_state: rnn_state
                    })

                avg_train_loss += 0.05 * (loss - avg_train_loss)

                time_elapsed = time.time() - start_time

                if count % print_every == 0:
                    print(
                        '%6d: %d [%5d/%5d], train_loss/perplexity = %6.8f/%6.7f secs/batch = %.4fs, grad.norm=%6.8f'
                        % (step, epoch, count, train_reader.length, loss,
                           np.exp(loss), time_elapsed, gradient_norm))

            print('Epoch training time:', time.time() - epoch_start_time)

            # epoch done: time to evaluate
            avg_valid_loss = 0.0
            count = 0
            rnn_state = session.run(valid_model.initial_rnn_state)
            for x, y in valid_reader.iter():
                count += 1
                start_time = time.time()

                loss, rnn_state = session.run(
                    [valid_model.loss, valid_model.final_rnn_state], {
                        valid_model.input: x,
                        valid_model.targets: y,
                        valid_model.initial_rnn_state: rnn_state,
                    })

                if count % print_every == 0:
                    print("\t> validation loss = %6.8f, perplexity = %6.8f" %
                          (loss, np.exp(loss)))
                avg_valid_loss += loss / valid_reader.length

            print("at the end of epoch:", epoch)
            print("train loss = %6.8f, perplexity = %6.8f" %
                  (avg_train_loss, np.exp(avg_train_loss)))
            print("validation loss = %6.8f, perplexity = %6.8f" %
                  (avg_valid_loss, np.exp(avg_valid_loss)))

            save_as = '%s/epoch%03d_%.4f.model' % (TRAINING_DIR, epoch,
                                                   avg_valid_loss)
            saver.save(session, save_as)
            print('Saved model', save_as)
            ''' write out summary events '''
            summary = tf.Summary(value=[
                tf.Summary.Value(tag="train_loss",
                                 simple_value=avg_train_loss),
                tf.Summary.Value(tag="valid_loss", simple_value=avg_valid_loss)
            ])
            summary_writer.add_summary(summary, step)
            ''' decide if need to decay learning rate '''
            if best_valid_loss is not None and np.exp(
                    avg_valid_loss) > np.exp(best_valid_loss) - decay_when:
                print(
                    'validation perplexity did not improve enough, decay learning rate'
                )
                current_learning_rate = session.run(train_model.learning_rate)
                print('learning rate was:', current_learning_rate)
                current_learning_rate *= learning_rate_decay
                if current_learning_rate < 1.e-5:
                    print('learning rate too small - stopping now')
                    break

                session.run(
                    train_model.learning_rate.assign(current_learning_rate))
                print('new learning rate is:', current_learning_rate)
            else:
                best_valid_loss = avg_valid_loss
예제 #7
0
def main(print):
    ''' Loads trained model and evaluates it on test split '''
    if FLAGS.load_model_for_test is None:
        print('Please specify checkpoint file to load model from')
        return -1

    if not os.path.exists(FLAGS.load_model_for_test + ".index"):
        print('Checkpoint file not found', FLAGS.load_model_for_test)
        return -1

    word_vocab, char_vocab, word_tensors, char_tensors, max_word_length, words_list, wers, acoustics, files_name, kaldi_sents_index = \
        load_test_data(FLAGS.data_dir, FLAGS.max_word_length, num_unroll_steps=FLAGS.num_unroll_steps, eos=FLAGS.EOS, datas=['test'])

    test_reader = TestDataReader(word_tensors['test'], char_tensors['test'],
                                 FLAGS.batch_size, FLAGS.num_unroll_steps,
                                 wers['test'], files_name['test'],
                                 kaldi_sents_index['test'])

    fasttext_model_path = None
    if FLAGS.fasttext_model_path:
        fasttext_model_path = FLAGS.fasttext_model_path

    if 'fasttext' in FLAGS.embedding:
        fasttext_model = FasttextModel(
            fasttext_path=fasttext_model_path).get_fasttext_model()
        test_ft_reader = DataReaderFastText(
            words_list=words_list,
            batch_size=FLAGS.batch_size,
            num_unroll_steps=FLAGS.num_unroll_steps,
            model=fasttext_model,
            data='test',
            acoustics=acoustics)

    print('initialized test dataset reader')

    with tf.Graph().as_default(), tf.Session() as session:

        # tensorflow seed must be inside graph
        tf.set_random_seed(FLAGS.seed)
        np.random.seed(seed=FLAGS.seed)
        ''' build inference graph '''
        with tf.variable_scope("Model"):
            m = model.inference_graph(char_vocab_size=char_vocab.size,
                                      word_vocab_size=word_vocab.size,
                                      char_embed_size=FLAGS.char_embed_size,
                                      batch_size=FLAGS.batch_size,
                                      num_highway_layers=FLAGS.highway_layers,
                                      num_rnn_layers=FLAGS.rnn_layers,
                                      rnn_size=FLAGS.rnn_size,
                                      max_word_length=max_word_length,
                                      kernels=eval(FLAGS.kernels),
                                      kernel_features=eval(
                                          FLAGS.kernel_features),
                                      num_unroll_steps=FLAGS.num_unroll_steps,
                                      dropout=0,
                                      embedding=FLAGS.embedding,
                                      fasttext_word_dim=300,
                                      acoustic_features_dim=4)
            m.update(model.loss_graph(m.logits, FLAGS.batch_size))

            global_step = tf.Variable(0, dtype=tf.int32, name='global_step')

        variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        saver = tf.train.Saver()
        saver.restore(session, FLAGS.load_model_for_test)
        print('Loaded model from' + str(FLAGS.load_model_for_test) +
              'saved at global step' + str(global_step.eval()))
        ''' training starts here '''
        rnn_state = session.run(m.initial_rnn_state)
        count = 0
        avg_loss = 0
        labels = []
        predictions = []
        files_name_list = []
        kaldi_sents_index_list = []
        start_time = time.time()
        for batch_kim, batch_ft in zip(test_reader.iter(),
                                       test_ft_reader.iter()):
            count += 1
            x, y, files_name_batch, kaldi_sents_index_batch = batch_kim
            loss, logits = session.run(
                [m.loss, m.logits], {
                    m.input2: batch_ft,
                    m.input: x,
                    m.targets: y,
                    m.initial_rnn_state: rnn_state
                })

            labels.append(y)
            predictions.append(logits)
            files_name_list.append(files_name_batch)
            kaldi_sents_index_list.append(kaldi_sents_index_batch)

        avg_loss /= count
        time_elapsed = time.time() - start_time

        print("test loss = %6.8f, perplexity = %6.8f" %
              (avg_loss, np.exp(avg_loss)))
        print("test samples:" + str(count * FLAGS.batch_size) +
              "time elapsed:" + str(time_elapsed) + "time per one batch:" +
              str(time_elapsed / count))

        df = pd.DataFrame({
            "labels": labels,
            "predictions": predictions,
            "files_name": files_name_list,
            "kaldi_sents_index": kaldi_sents_index_list
        })

        df['predictions'] = df['predictions'].apply(lambda x: x[0])
        final_df = pd.DataFrame()
        final_df['labels'] = df.explode('labels')['labels']
        final_df['predictions'] = df.explode('predictions')['predictions']
        final_df['files_name'] = df.explode('files_name')['files_name']
        final_df['kaldi_sents_index'] = df.explode(
            'kaldi_sents_index')['kaldi_sents_index']
        final_df.reset_index(drop=True, inplace=True)
        for col in final_df.columns:
            final_df[col] = final_df[col].apply(lambda column: column[0])

        final_df.to_pickle(FLAGS.train_dir + '/test_results.pkl')

    def get_wers_results(group):
        file_name = group.name

        our_best_prediction_index = group['predictions'].values.argmin()
        our_wer_label = group.iloc[our_best_prediction_index]['labels']

        kaldis_best_prediction_row = group[group['kaldi_sents_index'] == 1]
        kaldis_wer_label = kaldis_best_prediction_row['labels']

        min_wer = min(our_wer_label, kaldis_wer_label.values)
        return pd.DataFrame({
            'file_name': file_name,
            'our_wer_label': our_wer_label,
            'kaldis_wer_label': kaldis_wer_label,
            'min': min_wer
        })
예제 #8
0
def main(print):
    ''' Trains model from data '''
    if not os.path.exists(FLAGS.train_dir):
        os.mkdir(FLAGS.train_dir)
        print('Created training directory' + FLAGS.train_dir)

    # CSV initialize
    pd.DataFrame(FLAGS.flag_values_dict(),
                 index=range(1)).to_csv(FLAGS.train_dir +
                                        '/train_parameters.csv')
    epochs_results = initialize_epoch_data_dict()

    fasttext_model_path = None
    if FLAGS.fasttext_model_path:
        fasttext_model_path = FLAGS.fasttext_model_path

    word_vocab, char_vocab, word_tensors, char_tensors, max_word_length, words_list = \
        load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS)

    fasttext_model = None
    if 'fasttext' in FLAGS.embedding:
        fasttext_model = FasttextModel(
            fasttext_path=fasttext_model_path).get_fasttext_model()

        train_ft_reader = DataReaderFastText(
            words_list=words_list,
            batch_size=FLAGS.batch_size,
            num_unroll_steps=FLAGS.num_unroll_steps,
            model=fasttext_model,
            data='train')

        valid_ft_reader = DataReaderFastText(
            words_list=words_list,
            batch_size=FLAGS.batch_size,
            num_unroll_steps=FLAGS.num_unroll_steps,
            model=fasttext_model,
            data='valid')

    train_reader = DataReader(word_tensors['train'], char_tensors['train'],
                              FLAGS.batch_size, FLAGS.num_unroll_steps)

    valid_reader = DataReader(word_tensors['valid'], char_tensors['valid'],
                              FLAGS.batch_size, FLAGS.num_unroll_steps)

    test_reader = DataReader(word_tensors['test'], char_tensors['test'],
                             FLAGS.batch_size, FLAGS.num_unroll_steps)

    print('initialized all dataset readers')

    with tf.Graph().as_default(), tf.Session() as session:

        # tensorflow seed must be inside graph
        tf.set_random_seed(FLAGS.seed)
        np.random.seed(seed=FLAGS.seed)
        ''' build training graph '''
        initializer = tf.random_uniform_initializer(-FLAGS.param_init,
                                                    FLAGS.param_init)
        with tf.variable_scope("Model", initializer=initializer):
            train_model = model.inference_graph(
                char_vocab_size=char_vocab.size,
                word_vocab_size=word_vocab.size,
                char_embed_size=FLAGS.char_embed_size,
                batch_size=FLAGS.batch_size,
                num_highway_layers=FLAGS.highway_layers,
                num_rnn_layers=FLAGS.rnn_layers,
                rnn_size=FLAGS.rnn_size,
                max_word_length=max_word_length,
                kernels=eval(FLAGS.kernels),
                kernel_features=eval(FLAGS.kernel_features),
                num_unroll_steps=FLAGS.num_unroll_steps,
                dropout=FLAGS.dropout,
                embedding=FLAGS.embedding,
                fasttext_word_dim=300,
                acoustic_features_dim=4)
            train_model.update(
                model.loss_graph(train_model.logits, FLAGS.batch_size,
                                 FLAGS.num_unroll_steps))

            train_model.update(
                model.training_graph(train_model.loss * FLAGS.num_unroll_steps,
                                     FLAGS.learning_rate, FLAGS.max_grad_norm))

        # create saver before creating more graph nodes, so that we do not save any vars defined below
        saver = tf.train.Saver(max_to_keep=50)
        ''' build graph for validation and testing (shares parameters with the training graph!) '''
        with tf.variable_scope("Model", reuse=True):
            valid_model = model.inference_graph(
                char_vocab_size=char_vocab.size,
                word_vocab_size=word_vocab.size,
                char_embed_size=FLAGS.char_embed_size,
                batch_size=FLAGS.batch_size,
                num_highway_layers=FLAGS.highway_layers,
                num_rnn_layers=FLAGS.rnn_layers,
                rnn_size=FLAGS.rnn_size,
                max_word_length=max_word_length,
                kernels=eval(FLAGS.kernels),
                kernel_features=eval(FLAGS.kernel_features),
                num_unroll_steps=FLAGS.num_unroll_steps,
                dropout=0.0,
                embedding=FLAGS.embedding,
                fasttext_word_dim=300,
                acoustic_features_dim=4)
            valid_model.update(
                model.loss_graph(valid_model.logits, FLAGS.batch_size,
                                 FLAGS.num_unroll_steps))

        if FLAGS.load_model_for_training:
            saver.restore(session, FLAGS.load_model_for_training)
            string = str('Loaded model from' +
                         str(FLAGS.load_model_for_training) +
                         'saved at global step' +
                         str(train_model.global_step.eval()))
            print(string)
        else:
            tf.global_variables_initializer().run()
            session.run(train_model.clear_char_embedding_padding)
            string = str('Created and initialized fresh model. Size:' +
                         str(model.model_size()))
            print(string)
        summary_writer = tf.summary.FileWriter(FLAGS.train_dir,
                                               graph=session.graph)
        ''' take learning rate from CLI, not from saved graph '''
        session.run(tf.assign(train_model.learning_rate,
                              FLAGS.learning_rate), )
        ''' training starts here '''
        best_valid_loss = None
        rnn_state = session.run(train_model.initial_rnn_state)
        for epoch in range(FLAGS.max_epochs):

            epoch_start_time = time.time()
            avg_train_loss = 0.0
            count = 0
            if fasttext_model:
                iter_over = zip(train_reader.iter(), train_ft_reader.iter())

            else:
                iter_over = train_reader.iter()
            for batch_kim, batch_ft in iter_over:
                if fasttext_model:
                    x, y = batch_kim
                else:
                    x, y = batch_kim, batch_ft
                count += 1
                start_time = time.time()
                if fasttext_model:
                    ft_vectors = fasttext_model.wv[
                        words_list['train'][count]].reshape(
                            fasttext_model.wv.vector_size, 1)
                    loss, _, rnn_state, gradient_norm, step, _, probas = session.run(
                        [
                            train_model.loss, train_model.train_op,
                            train_model.final_rnn_state,
                            train_model.global_norm, train_model.global_step,
                            train_model.clear_char_embedding_padding
                        ], {
                            train_model.input2: batch_ft,
                            train_model.input: x,
                            train_model.targets: y,
                            train_model.initial_rnn_state: rnn_state
                        })
                else:
                    loss, _, rnn_state, gradient_norm, step, _ = session.run(
                        [
                            train_model.loss, train_model.train_op,
                            train_model.final_rnn_state,
                            train_model.global_norm, train_model.global_step,
                            train_model.clear_char_embedding_padding
                        ], {
                            train_model.input: x,
                            train_model.targets: y,
                            train_model.initial_rnn_state: rnn_state
                        })

                avg_train_loss += 0.05 * (loss - avg_train_loss)

                time_elapsed = time.time() - start_time

                if count % FLAGS.print_every == 0:
                    string = str(
                        '%6d: %d [%5d/%5d], train_loss/perplexity = %6.8f/%6.7f secs/batch = %.4fs, grad.norm=%6.8f'
                        % (step, epoch, count, train_reader.length, loss,
                           np.exp(loss), time_elapsed, gradient_norm))
                    print(string)
            string = str('Epoch training time:' +
                         str(time.time() - epoch_start_time))
            print(string)
            epochs_results['epoch_training_time'].append(
                str(time.time() - epoch_start_time))

            # epoch done: time to evaluate
            avg_valid_loss = 0.0
            count = 0
            rnn_state = session.run(valid_model.initial_rnn_state)
            for batch_kim, batch_ft in zip(valid_reader.iter(),
                                           valid_ft_reader.iter()):
                x, y = batch_kim
                count += 1
                start_time = time.time()

                loss, rnn_state = session.run(
                    [valid_model.loss, valid_model.final_rnn_state], {
                        valid_model.input2: batch_ft,
                        valid_model.input: x,
                        valid_model.targets: y,
                        valid_model.initial_rnn_state: rnn_state,
                    })

                if count % FLAGS.print_every == 0:
                    string = str(
                        "\t> validation loss = %6.8f, perplexity = %6.8f" %
                        (loss, np.exp(loss)))
                    print(string)
                avg_valid_loss += loss / valid_reader.length

            print("at the end of epoch:" + str(epoch))
            epochs_results['epoch_number'].append(str(epoch))
            print("train loss = %6.8f, perplexity = %6.8f" %
                  (avg_train_loss, np.exp(avg_train_loss)))
            epochs_results['train_loss'].append(avg_train_loss)
            epochs_results['train_perplexity'].append(np.exp(avg_train_loss))
            print("validation loss = %6.8f, perplexity = %6.8f" %
                  (avg_valid_loss, np.exp(avg_valid_loss)))
            epochs_results['validation_loss'].append(avg_valid_loss)
            epochs_results['valid_perplexity'].append(np.exp(avg_valid_loss))

            save_as = '%s/epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch,
                                                   avg_valid_loss)
            saver.save(session, save_as)
            print('Saved model' + str(save_as))
            epochs_results['model_name'].append(str(save_as))
            epochs_results['learning_rate'].append(
                str(session.run(train_model.learning_rate)))
            ''' write out summary events '''
            summary = tf.Summary(value=[
                tf.Summary.Value(tag="train_loss",
                                 simple_value=avg_train_loss),
                tf.Summary.Value(tag="train_perplexity",
                                 simple_value=np.exp(avg_train_loss)),
                tf.Summary.Value(tag="valid_loss",
                                 simple_value=avg_valid_loss),
                tf.Summary.Value(tag="valid_perplexity",
                                 simple_value=np.exp(avg_valid_loss)),
            ])
            summary_writer.add_summary(summary, step)
            ''' decide if need to decay learning rate '''
            if best_valid_loss is not None and np.exp(avg_valid_loss) > np.exp(
                    best_valid_loss) - FLAGS.decay_when:
                print(
                    'validation perplexity did not improve enough, decay learning rate'
                )
                current_learning_rate = session.run(train_model.learning_rate)
                string = str('learning rate was:' + str(current_learning_rate))
                print(string)
                current_learning_rate *= FLAGS.learning_rate_decay
                if current_learning_rate < 1.e-3:
                    print('learning rate too small - stopping now')
                    break
                session.run(
                    train_model.learning_rate.assign(current_learning_rate))
                string = str('new learning rate is:' +
                             str(current_learning_rate))
                print(string)
            else:
                best_valid_loss = avg_valid_loss

    # Save model performance data
    pd.DataFrame(epochs_results).to_csv(FLAGS.train_dir + '/train_results.csv')
예제 #9
0
def main(_):
    ''' Loads trained model and evaluates it on test split '''

    if FLAGS.load_model is None:
        print('Please specify checkpoint file to load model from')
        return -1
    
    if not os.path.exists(FLAGS.load_model):
        print('Checkpoint file not found', FLAGS.load_model)
        return -1
    
    word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = \
        load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS)
    
    print('initialized test dataset reader')
    
    with tf.Graph().as_default(), tf.Session() as session:

        # tensorflow seed must be inside graph        
        tf.set_random_seed(FLAGS.seed)
        np.random.seed(seed=FLAGS.seed)

        ''' build inference graph '''
        with tf.variable_scope("Model"):
            m = model.inference_graph(
                    char_vocab_size=char_vocab.size,
                    word_vocab_size=word_vocab.size,
                    char_embed_size=FLAGS.char_embed_size,
                    batch_size=1,
                    num_highway_layers=FLAGS.highway_layers,
                    num_rnn_layers=FLAGS.rnn_layers,
                    rnn_size=FLAGS.rnn_size,
                    max_word_length=max_word_length,
                    kernels=eval(FLAGS.kernels),
                    kernel_features=eval(FLAGS.kernel_features),
                    num_unroll_steps=1,
                    dropout=0)

            # we need global step only because we want to read it from the model
            global_step = tf.Variable(0, dtype=tf.int32, name='global_step')

        saver = tf.train.Saver()
        saver.restore(session, FLAGS.load_model)
        print('Loaded model from', FLAGS.load_model, 'saved at global step', global_step.eval())

        ''' training starts here '''
        rnn_state = session.run(m.initial_rnn_state)
        logits = np.ones((word_vocab.size,))
        rnn_state = session.run(m.initial_rnn_state)
        for i in range(FLAGS.num_samples):
            logits = logits / FLAGS.temperature
            prob = np.exp(logits)
            prob /= np.sum(prob)
            prob = prob.ravel()
            ix = np.random.choice(range(len(prob)), p=prob)
            
            word = word_vocab.token(ix)
            if word == '|':  # EOS
                print('<unk>', end=' ')
            elif word == '+':
                print('\n')
            else:
                print(word, end=' ')

            char_input = np.zeros((1, 1, max_word_length))
            for i,c in enumerate('{' + word + '}'):
                char_input[0,0,i] = char_vocab[c]
        
            logits, state = session.run([m.logits, m.final_rnn_state],
                                         {m.input: char_input,
                                          m.initial_rnn_state: rnn_state})
            logits = np.array(logits)
def evaluation():

    assert FLAGS.load_model != None

    input_tensors, label_tensors, seq_tensors = dl.make_batches()

    test_reader = dl.DataReader(input_tensors['Test'], label_tensors['Test'],
                                seq_tensors['Test'], FLAGS.batch_size,
                                FLAGS.num_unroll_steps)

    labels = tf.placeholder(tf.float32, [None, FLAGS.num_unroll_steps, 3],
                            name='labels')

    test_model = model.inference_graph(word_vocab_size=FLAGS.word_vocab_size,
                                       kernels=eval(FLAGS.kernels),
                                       kernel_features=eval(
                                           FLAGS.kernel_features),
                                       rnn_size=FLAGS.rnn_size,
                                       dropout=FLAGS.dropout,
                                       num_rnn_layers=FLAGS.rnn_layers,
                                       num_highway_layers=FLAGS.highway_layers,
                                       num_unroll_steps=FLAGS.num_unroll_steps,
                                       max_sent_length=FLAGS.max_sent_length,
                                       batch_size=FLAGS.batch_size,
                                       embed_size=FLAGS.word_embed_size)

    predictions = test_model.predictions

    print(predictions)

    losses = model.loss_graph(predictions, labels)

    loss_arousal = losses.loss_arousal
    loss_valence = losses.loss_valence
    loss_liking = losses.loss_liking

    metric_arousal = 1. - loss_arousal
    metric_valence = 1. - loss_valence
    metric_liking = 1. - loss_liking

    saver = tf.train.Saver()

    with tf.Session() as sess:

        print('load model %s ...' % SAVE_PATH)
        saver.restore(sess, SAVE_PATH)
        print('done!')

        metric = []

        for minibatch in test_reader.iter():

            x, y = minibatch

            m_arousal, m_valence, m_liking = sess.run(
                [metric_arousal, metric_valence, metric_liking],
                feed_dict={
                    test_model.input: x,
                    labels: y
                })

            metric.append([m_arousal, m_valence, m_liking])

        metric = np.mean(np.array(metric), axis=0)

        print('Test Reuslt: arousal: %.4f -- valence: %.4f -- liking: %.4f' %
              (metric[0], metric[1], metric[2]))
예제 #11
0
def train():
    dataset_tensors, labels_tensors = dl.make_batches()

    input_tensor_tr, label_tensor_tr, seq_tensor_tr = dl.sequence_init(
        dataset_tensors,
        labels_tensors,
        FLAGS.num_unroll_steps,
        'Train',
        allow_short_seq=False)
    input_tensor_te, label_tensor_te, seq_tensor_te = dl.sequence_init(
        dataset_tensors,
        labels_tensors,
        FLAGS.num_unroll_steps,
        'Devel',
        allow_short_seq=True)

    train_reader = dl.TrainDataReader(input_tensor_tr, label_tensor_tr,
                                      seq_tensor_tr, FLAGS.batch_size,
                                      FLAGS.num_unroll_steps, False)

    eval_reader = dl.EvalDataReader(input_tensor_te, label_tensor_te,
                                    seq_tensor_te, FLAGS.batch_size_eval,
                                    FLAGS.num_unroll_steps, False)

    labels = tf.placeholder(tf.float32, [None, FLAGS.num_unroll_steps, 3],
                            name='labels')

    train_model = model.inference_graph(
        word_vocab_size=FLAGS.word_vocab_size,
        kernels=eval(FLAGS.kernels),
        kernel_features=eval(FLAGS.kernel_features),
        rnn_size=FLAGS.rnn_size,
        dropout=FLAGS.dropout,
        num_rnn_layers=FLAGS.rnn_layers,
        num_highway_layers=FLAGS.highway_layers,
        num_unroll_steps=FLAGS.num_unroll_steps,
        max_sent_length=FLAGS.max_sent_length,
        # batch_size= FLAGS.batch_size,
        embed_size=FLAGS.word_embed_size,
        trnn_size=eval(FLAGS.trnn_size),
        num_trnn_layers=eval(FLAGS.trnn_layers),
        num_heads=FLAGS.head_attention_layers)

    predictions_arousal = train_model.predictions_arousal
    predictions_valence = train_model.predictions_valence
    predictions_liking = train_model.predictions_liking

    predictions_AV = tf.concat([predictions_arousal, predictions_valence], 1)
    predictions = tf.concat(
        [predictions_arousal, predictions_valence, predictions_liking], 1)

    embedding_matrix = dl.loadPickle(Embedding_PATH,
                                     'Embedding_300_fastText_training.pkl')

    AV_losses = model.loss_graph_ccc_arousal_valence(predictions_AV, labels)
    eval_model = model.metric_graph()

    loss_av = AV_losses.AV_CCC

    eval_arousal = eval_model.eval_metric_arousal
    eval_valence = eval_model.eval_metric_valence
    eval_liking = eval_model.eval_metric_liking

    optimize_graph = model.training_graph(loss_av, FLAGS.learning_rate,
                                          FLAGS.max_grad_norm)
    train_op = optimize_graph.train_op

    saver = tf.train.Saver(max_to_keep=100)

    with tf.Session() as sess:

        sess.run(tf.initialize_all_variables())
        train_writer = tf.summary.FileWriter('.\logs\\train', graph=sess.graph)
        eval_writer = tf.summary.FileWriter('.\logs\\eval', graph=sess.graph)

        best, inx = 0.92, 1

        epoch = 0
        global_step = 0

        while epoch < FLAGS.max_epochs:

            batch = 1
            epoch += 1
            train_reader.make_batches()
            for minibatch in train_reader.iter():

                x, y = minibatch
                _, l = sess.run(
                    [train_op, loss_av],
                    feed_dict={
                        train_model.input: x,
                        labels: y,
                        train_model.sequence_length: [96] * x.shape[0],
                        train_model.batch_size: x.shape[0],
                        train_model.training: True,
                        train_model.word_embedding: embedding_matrix,
                        train_model.dropout_LSTM: 0.0,
                        train_model.dropout_text: 0.1,
                        train_model.dropout_atdnn: 0.3,
                        train_model.dropout_trnn: 0.3,
                        train_model.dropout_mlattention: 0.2
                    })

                with open(ArchivePathTrain, 'a') as apt:
                    apt.write(str(l) + ';' + str(global_step))
                    apt.write('\n')
                print('Epoch: %5d/%5d -- batch: %5d -- loss: %.4f' %
                      (epoch, FLAGS.max_epochs, batch, l))

                summary = tf.Summary(
                    value=[tf.Summary.Value(tag="TRAIN_LOSS", simple_value=l)])

                train_writer.add_summary(summary, global_step)

                if batch % 9 == 0:  # 7, change print from 7 to 9 20180725
                    print(
                        '-------------------Devel Set Start------------------------------'
                    )
                    cnt = 0

                    prev = None
                    eval_x_total = None
                    eval_y = None
                    for mb in eval_reader.iter():

                        eval_x_list, eval_y_list, eval_z_list = mb

                        for eval_x, eval_z in zip(eval_x_list, eval_z_list):
                            cnt += np.sum(eval_z)
                            eval_tmp_preds = sess.run(
                                [predictions],
                                feed_dict={
                                    train_model.input: eval_x,
                                    train_model.sequence_length: eval_z,
                                    train_model.batch_size: eval_x.shape[0],
                                    train_model.training: False,
                                    train_model.word_embedding:
                                    embedding_matrix,
                                    train_model.dropout_LSTM: 0.0,
                                    train_model.dropout_text: 0.0,
                                    train_model.dropout_atdnn: 0.0,
                                    train_model.dropout_trnn: 0.0,
                                    train_model.dropout_mlattention: 0.0
                                })

                            if prev is None:
                                prev = eval_tmp_preds[0]
                            else:
                                prev = np.vstack((prev, eval_tmp_preds[0]))
                        prev = prev[:cnt]

                        if eval_x_total is None:
                            eval_x_total = prev
                        else:
                            eval_x_total = np.vstack((eval_x_total, prev))
                        if eval_y is None:
                            eval_y = np.array(eval_y_list).reshape([-1,
                                                                    3])[:cnt]
                        else:
                            eval_y = np.vstack(
                                (eval_y,
                                 np.array(eval_y_list).reshape([-1, 3])[:cnt]))

                        prev = None
                        cnt = 0

                    e_arousal, e_valence, e_liking = sess.run(
                        [eval_arousal, eval_valence, eval_liking],
                        feed_dict={
                            eval_model.eval_predictions: eval_x_total,
                            eval_model.eval_labels: eval_y
                        })
                    eval_res = np.array([e_arousal, e_valence, e_liking])
                    eval_loss = 2. - eval_res[0] - eval_res[1]

                    with open(ArchivePathEval, 'a') as ape:
                        ape.write(str(eval_loss) + ';' + str(global_step))
                        ape.write('\n')

                    summary_eval = tf.Summary(value=[
                        tf.Summary.Value(tag="Eval_LOSS",
                                         simple_value=eval_loss)
                    ])

                    eval_writer.add_summary(summary_eval, global_step)

                    if eval_loss < best:
                        saver.save(sess, SAVE_PATH + '-{}'.format(inx))
                        inx += 1

                        log = open(LOGGING_PATH, 'a')
                        log.write('Model, ' + SAVE_PATH + '-{}'.format(inx) +
                                  '\n')
                        log.write(
                            '%s, Epoch: %d, Batch: %d, Loss: %.4f, Arousal: %.4f, Valence: %.4f\n'
                            % ('Devel', epoch, batch, eval_loss, eval_res[0],
                               eval_res[1]))
                        log.write(
                            '======================================================\n'
                        )
                        log.close()

                    print(
                        'Devel Set, Epoch: %5d/%5d -- batch: %5d -- loss: _%.4f -- arousal: %.4f -- valence: %.4f -- liking: %.4f'
                        % (epoch, FLAGS.max_epochs, batch, eval_loss,
                           eval_res[0], eval_res[1], eval_res[2]))

                    print(
                        '---------------------Devel Finished----------------------'
                    )

                global_step += 1
                batch += 1
예제 #12
0
    def xest(self):

        with self.test_session() as sess:

            m = model.inference_graph(char_vocab_size=5,
                                      word_vocab_size=5,
                                      char_embed_size=3,
                                      batch_size=2,
                                      num_highway_layers=0,
                                      num_rnn_layers=1,
                                      rnn_size=5,
                                      max_word_length=5,
                                      kernels=[2],
                                      kernel_features=[2],
                                      num_unroll_steps=2,
                                      dropout=0.0)

            logits, input_embedded = sess.run(
                [
                    self.model.logits,
                    self.model.input_embedded,
                ], {
                    'LSTM/RNN/BasicLSTMCell/Linear/Matrix:0':
                    np.array([
                        [
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                            0, 0, 0
                        ],
                        [
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                            0, 0, 0
                        ],
                        [
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                            0, 0, 0
                        ],
                        [
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                            0, 0, 0
                        ],
                        [
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                            0, 0, 0
                        ],
                        [
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                            0, 0, 0
                        ],
                        [
                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                            0, 0, 0
                        ],
                    ]),
                    'LSTM/RNN/BasicLSTMCell/Linear/Bias:0':
                    np.array([
                        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 1
                    ]),
                    'TDNN/kernel_2/w:0':
                    np.array([[[[1, 1], [1, 1], [1, 1]],
                               [[1, 1], [1, 1], [1, 1]]]]),
                    'TDNN/kernel_2/b:0':
                    np.array([0, 0]),
                    'Embedding/char_embedding:0':
                    np.array([
                        [0, 0, 0],
                        [1, 0, 0],
                        [0, 1, 0],
                        [0, 0, 1],
                        [-1, 0, 1],
                    ]),
                    'input:0':
                    np.array([[[1, 3, 2, 0, 0], [1, 4, 2, 0, 0]],
                              [[1, 3, 3, 2, 0], [1, 4, 4, 2, 0]]]),
                })

            print(logits)
            print(input_embedded)
            self.assertAllClose(
                logits,
                np.array([[[0, 1, 0, 0, 0], [0, 0, 0, 0, 0]],
                          [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]))
예제 #13
0
def evaluation(model_path):

    g = tf.Graph()
    with g.as_default():
        assert FLAGS.load_model != None

        dataset_tensors, labels_tensors = dl.make_batches()

        input_tensor_te, label_tensor_te, seq_tensor_te = dl.sequence_init(dataset_tensors, labels_tensors,
                                                                           FLAGS.num_unroll_steps, 'Test',
                                                                           allow_short_seq=True)

        eval_reader = dl.EvalDataReader(input_tensor_te, label_tensor_te, seq_tensor_te, FLAGS.batch_size_eval,
                                        FLAGS.num_unroll_steps, False)


        test_model = model.inference_graph(word_vocab_size=FLAGS.word_vocab_size,
                                                 kernels=eval(FLAGS.kernels),
                                                 kernel_features=eval(FLAGS.kernel_features),
                                                 rnn_size=FLAGS.rnn_size,
                                                 dropout=FLAGS.dropout,
                                                 num_rnn_layers=FLAGS.rnn_layers,
                                                 num_highway_layers=FLAGS.highway_layers,
                                                 num_unroll_steps=FLAGS.num_unroll_steps,
                                                 max_sent_length=FLAGS.max_sent_length,
                                                 embed_size=FLAGS.word_embed_size,
                                                trnn_size=eval(FLAGS.trnn_size),
                                                num_trnn_layers=eval(FLAGS.trnn_layers),
                                                num_heads=FLAGS.head_attention_layers)

        embedding_matrix = dl.loadPickle(Embedding_PATH, 'Embedding_300_fastText_training.pkl')

        predictions_arousal = test_model.predictions_arousal
        predictions_valence = test_model.predictions_valence
        predictions_liking = test_model.predictions_liking

        predictions = tf.concat([predictions_arousal, predictions_valence, predictions_liking], 1)

        eval_model = model.metric_graph()

        eval_arousal = eval_model.eval_metric_arousal
        eval_valence = eval_model.eval_metric_valence
        eval_liking = eval_model.eval_metric_liking

        saver = tf.train.Saver()

        with tf.Session() as sess:

            print('load model %s ...' % model_path)
            saver.restore(sess, model_path)
            print('done!')

            cnt = 0

            prev = None
            eval_y = None
            eval_x_total = None
            for mb in eval_reader.iter():

                eval_x_list, eval_y_list, eval_z_list = mb

                for eval_x, eval_z in zip(eval_x_list, eval_z_list):
                    cnt += np.sum(eval_z)
                    eval_tmp_preds = sess.run([predictions], feed_dict={
                        test_model.input: eval_x,
                        test_model.sequence_length: eval_z,
                        test_model.batch_size: eval_x.shape[0],
                        test_model.training: False,
                        test_model.word_embedding: embedding_matrix,
                        test_model.dropout_LSTM: 0.0,
                        test_model.dropout_text: 0.0,
                        test_model.dropout_atdnn: 0.0,
                        test_model.dropout_trnn: 0.0,
                        test_model.dropout_mlattention: 0.0
                    })

                    # print(s)
                    if prev is None:
                        prev = eval_tmp_preds[0]
                    else:
                        prev = np.vstack((prev, eval_tmp_preds[0]))

                prev = prev[:cnt]


                if eval_x_total is None:
                    eval_x_total = prev
                else:
                    eval_x_total = np.vstack((eval_x_total, prev))
                    # print(prev[:,2])
                if eval_y is None:
                    eval_y = np.array(eval_y_list).reshape([-1, 3])[:cnt]
                else:
                    eval_y = np.vstack((eval_y, np.array(eval_y_list).reshape([-1, 3])[:cnt]))
                prev = None
                cnt = 0


            e_arousal, e_valence, e_liking = sess.run([eval_arousal, eval_valence, eval_liking],
                                                      feed_dict={
                                                          eval_model.eval_predictions: eval_x_total,
                                                          eval_model.eval_labels: eval_y
                                                      })
            eval_res = np.array([e_arousal, e_valence, e_liking])
            eval_loss = 2. - eval_res[0] - eval_res[1]

            print('loss: %.4f -- arousal: %.4f -- valence: %.4f -- liking: %.4f'
                  % (eval_loss, eval_res[0], eval_res[1], eval_res[2]))

            print('done evaluation------------------------------------------\n')
    return eval_loss, eval_res[0], eval_res[1]
예제 #14
0
def main(_):
    ''' Loads trained model and evaluates it on test split '''

    if FLAGS.load_model is None:
        print('Please specify checkpoint file to load model from')
        return -1

    if not os.path.exists(FLAGS.load_model + '.meta'):
        print('Checkpoint file not found', FLAGS.load_model)
        return -1

    word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = \
        load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS)

    print('initialized test dataset reader')

    with tf.Graph().as_default(), tf.Session() as session:

        # tensorflow seed must be inside graph
        tf.set_random_seed(FLAGS.seed)
        np.random.seed(seed=FLAGS.seed)
        ''' build inference graph '''
        with tf.variable_scope("Model"):
            m = model.inference_graph(char_vocab_size=char_vocab.size,
                                      word_vocab_size=word_vocab.size,
                                      char_embed_size=FLAGS.char_embed_size,
                                      batch_size=1,
                                      num_highway_layers=FLAGS.highway_layers,
                                      num_rnn_layers=FLAGS.rnn_layers,
                                      rnn_size=FLAGS.rnn_size,
                                      max_word_length=max_word_length,
                                      kernels=eval(FLAGS.kernels),
                                      kernel_features=eval(
                                          FLAGS.kernel_features),
                                      num_unroll_steps=1,
                                      dropout=0)

            # we need global step only because we want to read it from the model
            global_step = tf.Variable(0, dtype=tf.int32, name='global_step')

        saver = tf.train.Saver()
        saver.restore(session, FLAGS.load_model)
        print('Loaded model from', FLAGS.load_model, 'saved at global step',
              global_step.eval())
        ''' training starts here '''
        rnn_state = session.run(m.initial_rnn_state)
        logits = np.ones((word_vocab.size, ))
        rnn_state = session.run(m.initial_rnn_state)
        for i in range(FLAGS.num_samples):
            logits = logits / FLAGS.temperature
            prob = np.exp(logits)
            prob /= np.sum(prob)
            prob = prob.ravel()
            ix = np.random.choice(range(len(prob)), p=prob)

            word = word_vocab.token(ix)
            if word == '|':  # EOS
                print('<unk>', end=' ')
            elif word == '+':
                print('\n')
            else:
                print(word, end=' ')

            char_input = np.zeros((1, 1, max_word_length))
            for i, c in enumerate('{' + word + '}'):
                char_input[0, 0, i] = char_vocab[c]

            logits, rnn_state = session.run([m.logits, m.final_rnn_state], {
                m.input: char_input,
                m.initial_rnn_state: rnn_state
            })
            logits = np.array(logits)
 def model(self):
     return model.inference_graph(char_vocab_size=5, word_vocab_size=5,
                     char_embed_size=3, batch_size=1, num_highway_layers=0,
                     num_rnn_layers=1, rnn_size=5, max_word_length=5,
                     kernels= [2], kernel_features=[1], num_unroll_steps=1,
                     dropout=0.0)
예제 #16
0
def main(_):
    ''' Loads trained model and evaluates it on test split '''

    if FLAGS.load_model is None:
        print('Please specify checkpoint file to load model from')
        return -1
    
    if not os.path.exists(FLAGS.load_model):
        print('Checkpoint file not found', FLAGS.load_model)
        return -1
    
    word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS)

    test_reader = DataReader(word_tensors['test'], char_tensors['test'], FLAGS.batch_size, FLAGS.num_unroll_steps)
    
    print('initialized test dataset reader')
    
    with tf.Graph().as_default(), tf.Session() as session:

        # tensorflow seed must be inside graph        
        tf.set_random_seed(FLAGS.seed)
        np.random.seed(seed=FLAGS.seed)

        ''' build inference graph '''
        with tf.variable_scope("Model"):
            m = model.inference_graph(
                    char_vocab_size=char_vocab.size,
                    word_vocab_size=word_vocab.size,
                    char_embed_size=FLAGS.char_embed_size,
                    batch_size=FLAGS.batch_size,
                    num_highway_layers=FLAGS.highway_layers,
                    num_rnn_layers=FLAGS.rnn_layers,
                    rnn_size=FLAGS.rnn_size,
                    max_word_length=max_word_length,
                    kernels=eval(FLAGS.kernels),
                    kernel_features=eval(FLAGS.kernel_features),
                    num_unroll_steps=FLAGS.num_unroll_steps,
                    dropout=0)
            m.update(model.loss_graph(m.logits, FLAGS.batch_size, FLAGS.num_unroll_steps))

            global_step = tf.Variable(0, dtype=tf.int32, name='global_step')

        saver = tf.train.Saver()
        saver.restore(session, FLAGS.load_model)
        print('Loaded model from', FLAGS.load_model, 'saved at global step', global_step.eval())

        ''' training starts here '''
        rnn_state = session.run(m.initial_rnn_state)
        count = 0
        avg_loss = 0
        start_time = time.time()
        for x, y in test_reader.iter():
            count += 1
            loss, rnn_state = session.run([
                m.loss,
                m.final_rnn_state
            ], {
                m.input  : x,
                m.targets: y,
                m.initial_rnn_state: rnn_state
            })
            
            avg_loss += loss
        
        avg_loss /= count
        time_elapsed = time.time() - start_time

        print("test loss = %6.8f, perplexity = %6.8f" % (avg_loss, np.exp(avg_loss)))
        print("test samples:", count*FLAGS.batch_size, "time elapsed:", time_elapsed, "time per one batch:", time_elapsed/count)
예제 #17
0
def main(print):
    ''' Trains model from data '''
    if not os.path.exists(FLAGS.train_dir):
        os.mkdir(FLAGS.train_dir)
        print('Created training directory' + FLAGS.train_dir)

    # CSV initialize
    df_train_params = pd.DataFrame(FLAGS.flag_values_dict(), index=range(1))
    df_train_params['comment'] = ''
    df_train_params.to_csv(FLAGS.train_dir + '/train_parameters.csv')
    epochs_results = initialize_epoch_data_dict()

    fasttext_model_path = None
    if FLAGS.fasttext_model_path:
        fasttext_model_path = FLAGS.fasttext_model_path

    word_vocab, char_vocab, word_tensors, char_tensors, max_word_length, words_list, wers, acoustics = \
        load_data(FLAGS.data_dir, FLAGS.max_word_length, num_unroll_steps=FLAGS.num_unroll_steps, eos=FLAGS.EOS, batch_size=FLAGS.batch_size)

    word_vocab_valid, char_vocab_valid, word_tensors_valid, char_tensors_valid, max_word_length_valid, words_list_valid, wers_valid,\
    acoustics_valid, files_name_valid, kaldi_sents_index_valid = \
        load_test_data(FLAGS.data_dir, FLAGS.max_word_length, num_unroll_steps=FLAGS.num_unroll_steps, eos=FLAGS.EOS,
                       datas=['valid'])

    fasttext_model = None
    if 'fasttext' in FLAGS.embedding:
        fasttext_model = FasttextModel(
            fasttext_path=fasttext_model_path).get_fasttext_model()

        train_ft_reader = DataReaderFastText(
            words_list=words_list,
            batch_size=FLAGS.batch_size,
            num_unroll_steps=FLAGS.num_unroll_steps,
            model=fasttext_model,
            data='train',
            acoustics=acoustics)

        valid_ft_reader = DataReaderFastText(
            words_list=words_list,
            batch_size=FLAGS.batch_size,
            num_unroll_steps=FLAGS.num_unroll_steps,
            model=fasttext_model,
            data='valid',
            acoustics=acoustics)

    train_reader = DataReader(word_tensors['train'], char_tensors['train'],
                              FLAGS.batch_size, FLAGS.num_unroll_steps,
                              wers['train'])

    valid_reader = TestDataReader(word_tensors_valid['valid'],
                                  char_tensors_valid['valid'],
                                  FLAGS.batch_size, FLAGS.num_unroll_steps,
                                  wers_valid['valid'],
                                  files_name_valid['valid'],
                                  kaldi_sents_index_valid['valid'])

    # test_reader = DataReader(word_tensors['test'], char_tensors['test'],
    #                          FLAGS.batch_size, FLAGS.num_unroll_steps, wers['train'], word_vocab, char_vocab)

    print('initialized all dataset readers')

    with tf.Graph().as_default(), tf.Session() as session:

        # tensorflow seed must be inside graph
        tf.set_random_seed(FLAGS.seed)
        np.random.seed(seed=FLAGS.seed)
        ''' build training graph '''
        initializer = tf.random_uniform_initializer(-FLAGS.param_init,
                                                    FLAGS.param_init)
        with tf.variable_scope("Model", initializer=initializer):
            train_model = model.inference_graph(
                char_vocab_size=char_vocab.size,
                word_vocab_size=word_vocab.size,
                char_embed_size=FLAGS.char_embed_size,
                batch_size=FLAGS.batch_size,
                num_highway_layers=FLAGS.highway_layers,
                num_rnn_layers=FLAGS.rnn_layers,
                rnn_size=FLAGS.rnn_size,
                max_word_length=max_word_length,
                kernels=eval(FLAGS.kernels),
                kernel_features=eval(FLAGS.kernel_features),
                num_unroll_steps=FLAGS.num_unroll_steps,
                dropout=FLAGS.dropout,
                embedding=FLAGS.embedding,
                fasttext_word_dim=300,
                acoustic_features_dim=4)
            train_model.update(
                model.loss_graph(train_model.logits, FLAGS.batch_size))

            # scaling loss by FLAGS.num_unroll_steps effectively scales gradients by the same factor.
            # we need it to reproduce how the original Torch code optimizes. Without this, our gradients will be
            # much smaller (i.e. 35 times smaller) and to get system to learn we'd have to scale learning rate and max_grad_norm appropriately.
            # Thus, scaling gradients so that this trainer is exactly compatible with the original
            train_model.update(
                model.training_graph(train_model.loss * FLAGS.num_unroll_steps,
                                     FLAGS.learning_rate, FLAGS.max_grad_norm))
        ''' build graph for validation and testing (shares parameters with the training graph!) '''
        with tf.variable_scope("Model", reuse=True):
            valid_model = model.inference_graph(
                char_vocab_size=char_vocab_valid.size,
                word_vocab_size=word_vocab_valid.size,
                char_embed_size=FLAGS.char_embed_size,
                batch_size=FLAGS.batch_size,
                num_highway_layers=FLAGS.highway_layers,
                num_rnn_layers=FLAGS.rnn_layers,
                rnn_size=FLAGS.rnn_size,
                max_word_length=max_word_length,
                kernels=eval(FLAGS.kernels),
                kernel_features=eval(FLAGS.kernel_features),
                num_unroll_steps=FLAGS.num_unroll_steps,
                dropout=0.0,
                embedding=FLAGS.embedding,
                fasttext_word_dim=300,
                acoustic_features_dim=4)
            valid_model.update(
                model.loss_graph(valid_model.logits, FLAGS.batch_size))

        # create saver before creating more graph nodes, so that we do not save any vars defined below
        if FLAGS.load_model_for_training:
            # delete last layers (softmax) - SimpleLinear/Matrix + Bias
            variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            subset_grpah_for_loading = variables[:29] + variables[31:]
            loader = tf.train.Saver(max_to_keep=50,
                                    var_list=subset_grpah_for_loading)
            saver = tf.train.Saver(max_to_keep=50)

        if FLAGS.load_model_for_training:
            loader.restore(session, FLAGS.load_model_for_training)
            string = str('Loaded model from' +
                         str(FLAGS.load_model_for_training) +
                         'saved at global step' +
                         str(train_model.global_step.eval()))
            print(string)
            session.run(tf.variables_initializer(var_list=variables[29:31]))
            string = str('initialized specific scope for fresh model. Size:' +
                         str(model.model_size()))
            print(string)
        else:
            tf.global_variables_initializer().run()
            session.run(train_model.clear_char_embedding_padding)
            string = str('Created and initialized fresh model. Size:' +
                         str(model.model_size()))
            print(string)
        summary_writer = tf.summary.FileWriter(FLAGS.train_dir,
                                               graph=session.graph)
        ''' take learning rate from CLI, not from saved graph '''
        session.run(tf.assign(train_model.learning_rate,
                              FLAGS.learning_rate), )
        ''' training starts here '''
        best_valid_loss = None
        rnn_state = session.run(train_model.initial_rnn_state)
        for epoch in range(FLAGS.max_epochs):

            epoch_start_time = time.time()
            avg_train_loss = 0.0
            count = 0
            for batch_kim, batch_ft in zip(train_reader.iter(),
                                           train_ft_reader.iter()):
                x, y = batch_kim
                count += 1
                start_time = time.time()
                if fasttext_model:
                    ft_vectors = fasttext_model.wv[
                        words_list['train'][count]].reshape(
                            fasttext_model.wv.vector_size, 1)
                    loss, _, rnn_state, gradient_norm, step, _, logits = session.run(
                        [
                            train_model.loss, train_model.train_op,
                            train_model.final_rnn_state,
                            train_model.global_norm, train_model.global_step,
                            train_model.clear_char_embedding_padding,
                            train_model.logits
                        ], {
                            train_model.input2: batch_ft,
                            train_model.input: x,
                            train_model.targets: y,
                            train_model.initial_rnn_state: rnn_state
                        })
                else:
                    loss, _, rnn_state, gradient_norm, step, _, logits = session.run(
                        [
                            train_model.loss, train_model.train_op,
                            train_model.final_rnn_state,
                            train_model.global_norm, train_model.global_step,
                            train_model.clear_char_embedding_padding,
                            train_model.logits
                        ], {
                            train_model.input: x,
                            train_model.targets: y,
                            train_model.initial_rnn_state: rnn_state
                        })
                avg_train_loss += 0.05 * (loss - avg_train_loss)

                time_elapsed = time.time() - start_time

                if count % FLAGS.print_every == 0:
                    string = str(
                        '%6d: %d [%5d/%5d], train_loss = %6.8f secs/batch = %.4fs'
                        % (step, epoch, count, train_reader.length, loss,
                           time_elapsed))
                    print(string)
            string = str('Epoch training time:' +
                         str(time.time() - epoch_start_time))
            print(string)
            epochs_results['epoch_training_time'].append(
                str(time.time() - epoch_start_time))

            # epoch done: time to evaluate
            avg_valid_loss = 0.
            labels = []
            predictions = []
            files_name_list = []
            kaldi_sents_index_list = []

            count = 0
            rnn_state = session.run(valid_model.initial_rnn_state)
            for batch_kim, batch_ft in zip(valid_reader.iter(),
                                           valid_ft_reader.iter()):

                x, y, files_name_batch, kaldi_sents_index_batch = batch_kim
                count += 1
                start_time = time.time()

                loss, logits = session.run(
                    [valid_model.loss, valid_model.logits], {
                        valid_model.input2: batch_ft,
                        valid_model.input: x,
                        valid_model.targets: y,
                        valid_model.initial_rnn_state: rnn_state,
                    })
                labels.append(y)
                predictions.append(logits)
                files_name_list.append(files_name_batch)
                kaldi_sents_index_list.append(kaldi_sents_index_batch)

                if count % FLAGS.print_every == 0:
                    string = str("\t> validation loss = %6.8f" % (loss))
                    print(string)

            avg_valid_loss = get_valid_rescore_loss(labels, predictions,
                                                    files_name_list,
                                                    kaldi_sents_index_list)

            print("at the end of epoch:" + str(epoch))
            epochs_results['epoch_number'].append(str(epoch))
            print("train loss = %6.8f" % (avg_train_loss))
            epochs_results['train_loss'].append(avg_train_loss)
            print("validation loss = %6.8f" % (avg_valid_loss))
            epochs_results['validation_loss'].append(avg_valid_loss)

            save_as = '%s/epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch,
                                                   avg_valid_loss)
            saver.save(session, save_as)
            print('Saved model' + str(save_as))
            epochs_results['model_name'].append(str(save_as))
            epochs_results['learning_rate'].append(
                str(session.run(train_model.learning_rate)))

            current_learning_rate = session.run(train_model.learning_rate)
            ''' decide if need to decay learning rate '''
            if best_valid_loss is not None and avg_valid_loss > best_valid_loss - FLAGS.decay_when:
                print(
                    'validation perplexity did not improve enough, decay learning rate'
                )
                current_learning_rate = session.run(train_model.learning_rate)
                string = str('learning rate was:' + str(current_learning_rate))
                print(string)
                current_learning_rate *= FLAGS.learning_rate_decay
                if current_learning_rate < 1.e-6:
                    print('learning rate too small - stopping now')
                    break

                session.run(
                    train_model.learning_rate.assign(current_learning_rate))
                string = str('new learning rate is:' +
                             str(current_learning_rate))
                print(string)
            else:
                best_valid_loss = avg_valid_loss
            ''' write out summary events '''
            summary = tf.Summary(value=[
                tf.Summary.Value(tag="train_loss",
                                 simple_value=avg_train_loss),
                tf.Summary.Value(tag="valid_loss",
                                 simple_value=avg_valid_loss),
                tf.Summary.Value(tag="learning_rate",
                                 simple_value=current_learning_rate)
            ])
            summary_writer.add_summary(summary, step)

    # Save model performance data
    pd.DataFrame(epochs_results).to_csv(FLAGS.train_dir + '/train_results.csv')
예제 #18
0
def main(_):
    ''' Trains model from data '''
    min = [1000, 1000, 1000, 1000]  # [t_loss, t_ppl, v_loss, v_ppl]
    total_time = 0.

    if not os.path.exists(FLAGS.train_dir):
        os.mkdir(FLAGS.train_dir)
        print('Created training directory', FLAGS.train_dir)

    word_vocab, \
    char_vocab, \
    word_tensors, \
    char_tensors, \
    max_word_length = load_data(FLAGS.data_dir, FLAGS.max_word_length, flist = FILE_NAME_LIST, eos=FLAGS.EOS)

    train_reader = DataReader(word_tensors[FILE_NAME_LIST[0]],
                              FLAGS.batch_size, FLAGS.num_unroll_steps)

    valid_reader = DataReader(word_tensors[FILE_NAME_LIST[1]],
                              FLAGS.batch_size, FLAGS.num_unroll_steps)

    test_reader = DataReader(word_tensors[FILE_NAME_LIST[2]], FLAGS.batch_size,
                             FLAGS.num_unroll_steps)

    print('initialized all dataset readers')

    with tf.Graph().as_default(), tf.Session() as session:

        # tensorflow seed must be inside graph
        tf.set_random_seed(FLAGS.seed)
        np.random.seed(seed=FLAGS.seed)
        ''' build training graph '''
        initializer = tf.random_uniform_initializer(-FLAGS.param_init,
                                                    FLAGS.param_init)
        with tf.variable_scope("Model", initializer=initializer):
            train_model = model.inference_graph(
                word_vocab_size=word_vocab.size,
                word_embed_size=FLAGS.word_embed_size,
                batch_size=FLAGS.batch_size,
                num_highway_layers=FLAGS.highway_layers,
                num_rnn_layers=FLAGS.rnn_layers,
                rnn_size=FLAGS.rnn_size,
                num_unroll_steps=FLAGS.num_unroll_steps,
                dropout=FLAGS.dropout)
            train_model.update(
                model.loss_graph(train_model.logits, FLAGS.batch_size,
                                 FLAGS.num_unroll_steps))

            # scaling loss by FLAGS.num_unroll_steps effectively scales gradients by the same factor.
            # we need it to reproduce how the original Torch code optimizes. Without this, our gradients will be
            # much smaller (i.e. 35 times smaller) and to get system to learn we'd have to scale learning rate and max_grad_norm appropriately.
            # Thus, scaling gradients so that this trainer is exactly compatible with the original
            train_model.update(
                model.training_graph(train_model.loss * FLAGS.num_unroll_steps,
                                     FLAGS.learning_rate, FLAGS.max_grad_norm))

        # create saver before creating more graph nodes, so that we do not save any vars defined below
        saver = tf.train.Saver(max_to_keep=5)
        ''' build graph for validation and testing (shares parameters with the training graph!) '''
        with tf.variable_scope("Model", reuse=True):
            valid_model = model.inference_graph(
                word_vocab_size=word_vocab.size,
                word_embed_size=FLAGS.word_embed_size,
                batch_size=FLAGS.batch_size,
                num_highway_layers=FLAGS.highway_layers,
                num_rnn_layers=FLAGS.rnn_layers,
                rnn_size=FLAGS.rnn_size,
                num_unroll_steps=FLAGS.num_unroll_steps,
                dropout=0.0)
            valid_model.update(
                model.loss_graph(valid_model.logits, FLAGS.batch_size,
                                 FLAGS.num_unroll_steps))

        if FLAGS.load_model:
            saver.restore(session, FLAGS.load_model)
            print('Loaded model from', FLAGS.load_model,
                  'saved at global step', train_model.global_step.eval())
        else:
            tf.global_variables_initializer().run()
            session.run(train_model.clear_char_embedding_padding)
            print('Created and initialized fresh model. Size:',
                  model.model_size())

        summary_writer = tf.summary.FileWriter(FLAGS.train_dir,
                                               graph=session.graph)
        ''' take learning rate from CLI, not from saved graph '''
        session.run(tf.assign(train_model.learning_rate, FLAGS.learning_rate))

        print("=" * 89)
        print("=" * 89)
        all_weights = {v.name: v for v in tf.trainable_variables()}
        total_size = 0
        pi = 1  # 0 is for sum of grad_sses
        for v_name in list(all_weights):  # sorted()
            v = all_weights[v_name]
            v_size = int(np.prod(np.array(v.shape.as_list())))
            print("%02d-Weight   %s\tshape   %s\ttsize    %d" %
                  (pi, v.name[:-2].ljust(80), str(v.shape).ljust(20), v_size))
            total_size += v_size
            pi += 1
        print("Total size %d, %.3fMiB" % (total_size,
                                          (total_size * 4) / (1024 * 1024)))
        print("-" * 89)
        ''' training starts here '''
        best_valid_loss = None
        rnn_state = session.run(train_model.initial_rnn_state)
        for epoch in range(1, FLAGS.max_epochs + 1):

            epoch_start_time = time.time()
            avg_train_loss = 0.0
            count = 0
            for x, y in train_reader.iter():
                count += 1
                start_time = time.time()

                loss, _, rnn_state, gradient_norm, step, _ = session.run(
                    [
                        train_model.loss, train_model.train_op,
                        train_model.final_rnn_state, train_model.global_norm,
                        train_model.global_step,
                        train_model.clear_char_embedding_padding
                    ], {
                        train_model.input: x,
                        train_model.targets: y,
                        train_model.initial_rnn_state: rnn_state
                    })

                avg_train_loss += 0.05 * (loss - avg_train_loss)

                time_elapsed = time.time() - start_time

                if count % FLAGS.print_every == 0:
                    cur_lr = session.run(train_model.learning_rate)
                    print(
                        '%6d: -%d- [%5d/%5d], train_loss/ppl = %6.8f/%6.7f batch/secs = %.1fb/s, cur_lr = %2.5f, grad.norm=%6.8f'
                        % (step, epoch, count, train_reader.length, loss,
                           np.exp(loss), FLAGS.print_every / time_elapsed,
                           cur_lr, gradient_norm))

            print('Epoch training time:', time.time() - epoch_start_time)
            total_time += (time.time() - epoch_start_time)

            # epoch done: time to evaluate
            avg_valid_loss = 0.0
            count = 0
            rnn_state = session.run(valid_model.initial_rnn_state)
            for x, y in valid_reader.iter():
                count += 1
                start_time = time.time()

                loss, rnn_state = session.run(
                    [valid_model.loss, valid_model.final_rnn_state], {
                        valid_model.input: x,
                        valid_model.targets: y,
                        valid_model.initial_rnn_state: rnn_state,
                    })

                if count % FLAGS.print_every == 0:
                    print("\t> validation loss = %6.8f, perplexity = %6.8f" %
                          (loss, np.exp(loss)))
                avg_valid_loss += loss / valid_reader.length

            print("at the end of epoch:", epoch)
            print("train loss = %6.8f, perplexity = %6.8f" %
                  (avg_train_loss, np.exp(avg_train_loss)))
            print("validation loss = %6.8f, perplexity = %6.8f" %
                  (avg_valid_loss, np.exp(avg_valid_loss)))
            if min[2] > avg_valid_loss:
                min[0] = avg_train_loss
                min[1] = np.exp(avg_train_loss)
                min[2] = avg_valid_loss
                min[3] = np.exp(avg_valid_loss)

            save_as = '%s/epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch,
                                                   avg_valid_loss)
            saver.save(session, save_as)
            print('Saved model', save_as)
            ''' write out summary events '''
            summary = tf.Summary(value=[
                tf.Summary.Value(tag="train_loss",
                                 simple_value=avg_train_loss),
                tf.Summary.Value(tag="valid_loss", simple_value=avg_valid_loss)
            ])
            summary_writer.add_summary(summary, step)
            ''' decide if need to decay learning rate '''
            if best_valid_loss is not None and np.exp(avg_valid_loss) > np.exp(
                    best_valid_loss) - FLAGS.decay_when:
                print(
                    'validation perplexity did not improve enough, decay learning rate'
                )
                current_learning_rate = session.run(train_model.learning_rate)
                print('learning rate was:', current_learning_rate)
                current_learning_rate *= FLAGS.learning_rate_decay
                if current_learning_rate < 1.e-5:
                    print('learning rate too small - stopping now')
                    break

                session.run(
                    train_model.learning_rate.assign(current_learning_rate))
                print('new learning rate is:', current_learning_rate)
            else:
                best_valid_loss = avg_valid_loss
        ''' test on the test set '''
        ave_test_loss = 0.
        trnn_state = session.run(valid_model.initial_rnn_state)
        for x, y in test_reader.iter():
            loss, trnn_state = session.run(
                [valid_model.loss, valid_model.final_rnn_state], {
                    valid_model.input: x,
                    valid_model.targets: y,
                    valid_model.initial_rnn_state: trnn_state
                })
            disp_loss = loss
            ave_test_loss += disp_loss / test_reader.length

        print("=" * 89)
        print("=" * 89)
        print("Total training time(not included the valid time): %f" %
              total_time)
        print("The best result:")
        print("train loss = %.3f, ppl = %.4f" % (min[0], min[1]))
        print("valid loss = %.3f, ppl = %.4f" % (min[2], min[3]))
        print("test  loss = %.3f, ppl = %.4f" %
              (ave_test_loss, np.exp(ave_test_loss)))
        print("=" * 89)
예제 #19
0
def main(print):
    ''' Loads trained model and evaluates it on test split '''
    if FLAGS.load_model_for_test is None:
        print('Please specify checkpoint file to load model from')
        return -1

    if not os.path.exists(FLAGS.load_model_for_test + ".index"):
        print('Checkpoint file not found', FLAGS.load_model_for_test)
        return -1

    word_vocab, char_vocab, word_tensors, char_tensors, max_word_length, words_list = \
        load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS)

    test_reader = DataReader(word_tensors['test'], char_tensors['test'],
                             FLAGS.batch_size, FLAGS.num_unroll_steps)

    fasttext_model_path = None
    if FLAGS.fasttext_model_path:
        fasttext_model_path = FLAGS.fasttext_model_path

    if 'fasttext' in FLAGS.embedding:
        fasttext_model = FasttextModel(
            fasttext_path=fasttext_model_path).get_fasttext_model()
        test_ft_reader = DataReaderFastText(
            words_list=words_list,
            batch_size=FLAGS.batch_size,
            num_unroll_steps=FLAGS.num_unroll_steps,
            model=fasttext_model,
            data='test')

    print('initialized test dataset reader')

    with tf.Graph().as_default(), tf.Session() as session:

        # tensorflow seed must be inside graph
        tf.set_random_seed(FLAGS.seed)
        np.random.seed(seed=FLAGS.seed)
        ''' build inference graph '''
        with tf.variable_scope("Model"):
            m = model.inference_graph(char_vocab_size=char_vocab.size,
                                      word_vocab_size=word_vocab.size,
                                      char_embed_size=FLAGS.char_embed_size,
                                      batch_size=FLAGS.batch_size,
                                      num_highway_layers=FLAGS.highway_layers,
                                      num_rnn_layers=FLAGS.rnn_layers,
                                      rnn_size=FLAGS.rnn_size,
                                      max_word_length=max_word_length,
                                      kernels=eval(FLAGS.kernels),
                                      kernel_features=eval(
                                          FLAGS.kernel_features),
                                      num_unroll_steps=FLAGS.num_unroll_steps,
                                      dropout=0,
                                      embedding=FLAGS.embedding,
                                      fasttext_word_dim=300,
                                      acoustic_features_dim=4)
            m.update(
                model.loss_graph(m.logits, FLAGS.batch_size,
                                 FLAGS.num_unroll_steps))

            global_step = tf.Variable(0, dtype=tf.int32, name='global_step')

        saver = tf.train.Saver()
        saver.restore(session, FLAGS.load_model_for_test)
        print('Loaded model from' + str(FLAGS.load_model_for_test) +
              'saved at global step' + str(global_step.eval()))
        ''' training starts here '''
        rnn_state = session.run(m.initial_rnn_state)
        count = 0
        avg_loss = 0
        start_time = time.time()
        for batch_kim, batch_ft in zip(test_reader.iter(),
                                       test_ft_reader.iter()):
            count += 1
            x, y = batch_kim
            loss, rnn_state, logits = session.run(
                [m.loss, m.final_rnn_state, m.logits], {
                    m.input2: batch_ft,
                    m.input: x,
                    m.targets: y,
                    m.initial_rnn_state: rnn_state
                })

            avg_loss += loss

        avg_loss /= count
        time_elapsed = time.time() - start_time

        print("test loss = %6.8f, perplexity = %6.8f" %
              (avg_loss, np.exp(avg_loss)))
        print("test samples:" + str(count * FLAGS.batch_size) +
              "time elapsed:" + str(time_elapsed) + "time per one batch:" +
              str(time_elapsed / count))

        save_data_to_csv(avg_loss, count, time_elapsed)
예제 #20
0
def main(_):
    ''' Trains model from data '''
    print("we in main")
    print(sys.argv[2])
    print(FLAGS)
    if not os.path.exists(FLAGS.train_dir):
        os.mkdir(FLAGS.train_dir)
        print('Created training directory', FLAGS.train_dir)
    
    word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = \
        load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS)
    
    train_reader = DataReader(word_tensors['train'], char_tensors['train'],
                              FLAGS.batch_size, FLAGS.num_unroll_steps)

    valid_reader = DataReader(word_tensors['valid'], char_tensors['valid'],
                              FLAGS.batch_size, FLAGS.num_unroll_steps)

    test_reader = DataReader(word_tensors['test'], char_tensors['test'],
                              FLAGS.batch_size, FLAGS.num_unroll_steps)
    
    print('initialized all dataset readers')
    
    with tf.Graph().as_default(), tf.Session() as session:

        # tensorflow seed must be inside graph        
        tf.set_random_seed(FLAGS.seed)
        np.random.seed(seed=FLAGS.seed)

        ''' build training graph '''
        initializer = tf.random_uniform_initializer(-FLAGS.param_init, FLAGS.param_init)
        with tf.variable_scope("Model", initializer=initializer):
            train_model = model.inference_graph(
                    char_vocab_size=char_vocab.size,
                    word_vocab_size=word_vocab.size,
                    char_embed_size=FLAGS.char_embed_size,
                    batch_size=FLAGS.batch_size,
                    num_highway_layers=FLAGS.highway_layers,
                    num_rnn_layers=FLAGS.rnn_layers,
                    rnn_size=FLAGS.rnn_size,
                    max_word_length=max_word_length,
                    kernels=eval(FLAGS.kernels),
                    kernel_features=eval(FLAGS.kernel_features),
                    num_unroll_steps=FLAGS.num_unroll_steps,
                    dropout=FLAGS.dropout)
            train_model.update(model.loss_graph(train_model.logits, FLAGS.batch_size, FLAGS.num_unroll_steps))
            
            # scaling loss by FLAGS.num_unroll_steps effectively scales gradients by the same factor.
            # we need it to reproduce how the original Torch code optimizes. Without this, our gradients will be
            # much smaller (i.e. 35 times smaller) and to get system to learn we'd have to scale learning rate and max_grad_norm appropriately.
            # Thus, scaling gradients so that this trainer is exactly compatible with the original
            train_model.update(model.training_graph(train_model.loss * FLAGS.num_unroll_steps, FLAGS.learning_rate, FLAGS.max_grad_norm))

        # create saver before creating more graph nodes, so that we do not save any vars defined below      
        saver = tf.train.Saver(max_to_keep=50)

        ''' build graph for validation and testing (shares parameters with the training graph!) '''
        with tf.variable_scope("Model", reuse=True):
            valid_model = model.inference_graph(
                    char_vocab_size=char_vocab.size,
                    word_vocab_size=word_vocab.size,
                    char_embed_size=FLAGS.char_embed_size,
                    batch_size=FLAGS.batch_size,
                    num_highway_layers=FLAGS.highway_layers,
                    num_rnn_layers=FLAGS.rnn_layers,
                    rnn_size=FLAGS.rnn_size,
                    max_word_length=max_word_length,
                    kernels=eval(FLAGS.kernels),
                    kernel_features=eval(FLAGS.kernel_features),
                    num_unroll_steps=FLAGS.num_unroll_steps,
                    dropout=0.0)
            valid_model.update(model.loss_graph(valid_model.logits, FLAGS.batch_size, FLAGS.num_unroll_steps))

        with tf.variable_scope("Model", reuse=True):
            test_model = model.inference_graph(
                    char_vocab_size=char_vocab.size,
                    word_vocab_size=word_vocab.size,
                    char_embed_size=FLAGS.char_embed_size,
                    batch_size=1,
                    num_highway_layers=FLAGS.highway_layers,
                    num_rnn_layers=FLAGS.rnn_layers,
                    rnn_size=FLAGS.rnn_size,
                    max_word_length=max_word_length,
                    kernels=eval(FLAGS.kernels),
                    kernel_features=eval(FLAGS.kernel_features),
                    num_unroll_steps=1,
                    dropout=0.0)
            test_model.update(model.loss_graph(test_model.logits, 1, 1))

        if FLAGS.load_model:
            saver.restore(session, FLAGS.load_model)
            print('Loaded model from', FLAGS.load_model, 'saved at global step', train_model.global_step.eval())
        else:
            tf.initialize_all_variables().run()
            print('Created and initialized fresh model. Size:', model.model_size())
        
        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph=session.graph)

        ''' take learning rate from CLI, not from saved graph '''
        session.run(
            tf.assign(train_model.learning_rate, FLAGS.learning_rate),
        )
        
        def clear_char_embedding_padding():
            char_embedding = session.run(train_model.char_embedding)
            char_embedding[0,:] = 0.0
            session.run(tf.assign(train_model.char_embedding, char_embedding))
            char_embedding = session.run(train_model.char_embedding)
        
        clear_char_embedding_padding()

        run_test2(session, test_model, train_reader)
        #exit(1)

        ''' training starts here '''
        best_valid_loss = None
        rnn_state = session.run(train_model.initial_rnn_state)
        for epoch in range(FLAGS.max_epochs):

            avg_train_loss = 0.0
            count = 0
            for x, y in train_reader.iter():
                count += 1        
                start_time = time.time()
                print (x)
                exit(1)
                loss, _, rnn_state, gradient_norm, step = session.run([
                    train_model.loss,
                    train_model.train_op, 
                    train_model.final_rnn_state,
                    train_model.global_norm, 
                    train_model.global_step,
                ], {
                    train_model.input  : x,
                    train_model.targets: y,
                    train_model.initial_rnn_state: rnn_state
                })

                clear_char_embedding_padding()
                
                avg_train_loss += 0.05 * (loss - avg_train_loss)
        
                time_elapsed = time.time() - start_time
                
                if count % FLAGS.print_every == 0:
                    print('%6d: %d [%5d/%5d], train_loss/perplexity = %6.8f/%6.7f secs/batch = %.4fs, grad.norm=%6.8f' % (step, 
                                                            epoch, count, 
                                                            train_reader.length, 
                                                            loss, np.exp(loss),
                                                            time_elapsed,
                                                            gradient_norm))

            # epoch done: time to evaluate  
            avg_valid_loss = 0.0
            count = 0 
            rnn_state = session.run(valid_model.initial_rnn_state)
            for x, y in valid_reader.iter():
                count += 1        
                start_time = time.time()
        
                loss, rnn_state = session.run([
                    valid_model.loss, 
                    valid_model.final_rnn_state
                ], {
                    valid_model.input  : x,
                    valid_model.targets: y,
                    valid_model.initial_rnn_state: rnn_state,
                })
                
                if count % FLAGS.print_every == 0:
                    print("\t> validation loss = %6.8f, perplexity = %6.8f" % (loss, np.exp(loss)))
                avg_valid_loss += loss / valid_reader.length

            print("at the end of epoch:", epoch)            
            print("train loss = %6.8f, perplexity = %6.8f" % (avg_train_loss, np.exp(avg_train_loss)))
            print("validation loss = %6.8f, perplexity = %6.8f" % (avg_valid_loss, np.exp(avg_valid_loss)))

            save_as = '%s/epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch, avg_valid_loss)
            saver.save(session, save_as)
            print('Saved model', save_as)

            ''' write out summary events '''
            summary = tf.Summary(value=[
                tf.Summary.Value(tag="train_loss", simple_value=avg_train_loss),
                tf.Summary.Value(tag="valid_loss", simple_value=avg_valid_loss)
            ])
            summary_writer.add_summary(summary, step)
            
            ''' decide if need to decay learning rate '''
            if best_valid_loss is not None and np.exp(avg_valid_loss) > np.exp(best_valid_loss) - FLAGS.decay_when:
                print('** validation perplexity did not improve enough, decay learning rate')
                current_learning_rate = session.run(train_model.learning_rate)
                print('learning rate was:', current_learning_rate)
                current_learning_rate *= FLAGS.learning_rate_decay
                if current_learning_rate < 1.e-5:
                    print('learning rate too small - stopping now')
                    break

                session.run(train_model.learning_rate.assign(current_learning_rate))
                print('new learning rate is:', current_learning_rate)
            else:
                best_valid_loss = avg_valid_loss

        run_test2(session, test_model, train_reader)
        print ("AGAIN")
        run_test2(session, test_model, train_reader)
예제 #21
0
def main(_):
    ''' Loads trained model and evaluates it on test split '''

    if FLAGS.load_model is None:
        print('Please specify checkpoint file to load model from')
        return -1

    if not os.path.exists(FLAGS.load_model + ".index"):
        print('Checkpoint file not found', FLAGS.load_model)
        return -1

    word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = \
        load_data(FLAGS.data_dir, FLAGS.max_word_length, flist = FILE_NAME_LIST[2], eos=FLAGS.EOS)

    test_reader = DataReader(word_tensors[0], FLAGS.batch_size,
                             FLAGS.num_unroll_steps)

    print('initialized test dataset reader')

    with tf.Graph().as_default(), tf.Session() as session:

        # tensorflow seed must be inside graph
        tf.set_random_seed(FLAGS.seed)
        np.random.seed(seed=FLAGS.seed)
        ''' build inference graph '''
        with tf.variable_scope("Model"):
            m = model.inference_graph(word_vocab_size=word_vocab.size,
                                      word_embed_size=FLAGS.char_embed_size,
                                      batch_size=FLAGS.batch_size,
                                      num_highway_layers=FLAGS.highway_layers,
                                      num_rnn_layers=FLAGS.rnn_layers,
                                      rnn_size=FLAGS.rnn_size,
                                      num_unroll_steps=FLAGS.num_unroll_steps,
                                      dropout=0)
            m.update(
                model.score_graph(m.logits, FLAGS.batch_size,
                                  FLAGS.num_unroll_steps, FLAGS.alpha))

            global_step = tf.Variable(0, dtype=tf.int32, name='global_step')

        saver = tf.train.Saver()
        saver.restore(session, FLAGS.load_model)
        print('Loaded model from', FLAGS.load_model, 'saved at global step',
              global_step.eval())
        ''' training starts here '''
        count = 0
        avg_loss = 0
        start_time = time.time()
        rnn_state = session.run(m.initial_rnn_state)
        for x, y in test_reader.iter():
            count += 1
            loss = session.run(m.loss, {
                m.input: x,
                m.targets: y,
                m.initial_rnn_state: rnn_state
            })

            avg_loss += loss

        avg_loss /= count
        time_elapsed = time.time() - start_time

        print("test loss = %6.8f, perplexity = %6.8f" %
              (avg_loss, np.exp(avg_loss)))
        print("test samples:", count * FLAGS.batch_size, "time elapsed:",
              time_elapsed, "time per one batch:", time_elapsed / count)
예제 #22
0
def main(_):
    ''' Trains model from data '''

    if not os.path.exists(FLAGS.train_dir):
        os.mkdir(FLAGS.train_dir)
        print('Created training directory', FLAGS.train_dir)

    word_vocab, char_vocab, word_tensors, char_tensors, max_word_length = \
        load_data(FLAGS.data_dir, FLAGS.max_word_length, eos=FLAGS.EOS)

    train_reader = DataReader(word_tensors['train'], char_tensors['train'],
                              FLAGS.batch_size, FLAGS.num_unroll_steps)

    valid_reader = DataReader(word_tensors['valid'], char_tensors['valid'],
                              FLAGS.batch_size, FLAGS.num_unroll_steps)

    test_reader = DataReader(word_tensors['test'], char_tensors['test'],
                             FLAGS.batch_size, FLAGS.num_unroll_steps)

    print('initialized all dataset readers')
    minimum_valid_ppl = 1000000
    minimum_vl_epoch = 0

    text_file = open("train_log.txt", "w")
    # text_file.write("Purchase Amount: %s" % TotalAmount)
    with tf.Graph().as_default(), tf.Session() as session:

        # tensorflow seed must be inside graph
        tf.set_random_seed(FLAGS.seed)
        np.random.seed(seed=FLAGS.seed)
        ''' build training graph '''
        initializer = tf.random_uniform_initializer(-FLAGS.param_init,
                                                    FLAGS.param_init)
        with tf.variable_scope("Model", initializer=initializer):
            train_model = model.inference_graph(
                char_vocab_size=char_vocab.size,
                word_vocab_size=word_vocab.size,
                char_embed_size=FLAGS.char_embed_size,
                batch_size=FLAGS.batch_size,
                num_highway_layers=FLAGS.highway_layers,
                num_rnn_layers=FLAGS.rnn_layers,
                rnn_size=FLAGS.rnn_size,
                max_word_length=max_word_length,
                kernels=eval(FLAGS.kernels),
                kernel_features=eval(FLAGS.kernel_features),
                num_unroll_steps=FLAGS.num_unroll_steps,
                dropout=FLAGS.dropout)
            train_model.update(
                model.loss_graph(train_model.logits, FLAGS.batch_size,
                                 FLAGS.num_unroll_steps))

            # scaling loss by FLAGS.num_unroll_steps effectively scales gradients by the same factor.
            # we need it to reproduce how the original Torch code optimizes. Without this, our gradients will be
            # much smaller (i.e. 35 times smaller) and to get system to learn we'd have to scale learning rate and max_grad_norm appropriately.
            # Thus, scaling gradients so that this trainer is exactly compatible with the original
            train_model.update(
                model.training_graph(train_model.loss * FLAGS.num_unroll_steps,
                                     FLAGS.learning_rate, FLAGS.max_grad_norm))

        # create saver before creating more graph nodes, so that we do not save any vars defined below
        saver = tf.train.Saver(max_to_keep=10)
        ''' build graph for validation and testing (shares parameters with the training graph!) '''
        with tf.variable_scope("Model", reuse=True):
            valid_model = model.inference_graph(
                char_vocab_size=char_vocab.size,
                word_vocab_size=word_vocab.size,
                char_embed_size=FLAGS.char_embed_size,
                batch_size=FLAGS.batch_size,
                num_highway_layers=FLAGS.highway_layers,
                num_rnn_layers=FLAGS.rnn_layers,
                rnn_size=FLAGS.rnn_size,
                max_word_length=max_word_length,
                kernels=eval(FLAGS.kernels),
                kernel_features=eval(FLAGS.kernel_features),
                num_unroll_steps=FLAGS.num_unroll_steps,
                dropout=0.0)
            valid_model.update(
                model.loss_graph(valid_model.logits, FLAGS.batch_size,
                                 FLAGS.num_unroll_steps))

        if FLAGS.load_model:
            saver.restore(session, FLAGS.load_model)
            print('Loaded model from', FLAGS.load_model,
                  'saved at global step', train_model.global_step.eval())
        else:
            tf.global_variables_initializer().run()
            session.run(train_model.clear_char_embedding_padding)
            print('Created and initialized fresh model. Size:',
                  model.model_size())

        summary_writer = tf.summary.FileWriter(FLAGS.train_dir,
                                               graph=session.graph)
        ''' take learning rate from CLI, not from saved graph '''
        session.run(tf.assign(train_model.learning_rate,
                              FLAGS.learning_rate), )
        ''' training starts here '''
        best_valid_loss = None
        rnn_state = session.run(train_model.initial_rnn_state)
        for epoch in range(FLAGS.max_epochs):

            epoch_start_time = time.time()
            avg_train_loss = 0.0
            count = 0
            for x, y in train_reader.iter():
                count += 1
                start_time = time.time()

                loss, _, rnn_state, gradient_norm, step, _ = session.run(
                    [
                        train_model.loss, train_model.train_op,
                        train_model.final_rnn_state, train_model.global_norm,
                        train_model.global_step,
                        train_model.clear_char_embedding_padding
                    ], {
                        train_model.input: x,
                        train_model.targets: y,
                        train_model.initial_rnn_state: rnn_state
                    })

                avg_train_loss += 0.05 * (loss - avg_train_loss)

                time_elapsed = time.time() - start_time

                if count % FLAGS.print_every == 0:
                    print(
                        '%6d: %d [%5d/%5d], train_loss/perplexity = %6.8f/%6.7f secs/batch = %.4fs, grad.norm=%6.8f'
                        % (step, epoch, count, train_reader.length, loss,
                           np.exp(loss), time_elapsed, gradient_norm))

                    text_file.write(
                        '%6d: %d [%5d/%5d], train_loss/perplexity = %6.8f/%6.7f secs/batch = %.4fs, grad.norm=%6.8f \n'
                        % (step, epoch, count, train_reader.length, loss,
                           np.exp(loss), time_elapsed, gradient_norm))

            print('Epoch training time:', time.time() - epoch_start_time)
            # text_file.write('Epoch training time:'+str( time.time()-epoch_start_time)

            # epoch done: time to evaluate
            avg_valid_loss = 0.0
            count = 0
            rnn_state = session.run(valid_model.initial_rnn_state)
            for x, y in valid_reader.iter():
                count += 1
                start_time = time.time()

                loss, rnn_state = session.run(
                    [valid_model.loss, valid_model.final_rnn_state], {
                        valid_model.input: x,
                        valid_model.targets: y,
                        valid_model.initial_rnn_state: rnn_state,
                    })

                if count % FLAGS.print_every == 0:
                    print("\t> validation loss = %6.8f, perplexity = %6.8f" %
                          (loss, np.exp(loss)))
                avg_valid_loss += loss / valid_reader.length

            print("at the end of epoch:", epoch)
            print("train loss = %6.8f, perplexity = %6.8f" %
                  (avg_train_loss, np.exp(avg_train_loss)))
            print("validation loss = %6.8f, perplexity = %6.8f" %
                  (avg_valid_loss, np.exp(avg_valid_loss)))

            text_file.write("at the end of epoch:" + str(epoch) + '\n')
            text_file.write("train loss = %6.8f, perplexity = %6.8f \n" %
                            (avg_train_loss, np.exp(avg_train_loss)))
            text_file.write("validation loss = %6.8f, perplexity = %6.8f \n" %
                            (avg_valid_loss, np.exp(avg_valid_loss)))

            if (np.exp(avg_valid_loss) < minimum_valid_ppl):
                minimum_valid_ppl = np.exp(avg_valid_loss)
                minimum_vl_epoch = epoch
                save_as = '%s/epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch,
                                                       avg_valid_loss)
                saver.save(session, save_as)
                print('Saved model', save_as)

            elif (epoch % 4 == 0):
                save_as = '%s/epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch,
                                                       avg_valid_loss)
                saver.save(session, save_as)
                print('Saved model', save_as)
            ''' write out summary events '''
            summary = tf.Summary(value=[
                tf.Summary.Value(tag="train_loss",
                                 simple_value=avg_train_loss),
                tf.Summary.Value(tag="valid_loss", simple_value=avg_valid_loss)
            ])
            summary_writer.add_summary(summary, step)
            ''' decide if need to decay learning rate '''
            if best_valid_loss is not None and np.exp(avg_valid_loss) > np.exp(
                    best_valid_loss) - FLAGS.decay_when:
                print(
                    'validation perplexity did not improve enough, decay learning rate'
                )
                current_learning_rate = session.run(train_model.learning_rate)
                print('learning rate was:', current_learning_rate)
                current_learning_rate *= FLAGS.learning_rate_decay
                if current_learning_rate < 1.e-5:
                    print('learning rate too small - stopping now')
                    break

                session.run(
                    train_model.learning_rate.assign(current_learning_rate))
                print('new learning rate is:', current_learning_rate)
            else:
                best_valid_loss = avg_valid_loss

        save_as = '%s/epoch%03d_%.4f.model' % (FLAGS.train_dir, epoch,
                                               avg_valid_loss)
        saver.save(session, save_as)
        print('Saved model', save_as)
        print("----------------------------------------------")
        print(
            "Minimum Valid PPL is attained in epoch:%d and Validation PPL is %6.8f"
            % (minimum_vl_epoch, minimum_valid_ppl))
예제 #23
0
def main():
    pretrain_word2id, pretrain_id2word, pretrain_emb = reader.load_pretrain(
        FLAGS.pretrain_path,
        [FLAGS.train_path, FLAGS.validate_path, FLAGS.test_path])
    vocabs = reader.build_vocab(FLAGS.train_path)
    traindata = reader.DataSet(FLAGS.train_path, FLAGS.max_word_len,
                               pretrain_word2id, pretrain_id2word, pretrain_emb, vocabs)
    traindata.load_data()
    validate = reader.DataSet(FLAGS.validate_path, FLAGS.max_word_len,
                              pretrain_word2id, pretrain_id2word, pretrain_emb, vocabs)
    validate.load_data()
    test = reader.DataSet(FLAGS.test_path, FLAGS.max_word_len,
                          pretrain_word2id, pretrain_id2word, pretrain_emb, vocabs)
    test.load_data()
    seq_lens = FLAGS.num_steps * np.ones(FLAGS.batch_size)

    with tf.Graph().as_default(), tf.Session() as sess:
        with tf.variable_scope("Model"):
            train_model = model.inference_graph(
                char_vocab_size=len(traindata.char2id),
                pretrain_embedding=traindata.pretrain_emb,
                max_word_len=FLAGS.max_word_len,
                ntags=len(traindata.tag2id),
                batch_size=FLAGS.batch_size,
                num_steps=FLAGS.num_steps,
                char_emb_size=FLAGS.char_emb_size,
                lstm_state_size=FLAGS.lstm_state_size,
                num_rnn_layers=FLAGS.num_rnn_layers,
                dropout=FLAGS.dropout,
                filter_sizes=[FLAGS.filter_size],
                nfilters=[FLAGS.nfilter])
            train_model.update(model.loss_graph(train_model.logits, FLAGS.batch_size, FLAGS.num_steps, FLAGS.crf, seq_lens))
            train_model.update(model.training_graph(train_model.loss * FLAGS.num_steps, FLAGS.learning_rate, FLAGS.max_grad_norm))
            #train_model.update(model.training_graph(train_model.loss))
        saver = tf.train.Saver()

        '''Validate model'''
        with tf.variable_scope("Model", reuse=True):
            validate_model=model.inference_graph(
                char_vocab_size=len(validate.char2id),
                pretrain_embedding=validate.pretrain_emb,
                max_word_len=FLAGS.max_word_len,
                ntags=len(validate.tag2id),
                batch_size=FLAGS.batch_size,
                num_steps=FLAGS.num_steps,
                char_emb_size=FLAGS.char_emb_size,
                lstm_state_size=FLAGS.lstm_state_size,
                num_rnn_layers=FLAGS.num_rnn_layers,
                dropout=0,  #No dropout when testing!
                filter_sizes=[FLAGS.filter_size],
                nfilters=[FLAGS.nfilter])
            validate_model.update(model.loss_graph(validate_model.logits, FLAGS.batch_size, FLAGS.num_steps, FLAGS.crf, seq_lens))
            validate_model.update(model.adict(name="validation"))
        '''Test model'''
        with tf.variable_scope("Model", reuse=True):
            test_model=model.inference_graph(
                char_vocab_size=len(test.char2id),
                pretrain_embedding=test.pretrain_emb,
                max_word_len=FLAGS.max_word_len,
                ntags=len(test.tag2id),
                batch_size=FLAGS.batch_size,
                num_steps=FLAGS.num_steps,
                char_emb_size=FLAGS.char_emb_size,
                lstm_state_size=FLAGS.lstm_state_size,
                num_rnn_layers=FLAGS.num_rnn_layers,
                dropout=0,
                filter_sizes=[FLAGS.filter_size],
                nfilters=[FLAGS.nfilter])
            test_model.update(model.loss_graph(test_model.logits, FLAGS.batch_size, FLAGS.num_steps, FLAGS.crf, seq_lens))
            test_model.update(model.adict(name="test"))
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        lstm_state_fw = sess.run(train_model.initial_lstm_state_fw)
        lstm_state_bw = sess.run(train_model.initial_lstm_state_bw)
        print "Start Training..."


        current_best_Fscore = 0.0
        for epoch in range(FLAGS.total_epoch):
            print "epoch", epoch
            start_time = time.time()
            loss = run_epoch(sess, traindata, train_model, lstm_state_fw, lstm_state_bw, FLAGS.batch_size, FLAGS.num_steps)

            if FLAGS.crf:
                Fscore = crf_eval(sess, validate, validate_model, FLAGS.batch_size, FLAGS.num_steps, FLAGS.eval_path, FLAGS.eval_script_path)
            else:
                Fscore = evaluate(sess, validate, validate_model, FLAGS.batch_size, FLAGS.num_steps, FLAGS.eval_path)
            if Fscore > current_best_Fscore:
                current_best_Fscore = Fscore
                print "**Results on test set with current best F:", current_best_Fscore
                crf_eval(sess, test, test_model, FLAGS.batch_size, FLAGS.num_steps, FLAGS.eval_path,
                         FLAGS.eval_script_path)
                saver.save(sess, FLAGS.checkpoint_path)
                print "Model saved!"

            new_learning_rate = FLAGS.learning_rate / (1 + FLAGS.decay_rate * (epoch + 1))
            sess.run(train_model.learning_rate.assign(new_learning_rate))
            end_time = time.time()
            print "Epoch training time:", end_time - start_time