Ejemplo n.º 1
0
def train(config):

    # Initialize the text dataset
    dataset = TextDataset(config.txt_file)

    # Initialize the model
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=dataset.vocab_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers)

    ###########################################################################
    # Implement code here.
    ###########################################################################
    phrase = 'in the year'

    input_placeholder = tf.placeholder(
        tf.float32, [config.seq_length, config.batch_size, dataset.vocab_size])
    #label_placeholder = tf.placeholder(tf.float32, [config.seq_length, config.batch_size, dataset.vocab_size])
    #input_placeholder = tf.placeholder(tf.int32, [config.seq_length, config.batch_size])
    label_placeholder = tf.placeholder(tf.int32,
                                       [config.batch_size, config.seq_length])
    char_placeholder = tf.placeholder(tf.float32, [1, 1, dataset.vocab_size])
    phrase_placeholder = tf.placeholder(tf.float32,
                                        [len(phrase), 1, dataset.vocab_size])

    phrase = dataset.convert_to_numbers(phrase)
    phrase_ = np.zeros((len(phrase), dataset.vocab_size))
    for i, char in enumerate(phrase):
        phrase_[i, :] = one_hot([char], dataset.vocab_size)
    feed_dict_bonus = {
        input_placeholder:
        np.zeros((config.seq_length, config.batch_size, dataset.vocab_size)),
        label_placeholder:
        np.zeros((config.batch_size, config.seq_length)),
        char_placeholder:
        np.zeros((1, 1, dataset.vocab_size)),
        phrase_placeholder:
        np.expand_dims(phrase_, 1)
    }
    # Transform to one hot
    #input_placeholder = tf.one_hot(input_placeholder, dataset.vocab_size)
    #label_placeholder = tf.one_hot(label_placeholder, dataset.vocab_size)

    # Compute logits
    logits_per_step = model._build_model(input_placeholder)

    # Define the optimizer
    optimizer = tf.train.RMSPropOptimizer(config.learning_rate,
                                          decay=config.learning_rate_decay)

    # Compute the gradients for each variable
    loss = model._compute_loss(logits_per_step, label_placeholder)
    grads_and_vars = optimizer.compute_gradients(loss)
    train_op = optimizer.apply_gradients(grads_and_vars)  #, global_step)
    grads, variables = zip(*grads_and_vars)
    grads_clipped, _ = tf.clip_by_global_norm(
        grads, clip_norm=config.max_norm_gradient)
    apply_gradients_op = optimizer.apply_gradients(
        zip(grads_clipped, variables))  #, global_step=global_step)

    # Compute prediction of next character
    #next_logits = model._build_model(char_placeholder)
    #probabilities = model.probabilities(next_logits)
    #predictions   = model.predictions(probabilities)

    # Compute prediction given phrase
    predictions = model.get_predictions(phrase_placeholder, 100)
    predictions_char = model.get_predictions(char_placeholder, 100)

    summary = tf.summary.merge_all()
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()
    sess = tf.Session()
    #saver.restore(sess, "./checkpoints/model.ckpt")
    summary_writer = tf.summary.FileWriter(config.summary_path, sess.graph)
    sess.run(init)
    ###########################################################################
    # Implement code here.
    ###########################################################################

    for train_step in range(int(config.train_steps)):

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################################
        # Implement code here.
        #######################################################################

        # Load next sequence
        inputs, targets = get_batch(config.batch_size, config.seq_length,
                                    dataset)
        #inputs,targets = dataset.batch(config.batch_size, config.seq_length)
        #inputs = np.transpose(inputs)
        #targets = np.transpose(targets)
        feed_dict = {
            input_placeholder: inputs,
            label_placeholder: targets,
            char_placeholder: np.zeros(
                (1, 1, dataset.vocab_size
                 )),  #np.expand_dims(one_hot([1], dataset.vocab_size),1)
            phrase_placeholder: np.zeros((len(phrase), 1, dataset.vocab_size))
        }
        _, loss_value = sess.run([apply_gradients_op, loss],
                                 feed_dict=feed_dict)

        # Only for time measurement of step through network
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        # Output the training progress
        if train_step % config.print_every == 0:
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Loss = {:.2f}"
                .format(datetime.now().strftime("%Y-%m-%d %H:%M"),
                        train_step + 1, int(config.train_steps),
                        config.batch_size, examples_per_second, loss_value))
            # Update the events file.
            #summary_str = sess.run(summary, feed_dict=feed_dict)
            #summary_writer.add_summary(summary_str, train_step)
            #summary_writer.flush()

        if train_step % config.sample_every == 0:
            random_char = np.random.randint(0, dataset.vocab_size)
            feed_dict_bonus[char_placeholder] = np.expand_dims(
                one_hot([random_char], dataset.vocab_size), 0)
            predic, predic_char = sess.run([predictions, predictions_char],
                                           feed_dict=feed_dict_bonus)
            #print(predic)
            final_string = dataset.convert_to_string(predic)
            print(phrase, final_string)
            final_string = dataset.convert_to_string(predic_char)
            random_char = dataset.convert_to_string([random_char])
            print(random_char, final_string, '\n')
            '''inputs  = np.zeros((config.seq_length, config.batch_size, dataset.vocab_size))
            targets = np.zeros((config.batch_size, config.seq_length))
            char    = [0]
            char_   = np.zeros((1, config.batch_size, dataset.vocab_size))
            final_string = dataset.convert_to_string(char)
            for _ in range(config.seq_length):
                for i in range(config.batch_size):
                    char_[:, i, :] = one_hot(char, dataset.vocab_size)
                feed_dict = {
                    input_placeholder: inputs,
                    label_placeholder: targets,
                    char_placeholder:  char_
                }
                predic = sess.run(predictions, feed_dict=feed_dict)
                char   = [predic[0]]
                final_string += dataset.convert_to_string(char)
                #print(predic.shape)
                #print(predic)
                #print(dataset.convert_to_string(predic))
            print('\n\n\n\n\n\n')
            print(final_string)'''

        if train_step % config.checkpoint_every == 0:
            saver.save(sess, save_path='./checkpoints/model4.ckpt')
Ejemplo n.º 2
0
def train(config):

    # Initialize the text dataset
    dataset = TextDataset(config.txt_file)

    # Initialize the model
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=dataset.vocab_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                dropout_keep_prob=config.dropout_keep_prob)

    ###########################################################################
    # Implement code here.
    ###########################################################################

    with tf.name_scope('input'):
        inputs = tf.placeholder(tf.int32,
                                shape=[config.batch_size, config.seq_length],
                                name='inputs')
        labels = tf.placeholder(tf.int32,
                                shape=[config.batch_size, config.seq_length],
                                name='labels')
        input_sample = tf.placeholder(tf.int32,
                                      shape=[config.batch_size, 1],
                                      name='input_sample')
        state = tf.placeholder(tf.float32, [
            config.lstm_num_layers, 2, config.batch_size,
            config.lstm_num_hidden
        ])

    #Create tuple for the state placeholder
    layer = tf.unstack(state, axis=0)
    rnn_tuple_state = tuple([
        tf.nn.rnn_cell.LSTMStateTuple(layer[i][0], layer[i][1])
        for i in range(config.lstm_num_layers)
    ])

    #Logits
    with tf.name_scope('logits'):
        logits, _ = model._build_model(inputs, rnn_tuple_state)

    #Loss
    with tf.name_scope('loss'):
        loss = model._compute_loss(logits, labels)
    tf.summary.scalar('loss', loss)

    #Generate text
    with tf.name_scope('sample_logits'):
        sample_logits, final_state = model._build_model(
            input_sample, rnn_tuple_state)

    #predictions
    with tf.name_scope('predictions'):
        predictions = model.predictions(sample_logits)
    global_step = tf.Variable(0, trainable=False, name='global_step')

    #decaying learning rate
    decaying_learning_rate = tf.train.exponential_decay(
        config.learning_rate,
        global_step,
        config.learning_rate_step,
        config.learning_rate_decay,
        name='decaying_eta')
    tf.add_to_collection(tf.GraphKeys, decaying_learning_rate)

    # Define the optimizer
    optimizer = tf.train.RMSPropOptimizer(decaying_learning_rate)

    # Compute the gradients for each variable
    grads_and_vars = optimizer.compute_gradients(loss)
    #train_op = optimizer.apply_gradients(grads_and_vars, global_step)
    grads, variables = zip(*grads_and_vars)
    grads_clipped, _ = tf.clip_by_global_norm(
        grads, clip_norm=config.max_norm_gradient)
    apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped,
                                                       variables),
                                                   global_step=global_step)

    merged = tf.summary.merge_all()
    test_writer = tf.summary.FileWriter(config.summary_path + '/test',
                                        graph=tf.get_default_graph())

    #Initial zero state
    init_state = np.zeros(
        (config.lstm_num_layers, 2, config.batch_size, config.lstm_num_hidden))

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    ###########################################################################
    # Implement code here.
    ###########################################################################

    for train_step in range(int(config.train_steps)):

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################################
        # Implement code here.
        #######################################################################

        x_train, y_train = dataset.batch(config.batch_size, config.seq_length)
        #train
        sess.run(apply_gradients_op,
                 feed_dict={
                     inputs: x_train,
                     labels: y_train,
                     state: init_state
                 })

        # Only for time measurement of step through network
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        # Output the training progress
        if train_step % config.print_every == 0:

            l, summary = sess.run([loss, merged],
                                  feed_dict={
                                      inputs: x_train,
                                      labels: y_train,
                                      state: init_state
                                  })
            test_writer.add_summary(summary, train_step)

            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Loss = {}"
                .format(datetime.now().strftime("%Y-%m-%d %H:%M"),
                        train_step + 1, int(config.train_steps),
                        config.batch_size, examples_per_second, l))

        if train_step % config.sample_every == 0:
            sample_inputs = (np.random.randint(0,
                                               dataset.vocab_size,
                                               size=config.batch_size))
            new_sample = np.reshape(sample_inputs, (sample_inputs.shape[0], 1))
            new_sentence = np.empty([(config.batch_size), (config.seq_length)])
            #Generate new sentence of length seq_length
            for i in range(config.seq_length):
                if i == 0:
                    pred, final = sess.run([predictions, final_state],
                                           feed_dict={
                                               input_sample: new_sample,
                                               state: init_state
                                           })
                    new_sample = pred.T
                    new_sentence[:, i][:, None] = new_sample
                #When unrolling for 30 timesteps, save the state and feed it again in the model
                elif (i >= 30 & i < 60):
                    pred, final = sess.run([predictions, final_state],
                                           feed_dict={
                                               input_sample: new_sample,
                                               state: final
                                           })
                    new_sample = pred.T
                    new_sentence[:, i][:, None] = new_sample
                else:
                    pred, final = sess.run([predictions, final_state],
                                           feed_dict={
                                               input_sample: new_sample,
                                               state: final
                                           })
                    new_sample = pred.T
                    new_sentence[:, i][:, None] = new_sample

            for idx, elem in enumerate(new_sentence):
                #We can skip the .encode('utf-8') for better looking output. It was used in order not to produce errors when running in surfsara.
                print('Sentence {}:{} {}'.format(
                    idx,
                    dataset.convert_to_string(sample_inputs)[idx].encode(
                        'utf-8'),
                    dataset.convert_to_string(elem).encode('utf-8')))

    test_writer.close()
    sess.close()