コード例 #1
0
def run_baseline():
  """
  Main function which loads in data, runs the model, and prints out statistics

  """

  # Get train dataset for task 6
  train_total = get_task_6_train()

  train, validation = split_training_data(train_total)

  # Get test dataset for task 6
  test = get_task_6_test()

  # Get word to glove vectors dictionary
  glove_dict = load_glove_vectors()

  # Split data into batches
  validation_batches = batch_data(validation, BATCH_SIZE)
  test_batches = batch_data(test, BATCH_SIZE)

  # Convert batches into vectors
  val_batched_input_vecs, val_batched_input_lengths, val_batched_end_of_sentences, val_batched_num_sentences, val_batched_question_vecs, \
  val_batched_question_lengths, val_batched_answer_vecs = convert_to_vectors_with_sentences(validation_batches,
                                                                                            glove_dict,
                                                                                            MAX_INPUT_LENGTH,
                                                                                            MAX_INPUT_SENTENCES,
                                                                                            MAX_QUESTION_LENGTH)

  test_batched_input_vecs, test_batched_input_lengths, test_batched_end_of_sentences, test_batched_num_sentences, test_batched_question_vecs, \
  test_batched_question_lengths, test_batched_answer_vecs = convert_to_vectors_with_sentences(
    test_batches, glove_dict, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH)

  # Print summary statistics
  print "Training samples: {}".format(len(train))
  print "Validation samples: {}".format(len(validation))
  print "Testing samples: {}".format(len(test))
  print "Batch size: {}".format(BATCH_SIZE)
  print "Validation number of batches: {}".format(len(validation_batches))
  print "Test number of batches: {}".format(len(test_batches))

  # Add placeholders
  input_placeholder, input_length_placeholder, end_of_sentences_placeholder, num_sentences_placeholder, question_placeholder, \
  question_length_placeholder, labels_placeholder = add_placeholders()

  # Input module
  with tf.variable_scope("input"):
    sentence_states, all_outputs = input_module(input_placeholder, input_length_placeholder,
                                                end_of_sentences_placeholder)

  # Question module
  with tf.variable_scope("question"):
    question_state = question_module(question_placeholder, question_length_placeholder)

  # Episodic memory module
  with tf.variable_scope("episode"):
    episodic_memory_state = episodic_memory_module(sentence_states, num_sentences_placeholder, question_state)

  # Answer module
  with tf.variable_scope("answer"):
    projections = answer_module(episodic_memory_state)

  prediction_probs = tf.nn.softmax(projections)

  # Compute loss
  cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(projections, labels_placeholder))

  l2_loss = compute_regularization_penalty()

  cost = cross_entropy_loss + REG * l2_loss

  # Add optimizer
  optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(cost)

  # Initialize all variables
  init = tf.initialize_all_variables()
  saver = tf.train.Saver()

  # Train over multiple epochs
  with tf.Session() as sess:
    best_validation_accuracy = 0.0
    best_val_epoch = 0

    sess.run(init)
    # train until we reach the maximum number of epochs
    for epoch in range(MAX_EPOCHS):

      print 'Epoch {}'.format(epoch)
      start = time.time()
      ###

      total_training_loss = 0
      sum_training_accuracy = 0

      train_batches = batch_data(train, BATCH_SIZE)
      train_batched_input_vecs, train_batched_input_lengths, train_batched_end_of_sentences, train_batched_num_sentences, train_batched_question_vecs, \
      train_batched_question_lengths, train_batched_answer_vecs = convert_to_vectors_with_sentences(
        train_batches, glove_dict, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH)

      # Compute average loss on training data
      for i in range(len(train_batches)):

        # print "Train batch ", train_batches[i]
        # print "End of sentences ", train_batched_end_of_sentences[i]

        loss, _, batch_prediction_probs, input_outputs, sentence_states_out = sess.run(
          [cost, optimizer, prediction_probs, all_outputs, sentence_states],
          feed_dict={input_placeholder: train_batched_input_vecs[i],
                     input_length_placeholder: train_batched_input_lengths[i],
                     end_of_sentences_placeholder: train_batched_end_of_sentences[i],
                     num_sentences_placeholder: train_batched_num_sentences[i],
                     question_placeholder: train_batched_question_vecs[i],
                     question_length_placeholder: train_batched_question_lengths[i],
                     labels_placeholder: train_batched_answer_vecs[i]})

        # end_of_first_sentence_first_batch = train_batched_end_of_sentences[i][0,0]
        #
        # print "Index end of first sentence:", end_of_first_sentence_first_batch
        #
        # print "Shape input outputs", np.shape(input_outputs)
        # print "States at end of first sentence for first element of batch", input_outputs[end_of_first_sentence_first_batch, 0, :]
        # print "States at end of first sentence for first element of batch {}".format(sentence_states[0,0:])
        # print "Train batch number of sentences:", train_batched_num_sentences[i]

        total_training_loss += loss

        batch_accuracy = np.equal(np.argmax(batch_prediction_probs, axis=1),
                                  np.argmax(train_batched_answer_vecs[i], axis=1)).mean()

        sum_training_accuracy += batch_accuracy

        # Print a training update
        if i % UPDATE_LENGTH == 0:
          print "Current average training loss: {}".format(total_training_loss / (i + 1))
          print "Current training accuracy: {}".format(sum_training_accuracy / (i + 1))

      average_training_loss = total_training_loss / len(train_batches)
      training_accuracy = sum_training_accuracy / len(train_batches)

      total_validation_loss = 0
      sum_validation_accuracy = 0

      # Compute average loss on validation data
      for i in range(len(validation_batches)):
        loss, batch_prediction_probs = sess.run(
          [cost, prediction_probs],
          feed_dict={input_placeholder: val_batched_input_vecs[i],
                     input_length_placeholder: val_batched_input_lengths[i],
                     end_of_sentences_placeholder: val_batched_end_of_sentences[i],
                     num_sentences_placeholder: val_batched_num_sentences[i],
                     question_placeholder: val_batched_question_vecs[i],
                     question_length_placeholder: val_batched_question_lengths[i],
                     labels_placeholder: val_batched_answer_vecs[i]})

        total_validation_loss += loss

        batch_accuracy = np.equal(np.argmax(batch_prediction_probs, axis=1),
                                  np.argmax(val_batched_answer_vecs[i], axis=1)).mean()

        sum_validation_accuracy += batch_accuracy

      average_validation_loss = total_validation_loss / len(validation_batches)
      validation_accuracy = sum_validation_accuracy / len(validation_batches)

      print 'Training loss: {}'.format(average_training_loss)
      print 'Training accuracy: {}'.format(training_accuracy)
      print 'Validation loss: {}'.format(average_validation_loss)
      print 'Validation accuracy: {}'.format(validation_accuracy)
      if validation_accuracy > best_validation_accuracy:
        best_validation_accuracy = validation_accuracy
        best_val_epoch = epoch
        saver.save(sess, '../data/weights/dmn_' + OUTFILE_STRING + '.weights')
        print "Weights saved"

      print 'Total time: {}'.format(time.time() - start)

      outfile = './outputs/dmn/' + OUTFILE_STRING + '.txt'
      f = open(outfile, "a")
      f.write('train_acc, ' + str(training_accuracy) + '\n')
      f.write('train_loss, ' + str(average_training_loss) + '\n')
      f.write('val_acc, ' + str(validation_accuracy) + '\n')
      f.write('val_loss, ' + str(average_validation_loss) + '\n')
      f.close()

    # Compute average loss on testing data with best weights
    saver.restore(sess, '../data/weights/dmn_' + OUTFILE_STRING + '.weights')

    total_test_loss = 0
    sum_test_accuracy = 0

    # Compute average loss on test data
    for i in range(len(test_batches)):
      loss, batch_prediction_probs = sess.run(
        [cost, prediction_probs],
        feed_dict={input_placeholder: test_batched_input_vecs[i],
                   input_length_placeholder: test_batched_input_lengths[i],
                   end_of_sentences_placeholder: test_batched_end_of_sentences[i],
                   num_sentences_placeholder: test_batched_num_sentences[i],
                   question_placeholder: test_batched_question_vecs[i],
                   question_length_placeholder: test_batched_question_lengths[i],
                   labels_placeholder: test_batched_answer_vecs[i]})

      total_test_loss += loss

      batch_accuracy = np.equal(np.argmax(batch_prediction_probs, axis=1),
                                np.argmax(test_batched_answer_vecs[i], axis=1)).mean()

      sum_test_accuracy += batch_accuracy

    average_test_loss = total_test_loss / len(test_batches)
    test_accuracy = sum_test_accuracy / len(test_batches)

    print '=-=' * 5
    print 'Test accuracy: {}'.format(test_accuracy)
    print '=-=' * 5
コード例 #2
0
def run_dmn(input, question):

    if input == None or question == None:
        return None, None, None, None, None

    input_split = re.findall(r"[\w']+|[.,!?;]", input)
    print input_split
    question_split = re.findall(r"[\w']+|[.,!?;]", question)
    print question_split

    # Dummy answer must be in the dataset
    input_question_answer = (input_split, question_split, unicode('hallway'))

    print input_question_answer

    dummy_data = []

    for i in range(2 * BATCH_SIZE):
        dummy_data.append(input_question_answer)

    # Convert data into a batch
    dummy_batch = batch_data(dummy_data, BATCH_SIZE)

    # Convert words into indices
    val_batched_input_vecs, val_batched_input_lengths, val_batched_end_of_sentences, val_batched_num_sentences, val_batched_question_vecs, \
    val_batched_question_lengths, val_batched_answers = convert_to_indices(dummy_batch,
                                                                           word_to_index,
                                                                           answer_to_index,
                                                                           MAX_INPUT_LENGTH,
                                                                           MAX_INPUT_SENTENCES,
                                                                           MAX_QUESTION_LENGTH)

    print "Running DMN"

    # Run dmn
    probs, episode_1_gates, episode_2_gates, episode_3_gates = session.run(
        [
            prediction_probs, gates_for_episodes[0], gates_for_episodes[1],
            gates_for_episodes[2]
        ],
        feed_dict={
            input_placeholder: val_batched_input_vecs[0],
            input_length_placeholder: val_batched_input_lengths[0],
            end_of_sentences_placeholder: val_batched_end_of_sentences[0],
            num_sentences_placeholder: val_batched_num_sentences[0],
            question_placeholder: val_batched_question_vecs[0],
            question_length_placeholder: val_batched_question_lengths[0],
            dropout_placeholder: 1.0
        })

    print "DMN finished"

    answer_probs = probs[0]

    index_answer = np.argmax(answer_probs)

    # Convert answer into a word
    answer = index_to_answer[index_answer]

    print "Answer is", answer

    # Get sentences
    sentences = re.split(r"[.]+", input)
    sentences.pop()
    print "Sentences", sentences

    num_sentences = len(sentences)
    print num_sentences

    print episode_1_gates[0]
    print episode_2_gates[0]
    print episode_3_gates[0]

    # Get gates
    gates_1 = episode_1_gates[0][:num_sentences]
    gates_2 = episode_2_gates[0][:num_sentences]
    gates_3 = episode_3_gates[0][:num_sentences]

    print gates_1
    print gates_2
    print gates_3

    return answer, sentences, gates_1, gates_2, gates_3
コード例 #3
0
def run_baseline():
    """
  Main function which loads in data, runs the model, and prints out statistics

  """

    # Get train dataset for task 6
    train_total = get_task_6_train()

    train, validation = split_training_data(train_total)

    # Get test dataset for task 6
    test = get_task_6_test()

    # Get word to glove vectors dictionary
    glove_dict = load_glove_vectors()

    # Split data into batches
    validation_batches = batch_data(validation, BATCH_SIZE)
    test_batches = batch_data(test, BATCH_SIZE)

    # Convert batches into vectors
    val_batched_input_vecs, val_batched_input_lengths, val_batched_end_of_sentences, val_batched_num_sentences, val_batched_question_vecs, \
    val_batched_question_lengths, val_batched_answer_vecs = convert_to_vectors_with_sentences(validation_batches,
                                                                                              glove_dict,
                                                                                              MAX_INPUT_LENGTH,
                                                                                              MAX_INPUT_SENTENCES,
                                                                                              MAX_QUESTION_LENGTH)

    test_batched_input_vecs, test_batched_input_lengths, test_batched_end_of_sentences, test_batched_num_sentences, test_batched_question_vecs, \
    test_batched_question_lengths, test_batched_answer_vecs = convert_to_vectors_with_sentences(
      test_batches, glove_dict, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH)

    # Print summary statistics
    print "Training samples: {}".format(len(train))
    print "Validation samples: {}".format(len(validation))
    print "Testing samples: {}".format(len(test))
    print "Batch size: {}".format(BATCH_SIZE)
    print "Validation number of batches: {}".format(len(validation_batches))
    print "Test number of batches: {}".format(len(test_batches))

    # Add placeholders
    input_placeholder, input_length_placeholder, end_of_sentences_placeholder, num_sentences_placeholder, question_placeholder, \
    question_length_placeholder, labels_placeholder = add_placeholders()

    # Input module
    with tf.variable_scope("input"):
        sentence_states, all_outputs = input_module(
            input_placeholder, input_length_placeholder,
            end_of_sentences_placeholder)

    # Question module
    with tf.variable_scope("question"):
        question_state = question_module(question_placeholder,
                                         question_length_placeholder)

    # Episodic memory module
    with tf.variable_scope("episode"):
        episodic_memory_state = episodic_memory_module(
            sentence_states, num_sentences_placeholder, question_state)

    # Answer module
    with tf.variable_scope("answer"):
        projections = answer_module(episodic_memory_state)

    prediction_probs = tf.nn.softmax(projections)

    # Compute loss
    cross_entropy_loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(projections,
                                                labels_placeholder))

    l2_loss = compute_regularization_penalty()

    cost = cross_entropy_loss + REG * l2_loss

    # Add optimizer
    optimizer = tf.train.AdamOptimizer(
        learning_rate=LEARNING_RATE).minimize(cost)

    # Initialize all variables
    init = tf.initialize_all_variables()
    saver = tf.train.Saver()

    # Train over multiple epochs
    with tf.Session() as sess:
        best_validation_accuracy = 0.0
        best_val_epoch = 0

        sess.run(init)
        # train until we reach the maximum number of epochs
        for epoch in range(MAX_EPOCHS):

            print 'Epoch {}'.format(epoch)
            start = time.time()
            ###

            total_training_loss = 0
            sum_training_accuracy = 0

            train_batches = batch_data(train, BATCH_SIZE)
            train_batched_input_vecs, train_batched_input_lengths, train_batched_end_of_sentences, train_batched_num_sentences, train_batched_question_vecs, \
            train_batched_question_lengths, train_batched_answer_vecs = convert_to_vectors_with_sentences(
              train_batches, glove_dict, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH)

            # Compute average loss on training data
            for i in range(len(train_batches)):

                # print "Train batch ", train_batches[i]
                # print "End of sentences ", train_batched_end_of_sentences[i]

                loss, _, batch_prediction_probs, input_outputs, sentence_states_out = sess.run(
                    [
                        cost, optimizer, prediction_probs, all_outputs,
                        sentence_states
                    ],
                    feed_dict={
                        input_placeholder:
                        train_batched_input_vecs[i],
                        input_length_placeholder:
                        train_batched_input_lengths[i],
                        end_of_sentences_placeholder:
                        train_batched_end_of_sentences[i],
                        num_sentences_placeholder:
                        train_batched_num_sentences[i],
                        question_placeholder:
                        train_batched_question_vecs[i],
                        question_length_placeholder:
                        train_batched_question_lengths[i],
                        labels_placeholder:
                        train_batched_answer_vecs[i]
                    })

                # end_of_first_sentence_first_batch = train_batched_end_of_sentences[i][0,0]
                #
                # print "Index end of first sentence:", end_of_first_sentence_first_batch
                #
                # print "Shape input outputs", np.shape(input_outputs)
                # print "States at end of first sentence for first element of batch", input_outputs[end_of_first_sentence_first_batch, 0, :]
                # print "States at end of first sentence for first element of batch {}".format(sentence_states[0,0:])
                # print "Train batch number of sentences:", train_batched_num_sentences[i]

                total_training_loss += loss

                batch_accuracy = np.equal(
                    np.argmax(batch_prediction_probs, axis=1),
                    np.argmax(train_batched_answer_vecs[i], axis=1)).mean()

                sum_training_accuracy += batch_accuracy

                # Print a training update
                if i % UPDATE_LENGTH == 0:
                    print "Current average training loss: {}".format(
                        total_training_loss / (i + 1))
                    print "Current training accuracy: {}".format(
                        sum_training_accuracy / (i + 1))

            average_training_loss = total_training_loss / len(train_batches)
            training_accuracy = sum_training_accuracy / len(train_batches)

            total_validation_loss = 0
            sum_validation_accuracy = 0

            # Compute average loss on validation data
            for i in range(len(validation_batches)):
                loss, batch_prediction_probs = sess.run(
                    [cost, prediction_probs],
                    feed_dict={
                        input_placeholder:
                        val_batched_input_vecs[i],
                        input_length_placeholder:
                        val_batched_input_lengths[i],
                        end_of_sentences_placeholder:
                        val_batched_end_of_sentences[i],
                        num_sentences_placeholder:
                        val_batched_num_sentences[i],
                        question_placeholder:
                        val_batched_question_vecs[i],
                        question_length_placeholder:
                        val_batched_question_lengths[i],
                        labels_placeholder:
                        val_batched_answer_vecs[i]
                    })

                total_validation_loss += loss

                batch_accuracy = np.equal(
                    np.argmax(batch_prediction_probs, axis=1),
                    np.argmax(val_batched_answer_vecs[i], axis=1)).mean()

                sum_validation_accuracy += batch_accuracy

            average_validation_loss = total_validation_loss / len(
                validation_batches)
            validation_accuracy = sum_validation_accuracy / len(
                validation_batches)

            print 'Training loss: {}'.format(average_training_loss)
            print 'Training accuracy: {}'.format(training_accuracy)
            print 'Validation loss: {}'.format(average_validation_loss)
            print 'Validation accuracy: {}'.format(validation_accuracy)
            if validation_accuracy > best_validation_accuracy:
                best_validation_accuracy = validation_accuracy
                best_val_epoch = epoch
                saver.save(
                    sess, '../data/weights/dmn_' + OUTFILE_STRING + '.weights')
                print "Weights saved"

            print 'Total time: {}'.format(time.time() - start)

            outfile = './outputs/dmn/' + OUTFILE_STRING + '.txt'
            f = open(outfile, "a")
            f.write('train_acc, ' + str(training_accuracy) + '\n')
            f.write('train_loss, ' + str(average_training_loss) + '\n')
            f.write('val_acc, ' + str(validation_accuracy) + '\n')
            f.write('val_loss, ' + str(average_validation_loss) + '\n')
            f.close()

        # Compute average loss on testing data with best weights
        saver.restore(sess,
                      '../data/weights/dmn_' + OUTFILE_STRING + '.weights')

        total_test_loss = 0
        sum_test_accuracy = 0

        # Compute average loss on test data
        for i in range(len(test_batches)):
            loss, batch_prediction_probs = sess.run(
                [cost, prediction_probs],
                feed_dict={
                    input_placeholder: test_batched_input_vecs[i],
                    input_length_placeholder: test_batched_input_lengths[i],
                    end_of_sentences_placeholder:
                    test_batched_end_of_sentences[i],
                    num_sentences_placeholder: test_batched_num_sentences[i],
                    question_placeholder: test_batched_question_vecs[i],
                    question_length_placeholder:
                    test_batched_question_lengths[i],
                    labels_placeholder: test_batched_answer_vecs[i]
                })

            total_test_loss += loss

            batch_accuracy = np.equal(
                np.argmax(batch_prediction_probs, axis=1),
                np.argmax(test_batched_answer_vecs[i], axis=1)).mean()

            sum_test_accuracy += batch_accuracy

        average_test_loss = total_test_loss / len(test_batches)
        test_accuracy = sum_test_accuracy / len(test_batches)

        print '=-=' * 5
        print 'Test accuracy: {}'.format(test_accuracy)
        print '=-=' * 5
コード例 #4
0
def run_dmn(input, question):

  if input == None or question == None:
    return None, None, None, None, None

  input_split = re.findall(r"[\w']+|[.,!?;]", input)
  print input_split
  question_split = re.findall(r"[\w']+|[.,!?;]", question)
  print question_split

  # Dummy answer must be in the dataset
  input_question_answer = (input_split, question_split, unicode('hallway'))

  print input_question_answer

  dummy_data = []

  for i in range(2*BATCH_SIZE):
    dummy_data.append(input_question_answer)

  # Convert data into a batch
  dummy_batch = batch_data(dummy_data, BATCH_SIZE)

  # Convert words into indices
  val_batched_input_vecs, val_batched_input_lengths, val_batched_end_of_sentences, val_batched_num_sentences, val_batched_question_vecs, \
  val_batched_question_lengths, val_batched_answers = convert_to_indices(dummy_batch,
                                                                         word_to_index,
                                                                         answer_to_index,
                                                                         MAX_INPUT_LENGTH,
                                                                         MAX_INPUT_SENTENCES,
                                                                         MAX_QUESTION_LENGTH)

  print "Running DMN"

  # Run dmn
  probs, episode_1_gates, episode_2_gates, episode_3_gates = session.run(
    [prediction_probs, gates_for_episodes[0],
     gates_for_episodes[1], gates_for_episodes[2]],
    feed_dict={input_placeholder: val_batched_input_vecs[0],
               input_length_placeholder: val_batched_input_lengths[0],
               end_of_sentences_placeholder: val_batched_end_of_sentences[0],
               num_sentences_placeholder: val_batched_num_sentences[0],
               question_placeholder: val_batched_question_vecs[0],
               question_length_placeholder: val_batched_question_lengths[0],
               dropout_placeholder: 1.0})

  print "DMN finished"

  answer_probs = probs[0]

  index_answer = np.argmax(answer_probs)

  # Convert answer into a word
  answer = index_to_answer[index_answer]

  print "Answer is", answer

  # Get sentences
  sentences = re.split(r"[.]+", input)
  sentences.pop()
  print "Sentences", sentences

  num_sentences = len(sentences)
  print num_sentences

  print episode_1_gates[0]
  print episode_2_gates[0]
  print episode_3_gates[0]

  # Get gates
  gates_1 = episode_1_gates[0][:num_sentences]
  gates_2 = episode_2_gates[0][:num_sentences]
  gates_3 = episode_3_gates[0][:num_sentences]

  print gates_1
  print gates_2
  print gates_3

  return answer, sentences, gates_1, gates_2, gates_3
コード例 #5
0
def run_dmn():
  """
  Main function which loads in data, runs the model, and prints out statistics

  """

  print "Task", TASK

  # Get train dataset for task
  train_total = get_task_train(TASK)
  train_total = remove_long_sentences(train_total, MAX_INPUT_SENTENCES)

  train, validation = split_training_data(train_total)

  # Get all tokens from answers in training
  answer_to_index = answer_tokens_to_index(train_total)

  print answer_to_index

  number_of_answers = len(answer_to_index)

  print number_of_answers

  # Get test dataset for task
  test = get_task_test(TASK)
  test = remove_long_sentences(test, MAX_INPUT_SENTENCES)

  # Get word to glove vectors dictionary
  word_to_index, embedding_mat = load_glove_embedding()

  def initialize_word_vectors(shape, dtype):
    return embedding_mat

  # Create L tensor from embedding_mat
  with tf.variable_scope("Embedding") as scope:

    # L = tf.get_variable("L", shape=np.shape(embedding_mat), initializer=initialize_word_vectors)
    L = tf.get_variable("L", shape=np.shape(embedding_mat),
                        initializer=tf.random_uniform_initializer(minval=-np.sqrt(3), maxval=np.sqrt(3)))

  # Split data into batches
  validation_batches = batch_data(validation, BATCH_SIZE)
  test_batches = batch_data(test, BATCH_SIZE)

  # Convert batches into indeces
  val_batched_input_vecs, val_batched_input_lengths, val_batched_end_of_sentences, val_batched_num_sentences, val_batched_question_vecs, \
  val_batched_question_lengths, val_batched_answers = convert_to_indices(validation_batches,
                                                                         word_to_index,
                                                                         answer_to_index,
                                                                         MAX_INPUT_LENGTH,
                                                                         MAX_INPUT_SENTENCES,
                                                                         MAX_QUESTION_LENGTH)

  test_batched_input_vecs, test_batched_input_lengths, test_batched_end_of_sentences, test_batched_num_sentences, test_batched_question_vecs, \
  test_batched_question_lengths, test_batched_answers = convert_to_indices(test_batches,
                                                                           word_to_index,
                                                                           answer_to_index,
                                                                           MAX_INPUT_LENGTH,
                                                                           MAX_INPUT_SENTENCES,
                                                                           MAX_QUESTION_LENGTH)

  # Print summary statistics
  print "Training samples: {}".format(len(train))
  print "Validation samples: {}".format(len(validation))
  print "Testing samples: {}".format(len(test))
  print "Batch size: {}".format(BATCH_SIZE)
  print "Validation number of batches: {}".format(len(validation_batches))
  print "Test number of batches: {}".format(len(test_batches))

  # Add placeholders
  input_placeholder, input_length_placeholder, end_of_sentences_placeholder, num_sentences_placeholder, question_placeholder, \
  question_length_placeholder, labels_placeholder, dropout_placeholder = add_placeholders()

  # Input module
  sentence_states, all_outputs = input_module(input_placeholder, input_length_placeholder,
                                              end_of_sentences_placeholder, dropout_placeholder)

  # Question module
  question_state = question_module(question_placeholder, question_length_placeholder, dropout_placeholder)

  # Episodic memory module
  episodic_memory_state, gates_for_episodes = episodic_memory_module(sentence_states, num_sentences_placeholder,
                                                                     question_state)

  # Answer module
  projections = answer_module(episodic_memory_state, number_of_answers, dropout_placeholder)

  prediction_probs = tf.nn.softmax(projections)

  # Compute loss
  cross_entropy_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(projections, labels_placeholder))

  l2_loss = compute_regularization_penalty()

  cost = cross_entropy_loss + REG * l2_loss

  # Add optimizer
  optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(cost)

  # Initialize all variables
  init = tf.initialize_all_variables()
  saver = tf.train.Saver()

  # Train over multiple epochs
  with tf.Session() as sess:
    best_validation_accuracy = 0.0
    best_val_epoch = 0

    sess.run(init)
    # train until we reach the maximum number of epochs
    for epoch in range(MAX_EPOCHS):

      print 'Epoch {}'.format(epoch)
      start = time.time()
      ###

      total_training_loss = 0
      sum_training_accuracy = 0

      train_batches = batch_data(train, BATCH_SIZE)
      train_batched_input_vecs, train_batched_input_lengths, train_batched_end_of_sentences, train_batched_num_sentences, train_batched_question_vecs, \
      train_batched_question_lengths, train_batched_answers = convert_to_indices(
        train_batches, word_to_index, answer_to_index, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH)

      # Compute average loss on training data
      for i in range(len(train_batches)):

        # print "Train batch ", train_batches[i][0]
        # print "End of sentences ", train_batched_end_of_sentences[i]

        loss, _, batch_prediction_probs, input_outputs, sentence_states_out, episode_1_gates, episode_2_gates, episode_3_gates = sess.run(
          [cost, optimizer, prediction_probs, all_outputs, sentence_states, gates_for_episodes[0],
           gates_for_episodes[1], gates_for_episodes[2]],
          feed_dict={input_placeholder: train_batched_input_vecs[i],
                     input_length_placeholder: train_batched_input_lengths[i],
                     end_of_sentences_placeholder: train_batched_end_of_sentences[i],
                     num_sentences_placeholder: train_batched_num_sentences[i],
                     question_placeholder: train_batched_question_vecs[i],
                     question_length_placeholder: train_batched_question_lengths[i],
                     labels_placeholder: train_batched_answers[i],
                     dropout_placeholder: DROPOUT})

        # print episode_1_gates[0]
        # print episode_2_gates[0]
        # print episode_3_gates[0]

        total_training_loss += loss

        batch_accuracy = np.equal(np.argmax(batch_prediction_probs, axis=1), train_batched_answers[i]).mean()

        sum_training_accuracy += batch_accuracy

        # Print a training update
        if i % UPDATE_LENGTH == 0:
          print "Current average training loss: {}".format(total_training_loss / (i + 1))
          print "Current training accuracy: {}".format(sum_training_accuracy / (i + 1))

      average_training_loss = total_training_loss / len(train_batches)
      training_accuracy = sum_training_accuracy / len(train_batches)

      total_validation_loss = 0
      sum_validation_accuracy = 0

      # Compute average loss on validation data
      for i in range(len(validation_batches)):
        loss, batch_prediction_probs = sess.run(
          [cost, prediction_probs],
          feed_dict={input_placeholder: val_batched_input_vecs[i],
                     input_length_placeholder: val_batched_input_lengths[i],
                     end_of_sentences_placeholder: val_batched_end_of_sentences[i],
                     num_sentences_placeholder: val_batched_num_sentences[i],
                     question_placeholder: val_batched_question_vecs[i],
                     question_length_placeholder: val_batched_question_lengths[i],
                     labels_placeholder: val_batched_answers[i],
                     dropout_placeholder: 1.0})

        total_validation_loss += loss

        batch_accuracy = np.equal(np.argmax(batch_prediction_probs, axis=1), val_batched_answers[i]).mean()

        sum_validation_accuracy += batch_accuracy

      average_validation_loss = total_validation_loss / len(validation_batches)
      validation_accuracy = sum_validation_accuracy / len(validation_batches)

      print 'Training loss: {}'.format(average_training_loss)
      print 'Training accuracy: {}'.format(training_accuracy)
      print 'Validation loss: {}'.format(average_validation_loss)
      print 'Validation accuracy: {}'.format(validation_accuracy)

      if validation_accuracy >= best_validation_accuracy:
        best_validation_accuracy = validation_accuracy
        best_val_epoch = epoch
        saver.save(sess, '../data/weights/dmn_' + OUTFILE_STRING + '.weights')
        print "Weights saved to " + '../data/weights/dmn_' + OUTFILE_STRING + '.weights'

      print 'Total time: {}'.format(time.time() - start)

      outfile = './outputs/dmn/' + OUTFILE_STRING + '.txt'
      f = open(outfile, "a")
      f.write('epoch, ' + str(epoch) + '\n')
      f.write('train_acc, ' + str(training_accuracy) + '\n')
      f.write('train_loss, ' + str(average_training_loss) + '\n')
      f.write('val_acc, ' + str(validation_accuracy) + '\n')
      f.write('val_loss, ' + str(average_validation_loss) + '\n')
      f.close()

    # Compute average loss on testing data with best weights
    saver.restore(sess, '../data/weights/dmn_' + OUTFILE_STRING + '.weights')

    total_test_loss = 0
    sum_test_accuracy = 0

    # Compute average loss on test data
    for i in range(len(test_batches)):
      loss, batch_prediction_probs = sess.run(
        [cost, prediction_probs],
        feed_dict={input_placeholder: test_batched_input_vecs[i],
                   input_length_placeholder: test_batched_input_lengths[i],
                   end_of_sentences_placeholder: test_batched_end_of_sentences[i],
                   num_sentences_placeholder: test_batched_num_sentences[i],
                   question_placeholder: test_batched_question_vecs[i],
                   question_length_placeholder: test_batched_question_lengths[i],
                   labels_placeholder: test_batched_answers[i],
                   dropout_placeholder: 1.0})

      total_test_loss += loss

      batch_accuracy = np.equal(np.argmax(batch_prediction_probs, axis=1), test_batched_answers[i]).mean()

      sum_test_accuracy += batch_accuracy

    average_test_loss = total_test_loss / len(test_batches)
    test_accuracy = sum_test_accuracy / len(test_batches)

    print '=-=' * 5
    print 'Test accuracy: {}'.format(test_accuracy)
    print '=-=' * 5

    f = open(outfile, "a")
    f.write('test_acc, ' + str(test_accuracy) + '\n')
    f.close()
コード例 #6
0
def run_dmn():
    """
  Main function which loads in data, runs the model, and prints out statistics

  """

    # Get train dataset for task
    train_total = get_rc_train()
    train_total = remove_long_sentences(train_total, MAX_INPUT_SENTENCES)

    train, validation = split_training_data(train_total)

    # Get all tokens from answers in training
    answer_to_index = answer_tokens_to_index(train_total)

    number_of_answers = len(answer_to_index)

    # Get test dataset for task
    test = get_rc_val()
    test = remove_long_sentences(test, MAX_INPUT_SENTENCES)

    # Get word to glove vectors dictionary
    # word_to_index, embedding_mat = load_glove_embedding()

    # print "embedding mat shape: "
    # print np.shape(embedding_mat)

    word_to_index, vocab_size = create_word_to_index_from_vocab(train)

    print "vocab_size: "
    print vocab_size

    # def initialize_word_vectors(shape, dtype):
    #   return embedding_mat

    # Create L tensor from embedding_mat
    with tf.variable_scope("Embedding") as scope:

        # L = tf.get_variable("L", shape=np.shape(embedding_mat), initializer=initialize_word_vectors)
        L = tf.get_variable("L",
                            shape=(vocab_size, WORD_VECTOR_LENGTH),
                            initializer=tf.random_uniform_initializer(
                                minval=-np.sqrt(3), maxval=np.sqrt(3)))

    # Split data into batches
    validation_batches = batch_data(validation, BATCH_SIZE)
    test_batches = batch_data(test, BATCH_SIZE)

    # Convert batches into indeces
    val_batched_input_vecs, val_batched_input_lengths, val_batched_end_of_sentences, val_batched_num_sentences, val_batched_question_vecs, \
    val_batched_question_lengths, val_batched_answers = convert_to_indices(validation_batches,
                                                                           word_to_index,
                                                                           answer_to_index,
                                                                           MAX_INPUT_LENGTH,
                                                                           MAX_INPUT_SENTENCES,
                                                                           MAX_QUESTION_LENGTH)

    test_batched_input_vecs, test_batched_input_lengths, test_batched_end_of_sentences, test_batched_num_sentences, test_batched_question_vecs, \
    test_batched_question_lengths, test_batched_answers = convert_to_indices(test_batches,
                                                                             word_to_index,
                                                                             answer_to_index,
                                                                             MAX_INPUT_LENGTH,
                                                                             MAX_INPUT_SENTENCES,
                                                                             MAX_QUESTION_LENGTH)

    # Print summary statistics
    print "Training samples: {}".format(len(train))
    print "Validation samples: {}".format(len(validation))
    print "Testing samples: {}".format(len(test))
    print "Batch size: {}".format(BATCH_SIZE)
    print "Validation number of batches: {}".format(len(validation_batches))
    print "Test number of batches: {}".format(len(test_batches))

    # Add placeholders
    input_placeholder, input_length_placeholder, end_of_sentences_placeholder, num_sentences_placeholder, question_placeholder, \
    question_length_placeholder, labels_placeholder, dropout_placeholder = add_placeholders()

    # Input module
    sentence_states, all_outputs = input_module(input_placeholder,
                                                input_length_placeholder,
                                                end_of_sentences_placeholder,
                                                dropout_placeholder)

    # Question module
    question_state = question_module(question_placeholder,
                                     question_length_placeholder,
                                     dropout_placeholder)

    # Episodic memory module
    episodic_memory_state, gates_for_episodes = episodic_memory_module(
        sentence_states, num_sentences_placeholder, question_state)

    # Answer module
    projections = answer_module(episodic_memory_state, number_of_answers,
                                dropout_placeholder)

    prediction_probs = tf.nn.softmax(projections)

    # Compute loss
    cross_entropy_loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(projections,
                                                       labels_placeholder))

    l2_loss = compute_regularization_penalty()

    cost = cross_entropy_loss + REG * l2_loss

    # Add optimizer
    optimizer = tf.train.AdamOptimizer(
        learning_rate=LEARNING_RATE).minimize(cost)

    # Initialize all variables
    init = tf.initialize_all_variables()
    saver = tf.train.Saver()

    # Train over multiple epochs
    with tf.Session() as sess:
        best_validation_accuracy = 0.0
        best_val_epoch = 0

        sess.run(init)
        # train until we reach the maximum number of epochs
        for epoch in range(MAX_EPOCHS):

            print 'Epoch {}'.format(epoch)
            start = time.time()
            ###

            total_training_loss = 0
            sum_training_accuracy = 0

            train_batches = batch_data(train, BATCH_SIZE)
            train_batched_input_vecs, train_batched_input_lengths, train_batched_end_of_sentences, train_batched_num_sentences, train_batched_question_vecs, \
            train_batched_question_lengths, train_batched_answers = convert_to_indices(
              train_batches, word_to_index, answer_to_index, MAX_INPUT_LENGTH, MAX_INPUT_SENTENCES, MAX_QUESTION_LENGTH)

            # Compute average loss on training data
            for i in range(len(train_batches)):

                # print "Train batch ", train_batches[i][0]
                # print "End of sentences ", train_batched_end_of_sentences[i]

                loss, _, batch_prediction_probs, input_outputs, sentence_states_out, episode_1_gates, episode_2_gates, episode_3_gates = sess.run(
                    [
                        cost, optimizer, prediction_probs, all_outputs,
                        sentence_states, gates_for_episodes[0],
                        gates_for_episodes[1], gates_for_episodes[2]
                    ],
                    feed_dict={
                        input_placeholder:
                        train_batched_input_vecs[i],
                        input_length_placeholder:
                        train_batched_input_lengths[i],
                        end_of_sentences_placeholder:
                        train_batched_end_of_sentences[i],
                        num_sentences_placeholder:
                        train_batched_num_sentences[i],
                        question_placeholder:
                        train_batched_question_vecs[i],
                        question_length_placeholder:
                        train_batched_question_lengths[i],
                        labels_placeholder:
                        train_batched_answers[i],
                        dropout_placeholder:
                        DROPOUT
                    })

                # print episode_1_gates[0]
                # print episode_2_gates[0]
                # print episode_3_gates[0]

                total_training_loss += loss

                batch_accuracy = np.equal(
                    np.argmax(batch_prediction_probs, axis=1),
                    train_batched_answers[i]).mean()

                sum_training_accuracy += batch_accuracy

                # Print a training update
                if i % UPDATE_LENGTH == 0:
                    print "Current average training loss: {}".format(
                        total_training_loss / (i + 1))
                    print "Current training accuracy: {}".format(
                        sum_training_accuracy / (i + 1))

            average_training_loss = total_training_loss / len(train_batches)
            training_accuracy = sum_training_accuracy / len(train_batches)

            total_validation_loss = 0
            sum_validation_accuracy = 0

            # Compute average loss on validation data
            for i in range(len(validation_batches)):
                loss, batch_prediction_probs = sess.run(
                    [cost, prediction_probs],
                    feed_dict={
                        input_placeholder:
                        val_batched_input_vecs[i],
                        input_length_placeholder:
                        val_batched_input_lengths[i],
                        end_of_sentences_placeholder:
                        val_batched_end_of_sentences[i],
                        num_sentences_placeholder:
                        val_batched_num_sentences[i],
                        question_placeholder:
                        val_batched_question_vecs[i],
                        question_length_placeholder:
                        val_batched_question_lengths[i],
                        labels_placeholder:
                        val_batched_answers[i],
                        dropout_placeholder:
                        1.0
                    })

                total_validation_loss += loss

                batch_accuracy = np.equal(
                    np.argmax(batch_prediction_probs, axis=1),
                    val_batched_answers[i]).mean()

                sum_validation_accuracy += batch_accuracy

            average_validation_loss = total_validation_loss / len(
                validation_batches)
            validation_accuracy = sum_validation_accuracy / len(
                validation_batches)

            print 'Training loss: {}'.format(average_training_loss)
            print 'Training accuracy: {}'.format(training_accuracy)
            print 'Validation loss: {}'.format(average_validation_loss)
            print 'Validation accuracy: {}'.format(validation_accuracy)

            if validation_accuracy > best_validation_accuracy:
                best_validation_accuracy = validation_accuracy
                best_val_epoch = epoch
                saver.save(
                    sess, '../data/weights/dmn_' + OUTFILE_STRING + '.weights')
                print "Weights saved to " + '../data/weights/dmn_' + OUTFILE_STRING + '.weights'

            print 'Total time: {}'.format(time.time() - start)

            outfile = './outputs/dmn/' + OUTFILE_STRING + '.txt'
            f = open(outfile, "a")
            f.write('epoch, ' + str(epoch) + '\n')
            f.write('train_acc, ' + str(training_accuracy) + '\n')
            f.write('train_loss, ' + str(average_training_loss) + '\n')
            f.write('val_acc, ' + str(validation_accuracy) + '\n')
            f.write('val_loss, ' + str(average_validation_loss) + '\n')
            f.close()

        # Compute average loss on testing data with best weights
        saver.restore(sess,
                      '../data/weights/dmn_' + OUTFILE_STRING + '.weights')

        total_test_loss = 0
        sum_test_accuracy = 0

        # Compute average loss on test data
        for i in range(len(test_batches)):
            loss, batch_prediction_probs = sess.run(
                [cost, prediction_probs],
                feed_dict={
                    input_placeholder: test_batched_input_vecs[i],
                    input_length_placeholder: test_batched_input_lengths[i],
                    end_of_sentences_placeholder:
                    test_batched_end_of_sentences[i],
                    num_sentences_placeholder: test_batched_num_sentences[i],
                    question_placeholder: test_batched_question_vecs[i],
                    question_length_placeholder:
                    test_batched_question_lengths[i],
                    labels_placeholder: test_batched_answers[i],
                    dropout_placeholder: 1.0
                })

            total_test_loss += loss

            batch_accuracy = np.equal(
                np.argmax(batch_prediction_probs, axis=1),
                test_batched_answers[i]).mean()

            sum_test_accuracy += batch_accuracy

        average_test_loss = total_test_loss / len(test_batches)
        test_accuracy = sum_test_accuracy / len(test_batches)

        print '=-=' * 5
        print 'Test accuracy: {}'.format(test_accuracy)
        print '=-=' * 5

        f = open(outfile, "a")
        f.write('test_acc, ' + str(test_accuracy) + '\n')
        f.close()