Exemple #1
0
def main():
    dialogues_train = map(lambda x: x, train_babi)
    dialogues_train_eval = map(lambda x: [x[-1]], train_babi) \
        if args.predict_last_turn_only \
        else map(lambda x: x, train_babi)

    dialogues_test = map(lambda x: [x[-1]], test_plus) \
        if args.predict_last_turn_only \
        else map(lambda x: x, test_plus)

    data_train = reduce(lambda x, y: x + y, dialogues_train, [])
    data_train_eval = reduce(lambda x, y: x + y, dialogues_train_eval, [])
    data_test = reduce(lambda x, y: x + y, dialogues_test, [])

    train_s, train_q, train_a = vectorize_data_dialog(data_train, word_idx,
                                                      answer_idx,
                                                      sentence_size,
                                                      memory_size)
    train_eval_s, train_eval_q, train_eval_a = vectorize_data_dialog(
        data_train_eval, word_idx, answer_idx, sentence_size, memory_size)
    test_s, test_q, test_a = vectorize_data_dialog(data_test, word_idx,
                                                   answer_idx, sentence_size,
                                                   memory_size)

    print("Training Size (dialogues)", len(dialogues_train))
    print("Training/Evaluation Size (dialogues)", len(dialogues_train_eval))
    print("Testing Size (dialogues)", len(dialogues_test))
    print("Training Size (stories)", len(data_train))
    print("Training/Evaluation Size (stories)", len(data_train_eval))
    print("Testing Size (stories)", len(data_test))

    tf.set_random_seed(FLAGS.random_state)
    batch_size = FLAGS.batch_size
    optimizer = tf.train.GradientDescentOptimizer(
        learning_rate=FLAGS.learning_rate)

    batches = zip(range(0,
                        len(data_train) - batch_size, batch_size),
                  range(batch_size, len(data_train), batch_size))
    batches = [(start, end) for start, end in batches]

    with tf.Session() as sess:
        model = MemN2N(batch_size,
                       vocab_size,
                       sentence_size,
                       memory_size,
                       FLAGS.embedding_size,
                       answers_vectorized,
                       session=sess,
                       hops=FLAGS.hops,
                       max_grad_norm=FLAGS.max_grad_norm,
                       optimizer=optimizer)
        best_accuracy_per_epoch = train_model(
            model, (train_s, train_q, train_a),
            (train_eval_s, train_eval_q, train_eval_a),
            (test_s, test_q, test_a), batches)
    return best_accuracy_per_epoch
def main(in_train_dialogue_name, in_testset_size, in_fold_number,
         in_dataset_shuffle):
    trainset_idx = [get_global_dialogue_index(in_train_dialogue_name)]
    testset_idx = []
    testset_counter = in_fold_number * in_testset_size
    while len(testset_idx) != in_testset_size:
        current_idx = in_dataset_shuffle[testset_counter]
        if current_idx not in trainset_idx:
            testset_idx.append(current_idx)
        testset_counter += 1
    dialogues_train = map(lambda x: all_dialogues[x], trainset_idx)
    dialogues_test = map(lambda x: all_dialogues[x], testset_idx)

    data_train = reduce(lambda x, y: x + y, dialogues_train, [])
    data_test = reduce(lambda x, y: x + y, dialogues_test, [])

    train_s, train_q, train_a = vectorize_data_dialog(data_train, word_idx,
                                                      answer_idx,
                                                      sentence_size,
                                                      memory_size)
    test_s, test_q, test_a = vectorize_data_dialog(data_test, word_idx,
                                                   answer_idx, sentence_size,
                                                   memory_size)

    print("Training Size (dialogues)", len(dialogues_train))
    print("Testing Size (dialogues)", len(dialogues_test))
    print("Training Size (stories)", len(data_train))
    print("Testing Size (stories)", len(data_test))

    tf.set_random_seed(FLAGS.random_state)
    batch_size = FLAGS.batch_size
    optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate,
                                       epsilon=FLAGS.epsilon)

    batches = zip(range(0,
                        len(data_train) - batch_size, batch_size),
                  range(batch_size, len(data_train), batch_size))
    batches = [(start, end) for start, end in batches]

    with tf.Session() as sess:
        model = MemN2N(batch_size,
                       vocab_size,
                       sentence_size,
                       memory_size,
                       FLAGS.embedding_size,
                       answer_vocab_size=answer_vocab_size,
                       session=sess,
                       hops=FLAGS.hops,
                       max_grad_norm=FLAGS.max_grad_norm,
                       optimizer=optimizer)
        best_accuracy_per_epoch = train_model(model,
                                              (train_s, train_q, train_a),
                                              (test_s, test_q, test_a),
                                              batches)
    return best_accuracy_per_epoch
Exemple #3
0
def main(in_trainset_size, in_split_number):
    # train/validation/test sets
    all_dialogues_idx = reduce(lambda x, y: x + [y], range(len(all_dialogues)),
                               [])
    random.shuffle(all_dialogues_idx)
    trainset_idx = all_dialogues_idx[in_split_number *
                                     in_trainset_size:in_split_number *
                                     in_trainset_size + in_trainset_size]
    testset_idx = filter(lambda x: x not in trainset_idx, all_dialogues_idx)
    dialogues_train = map(lambda x: all_dialogues[x], trainset_idx)
    dialogues_test = map(lambda x: all_dialogues[x], testset_idx)

    data_train = reduce(lambda x, y: x + y, dialogues_train, [])
    data_test = reduce(lambda x, y: x + y, dialogues_test, [])

    train_s, train_q, train_a = vectorize_data_dialog(data_train, word_idx,
                                                      answer_idx,
                                                      sentence_size,
                                                      memory_size)
    test_s, test_q, test_a = vectorize_data_dialog(data_test, word_idx,
                                                   answer_idx, sentence_size,
                                                   memory_size)

    print("Training Size (dialogues)", len(dialogues_train))
    print("Testing Size (dialogues)", len(dialogues_test))
    print("Training Size (stories)", len(data_train))
    print("Testing Size (stories)", len(data_test))

    tf.set_random_seed(FLAGS.random_state)
    batch_size = FLAGS.batch_size
    optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate,
                                       epsilon=FLAGS.epsilon)

    batches = zip(range(0,
                        len(data_train) - batch_size, batch_size),
                  range(batch_size, len(data_train), batch_size))
    batches = [(start, end) for start, end in batches]

    with tf.Session() as sess:
        model = MemN2N(batch_size,
                       vocab_size,
                       sentence_size,
                       memory_size,
                       FLAGS.embedding_size,
                       answer_vocab_size=answer_vocab_size,
                       session=sess,
                       hops=FLAGS.hops,
                       max_grad_norm=FLAGS.max_grad_norm,
                       optimizer=optimizer)
        best_accuracy_per_epoch = train_model(model,
                                              (train_s, train_q, train_a),
                                              (test_s, test_q, test_a),
                                              batches)
    return best_accuracy_per_epoch
Exemple #4
0
def main():
    data_train = reduce(lambda x, y: x + y, train, [])
    data_test = reduce(lambda x, y: x + y, test, [])

    train_s, train_q, train_a = vectorize_data_dialog(data_train, word_idx,
                                                      answer_idx,
                                                      sentence_size,
                                                      memory_size)
    test_s, test_q, test_a = vectorize_data_dialog(data_test, word_idx,
                                                   answer_idx, sentence_size,
                                                   memory_size)

    print("Training Size (dialogues)", len(train))
    print("Testing Size (dialogues)", len(test))
    print("Training Size (stories)", len(data_train))
    print("Testing Size (stories)", len(data_test))

    tf.set_random_seed(FLAGS.random_state)
    batch_size = FLAGS.batch_size
    optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate,
                                       epsilon=FLAGS.epsilon)

    batches = zip(range(0,
                        len(data_train) - batch_size, batch_size),
                  range(batch_size, len(data_train), batch_size))
    batches = [(start, end) for start, end in batches]

    with tf.Session() as sess:
        model = MemN2N(batch_size,
                       vocab_size,
                       sentence_size,
                       memory_size,
                       FLAGS.embedding_size,
                       answer_vocab_size=answer_vocab_size,
                       session=sess,
                       hops=FLAGS.hops,
                       max_grad_norm=FLAGS.max_grad_norm,
                       optimizer=optimizer)
        best_accuracy_per_epoch = train_model(model,
                                              (train_s, train_q, train_a),
                                              (test_s, test_q, test_a),
                                              batches)
    return best_accuracy_per_epoch
Exemple #5
0
val_labels = np.argmax(valA, axis=1)

tf.set_random_seed(FLAGS.random_state)
batch_size = FLAGS.batch_size
optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate,
                                   epsilon=FLAGS.epsilon)

batches = zip(range(0, n_train - batch_size, batch_size),
              range(batch_size, n_train, batch_size))
with tf.Session() as sess:
    model = MemN2N(batch_size,
                   vocab_size,
                   sentence_size,
                   memory_size,
                   FLAGS.embedding_size,
                   session=sess,
                   hops=FLAGS.hops,
                   max_grad_norm=FLAGS.max_grad_norm,
                   optimizer=optimizer,
                   l2=FLAGS.regularization,
                   nonlin=tf.nn.relu)

    writer = tf.train.SummaryWriter(get_log_dir_name(), sess.graph)

    for t in range(1, FLAGS.epochs + 1):
        np.random.shuffle(batches)
        total_cost = 0.0
        for start in range(0, n_train, batch_size):
            end = start + batch_size
            s = trainS[start:end]
            q = trainQ[start:end]
Exemple #6
0
def main(task_id):
    tf.flags.DEFINE_integer("hops", 3, "")
    tf.flags.DEFINE_integer("epochs", 100, "")
    tf.flags.DEFINE_integer("embedding_size", 100, "")
    tf.flags.DEFINE_float("learning_rate", 0.01, "")
    tf.flags.DEFINE_float("anneal_rate", 25, "")
    tf.flags.DEFINE_float("anneal_stop_epoch", 100, "")
    tf.flags.DEFINE_float("max_grad_norm", 40.0, "")
    tf.flags.DEFINE_integer("random_state", None, "")
    tf.flags.DEFINE_integer("evaluation_interval", 10, "")
    tf.flags.DEFINE_integer("batch_size", 32, "")
    tf.flags.DEFINE_integer("memory_size", 50, "")
    tf.flags.DEFINE_integer("task_id", task_id, "")
    tf.flags.DEFINE_string("data_dir", "data/tasks_1-20_v1-2/hn/", "")
    FLAGS = tf.flags.FLAGS

    print("Started Task:", FLAGS.task_id)

    train, test = load_task(FLAGS.data_dir, FLAGS.task_id)
    data = train + test
    vocab = sorted(
        reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a)
                                    for s, q, a in data)))

    max_story_size, mean_story_size, sentence_size, query_size = get_sizes(
        data)
    memory_size = min(FLAGS.memory_size, max_story_size)
    word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
    inverted_word_idx = dict((i + 1, c) for i, c in enumerate(vocab))

    for i in range(memory_size):
        word_idx['time{}'.format(i + 1)] = 'time{}'.format(i + 1)
    vocab_size = len(word_idx) + 1
    sentence_size = max(query_size, sentence_size)
    sentence_size += 1

    S, Q, A = vectorize_data(train, word_idx, sentence_size, memory_size)
    trainS, valS, trainQ, valQ, trainA, valA = train_test_split(
        S, Q, A, test_size=.1, random_state=FLAGS.random_state)
    testS, testQ, testA = vectorize_data(test, word_idx, sentence_size,
                                         memory_size)
    n_test, n_train, n_val = get_shapes(testS, trainS, valS)
    train_labels, test_labels, val_labels = get_labels(trainA, testA, valA)
    tf.set_random_seed(FLAGS.random_state)
    batch_size, batches = get_batches(n_train, FLAGS)

    with tf.Session() as sess:
        model = MemN2N(batch_size,
                       vocab_size,
                       sentence_size,
                       memory_size,
                       FLAGS.embedding_size,
                       session=sess,
                       hops=FLAGS.hops,
                       max_grad_norm=FLAGS.max_grad_norm)

        for t in range(1, FLAGS.epochs + 1):
            lr = get_updated_learning_rate(t, FLAGS)
            np.random.shuffle(batches)
            total_cost = get_total_cost(batches, model, trainS, trainQ, trainA,
                                        lr)
            if t % FLAGS.evaluation_interval == 0:
                train_predictions = get_train_predictions(
                    FLAGS, trainS, trainQ, n_train, batch_size, model)
                validation_predictions = model.predict(valS, valQ)
                training_accuracy = metrics.accuracy_score(
                    np.array(train_predictions), train_labels)
                validation_accuracy = metrics.accuracy_score(
                    validation_predictions, val_labels)

                print("******************************************")
                print('Epoch', t)
                print('Training Accuracy:', training_accuracy)
                print('Validation Accuracy:', validation_accuracy)
                print('Total Cost:', total_cost)
                print("******************************************")

        # Saving Model
        # input_dic = {"stories":model._stories, "queries":model._queries, "answers":model._answers, "learning_rate":model._lr}
        # output_dic = {model._logits}
        # tf.saved_model.simple_save(
        #     sess, "./saved_models/task_"+FLAGS.task_id, input_dic, output_dic
        # )

        test_predictions = model.predict(testS, testQ)
        testing_accuracy = metrics.accuracy_score(test_predictions,
                                                  test_labels)
        print("Final Testing Accuracy:", testing_accuracy)
        print("")
        printTests(1, 6, test, test_predictions, test_labels,
                   inverted_word_idx)
Exemple #7
0
    else:
        os.mkdir(FLAGS.test_dir)
        os.mkdir(lead3_dir)
        os.mkdir(mem_dir)
        os.mkdir(lead3_dir + '/system')
        os.mkdir(lead3_dir + '/model')
        os.mkdir(mem_dir + '/system')
        os.mkdir(mem_dir + '/model')
        print('Created test directory', FLAGS.test_dir)

    if os.path.exists('se.txt'):
        os.remove('se.txt')

    model = MemN2N(batch_size, vocab_size, sentence_size, memory_size, FLAGS.embedding_size, next_element,
                   FLAGS.keep_prob, l2_para=FLAGS.l2,  word_init=FLAGS.word2vec_init,
                   hidden_size=FLAGS.hidden_size,
                   training=False,
                   session=sess,
                   hops=FLAGS.hops, max_grad_norm=FLAGS.max_grad_norm)
    print('created model')

    saver = tf.train.Saver()
    saver.restore(sess, FLAGS.train_dir)
    names=[]
    cnt = -1  # file index
    for epoch in range(1, FLAGS.epochs + 1):
        epoch_start_time = time.time()

        total_cost = 0.0
        sess.run(iterator.initializer, feed_dict={filenames: test_filenames})
        while True:
            try:
Exemple #8
0
                                           epsilon=FLAGS.epsilon)

    batches = zip(range(0, n_train - batch_size, batch_size),
                  range(batch_size, n_train, batch_size))
    batches = [(start, end) for start, end in batches]

    linear_start = FLAGS.linear_start
    last_train_acc, last_val_acc = None, None

    with tf.Session() as sess:
        model = MemN2N(batch_size,
                       vocab_size,
                       sentence_size,
                       memory_size,
                       FLAGS.embedding_size,
                       session=sess,
                       weight_tying=FLAGS.weight_tying,
                       hops=FLAGS.hops,
                       max_grad_norm=FLAGS.max_grad_norm,
                       optimizer=optimizer,
                       global_step=global_step)
        for t in range(1, FLAGS.epochs + 1):
            np.random.shuffle(batches)
            total_cost = 0.0
            if t > FLAGS.ls_epoch:
                linear_start = False

            for start, end in batches:
                s = trainS[start:end]
                q = trainQ[start:end]
                a = trainA[start:end]
Exemple #9
0
best_val_accs = [-1] * len(tasks)
best_val_epochs = [-1] * len(tasks)
best_val_update_epoch = -1
stop_early = False
best_train_accs = []
with tf.Session() as sess:
    print(batch_size, vocab_size, sentence_size, memory_size,
          FLAGS.embedding_size, FLAGS.hops, FLAGS.max_grad_norm,
          FLAGS.regularization, FLAGS.learning_rate, FLAGS.epsilon)

    model = MemN2N(batch_size,
                   vocab_size,
                   sentence_size,
                   memory_size,
                   FLAGS.embedding_size,
                   session=sess,
                   hops=FLAGS.hops,
                   max_grad_norm=FLAGS.max_grad_norm,
                   l2=FLAGS.regularization,
                   lr=FLAGS.learning_rate,
                   epsilon=FLAGS.epsilon,
                   nonlin=tf.nn.relu)

    writers = gen_writers(sess, get_log_dir_name())

    for i in range(1, FLAGS.epochs + 1):
        np.random.shuffle(batches)
        total_cost = 0.0
        for start, end in batches:
            end = start + batch_size
            s = trainS[start:end]
            q = trainQ[start:end]
Exemple #10
0
def run_memn2n_joint_training(tensorflow_commandline_flags):

    # parsing commandline input for defining tasks to log with each observation
    calc_accuracy_tasks = []
    if tensorflow_commandline_flags.sigopt_calc_accuracy_tasks.split is not None:
        calc_accuracy_tasks = [
            int(i) for i in
            tensorflow_commandline_flags.sigopt_calc_accuracy_tasks.split(",")
        ]

    # preprocessing data

    # load all train/test data
    ids = range(1, 21)
    train, test = [], []
    for i in ids:
        tr, te = load_task(tensorflow_commandline_flags.data_dir, i)
        train.append(tr)
        test.append(te)
    data = list(chain.from_iterable(train + test))

    vocab = sorted(
        reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a)
                                    for s, q, a in data)))
    word_idx = dict((c, i + 1) for i, c in enumerate(vocab))

    max_story_size = max(map(len, (s for s, _, _ in data)))
    mean_story_size = int(np.mean([len(s) for s, _, _ in data]))
    sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data)))
    query_size = max(map(len, (q for _, q, _ in data)))
    sentence_size = max(query_size, sentence_size)  # for the position
    sentence_size += 1  # +1 for time words

    logging.debug("setting up sigopt experiment")

    sigopt_experiment_definition, e2e_memnn_experiment = sigopt_memn2n_experiment_setup.setup_sigopt_memn2n_experiment(
        tensorflow_commandline_flags)

    while e2e_memnn_experiment.progress.observation_count < e2e_memnn_experiment.observation_budget:

        logging.info("starting new observation cycle")
        logging.info("observation number: %d",
                     e2e_memnn_experiment.progress.observation_count)

        logging.debug("getting sigopt suggestions")
        suggestions = sigopt_experiment_definition.get_suggestions(
            e2e_memnn_experiment)

        memory_size = suggestions.assignments[ParametersList.MEMORY_SIZE.value]

        # Add time words/indexes
        for i in range(memory_size):
            word_idx['time{}'.format(i + 1)] = 'time{}'.format(i + 1)

        vocab_size = len(word_idx) + 1  # +1 for nil word

        logging.info("Longest sentence length %d", sentence_size)
        logging.info("Longest story length %d", max_story_size)
        logging.info("Average story length %d", mean_story_size)

        logging.info("transforming data")

        # train/validation/test sets
        trainS = []
        valS = []
        trainQ = []
        valQ = []
        trainA = []
        valA = []
        for task in train:
            S, Q, A = vectorize_data(task, word_idx, sentence_size,
                                     memory_size)
            ts, vs, tq, vq, ta, va = cross_validation.train_test_split(
                S,
                Q,
                A,
                test_size=0.1,
                random_state=tensorflow_commandline_flags.random_state)
            trainS.append(ts)
            trainQ.append(tq)
            trainA.append(ta)
            valS.append(vs)
            valQ.append(vq)
            valA.append(va)

        trainS = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainS))
        trainQ = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainQ))
        trainA = reduce(lambda a, b: np.vstack((a, b)), (x for x in trainA))
        valS = reduce(lambda a, b: np.vstack((a, b)), (x for x in valS))
        valQ = reduce(lambda a, b: np.vstack((a, b)), (x for x in valQ))
        valA = reduce(lambda a, b: np.vstack((a, b)), (x for x in valA))

        testS, testQ, testA = vectorize_data(list(chain.from_iterable(test)),
                                             word_idx, sentence_size,
                                             memory_size)

        n_train = trainS.shape[0]
        n_val = valS.shape[0]
        n_test = testS.shape[0]

        logging.info("Training Size: %d", n_train)
        logging.info("Validation Size: %d", n_val)
        logging.info("Testing Size: %d", n_test)

        train_labels = np.argmax(trainA, axis=1)
        test_labels = np.argmax(testA, axis=1)
        val_labels = np.argmax(valA, axis=1)

        tf.set_random_seed(tensorflow_commandline_flags.random_state)
        batch_size = tensorflow_commandline_flags.batch_size

        # This avoids feeding 1 task after another, instead each batch has a random sampling of tasks
        batches = zip(range(0, n_train - batch_size, batch_size),
                      range(batch_size, n_train, batch_size))
        batches = [(start, end) for start, end in batches]

        optimizer = sigopt_memn2n_experiment_setup.string_to_optimizer_object(
            suggestions.assignments[ParametersList.OPTIMIZER.value],
            suggestions.assignments)

        with tf.Session(config=tf.ConfigProto(
                log_device_placement=True)) as sess:

            model = MemN2N(
                batch_size,
                vocab_size,
                sentence_size,
                memory_size=memory_size,
                embedding_size=suggestions.assignments[
                    ParametersList.WORD_EMBEDDING.value],
                optimizer=optimizer,
                session=sess,
                hops=suggestions.assignments[ParametersList.HOP_SIZE.value],
                max_grad_norm=tensorflow_commandline_flags.max_grad_norm)

            logging.info("batch training memory network")

            for i in range(1, tensorflow_commandline_flags.epochs + 1):

                logging.debug("epoch number %d", i)
                logging.debug("observation count %d",
                              e2e_memnn_experiment.progress.observation_count)

                np.random.shuffle(batches)

                total_cost = 0.0
                for start, end in batches:
                    s = trainS[start:end]
                    q = trainQ[start:end]
                    a = trainA[start:end]
                    cost_t = model.batch_fit(s, q, a)
                    total_cost += cost_t

                if i % tensorflow_commandline_flags.evaluation_interval == 0:

                    logging.info(
                        "calculating training and validation accuracy.")

                    train_accs = []
                    for start in range(0, n_train, int(n_train / 20)):
                        end = start + int(n_train / 20)
                        s = trainS[start:end]
                        q = trainQ[start:end]
                        pred = model.predict(s, q)
                        acc = metrics.accuracy_score(pred,
                                                     train_labels[start:end])
                        train_accs.append(acc)

                    logging.debug("Training accuracy %f",
                                  np.average(train_accs))

                    val_accs = []
                    for start in range(0, n_val, int(n_val / 20)):
                        end = start + int(n_val / 20)
                        s = valS[start:end]
                        q = valQ[start:end]
                        pred = model.predict(s, q)
                        acc = metrics.accuracy_score(pred,
                                                     val_labels[start:end])
                        val_accs.append(acc)

                    logging.debug("Validation accuracy %f",
                                  np.average(val_accs))

                    test_accs = []
                    for start in range(0, n_test, int(n_test / 20)):
                        end = start + int(n_test / 20)
                        s = testS[start:end]
                        q = testQ[start:end]
                        pred = model.predict(s, q)
                        acc = metrics.accuracy_score(pred,
                                                     test_labels[start:end])
                        test_accs.append(acc)

                    logging.info('-----------------------')
                    logging.info('Total Cost: %d', total_cost)

                    task_accuracies = []
                    t = 1
                    for t1, t2, t3 in zip(train_accs, val_accs, test_accs):
                        logging.info("Task %d", t)
                        logging.info("Training Accuracy %f", t1)
                        logging.info("Validation Accuracy %f", t2)
                        logging.info("Testing Accuracy %f", t3)
                        if t in calc_accuracy_tasks:
                            task_accuracies.append(t3)
                        t += 1
                    logging.info('-----------------------')

                    # log task test accuracies with current observation
                    metadata_dict = {}
                    for t, task_name in enumerate(calc_accuracy_tasks):
                        metadata_dict[str(task_name)] = task_accuracies[t]

                    test_accs_average = np.average(test_accs)

                    logging.debug("creating sigopt observation")
                    try:
                        e2e_memnn_experiment, current_observation = sigopt_experiment_definition.update_experiment_metadata(
                            e2e_memnn_experiment, suggestions,
                            test_accs_average, metadata_dict)
                    except ConnectionError as error:
                        logging.debug("connection problem: %s", str(error))
                        conn = Connection(
                            client_token=tensorflow_commandline_flags.
                            sigopt_connection_token)
                        conn.experiments(
                            e2e_memnn_experiment.id).observations().create(
                                suggestion=suggestions.id,
                                value=test_accs_average,
                                metadata=metadata_dict)
                        e2e_memnn_experiment = conn.experiments(
                            e2e_memnn_experiment.id).fetch()

        tf.reset_default_graph()

    logging.info(
        "Sig opt best parameters: %s",
        sigopt_experiment_definition.get_best_suggestions(
            e2e_memnn_experiment))
Exemple #11
0
def main(args=sys.argv[1:]):

    args = parse_args(args)
    logging.basicConfig(level=args.logging,
                        format='%(asctime)s\t%(levelname)-8s\t%(message)s')

    output_path = os.path.join(
        args.output_dir_path,
        #'%s_%s' % (get_git_revision_short_hash(), datetime.datetime.now().time().isoformat())
        datetime.datetime.now().date().isoformat() + '_' +
        datetime.datetime.now().time().isoformat(),
    )

    optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate)

    encodings = {
        'bow_encoding': bow_encoding,
        'position_encoding': position_encoding,
    }

    nonlins = {
        'relu': tf.nn.relu,
    }
    if args.nonlin is not None:
        nonlin = nonlins[args.nonlin]
    else:
        nonlin = None

    tf.set_random_seed(args.random_seed)

    if not args.joint:

        for task_id in args.task_ids:

            with tf.Graph().as_default():

                with tf.Session(graph=tf.get_default_graph()) as sess:

                    train_data, val_data, test_data, word_idx, reverse_word_idx, vocab_size, sentence_size, memory_size = load_data(
                        args.data_path, [task_id], args.dim_memory,
                        args.num_caches, args.random_seed)
                    print(args.temporal_encoding)
                    model = MemN2N(
                        args.batch_size,
                        vocab_size,
                        sentence_size,
                        memory_size,
                        args.num_caches,
                        args.dim_emb,
                        word_idx,
                        reverse_word_idx,
                        args.num_hops,
                        args.max_grad_norm,
                        share_type=args.share_type,
                        nonlin=nonlin,
                        optimizer=optimizer,
                        initializer=tf.random_normal_initializer(
                            stddev=args.init_stddev),
                        encoding=encodings[args.encoding_type],
                        temporal_encoding=args.temporal_encoding,
                        session=sess,
                    )

                    saver = tf.train.Saver()
                    params = list_to_path(vars(args).values())
                    saver.save(sess, './save/model_%s' % params)

                    train_loop(model, train_data, val_data, args.batch_size,
                               args.num_epochs, args.val_freq)

                    d = {'vocab_dict': word_idx}

                    for f in test_data:
                        test_accs, test_attendance_acc, test_preds, test_probs = evaluate_per_question(
                            model, test_data[f], output_path)

                        test_task_name = os.path.basename(
                            os.path.splitext(f)[0])

                        d.update({
                            '%s_test_preds' % test_task_name:
                            test_preds,
                            '%s_test_probs' % test_task_name:
                            test_probs[0],
                            '%s_test_r' % test_task_name:
                            test_probs[1],
                            '%s_test_acc' % test_task_name:
                            test_accs,
                            #'%s_test_acc_0' % test_task_name: test_accs[0],
                            #'%s_test_acc_1' % test_task_name: test_accs[1],
                            #'%s_test_acc_2' % test_task_name: test_accs[2],
                            #'%s_test_acc_3' % test_task_name: test_accs[3],
                            '%s_test_attendance_accs' % test_task_name:
                            test_attendance_acc,
                        })

                    vars_args = vars(args)
                    del vars_args['task_ids']
                    vars_args['task_ids'] = [task_id]
                    d.update(**vars_args)

                    np.save(output_path + '_%d' % task_id, d)

    else:

        with tf.Graph().as_default():

            with tf.Session(graph=tf.get_default_graph()) as sess:

                train_data, val_data, test_data, word_idx, reverse_word_idx, vocab_size, sentence_size, memory_size = load_data(
                    args.data_path, args.task_ids, args.dim_memory,
                    args.num_caches, args.random_seed)
                print(args.temporal_encoding)
                model = MemN2N(
                    args.batch_size,
                    vocab_size,
                    sentence_size,
                    memory_size,
                    args.num_caches,
                    args.dim_emb,
                    word_idx,
                    reverse_word_idx,
                    args.num_hops,
                    args.max_grad_norm,
                    share_type=args.share_type,
                    nonlin=nonlin,
                    optimizer=optimizer,
                    initializer=tf.random_normal_initializer(
                        stddev=args.init_stddev),
                    encoding=encodings[args.encoding_type],
                    temporal_encoding=args.temporal_encoding,
                    session=sess,
                )

                saver = tf.train.Saver()
                params = list_to_path(vars(args).values())
                saver.save(
                    sess,
                    '/home/kayleeburns/memory/memn2n/ckpts/consistency/model_%s'
                    % params)

                train_loop(model, train_data, val_data, args.batch_size,
                           args.num_epochs, args.val_freq)

                d = {'vocab_dict': word_idx}

                # FIND TEST ACCURACY
                for f in test_data:
                    test_accs, test_attendance_acc, test_preds, test_probs = evaluate_per_question(
                        model, test_data[f], output_path)

                    test_task_name = os.path.basename(os.path.splitext(f)[0])

                    d.update({
                        '%s_test_preds' % test_task_name: test_preds,
                        '%s_test_probs' % test_task_name: test_probs,
                        '%s_test_acc' % test_task_name: test_accs,
                        #'%s_test_acc_0' % test_task_name: test_accs[0],
                        #'%s_test_acc_1' % test_task_name: test_accs[1],
                        #'%s_test_acc_2' % test_task_name: test_accs[2],
                        #'%s_test_acc_3' % test_task_name: test_accs[3],
                        #'%s_test_attendance_accs' % test_task_name: test_attendance_acc,
                    })

                d.update(**vars(args))

                np.save(output_path, d)
Exemple #12
0
print("Validation Size", n_val)
print("Testing Size", n_test)

train_labels = np.argmax(trainA, axis=1)
test_labels = np.argmax(testA, axis=1)
val_labels = np.argmax(valA, axis=1)

tf.set_random_seed(FLAGS.random_state)
batch_size = FLAGS.batch_size
optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
with tf.Session() as sess:
    model = MemN2N(
        batch_size,
        vocab_size,
        sentence_size,
        memory_size,
        FLAGS.embedding_size,
        session=sess,  # max_gradient_norm=FLAGS.max_gradient_norm,
        hops=FLAGS.num_hops,
        optimizer=optimizer)
    for t in range(1, FLAGS.num_epochs + 1):
        total_cost = 0.0
        for start in range(0, n_train, batch_size):
            end = start + batch_size
            s = trainS[start:end]
            q = trainQ[start:end]
            a = trainA[start:end]
            cost_t = model.batch_fit(s, q, a)
            total_cost += cost_t

        if t % FLAGS.evaluation_interval == 0:
def run_task(task_id):
    print("Started Task:", task_id)
    # task data
    train, test = load_task(FLAGS.data_dir, task_id)
    data = train + test

    vocab = sorted(
        reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a)
                                    for s, q, a in data)))
    word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
    reverse_lookup = {v: k for (k, v) in word_idx.items()}
    lookup_vocab = ['nil']

    print(reverse_lookup)
    print(word_idx)
    for i in range(1, len(reverse_lookup) + 1):
        lookup_vocab.append(reverse_lookup[i])

    max_story_size = max(map(len, (s for s, _, _ in data)))
    mean_story_size = int(np.mean([len(s) for s, _, _ in data]))
    sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data)))
    query_size = max(map(len, (q for _, q, _ in data)))
    memory_size = min(FLAGS.memory_size, max_story_size)

    # Add time words/indexes
    for i in range(memory_size):
        word_idx['time{}'.format(i + 1)] = 'time{}'.format(i + 1)

    print(len(word_idx))
    print(word_idx)
    vocab_size = len(word_idx) + 1  # +1 for nil word
    sentence_size = max(query_size, sentence_size)  # for the position
    sentence_size += 1  # +1 for time words

    print("Longest sentence length", sentence_size)
    print("Longest story length", max_story_size)
    print("Average story length", mean_story_size)

    # train/validation/test sets
    S, Q, A = vectorize_data(train, word_idx, sentence_size, memory_size)
    trainS, valS, trainQ, valQ, trainA, valA = cross_validation.train_test_split(
        S, Q, A, test_size=.1, random_state=FLAGS.random_state)
    testS, testQ, testA = vectorize_data(test, word_idx, sentence_size,
                                         memory_size)

    #print(testS[0])

    print("Training set shape", trainS.shape)

    # params
    n_train = trainS.shape[0]
    n_test = testS.shape[0]
    n_val = valS.shape[0]

    print("Training Size", n_train)
    print("Validation Size", n_val)
    print("Testing Size", n_test)

    train_labels = np.argmax(trainA, axis=1)
    val_labels = np.argmax(valA, axis=1)
    test_labels = np.argmax(testA, axis=1)

    tf.set_random_seed(FLAGS.random_state)
    batch_size = FLAGS.batch_size

    batches = zip(range(0, n_train - batch_size, batch_size),
                  range(batch_size, n_train, batch_size))
    batches = [(start, end) for start, end in batches]

    train_accuracies = []
    validation_accuracies = []
    test_accuracies = []

    max_testAccuracy = 0
    max_trainAccuracy = 0
    max_valAccuracy = 0

    best_test_prob_hops = 0
    best_test_prob_vocab = 0
    best_pred_word = 0
    best_true_word = 0
    best_test_A1 = 0
    best_test_C = 0
    best_lookup_vocab = 0

    for run_id in range(FLAGS.NoOfRuns):
        print('Run Number: ' + str(run_id))
        max_epoch_trainAccuracy = 0
        max_epoch_valAccuracy = 0
        with tf.Session() as sess:
            model = MemN2N(batch_size,
                           vocab_size,
                           sentence_size,
                           memory_size,
                           FLAGS.embedding_size,
                           session=sess,
                           hops=FLAGS.hops,
                           max_grad_norm=FLAGS.max_grad_norm,
                           regularization=FLAGS.regularization,
                           nonlin=FLAGS.nonlin)
            for t in range(1, FLAGS.epochs + 1):
                # Stepped learning rate
                if t - 1 <= FLAGS.anneal_stop_epoch:
                    anneal = 2.0**((t - 1) // FLAGS.anneal_rate)
                else:
                    anneal = 2.0**(FLAGS.anneal_stop_epoch //
                                   FLAGS.anneal_rate)
                lr = FLAGS.learning_rate / anneal

                np.random.shuffle(batches)
                total_cost = 0.0
                for start, end in batches:
                    s = trainS[start:end]
                    q = trainQ[start:end]
                    a = trainA[start:end]
                    cost_t = model.batch_fit(s, q, a, lr)
                    total_cost += cost_t

                if t % FLAGS.evaluation_interval == 0:
                    train_preds = []
                    for start in range(0, n_train, batch_size):
                        end = start + batch_size
                        s = trainS[start:end]
                        q = trainQ[start:end]
                        pred = model.predict(s, q)
                        train_preds += list(pred)

                    val_preds, valid_prob_vocab, valid_prob_hops, valid_A1, valid_C = model.predict_prob_instrument(
                        valS, valQ)
                    train_acc = metrics.accuracy_score(np.array(train_preds),
                                                       train_labels)
                    val_acc = metrics.accuracy_score(val_preds, val_labels)

                    print('-----------------------')
                    print('Epoch', t)
                    print('Total Cost:', total_cost)
                    print('Training Accuracy:', train_acc)
                    print('Validation Accuracy:', val_acc)
                    print('-----------------------')

                    if (val_acc > max_epoch_valAccuracy):
                        max_epoch_trainAccuracy = train_acc
                        max_epoch_valAccuracy = val_acc

            test_preds, test_prob_vocab, test_prob_hops, test_A1, test_C = model.predict_prob_instrument(
                testS, testQ)
            pred_word = [reverse_lookup[i] for i in test_preds]
            true_word = [reverse_lookup[i] for i in test_labels]

            test_acc = metrics.accuracy_score(test_preds, test_labels)

            train_accuracies.append(max_trainAccuracy)
            validation_accuracies.append(max_valAccuracy)
            test_accuracies.append(test_acc)

            if (test_acc > max_testAccuracy):
                max_testAccuracy = test_acc
                max_trainAccuracy = max_epoch_trainAccuracy
                max_valAccuracy = max_epoch_valAccuracy
                best_test_prob_hops = test_prob_hops
                best_test_prob_vocab = test_prob_vocab
                best_pred_word = pred_word
                best_true_word = true_word
                best_test_A1 = test_A1
                best_test_C = test_C
                best_lookup_vocab = lookup_vocab

            print("Test Accuracy: ", test_acc)
            iou = calculate_iou(test_prob_hops, test, task_id)
            print('IoU: ' + str(iou))

    print("Best Testing Accuracy:", max_testAccuracy)
    # save test files
    np.save(logs_dir + 'task_' + str(task_id) + '_attention',
            best_test_prob_hops)
    np.save(logs_dir + 'task_' + str(task_id) + '_vocab_prob',
            best_test_prob_vocab)
    np.save(logs_dir + 'task_' + str(task_id) + '_pred', best_pred_word)
    np.save(logs_dir + 'task_' + str(task_id) + '_truth', best_true_word)
    np.save(logs_dir + 'task_' + str(task_id) + '_A', best_test_A1)
    np.save(logs_dir + 'task_' + str(task_id) + '_C', np.array(best_test_C))
    np.save(logs_dir + 'task_' + str(task_id) + '_lookupvocab',
            best_lookup_vocab)

    iou = calculate_iou(best_test_prob_hops, test, task_id)
    print('Best IoU: ' + str(iou))

    return max_trainAccuracy, max_valAccuracy, max_testAccuracy, iou
# Make only 1 GPU available to CUDA
os.environ[
    'CUDA_VISIBLE_DEVICES'] = "0"  # Comma separated indexes of GPUs to use - GPUs are indexed from 0 to 7 on the workstation

# Set the options to limit the memory allocation on GPUs
tf_gpu_options = tf.GPUOptions(allow_growth=True,
                               per_process_gpu_memory_fraction=0.5)
sess = tf.Session(config=tf.ConfigProto(gpu_options=tf_gpu_options))

model = MemN2N(config["batch"],
               config["vocab_size"],
               config["sentence_size"],
               config["memory_size"],
               config["embedding_size"],
               session=sess,
               hops=config["hops"],
               max_grad_norm=config["max_grad_norm"],
               l2=config["regularization"],
               lr=config["lr"],
               epsilon=config["epsilon"],
               nonlin=tf.nn.relu,
               restoreLoc=restore_location)

# Uncomment to see if the weights were loaded correctly
# print(sess.run(model.A))


def get_pred(testS, testQ):
    # print(testS)
    ps = model.predict_proba(testS, testQ)
    op = model.predict_test(testS, testQ)
Exemple #15
0
tf.flags.DEFINE_float("max_grad_norm", 40.0, "Clip gradients to this norm.")
tf.flags.DEFINE_integer("evaluation_interval", 1,
                        "Evaluate and print results every x epochs")
tf.flags.DEFINE_integer("batch_size", 32, "Batch size for training.")
tf.flags.DEFINE_integer("hops", 3, "Number of hops in the Memory Network.")
tf.flags.DEFINE_integer("epochs", 50, "Number of epochs to train for.")
tf.flags.DEFINE_integer("embedding_size", 20,
                        "Embedding size for embedding matrices.")
tf.flags.DEFINE_integer("memory_size", 50, "Maximum size of memory.")
tf.flags.DEFINE_integer("class_size", 6, "Number of classes.")
tf.flags.DEFINE_integer("random_state", 20, "Random state.")
tf.flags.DEFINE_string("dataset", "dblp", "Directory containing bAbI tasks")
tf.flags.DEFINE_integer("sentence_size", "15", "maximum sentence size")
tf.flags.DEFINE_string("nonlin", "None", "nonlinear function ")
tf.flags.DEFINE_string("checkpoint_dir", "checkpoint",
                       "Directory name to save the checkpoints [checkpoints]")
tf.flags.DEFINE_string("encoder", "avg", " the approach of sentence encoder")
tf.flags.DEFINE_string("filter_sizes", "3",
                       "the filter sizes in convolutional neural networks")
tf.flags.DEFINE_float("dropout_keep_prob", 1.0, "Dropout keep probability")
tf.flags.DEFINE_integer("share_memory_size", 20, "Share memory size")
tf.flags.DEFINE_string("embedding_file", "doc.txt", "document embedding file")
FLAGS = tf.flags.FLAGS

print("class_size:", FLAGS.class_size)

with tf.Session() as sess:
    reader = TextReader(FLAGS)
    model = MemN2N(session=sess, reader=reader, config=FLAGS)
    model.train()
Exemple #16
0
train_labels = np.argmax(trainA, axis=1)
test_labels = np.argmax(testA, axis=1)
val_labels = np.argmax(valA, axis=1)

tf.set_random_seed(FLAGS.random_state)
batch_size = FLAGS.batch_size

batches = list(
    zip(range(0, n_train - batch_size, batch_size),
        range(batch_size, n_train, batch_size)))

with tf.Session() as sess:
    model = MemN2N(batch_size,
                   vocab_size,
                   sentence_size,
                   memory_size,
                   FLAGS.embedding_size,
                   session=sess,
                   hops=FLAGS.hops,
                   max_grad_norm=FLAGS.max_grad_norm)
    for t in range(1, FLAGS.epochs + 1):
        # Stepped learning rate
        if t - 1 <= FLAGS.anneal_stop_epoch:
            anneal = 2.0**((t - 1) // FLAGS.anneal_rate)
        else:
            anneal = 2.0**(FLAGS.anneal_stop_epoch // FLAGS.anneal_rate)
        lr = FLAGS.learning_rate / anneal

        np.random.shuffle(batches)
        total_cost = 0.0
        for start, end in batches:
            s = trainS[start:end]
Exemple #17
0
def run_memn2n_single_training(tensorflow_commandline_flags):

    logging.info("Started Task: %s", str(tensorflow_commandline_flags.task_id))

    # preprocessing data before training memory network

    # task data
    train, test = load_task(tensorflow_commandline_flags.data_dir,
                            tensorflow_commandline_flags.task_id)
    data = train + test

    vocab = sorted(
        reduce(lambda x, y: x | y, (set(list(chain.from_iterable(s)) + q + a)
                                    for s, q, a in data)))
    word_idx = dict((c, i + 1) for i, c in enumerate(vocab))

    max_story_size = max(map(len, (s for s, _, _ in data)))
    mean_story_size = int(np.mean([len(s) for s, _, _ in data]))
    sentence_size = max(map(len, chain.from_iterable(s for s, _, _ in data)))
    query_size = max(map(len, (q for _, q, _ in data)))
    sentence_size = max(query_size, sentence_size)  # for the position
    sentence_size += 1  # +1 for time words

    sigopt_experiment_definition, e2e_memnn_experiment = sigopt_memn2n_experiment_setup.setup_sigopt_memn2n_experiment(
        tensorflow_commandline_flags)

    while e2e_memnn_experiment.progress.observation_count < e2e_memnn_experiment.observation_budget:
        logging.info("observation number: %d",
                     e2e_memnn_experiment.progress.observation_count)

        suggestions = sigopt_experiment_definition.get_suggestions(
            e2e_memnn_experiment)

        memory_size = suggestions.assignments[ParametersList.MEMORY_SIZE.value]

        # Add time words/indexes
        for i in range(memory_size):
            word_idx['time{}'.format(i + 1)] = 'time{}'.format(i + 1)

        vocab_size = len(word_idx) + 1  # +1 for nil word

        logging.info("Longest sentence length %d", sentence_size)
        logging.info("Longest story length %d", max_story_size)
        logging.info("Average story length %d", mean_story_size)

        # train/validation/test sets
        S, Q, A = vectorize_data(train, word_idx, sentence_size, memory_size)
        trainS, valS, trainQ, valQ, trainA, valA = cross_validation.train_test_split(
            S,
            Q,
            A,
            test_size=.1,
            random_state=tensorflow_commandline_flags.random_state)
        testS, testQ, testA = vectorize_data(test, word_idx, sentence_size,
                                             memory_size)

        # params
        n_train = trainS.shape[0]
        n_test = testS.shape[0]
        n_val = valS.shape[0]

        logging.info("Training Size %d", n_train)
        logging.info("Validation Size %d", n_val)
        logging.info("Testing Size %d", n_test)

        train_labels = np.argmax(trainA, axis=1)
        test_labels = np.argmax(testA, axis=1)
        val_labels = np.argmax(valA, axis=1)

        tf.set_random_seed(tensorflow_commandline_flags.random_state)
        batch_size = tensorflow_commandline_flags.batch_size

        batches = zip(range(0, n_train - batch_size, batch_size),
                      range(batch_size, n_train, batch_size))
        batches = [(start, end) for start, end in batches]

        optimizer = sigopt_memn2n_experiment_setup.string_to_optimizer_object(
            suggestions.assignments[ParametersList.OPTIMIZER.value],
            suggestions.assignments)
        with tf.Session(config=tf.ConfigProto(
                log_device_placement=True)) as sess:
            model = MemN2N(
                batch_size,
                vocab_size,
                sentence_size,
                memory_size=memory_size,
                embedding_size=suggestions.assignments[
                    ParametersList.WORD_EMBEDDING.value],
                optimizer=optimizer,
                session=sess,
                hops=suggestions.assignments[ParametersList.HOP_SIZE.value],
                max_grad_norm=tensorflow_commandline_flags.max_grad_norm)

            for t in range(1, tensorflow_commandline_flags.epochs + 1):
                logging.info("epoch number: %d", t)
                logging.info("observation number: %d",
                             e2e_memnn_experiment.progress.observation_count)

                np.random.shuffle(batches)
                total_cost = 0.0
                for start, end in batches:
                    s = trainS[start:end]
                    q = trainQ[start:end]
                    a = trainA[start:end]
                    cost_t = model.batch_fit(s, q, a)
                    total_cost += cost_t

                if t % tensorflow_commandline_flags.evaluation_interval == 0:
                    train_preds = []
                    for start in range(0, n_train, batch_size):
                        end = start + batch_size
                        s = trainS[start:end]
                        q = trainQ[start:end]
                        pred = model.predict(s, q)
                        train_preds += list(pred)

                    val_preds = model.predict(valS, valQ)
                    train_acc = metrics.accuracy_score(np.array(train_preds),
                                                       train_labels)
                    val_acc = metrics.accuracy_score(val_preds, val_labels)

                    test_preds = model.predict(testS, testQ)
                    test_acc = metrics.accuracy_score(test_preds, test_labels)

                    logging.info('-----------------------')
                    logging.info('Epoch %d', t)
                    logging.info('Total Cost: %f', total_cost)
                    logging.info('Training Accuracy: %f', train_acc)
                    logging.info('Validation Accuracy: %f', val_acc)
                    logging.info('Test Accuracy: %f', test_acc)
                    logging.info('-----------------------')

                    e2e_memnn_experiment, observation = sigopt_experiment_definition.update_experiment(
                        e2e_memnn_experiment, suggestions, test_acc)

        # reset computation graph to create new mm model
        tf.reset_default_graph()

    logging.info(
        "Sig opt best parameters: %s",
        sigopt_experiment_definition.get_best_suggestions(
            e2e_memnn_experiment))
def main(
    in_trainset_size,
    in_testset_size,
    in_fold_number,
    in_dataset_shuffle
):
    max_dialogue_length = max(map(len, all_dialogues_babi))
    longer_dialogues_idx = filter(
        lambda x: len(all_dialogues_babi[x]) == max_dialogue_length,
        in_dataset_shuffle
    )

    trainset_idx = []
    for train_dialogue_counter in range(in_trainset_size * in_fold_number, in_trainset_size * (in_fold_number + 1)):
        trainset_idx.append(longer_dialogues_idx[train_dialogue_counter % len(longer_dialogues_idx)])

    testset_idx = []
    for test_dialogue_counter in range(in_testset_size * in_fold_number, in_testset_size * (in_fold_number + 1)):
        testset_idx.append(in_dataset_shuffle[test_dialogue_counter % len(in_dataset_shuffle)])

    dialogues_train = map(lambda x: all_dialogues_babi[x], trainset_idx)
    # testing on API calls only?
    dialogues_test = map(lambda x: [all_dialogues_babi_plus[x][-1]], testset_idx)

    data_train = reduce(lambda x, y: x + y, dialogues_train, [])
    data_test = reduce(lambda x, y: x + y, dialogues_test, [])

    train_s, train_q, train_a = vectorize_data_dialog(
        data_train,
        word_idx,
        answer_idx,
        sentence_size,
        memory_size
    )
    test_s, test_q, test_a = vectorize_data_dialog(
        data_test,
        word_idx,
        answer_idx,
        sentence_size,
        memory_size
    )

    print("Training Size (dialogues)", len(dialogues_train))
    print("Testing Size (dialogues)", len(dialogues_test))
    print("Training Size (stories)", len(data_train))
    print("Testing Size (stories)", len(data_test))

    tf.set_random_seed(FLAGS.random_state)
    batch_size = FLAGS.batch_size
    optimizer = tf.train.GradientDescentOptimizer(
        learning_rate=FLAGS.learning_rate  # ,
        # epsilon=FLAGS.epsilon
    )

    batches = zip(
        range(0, len(data_train) - batch_size, batch_size),
        range(batch_size, len(data_train), batch_size)
    )
    batches = [(start, end) for start, end in batches]

    with tf.Session() as sess:
        model = MemN2N(
            batch_size,
            vocab_size,
            sentence_size,
            memory_size,
            FLAGS.embedding_size,
            answers_vectorized,
            session=sess,
            hops=FLAGS.hops,
            max_grad_norm=FLAGS.max_grad_norm,
            optimizer=optimizer
        )
        best_accuracy_per_epoch = train_model(
            model,
            (train_s, train_q, train_a),
            (test_s, test_q, test_a),
            batches
        )
    return best_accuracy_per_epoch
Exemple #19
0
    val_batches = zip(range(0, n_val - batch_size + 1, batch_size),
                      range(batch_size, n_val + 1, batch_size))
    val_batches = [(start, end) for start, end in val_batches]
    if (FLAGS.use_testset):
        test_batches = zip(range(0, n_test - batch_size + 1, batch_size),
                           range(batch_size, n_test + 1, batch_size))
        test_batches = [(start, end) for start, end in test_batches]

    with tf.Graph().as_default() as graph:
        with tf.Session() as sess:
            model = MemN2N(batch_size,
                           vocab_size,
                           ans_vec_size,
                           sentence_size,
                           memory_size,
                           FLAGS.embedding_size,
                           FLAGS.memn2n_vector_size,
                           FLAGS.loss_norm,
                           session=sess,
                           hops=FLAGS.hops,
                           max_grad_norm=FLAGS.max_grad_norm,
                           optimizer=optimizer)
            saver = XlsxExporter(story)

            header = {}
            header['Story'] = story
            header['Learning Rate'] = str(FLAGS.learning_rate)
            header['Training Size'] = str(n_train)
            header['Validation Size'] = str(n_val)
            header['Sentence Size'] = str(sentence_size)
            header['Answer Vector Size'] = str(ans_vec_size)
            header['MemN2N Vector Size'] = str(FLAGS.memn2n_vector_size)