コード例 #1
0
def main():
    dialogues_train = map(lambda x: x, train_babi)
    dialogues_train_eval = map(lambda x: [x[-1]], train_babi) \
        if args.predict_last_turn_only \
        else map(lambda x: x, train_babi)

    dialogues_test = map(lambda x: [x[-1]], test_plus) \
        if args.predict_last_turn_only \
        else map(lambda x: x, test_plus)

    data_train = reduce(lambda x, y: x + y, dialogues_train, [])
    data_train_eval = reduce(lambda x, y: x + y, dialogues_train_eval, [])
    data_test = reduce(lambda x, y: x + y, dialogues_test, [])

    train_s, train_q, train_a = vectorize_data_dialog(data_train, word_idx,
                                                      answer_idx,
                                                      sentence_size,
                                                      memory_size)
    train_eval_s, train_eval_q, train_eval_a = vectorize_data_dialog(
        data_train_eval, word_idx, answer_idx, sentence_size, memory_size)
    test_s, test_q, test_a = vectorize_data_dialog(data_test, word_idx,
                                                   answer_idx, sentence_size,
                                                   memory_size)

    print("Training Size (dialogues)", len(dialogues_train))
    print("Training/Evaluation Size (dialogues)", len(dialogues_train_eval))
    print("Testing Size (dialogues)", len(dialogues_test))
    print("Training Size (stories)", len(data_train))
    print("Training/Evaluation Size (stories)", len(data_train_eval))
    print("Testing Size (stories)", len(data_test))

    tf.set_random_seed(FLAGS.random_state)
    batch_size = FLAGS.batch_size
    optimizer = tf.train.GradientDescentOptimizer(
        learning_rate=FLAGS.learning_rate)

    batches = zip(range(0,
                        len(data_train) - batch_size, batch_size),
                  range(batch_size, len(data_train), batch_size))
    batches = [(start, end) for start, end in batches]

    with tf.Session() as sess:
        model = MemN2N(batch_size,
                       vocab_size,
                       sentence_size,
                       memory_size,
                       FLAGS.embedding_size,
                       answers_vectorized,
                       session=sess,
                       hops=FLAGS.hops,
                       max_grad_norm=FLAGS.max_grad_norm,
                       optimizer=optimizer)
        best_accuracy_per_epoch = train_model(
            model, (train_s, train_q, train_a),
            (train_eval_s, train_eval_q, train_eval_a),
            (test_s, test_q, test_a), batches)
    return best_accuracy_per_epoch
コード例 #2
0
def main(in_train_dialogue_name, in_testset_size, in_fold_number,
         in_dataset_shuffle):
    trainset_idx = [get_global_dialogue_index(in_train_dialogue_name)]
    testset_idx = []
    testset_counter = in_fold_number * in_testset_size
    while len(testset_idx) != in_testset_size:
        current_idx = in_dataset_shuffle[testset_counter]
        if current_idx not in trainset_idx:
            testset_idx.append(current_idx)
        testset_counter += 1
    dialogues_train = map(lambda x: all_dialogues[x], trainset_idx)
    dialogues_test = map(lambda x: all_dialogues[x], testset_idx)

    data_train = reduce(lambda x, y: x + y, dialogues_train, [])
    data_test = reduce(lambda x, y: x + y, dialogues_test, [])

    train_s, train_q, train_a = vectorize_data_dialog(data_train, word_idx,
                                                      answer_idx,
                                                      sentence_size,
                                                      memory_size)
    test_s, test_q, test_a = vectorize_data_dialog(data_test, word_idx,
                                                   answer_idx, sentence_size,
                                                   memory_size)

    print("Training Size (dialogues)", len(dialogues_train))
    print("Testing Size (dialogues)", len(dialogues_test))
    print("Training Size (stories)", len(data_train))
    print("Testing Size (stories)", len(data_test))

    tf.set_random_seed(FLAGS.random_state)
    batch_size = FLAGS.batch_size
    optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate,
                                       epsilon=FLAGS.epsilon)

    batches = zip(range(0,
                        len(data_train) - batch_size, batch_size),
                  range(batch_size, len(data_train), batch_size))
    batches = [(start, end) for start, end in batches]

    with tf.Session() as sess:
        model = MemN2N(batch_size,
                       vocab_size,
                       sentence_size,
                       memory_size,
                       FLAGS.embedding_size,
                       answer_vocab_size=answer_vocab_size,
                       session=sess,
                       hops=FLAGS.hops,
                       max_grad_norm=FLAGS.max_grad_norm,
                       optimizer=optimizer)
        best_accuracy_per_epoch = train_model(model,
                                              (train_s, train_q, train_a),
                                              (test_s, test_q, test_a),
                                              batches)
    return best_accuracy_per_epoch
コード例 #3
0
def main(in_trainset_size, in_split_number):
    # train/validation/test sets
    all_dialogues_idx = reduce(lambda x, y: x + [y], range(len(all_dialogues)),
                               [])
    random.shuffle(all_dialogues_idx)
    trainset_idx = all_dialogues_idx[in_split_number *
                                     in_trainset_size:in_split_number *
                                     in_trainset_size + in_trainset_size]
    testset_idx = filter(lambda x: x not in trainset_idx, all_dialogues_idx)
    dialogues_train = map(lambda x: all_dialogues[x], trainset_idx)
    dialogues_test = map(lambda x: all_dialogues[x], testset_idx)

    data_train = reduce(lambda x, y: x + y, dialogues_train, [])
    data_test = reduce(lambda x, y: x + y, dialogues_test, [])

    train_s, train_q, train_a = vectorize_data_dialog(data_train, word_idx,
                                                      answer_idx,
                                                      sentence_size,
                                                      memory_size)
    test_s, test_q, test_a = vectorize_data_dialog(data_test, word_idx,
                                                   answer_idx, sentence_size,
                                                   memory_size)

    print("Training Size (dialogues)", len(dialogues_train))
    print("Testing Size (dialogues)", len(dialogues_test))
    print("Training Size (stories)", len(data_train))
    print("Testing Size (stories)", len(data_test))

    tf.set_random_seed(FLAGS.random_state)
    batch_size = FLAGS.batch_size
    optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate,
                                       epsilon=FLAGS.epsilon)

    batches = zip(range(0,
                        len(data_train) - batch_size, batch_size),
                  range(batch_size, len(data_train), batch_size))
    batches = [(start, end) for start, end in batches]

    with tf.Session() as sess:
        model = MemN2N(batch_size,
                       vocab_size,
                       sentence_size,
                       memory_size,
                       FLAGS.embedding_size,
                       answer_vocab_size=answer_vocab_size,
                       session=sess,
                       hops=FLAGS.hops,
                       max_grad_norm=FLAGS.max_grad_norm,
                       optimizer=optimizer)
        best_accuracy_per_epoch = train_model(model,
                                              (train_s, train_q, train_a),
                                              (test_s, test_q, test_a),
                                              batches)
    return best_accuracy_per_epoch
コード例 #4
0
def main():
    data_train = reduce(lambda x, y: x + y, train, [])
    data_test = reduce(lambda x, y: x + y, test, [])

    train_s, train_q, train_a = vectorize_data_dialog(data_train, word_idx,
                                                      answer_idx,
                                                      sentence_size,
                                                      memory_size)
    test_s, test_q, test_a = vectorize_data_dialog(data_test, word_idx,
                                                   answer_idx, sentence_size,
                                                   memory_size)

    print("Training Size (dialogues)", len(train))
    print("Testing Size (dialogues)", len(test))
    print("Training Size (stories)", len(data_train))
    print("Testing Size (stories)", len(data_test))

    tf.set_random_seed(FLAGS.random_state)
    batch_size = FLAGS.batch_size
    optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate,
                                       epsilon=FLAGS.epsilon)

    batches = zip(range(0,
                        len(data_train) - batch_size, batch_size),
                  range(batch_size, len(data_train), batch_size))
    batches = [(start, end) for start, end in batches]

    with tf.Session() as sess:
        model = MemN2N(batch_size,
                       vocab_size,
                       sentence_size,
                       memory_size,
                       FLAGS.embedding_size,
                       answer_vocab_size=answer_vocab_size,
                       session=sess,
                       hops=FLAGS.hops,
                       max_grad_norm=FLAGS.max_grad_norm,
                       optimizer=optimizer)
        best_accuracy_per_epoch = train_model(model,
                                              (train_s, train_q, train_a),
                                              (test_s, test_q, test_a),
                                              batches)
    return best_accuracy_per_epoch
def main(
    in_trainset_size,
    in_testset_size,
    in_fold_number,
    in_dataset_shuffle
):
    max_dialogue_length = max(map(len, all_dialogues_babi))
    longer_dialogues_idx = filter(
        lambda x: len(all_dialogues_babi[x]) == max_dialogue_length,
        in_dataset_shuffle
    )

    trainset_idx = []
    for train_dialogue_counter in range(in_trainset_size * in_fold_number, in_trainset_size * (in_fold_number + 1)):
        trainset_idx.append(longer_dialogues_idx[train_dialogue_counter % len(longer_dialogues_idx)])

    testset_idx = []
    for test_dialogue_counter in range(in_testset_size * in_fold_number, in_testset_size * (in_fold_number + 1)):
        testset_idx.append(in_dataset_shuffle[test_dialogue_counter % len(in_dataset_shuffle)])

    dialogues_train = map(lambda x: all_dialogues_babi[x], trainset_idx)
    # testing on API calls only?
    dialogues_test = map(lambda x: [all_dialogues_babi_plus[x][-1]], testset_idx)

    data_train = reduce(lambda x, y: x + y, dialogues_train, [])
    data_test = reduce(lambda x, y: x + y, dialogues_test, [])

    train_s, train_q, train_a = vectorize_data_dialog(
        data_train,
        word_idx,
        answer_idx,
        sentence_size,
        memory_size
    )
    test_s, test_q, test_a = vectorize_data_dialog(
        data_test,
        word_idx,
        answer_idx,
        sentence_size,
        memory_size
    )

    print("Training Size (dialogues)", len(dialogues_train))
    print("Testing Size (dialogues)", len(dialogues_test))
    print("Training Size (stories)", len(data_train))
    print("Testing Size (stories)", len(data_test))

    tf.set_random_seed(FLAGS.random_state)
    batch_size = FLAGS.batch_size
    optimizer = tf.train.GradientDescentOptimizer(
        learning_rate=FLAGS.learning_rate  # ,
        # epsilon=FLAGS.epsilon
    )

    batches = zip(
        range(0, len(data_train) - batch_size, batch_size),
        range(batch_size, len(data_train), batch_size)
    )
    batches = [(start, end) for start, end in batches]

    with tf.Session() as sess:
        model = MemN2N(
            batch_size,
            vocab_size,
            sentence_size,
            memory_size,
            FLAGS.embedding_size,
            answers_vectorized,
            session=sess,
            hops=FLAGS.hops,
            max_grad_norm=FLAGS.max_grad_norm,
            optimizer=optimizer
        )
        best_accuracy_per_epoch = train_model(
            model,
            (train_s, train_q, train_a),
            (test_s, test_q, test_a),
            batches
        )
    return best_accuracy_per_epoch