Beispiel #1
0
def generate_qas(questions_list):
    qas_list = []
    for ques in questions_list:
        rand_id = ''.join(random.choice('0123456789abcde') for i in range(24))
        qas_list.append({
            'answers': generate_answers(),
            'id': rand_id,
            'question': ques
        })
    return qas_list
Beispiel #2
0
def getAnswers():
    data = json.loads(request.data)
    print data
    qn_uuid_data, context_token_data, qn_token_data = get_json(data)
    print "qn_uuid_data"
    print qn_uuid_data
    print "#" * 100
    print "context_token_data"
    print context_token_data
    print "#" * 100
    print "qn_token_data"
    print qn_token_data
    answers_dict = generate_answers(sess, qa_model, word2id, qn_uuid_data,
                                    context_token_data, qn_token_data)
    return jsonify(answers_dict)
Beispiel #3
0
def main(unused_argv):
    # Print an error message if you've entered flags incorrectly
    if len(unused_argv) != 1:
        raise Exception("There is a problem with how you entered flags: %s" %
                        unused_argv)

    # Check for Python 2
    if sys.version_info[0] != 2:
        raise Exception(
            "ERROR: You must use Python 2 but you are running Python %i" %
            sys.version_info[0])

    # Print out Tensorflow version
    print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__

    # Define train_dir
    if not FLAGS.experiment_name and not FLAGS.train_dir and FLAGS.mode != "official_eval":
        raise Exception(
            "You need to specify either --experiment_name or --train_dir")
    FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR,
                                                      FLAGS.experiment_name)

    # Initialize bestmodel directory
    bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint")

    # Define path for glove vecs
    FLAGS.glove_path = FLAGS.glove_path or os.path.join(
        DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size))

    # Load embedding matrix and vocab mappings
    emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path,
                                             FLAGS.embedding_size)

    # Get filepaths to train/dev datafiles for tokenized queries, contexts and answers
    train_context_path = os.path.join(FLAGS.data_dir, "train.context")
    train_qn_path = os.path.join(FLAGS.data_dir, "train.question")
    train_ans_path = os.path.join(FLAGS.data_dir, "train.span")
    dev_context_path = os.path.join(FLAGS.data_dir, "dev.context")
    dev_qn_path = os.path.join(FLAGS.data_dir, "dev.question")
    dev_ans_path = os.path.join(FLAGS.data_dir, "dev.span")

    # Initialize model
    qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix)

    # Some GPU settings
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # Split by mode
    if FLAGS.mode == "train":

        # Setup train dir and logfile
        if not os.path.exists(FLAGS.train_dir):
            os.makedirs(FLAGS.train_dir)
        file_handler = logging.FileHandler(
            os.path.join(FLAGS.train_dir, "log.txt"))
        logging.getLogger().addHandler(file_handler)

        # Save a record of flags as a .json file in train_dir
        with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout:
            json.dump(FLAGS.__flags, fout)

        # Make bestmodel dir if necessary
        if not os.path.exists(bestmodel_dir):
            os.makedirs(bestmodel_dir)

        with tf.Session(config=config) as sess:

            # Load most recent model
            initialize_model(sess,
                             qa_model,
                             FLAGS.train_dir,
                             expect_exists=False)

            # Train
            qa_model.train(sess, train_context_path, train_qn_path,
                           train_ans_path, dev_qn_path, dev_context_path,
                           dev_ans_path)

    elif FLAGS.mode == "show_examples":
        with tf.Session(config=config) as sess:

            # Load best model
            initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True)

            # Show examples with F1/EM scores
            _, _ = qa_model.check_f1_em(sess,
                                        dev_context_path,
                                        dev_qn_path,
                                        dev_ans_path,
                                        "dev",
                                        num_samples=10,
                                        print_to_screen=True)

    elif FLAGS.mode == "official_eval":
        if FLAGS.json_in_path == "":
            raise Exception(
                "For official_eval mode, you need to specify --json_in_path")
        if FLAGS.ckpt_load_dir == "":
            raise Exception(
                "For official_eval mode, you need to specify --ckpt_load_dir")

        # Read the JSON data from file
        qn_uuid_data, context_token_data, qn_token_data = get_json_data(
            FLAGS.json_in_path)
        print "qn_uuid_data"
        print qn_uuid_data
        print "#" * 100
        print "context_token_data"
        print context_token_data
        print "#" * 100
        print "qn_token_data"
        print qn_token_data

        with tf.Session(config=config) as sess:

            # Load model from ckpt_load_dir
            initialize_model(sess,
                             qa_model,
                             FLAGS.ckpt_load_dir,
                             expect_exists=True)

            # Get a predicted answer for each example in the data
            # Return a mapping answers_dict from uuid to answer
            answers_dict = generate_answers(sess, qa_model, word2id,
                                            qn_uuid_data, context_token_data,
                                            qn_token_data)

            # Write the uuid->answer mapping a to json file in root dir
            print "Writing predictions to %s..." % FLAGS.json_out_path
            with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f:
                f.write(unicode(json.dumps(answers_dict, ensure_ascii=False)))
                print "Wrote predictions to %s" % FLAGS.json_out_path

    else:
        raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
def main(unused_argv):
    # Print an error message if you've entered flags incorrectly
    if len(unused_argv) != 1:
        raise Exception("There is a problem with how you entered flags: %s" %
                        unused_argv)

    # Check for Python 2
    if sys.version_info[0] != 2:
        raise Exception(
            "ERROR: You must use Python 2 but you are running Python %i" %
            sys.version_info[0])

    # Define path for glove vecs
    FLAGS.glove_path = FLAGS.glove_path or \
                       os.path.join(DEFAULT_DATA_DIR,
                            "glove.6B.{}d.txt".format(FLAGS.embedding_size))

    # Load embedding matrix and vocab mappings
    timer.start("glove_getter")
    emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path,
                                             FLAGS.embedding_size)
    id2idf = get_idf(os.path.abspath(FLAGS.idf_path), word2id)
    logger.warn("Get glove embedding of size {} takes {:.2f} s".format(
        FLAGS.embedding_size, timer.stop("glove_getter")))
    # Print out Tensorflow version
    # print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__

    ensumble = FLAGS.ensumble
    print(ensumble)
    if not ensumble and not FLAGS.attn_layer and not FLAGS.train_dir and FLAGS.mode != "official_eval":
        raise Exception(
            "You need to specify either --attn_layer or --train_dir")

    # Define train_dir
    if not FLAGS.experiment_name:
        FLAGS.experiment_name = "A_{}_E_{}_D_{}".format(
            FLAGS.attn_layer, FLAGS.embedding_size, FLAGS.dropout)

    checkptr_name = FLAGS.experiment_name + "/glove{}".format(
        FLAGS.embedding_size)
    FLAGS.train_dir = FLAGS.train_dir or\
                        os.path.join(EXPERIMENTS_DIR, checkptr_name)

    # Initialize bestmodel directory
    bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint")

    # Get filepaths to train/dev datafiles for tokenized queries, contexts and answers
    train_context_path = os.path.join(FLAGS.data_dir, "train.context")
    train_qn_path = os.path.join(FLAGS.data_dir, "train.question")
    train_ans_path = os.path.join(FLAGS.data_dir, "train.span")
    dev_context_path = os.path.join(FLAGS.data_dir, "dev.context")
    dev_qn_path = os.path.join(FLAGS.data_dir, "dev.question")
    dev_ans_path = os.path.join(FLAGS.data_dir, "dev.span")

    # Some GPU settings
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    is_training = (FLAGS.mode == "train")
    if not ensumble:
        # Initialize model
        qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix, id2idf,
                           is_training)
    else:
        ensumbler = Ensumbler(ensumble, config, id2word, word2id, emb_matrix,
                              id2idf)

    # Split by mode
    if FLAGS.mode == "train":
        # Setup train dir and logfile
        if not os.path.exists(FLAGS.train_dir):
            os.makedirs(FLAGS.train_dir)
        file_handler = logging.FileHandler(
            os.path.join(FLAGS.train_dir, "log.txt"))
        logging.getLogger().addHandler(file_handler)

        # Save a record of flags as a .json file in train_dir
        with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout:
            json.dump(FLAGS.__flags, fout)

        # Make bestmodel dir if necessary
        if not os.path.exists(bestmodel_dir):
            os.makedirs(bestmodel_dir)

        with tf.Session(config=config) as sess:
            # Load most recent model
            qa_model.initialize_from_checkpoint(sess,
                                                FLAGS.train_dir,
                                                expect_exists=False)

            # Train
            qa_model.train(sess, train_context_path, train_qn_path,
                           train_ans_path, dev_qn_path, dev_context_path,
                           dev_ans_path)

    elif FLAGS.mode == "show_examples":
        with tf.Session(config=config) as sess:

            # Load best model
            qa_model.initialize_from_checkpoint(sess,
                                                bestmodel_dir,
                                                expect_exists=True)

            # Show examples with F1/EM scores
            f1, em = qa_model.check_f1_em(sess,
                                          dev_context_path,
                                          dev_qn_path,
                                          dev_ans_path,
                                          "dev",
                                          num_samples=10,
                                          print_to_screen=True)
            logger.info("Dev: F1 = {0:.3}, EM = {0:.3}".format(f1, em))

    elif FLAGS.mode == "eval":
        if ensumble:
            # train
            train_f1, train_em = ensumbler.check_f1_em(train_context_path,
                                                       train_qn_path,
                                                       train_ans_path, "train",
                                                       FLAGS.n_eval)
            # dev
            dev_f1, dev_em = ensumbler.check_f1_em(dev_context_path,
                                                   dev_qn_path, dev_ans_path,
                                                   "dev", FLAGS.n_eval)

        else:
            with tf.Session(config=config) as sess:

                # Load best model
                qa_model.initialize_from_checkpoint(sess,
                                                    FLAGS.ckpt_load_dir,
                                                    expect_exists=True)

                logger.info("Model initialzed from checkpoint")
                # train
                train_f1, train_em = qa_model.check_f1_em(
                    sess,
                    train_context_path,
                    train_qn_path,
                    train_ans_path,
                    "train",
                    num_samples=10,
                    print_to_screen=False)
                # dev
                dev_f1, dev_em = qa_model.check_f1_em(sess,
                                                      dev_context_path,
                                                      dev_qn_path,
                                                      dev_ans_path,
                                                      "dev",
                                                      num_samples=10,
                                                      print_to_screen=False)
        logger.error("Train: F1 = {:.3}, EM = {:.3}".format(
            train_f1, train_em))
        logger.error("Dev:   F1 = {:.3}, EM = {:.3}".format(dev_f1, dev_em))

    elif FLAGS.mode == "official_eval":
        if not ensumble:
            if FLAGS.json_in_path == "":
                raise Exception(
                    "For official_eval mode, you need to specify --json_in_path"
                )
            if FLAGS.ckpt_load_dir == "":
                raise Exception(
                    "For official_eval mode, you need to specify --ckpt_load_dir"
                )

        # Read the JSON data from file
        qn_uuid_data, context_token_data, qn_token_data = get_json_data(
            FLAGS.json_in_path)

        if ensumble:
            answers_dict = ensumbler.generate_answers(qn_uuid_data,
                                                      context_token_data,
                                                      qn_token_data)
        else:
            with tf.Session(config=config) as sess:
                # Load model from ckpt_load_dir
                qa_model.initialize_from_checkpoint(sess,
                                                    FLAGS.ckpt_load_dir,
                                                    expect_exists=True)
                # Get a predicted answer for each example in the data
                # Return a mapping answers_dict from uuid to answer
                answers_dict = generate_answers(sess, qa_model, word2id,
                                                id2idf, qn_uuid_data,
                                                context_token_data,
                                                qn_token_data)

        # Write the uuid->answer mapping a to json file in root dir
        print "Writing predictions to %s..." % FLAGS.json_out_path
        with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f:
            f.write(unicode(json.dumps(answers_dict, ensure_ascii=False)))
            print "Wrote predictions to %s" % FLAGS.json_out_path

    else:
        raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
Beispiel #5
0
def main(unused_argv):
    # Print an error message if you've entered flags incorrectly
    if len(unused_argv) != 1:
        raise Exception("There is a problem with how you entered flags: %s" %
                        unused_argv)

    # Check for Python 2
    if sys.version_info[0] != 2:
        raise Exception(
            "ERROR: You must use Python 2 but you are running Python %i" %
            sys.version_info[0])

    # Print out Tensorflow version
    print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__

    # Initialize bestmodel directory
    bestmodel_dir = os.path.join(
        "F:\Sid\Learnings\Data Scientist\Text Analysis\Machine Comprehension\data\ModelLogs\best_weights",
        "best_checkpoint")

    # Initialize model
    qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix)

    # Some GPU settings
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # Split by mode
    if FLAGS.mode == "train":

        # Setup train dir and logfile
        if not os.path.exists(FLAGS.train_dir):
            os.makedirs(FLAGS.train_dir)
        file_handler = logging.FileHandler(
            os.path.join(FLAGS.train_dir, "log.txt"))
        logging.getLogger().addHandler(file_handler)

        # Save a record of flags as a .json file in train_dir
        with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout:
            json.dump(FLAGS.__flags, fout)

        # Make bestmodel dir if necessary
        if not os.path.exists(bestmodel_dir):
            os.makedirs(bestmodel_dir)

        with tf.Session(config=config) as sess:

            # Load most recent model
            initialize_model(sess,
                             qa_model,
                             FLAGS.train_dir,
                             expect_exists=False)

            # Train
            qa_model.train(sess, train_context_path, train_qn_path,
                           train_ans_path, dev_qn_path, dev_context_path,
                           dev_ans_path)

    elif FLAGS.mode == "show_examples":
        with tf.Session(config=config) as sess:

            # Load best model
            initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True)

            # Show examples with F1/EM scores
            _, _ = qa_model.check_f1_em(sess,
                                        dev_context_path,
                                        dev_qn_path,
                                        dev_ans_path,
                                        "dev",
                                        num_samples=10,
                                        print_to_screen=True)

    elif FLAGS.mode == "official_eval":
        if FLAGS.json_in_path == "":
            raise Exception(
                "For official_eval mode, you need to specify --json_in_path")
        if FLAGS.ckpt_load_dir == "":
            raise Exception(
                "For official_eval mode, you need to specify --ckpt_load_dir")

        # Read the JSON data from file
        qn_uuid_data, context_token_data, qn_token_data = get_json_data(
            FLAGS.json_in_path)

        with tf.Session(config=config) as sess:

            # Load model from ckpt_load_dir
            initialize_model(sess,
                             qa_model,
                             FLAGS.ckpt_load_dir,
                             expect_exists=True)

            # Get a predicted answer for each example in the data
            # Return a mapping answers_dict from uuid to answer
            answers_dict = generate_answers(sess, qa_model, word2id,
                                            qn_uuid_data, context_token_data,
                                            qn_token_data)

            # Write the uuid->answer mapping a to json file in root dir
            print "Writing predictions to %s..." % FLAGS.json_out_path
            with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f:
                f.write(unicode(json.dumps(answers_dict, ensure_ascii=False)))
                print "Wrote predictions to %s" % FLAGS.json_out_path

    else:
        raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
Beispiel #6
0
def main(unused_argv):
    # Print an error message if you've entered flags incorrectly
    if len(unused_argv) != 1:
        raise Exception("There is a problem with how you entered flags: %s" % unused_argv)

    # Check for Python 2
    if sys.version_info[0] != 2:
        raise Exception("ERROR: You must use Python 2 but you are running Python %i" % sys.version_info[0])

    # Print out Tensorflow version
    print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__

    # Define train_dir
    if not FLAGS.experiment_name and not FLAGS.train_dir and FLAGS.mode != "official_eval":
        raise Exception("You need to specify either --experiment_name or --train_dir")
    FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR, FLAGS.experiment_name)

    # Initialize bestmodel directory
    bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint")

    # Define path for glove vecs
    FLAGS.glove_path = FLAGS.glove_path or os.path.join(DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size))

    # Load embedding matrix and vocab mappings
    emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path, FLAGS.embedding_size)

    # Get filepaths to train/dev datafiles for tokenized queries, contexts and answers
    train_context_path = os.path.join(FLAGS.data_dir, "train.context")
    train_qn_path = os.path.join(FLAGS.data_dir, "train.question")
    train_ans_path = os.path.join(FLAGS.data_dir, "train.span")
    dev_context_path = os.path.join(FLAGS.data_dir, "dev.context")
    dev_qn_path = os.path.join(FLAGS.data_dir, "dev.question")
    dev_ans_path = os.path.join(FLAGS.data_dir, "dev.span")

    # Initialize model
    qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix)

    # Some GPU settings
    config=tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # Split by mode
    if FLAGS.mode == "train":

        # Setup train dir and logfile
        if not os.path.exists(FLAGS.train_dir):
            os.makedirs(FLAGS.train_dir)
        file_handler = logging.FileHandler(os.path.join(FLAGS.train_dir, "log.txt"))
        logging.getLogger().addHandler(file_handler)

        # Save a record of flags as a .json file in train_dir
        with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout:
            json.dump(FLAGS.__flags, fout)

        # Make bestmodel dir if necessary
        if not os.path.exists(bestmodel_dir):
            os.makedirs(bestmodel_dir)

        with tf.Session(config=config) as sess:

            # Load most recent model
            initialize_model(sess, qa_model, FLAGS.train_dir, expect_exists=False)

            # Train
            qa_model.train(sess, train_context_path, train_qn_path, train_ans_path, dev_qn_path, dev_context_path, dev_ans_path)


    elif FLAGS.mode == "show_examples":
        with tf.Session(config=config) as sess:

            # Load best model
            initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True)

            # Show examples with F1/EM scores
            _, _ = qa_model.check_f1_em(sess, dev_context_path, dev_qn_path, dev_ans_path, "dev", num_samples=10, print_to_screen=True)


    elif FLAGS.mode == "official_eval":
        if FLAGS.json_in_path == "":
            raise Exception("For official_eval mode, you need to specify --json_in_path")
        if FLAGS.ckpt_load_dir == "":
            raise Exception("For official_eval mode, you need to specify --ckpt_load_dir")

        # Read the JSON data from file
        qn_uuid_data, context_token_data, qn_token_data = get_json_data(FLAGS.json_in_path)

        with tf.Session(config=config) as sess:

            # Load model from ckpt_load_dir
            initialize_model(sess, qa_model, FLAGS.ckpt_load_dir, expect_exists=True)

            # Get a predicted answer for each example in the data
            # Return a mapping answers_dict from uuid to answer
            answers_dict = generate_answers(sess, qa_model, word2id, qn_uuid_data, context_token_data, qn_token_data)

            # Write the uuid->answer mapping a to json file in root dir
            print "Writing predictions to %s..." % FLAGS.json_out_path
            with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f:
                f.write(unicode(json.dumps(answers_dict, ensure_ascii=False)))
                print "Wrote predictions to %s" % FLAGS.json_out_path


    else:
        raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
        dev_loss = sum(loss_per_batch) / float(total_num_examples)

        return dev_loss


if __name__ == "__main__":
    mode = sys.argv[1]
    processor = Processor()
    if mode == "train":
        model_file_path = None
        if len(sys.argv) > 2:
            model_file_path = sys.argv[2]
        processor.train(model_file_path)
    elif mode == "show_examples":
        model_file_path = sys.argv[2]
        model = processor.get_model(model_file_path)
        processor.check_f1_em(model, num_samples=10, print_to_screen=True)

    elif mode == "official_eval":
        model_file_path = sys.argv[2]
        json_in_path = ""
        json_out_path = ""
        qn_uuid_data, context_token_data, qn_token_data = get_json_data(json_in_path)
        model = processor.get_model(model_file_path)
        answers_dict = generate_answers(config, model, processor, qn_uuid_data, context_token_data, qn_token_data)
        print ("Writing predictions to %s..." % json_out_path)
        with open(json_out_path, 'w') as f:
            f.write(json.dumps(answers_dict, ensure_ascii=False))
            print ("Wrote predictions to %s" % json_out_path)
Beispiel #8
0
def main(unused_argv):
    # Print an error message if you've entered flags incorrectly
    if len(unused_argv) != 1:
        raise Exception("There is a problem with how you entered flags: %s" %
                        unused_argv)

    # Check for Python 2
    if sys.version_info[0] != 2:
        raise Exception(
            "ERROR: You must use Python 2 but you are running Python %i" %
            sys.version_info[0])

    # Print out Tensorflow version
    print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__

    # Define train_dir
    if not FLAGS.experiment_name and not FLAGS.train_dir and FLAGS.mode != "official_eval":
        raise Exception(
            "You need to specify either --experiment_name or --train_dir")
    FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR,
                                                      FLAGS.experiment_name)

    # Initialize bestmodel directory
    bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint")

    # Define path for glove vecs
    FLAGS.glove_path = FLAGS.glove_path or os.path.join(
        DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size))

    #     if FLAGS.mode != 'loadProbs':
    # Load embedding matrix and vocab mappings
    emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path,
                                             FLAGS.embedding_size)

    # Get filepaths to train/dev datafiles for tokenized queries, contexts and answers
    train_context_path = os.path.join(FLAGS.data_dir, "train.context")
    train_qn_path = os.path.join(FLAGS.data_dir, "train.question")
    train_ans_path = os.path.join(FLAGS.data_dir, "train.span")
    dev_context_path = os.path.join(FLAGS.data_dir, "dev.context")
    dev_qn_path = os.path.join(FLAGS.data_dir, "dev.question")
    dev_ans_path = os.path.join(FLAGS.data_dir, "dev.span")

    # Initialize model
    qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix)

    # Some GPU settings
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # Split by mode
    if FLAGS.mode == "train":

        # Setup train dir and logfile
        if not os.path.exists(FLAGS.train_dir):
            os.makedirs(FLAGS.train_dir)
        file_handler = logging.FileHandler(
            os.path.join(FLAGS.train_dir, "log.txt"))
        logging.getLogger().addHandler(file_handler)

        # Save a record of flags as a .json file in train_dir
        # with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout:
        # json.dump(FLAGS.__flags, fout)

        # Make bestmodel dir if necessary
        if not os.path.exists(bestmodel_dir):
            os.makedirs(bestmodel_dir)

        with tf.Session(config=config) as sess:

            # Load most recent model
            initialize_model(sess,
                             qa_model,
                             FLAGS.train_dir,
                             expect_exists=False)

            # Train
            qa_model.train(sess, train_context_path, train_qn_path,
                           train_ans_path, dev_qn_path, dev_context_path,
                           dev_ans_path)

    elif FLAGS.mode == "show_examples":
        with tf.Session(config=config) as sess:

            # Load best model
            initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True)

            # Show examples with F1/EM scores
            _, _ = qa_model.check_f1_em(sess,
                                        dev_context_path,
                                        dev_qn_path,
                                        dev_ans_path,
                                        "dev",
                                        num_samples=20,
                                        print_to_screen=True)

    elif FLAGS.mode == "official_eval":
        if FLAGS.json_in_path == "":
            raise Exception(
                "For official_eval mode, you need to specify --json_in_path")
        if FLAGS.ckpt_load_dir == "":
            raise Exception(
                "For official_eval mode, you need to specify --ckpt_load_dir")

        # Read the JSON data from file
        qn_uuid_data, context_token_data, qn_token_data = get_json_data(
            FLAGS.json_in_path)

        with tf.Session(config=config) as sess:

            # Load model from ckpt_load_dir
            initialize_model(sess,
                             qa_model,
                             FLAGS.ckpt_load_dir,
                             expect_exists=True)

            # Get a predicted answer for each example in the data
            # Return a mapping answers_dict from uuid to answer
            answers_dict = generate_answers(sess, qa_model, word2id,
                                            qn_uuid_data, context_token_data,
                                            qn_token_data)

            # Write the uuid->answer mapping a to json file in root dir
            print "Writing predictions to %s..." % FLAGS.json_out_path
            with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f:
                f.write(unicode(json.dumps(answers_dict, ensure_ascii=False)))
                print "Wrote predictions to %s" % FLAGS.json_out_path

    elif FLAGS.mode == "official_eval_with_bidaf":
        if FLAGS.json_in_path == "":
            raise Exception(
                "For official_eval mode, you need to specify --json_in_path")
        if FLAGS.ckpt_load_dir == "":
            raise Exception(
                "For official_eval mode, you need to specify --ckpt_load_dir")

        # Read the JSON data from file
        qn_uuid_data, context_token_data, qn_token_data = get_json_data(
            FLAGS.json_in_path)

        with tf.Session(config=config) as sess:

            # Load model from ckpt_load_dir
            initialize_model(sess,
                             qa_model,
                             FLAGS.ckpt_load_dir,
                             expect_exists=True)

            # Get a predicted answer for each example in the data
            # Return a mapping answers_dict from uuid to answer
            answers_dict, bidaf_dict, self_dict1, self_dict2, out_dict = generate_answers_with_bidaf(
                sess, qa_model, word2id, qn_uuid_data, context_token_data,
                qn_token_data)

            # Write the uuid->answer mapping a to json file in root dir
            print "Writing predictions to %s..." % FLAGS.json_out_path
            with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f:
                f.write(unicode(json.dumps(answers_dict, ensure_ascii=False)))
                print "Wrote predictions to %s" % FLAGS.json_out_path

            print "Writing sims to %s..." % FLAGS.json_out_path
            with io.open(FLAGS.json_out_path + '-bidaf', 'w',
                         encoding='utf-8') as f:
                f.write(unicode(json.dumps(bidaf_dict, ensure_ascii=False)))
                print "Wrote sims to %s" % FLAGS.json_out_path

            print "Writing self sims1 to %s..." % FLAGS.json_out_path
            with io.open(FLAGS.json_out_path + '-self1', 'w',
                         encoding='utf-8') as f:
                f.write(unicode(json.dumps(self_dict1, ensure_ascii=False)))
                print "Wrote self sims1 to %s" % FLAGS.json_out_path

            print "Writing self sims2 to %s..." % FLAGS.json_out_path
            with io.open(FLAGS.json_out_path + '-self2', 'w',
                         encoding='utf-8') as f:
                f.write(unicode(json.dumps(self_dict2, ensure_ascii=False)))
                print "Wrote self sims2 to %s" % FLAGS.json_out_path

            print "Writing preds to %s..." % FLAGS.json_out_path
            with io.open(FLAGS.json_out_path + '-preds', 'w',
                         encoding='utf-8') as f:
                f.write(unicode(json.dumps(out_dict, ensure_ascii=False)))
                print "Wrote preds to %s" % FLAGS.json_out_path

    elif FLAGS.mode == 'saveProbs':
        if FLAGS.json_in_path == "":
            raise Exception(
                "For official_eval mode, you need to specify --json_in_path")
        if FLAGS.ckpt_load_dir == "":
            raise Exception(
                "For official_eval mode, you need to specify --ckpt_load_dir")

        # Read the JSON data from file
        qn_uuid_data, context_token_data, qn_token_data = get_json_data(
            FLAGS.json_in_path)

        with tf.Session(config=config) as sess:

            # Load model from ckpt_load_dir
            initialize_model(sess,
                             qa_model,
                             FLAGS.ckpt_load_dir,
                             expect_exists=True)

            # Get a predicted answer for each example in the data
            # Return a mapping answers_dict from uuid to answer
            answers_dict = save_answer_probs(sess, qa_model, word2id,
                                             qn_uuid_data, context_token_data,
                                             qn_token_data)

            # Write the uuid->answer mapping a to json file in root dir
            print "Writing predictions to %s..." % FLAGS.json_out_path
            with io.open(FLAGS.json_out_path, 'wb') as f:
                pickle.dump(answers_dict, f, protocol=2)
                # f.write(unicode(pickle.dumps(answers_dict, ensure_ascii=False)))
                # f.write(unicode(json.dumps(answers_dict, ensure_ascii=False)))
                print "Wrote predictions to %s" % FLAGS.json_out_path

    elif FLAGS.mode == 'loadProbs':
        if FLAGS.json_in_path == "":
            raise Exception(
                "For official_eval mode, you need to specify --json_in_path")
        if FLAGS.ckpt_load_dir == "":
            raise Exception(
                "For official_eval mode, you need to specify --ckpt_load_dir")

        # Read the JSON data from file
        qn_uuid_data, context_token_data, qn_token_data = get_json_data(
            FLAGS.json_in_path)
        #         word2id = pickle.load(open('word2id', 'rb'))
        #         pickle.dump(word2id, open('word2id', 'wb'))
        print 'Loaded data'

        dictLists = []
        for file in os.listdir('./pickles'):
            f = os.path.join('./pickles', file)
            print 'Loading predictions from ', f
            prob_dict = pickle.load(open(f, 'rb'))
            dictLists += [prob_dict]


#         mainDict = {}
#         stdiDict = {}
#         for probs in dictLists:
#             for k in dictLists[0].keys():
#                 stdi = 1.0 / (np.std(np.array(probs[k][0])) + np.std(np.array(probs[k][1])) + 1e-2)
#                 stdiDict[k] = stdi
#                 try:
#                     mainDict[k] = (mainDict[k][0] + stdi * np.array(probs[k][0]), mainDict[k][1] + stdi* np.array(probs[k][1]))
#                 except KeyError:
#                     mainDict[k] = (stdi* np.array(probs[k][0]), stdi*np.array(probs[k][1]))

        uuid2ans = {}  # maps uuid to string containing predicted answer
        detokenizer = MosesDetokenizer()

        #         for k in mainDict.keys():
        #             start_dist = mainDict[k][0] / stdiDict[k]
        #             end_dist = mainDict[k][1] / stdiDict[k]

        #             # Take argmax to get start_pos and end_post, both shape (batch_size)
        #             end_dp = np.zeros(end_dist.shape)
        # #             start_pos = np.argmax(start_dist)
        # #             end_pos = np.argmax(end_dist)
        #             end_dp[-1]=end_dist[-1]
        #             for i in range(len(end_dist)-2,-1,-1):
        #                 end_dp[i]=np.amax([end_dist[i],end_dp[i+1]])
        #             start_pos=np.argmax(start_dist*end_dp)
        #             end_pos = start_pos + np.argmax(end_dist[start_pos:])

        #             uuid2ans[k] = (start_pos, end_pos)

        for k in dictLists[0].keys():
            spanDict = {}
            for probs in dictLists:
                start_dist = np.array(probs[k][0])
                end_dist = np.array(probs[k][1])

                # Take argmax to get start_pos and end_post, both shape (batch_size)
                end_dp = np.zeros(end_dist.shape)
                end_dp[-1] = end_dist[-1]
                for i in range(len(end_dist) - 2, -1, -1):
                    end_dp[i] = np.amax([end_dist[i], end_dp[i + 1]])
                start_pos = np.argmax(start_dist * end_dp)
                end_pos = start_pos + np.argmax(end_dist[start_pos:])

                try:
                    spanDict[(start_pos, end_pos)] += [
                        start_dist[start_pos] * end_dist[end_pos]
                    ]
                except KeyError:
                    spanDict[(start_pos, end_pos)] = [
                        start_dist[start_pos] * end_dist[end_pos]
                    ]

            best_span = (0, 0)
            best_span_votes = 0
            best_span_prob = 0
            for span in spanDict.keys():
                if len(spanDict[span]) > best_span_votes:
                    best_span = span
                    best_span_votes = len(spanDict[span])
                    best_span_prob = max(spanDict[span])
                elif len(
                        spanDict[span]
                ) == best_span_votes and best_span_prob < max(spanDict[span]):
                    best_span = span
                    best_span_votes = len(spanDict[span])
                    best_span_prob = max(spanDict[span])

            uuid2ans[k] = (best_span[0], best_span[1])

        result = {}
        data_size = len(qn_uuid_data)
        num_batches = ((data_size - 1) / FLAGS.batch_size) + 1
        batch_num = 0
        print "Generating answers..."

        for batch in get_batch_generator(word2id, qn_uuid_data,
                                         context_token_data, qn_token_data,
                                         FLAGS.batch_size, FLAGS.context_len,
                                         FLAGS.question_len):

            # For each example in the batch:
            for ex_idx in range(FLAGS.batch_size):

                # Detokenize and add to dict
                try:
                    uuid = batch.uuids[ex_idx]
                    pred_start, pred_end = uuid2ans[uuid]

                    # Original context tokens (no UNKs or padding) for this example
                    context_tokens = batch.context_tokens[
                        ex_idx]  # list of strings

                    # Check the predicted span is in range
                    assert pred_start in range(len(context_tokens))
                    assert pred_end in range(len(context_tokens))

                    # Predicted answer tokens
                    pred_ans_tokens = context_tokens[pred_start:pred_end +
                                                     1]  # list of strings

                    result[uuid] = detokenizer.detokenize(pred_ans_tokens,
                                                          return_str=True)

                except IndexError:
                    pass

            batch_num += 1

            if batch_num % 10 == 0:
                print "Generated answers for %i/%i batches = %.2f%%" % (
                    batch_num, num_batches, batch_num * 100.0 / num_batches)

        print "Finished generating answers for dataset."
        answers_dict = result

        # Write the uuid->answer mapping a to json file in root dir
        print "Writing predictions to %s..." % FLAGS.json_out_path
        with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f:
            f.write(unicode(json.dumps(answers_dict, ensure_ascii=False)))
            print "Wrote predictions to %s" % FLAGS.json_out_path

    else:
        raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
Beispiel #9
0
def main(unused_argv):
    # Print an error message if you've entered flags incorrectly
    if len(unused_argv) != 1:
        raise Exception("There is a problem with how you entered flags: %s" %
                        unused_argv)

    # Check for Python 2
    if sys.version_info[0] != 2:
        raise Exception(
            "ERROR: You must use Python 2 but you are running Python %i" %
            sys.version_info[0])

    # Check for ensemble model param setting
    if FLAGS.enable_ensemble_model and (FLAGS.mode != "official_eval"
                                        or not FLAGS.ensemble_model_names):
        raise Exception(
            "ERROR: model ensemble is only supported in official_eval mode, you must specify ensemble_model_names"
        )

    # Print out Tensorflow version
    print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__

    # Define train_dir
    if (not FLAGS.enable_ensemble_model and not FLAGS.experiment_name
            and not FLAGS.train_dir and FLAGS.mode != "official_eval"
        ) or (FLAGS.enable_ensemble_model and not FLAGS.ensemble_model_names):
        raise Exception(
            "You need to specify either --experiment_name or --train_dir, or ensemble_model_names if ensemble is enabled."
        )
    FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR,
                                                      FLAGS.experiment_name)

    # Initialize bestmodel directory
    bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint")

    # Define path for glove vecs
    FLAGS.glove_path = FLAGS.glove_path or os.path.join(
        DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size))

    # Load embedding matrix and vocab mappings
    emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path,
                                             FLAGS.embedding_size)

    # Build character level vocab mappings
    char2id, id2char = get_char_mapping()

    # Get filepaths to train/dev datafiles for tokenized queries, contexts and answers
    train_context_path = os.path.join(FLAGS.data_dir, "train.context")
    train_qn_path = os.path.join(FLAGS.data_dir, "train.question")
    train_ans_path = os.path.join(FLAGS.data_dir, "train.span")
    dev_context_path = os.path.join(FLAGS.data_dir, "dev.context")
    dev_qn_path = os.path.join(FLAGS.data_dir, "dev.question")
    dev_ans_path = os.path.join(FLAGS.data_dir, "dev.span")

    if not FLAGS.enable_ensemble_model:
        # Initialize model only when ensemble model is disabled.
        qa_model_name = FLAGS.model_name + '_model'
        QAModel = importlib.import_module(qa_model_name).QAModel
        print('model loaded from: %s' % qa_model_name)
        qa_model = QAModel(FLAGS, id2word, word2id, emb_matrix, char2id,
                           id2char)

    # Some GPU settings
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # Split by mode
    if FLAGS.mode == "train":

        # Setup train dir and logfile
        if not os.path.exists(FLAGS.train_dir):
            os.makedirs(FLAGS.train_dir)
        file_handler = logging.FileHandler(
            os.path.join(FLAGS.train_dir, "log.txt"))
        logging.getLogger().addHandler(file_handler)

        # Save a record of flags as a .json file in train_dir
        with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout:
            json.dump(FLAGS.__flags, fout)

        # Make bestmodel dir if necessary
        if not os.path.exists(bestmodel_dir):
            os.makedirs(bestmodel_dir)

        with tf.Session(config=config) as sess:

            # Load most recent model
            initialize_model(sess,
                             qa_model,
                             FLAGS.train_dir,
                             expect_exists=False)

            # Train
            qa_model.train(sess, train_context_path, train_qn_path,
                           train_ans_path, dev_qn_path, dev_context_path,
                           dev_ans_path)

    elif FLAGS.mode == "show_examples":
        with tf.Session(config=config) as sess:

            # Load best model
            initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True)

            # Show examples with F1/EM scores
            _, _ = qa_model.check_f1_em(sess,
                                        dev_context_path,
                                        dev_qn_path,
                                        dev_ans_path,
                                        "dev",
                                        num_samples=10,
                                        print_to_screen=True)

    elif FLAGS.mode == "official_eval":
        if FLAGS.json_in_path == "":
            raise Exception(
                "For official_eval mode, you need to specify --json_in_path")
        if not FLAGS.enable_ensemble_model and FLAGS.ckpt_load_dir == "":
            raise Exception(
                "For official_eval mode, you need to specify --ckpt_load_dir or use ensemble_model_names"
            )

        # Read the JSON data from file
        qn_uuid_data, context_token_data, qn_token_data = get_json_data(
            FLAGS.json_in_path)

        if FLAGS.enable_ensemble_model:
            print('FLAGS.ensemble_model_names: %s' %
                  FLAGS.ensemble_model_names)
            print('FLAGS.sum_weights: %s' % FLAGS.sum_weights)
            # KV is 'label': ('model_file', 'exp_name', 'codalab_bundle_name', 'has_cnn', weight),
            ensemble_label_to_model_meta = {
                'binco_legacy': [
                    'binco_legacy_model',
                    'binco_30b15_hidden=100_lr=0.001_batch=100_context=400_qn=27',
                    'binco_30b15', False, 0.6692
                ],  # 0.6900  (74.0, 63.5)
                'chsebinco_real': [
                    'chsebinco_model',
                    'chsebinco_real_1c999_hidden=100_lr=0.001_batch=100_context=400_qn=27',
                    'chsebinco_real_1c999', True, 0.6733
                ],  # 0.6958, (74.7, 64.0)
                'chsebinco_legacy': [
                    'chsebinco_legacy_model',
                    'chsebinco_4a81a_hidden=100_lr=0.001_batch=100_context=400_qn=27',
                    'chsebinco_4a81a', True, 0.6507
                ],  # 0.6954, (?, ?)
                'chgasebinco': [
                    'chgasebinco_model',
                    'chgasebinco_1c999_hidden=100_lr=0.001_batch=100_context=400_qn=27',
                    'chgasebinco_1c999', True, 0.7045
                ],  # 0.7101  (76.6, 66.4)
                'chgasebinco_91ca1': [
                    'chgasebinco_model',
                    'chgasebinco_91ca1_hidden=100_lr=0.001_batch=100_context=400_qn=27',
                    'chgasebinco_91ca1', True, 0.69
                ],  # 0.68  (? ?)
                'chgasebinco_888ca': [
                    'chgasebinco_model',
                    'chgasebinco_888ca_hidden=100_lr=0.001_batch=100_context=400_qn=27',
                    'chgasebinco_888ca', True, 0.69
                ],  # 0.67  (? ?)
                'chgasebinco_888ca_run2': [
                    'chgasebinco_model',
                    'chgasebinco_888ca_run2_hidden=100_lr=0.001_batch=100_context=400_qn=27',
                    'chgasebinco_888ca_run2', True, 0.69
                ],  # 0.6911  (? ?)
            }
            model_labels = FLAGS.ensemble_model_names.split(';')
            if len(model_labels) == 1 and model_labels[0].lower() == 'all':
                model_labels = ensemble_label_to_model_meta.keys()
            else:
                for label in model_labels:
                    assert label in ensemble_label_to_model_meta

            # A list to store the output of all predictions
            # each entry is a map, storing the start and end dist for that batch.
            # len(ensemble_model_pred) is len(model_labels)
            # len(ensemble_model_pred[0]) is number of batches
            # len(ensemble_model_pred[0]['start']) is batch_size
            # len(ensemble_model_pred[0]['end']) is batch_size
            ensemble_model_pred = []
            sum_weights_list = []
            for label in model_labels:
                tf.reset_default_graph()
                model_name, model_exp_name, cl_bundle_name, has_cnn, weight = ensemble_label_to_model_meta[
                    label]
                sum_weights_list += str(weight),
                print "Loading model: %s" % model_name
                # TODO(binbinx): change this to appropriate models
                QAModel = importlib.import_module(model_name).QAModel
                qa_model = (QAModel(FLAGS, id2word, word2id, emb_matrix,
                                    char2id, id2char) if has_cnn else QAModel(
                                        FLAGS, id2word, word2id, emb_matrix))

                with tf.Session(config=config) as sess:
                    # Initialize bestmodel directory
                    ckpt_load_dir = (os.path.join(
                        EXPERIMENTS_DIR, model_exp_name, "best_checkpoint")
                                     if not FLAGS.is_codalab_eval else
                                     cl_bundle_name)

                    # Load model from ckpt_load_dir
                    initialize_model(sess,
                                     qa_model,
                                     ckpt_load_dir,
                                     expect_exists=True)

                    # Get a predicted answer for each example in the data
                    # Return a mapping answers_dict from uuid to answer
                    # WE MUST USE A DEEPCOPY HERE!!
                    qn_uuid_data_ = copy.deepcopy(qn_uuid_data)
                    context_token_data_ = copy.deepcopy(context_token_data)
                    qn_token_data_ = copy.deepcopy(qn_token_data)
                    answers_dict = generate_answers(sess, qa_model, word2id,
                                                    char2id, qn_uuid_data_,
                                                    context_token_data_,
                                                    qn_token_data_,
                                                    ensemble_model_pred)

            sum_weights = ';'.join(
                sum_weights_list) if FLAGS.sum_weights.lower(
                ) == 'default' else FLAGS.sum_weights
            pred_start_batches, pred_end_batches = resolve_ensemble_model_preds(
                ensemble_model_pred, sum_weights=sum_weights)

            final_ans_dict = generate_answers_with_start_end(
                FLAGS, word2id, char2id, qn_uuid_data, context_token_data,
                qn_token_data, pred_start_batches, pred_end_batches)

            # Write the uuid->answer mapping a to json file in root dir
            print "Writing predictions to %s..." % FLAGS.json_out_path
            with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f:
                f.write(unicode(json.dumps(final_ans_dict,
                                           ensure_ascii=False)))
                print "Wrote predictions to %s" % FLAGS.json_out_path
        else:
            with tf.Session(config=config) as sess:
                # Load model from ckpt_load_dir
                initialize_model(sess,
                                 qa_model,
                                 FLAGS.ckpt_load_dir,
                                 expect_exists=True)

                # Get a predicted answer for each example in the data
                # Return a mapping answers_dict from uuid to answer
                answers_dict = generate_answers(sess, qa_model, word2id,
                                                char2id, qn_uuid_data,
                                                context_token_data,
                                                qn_token_data)

                # Write the uuid->answer mapping a to json file in root dir
                print "Writing predictions to %s..." % FLAGS.json_out_path
                with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f:
                    f.write(
                        unicode(json.dumps(answers_dict, ensure_ascii=False)))
                    print "Wrote predictions to %s" % FLAGS.json_out_path

    else:
        raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)
Beispiel #10
0
def main(unused_argv):
    # Print an error message if you've entered flags incorrectly
    if len(unused_argv) != 1:
        raise Exception("There is a problem with how you entered flags: %s" %
                        unused_argv)

    # Check for Python 2
    if sys.version_info[0] != 2:
        raise Exception(
            "ERROR: You must use Python 2 but you are running Python %i" %
            sys.version_info[0])

    # Print out Tensorflow version
    print "This code was developed and tested on TensorFlow 1.4.1. Your TensorFlow version: %s" % tf.__version__

    # Define train_dir
    if not FLAGS.experiment_name and not FLAGS.train_dir and \
            FLAGS.mode != "official_eval" and FLAGS.mode!= "ensemble_write" and FLAGS.mode!= "ensemble_predict":
        raise Exception(
            "You need to specify either --experiment_name or --train_dir")
    FLAGS.train_dir = FLAGS.train_dir or os.path.join(EXPERIMENTS_DIR,
                                                      FLAGS.experiment_name)

    # Initialize bestmodel directory
    bestmodel_dir = os.path.join(FLAGS.train_dir, "best_checkpoint")

    # Define path for glove vecs
    FLAGS.glove_path = FLAGS.glove_path or os.path.join(
        DEFAULT_DATA_DIR, "glove.6B.{}d.txt".format(FLAGS.embedding_size))

    # Load embedding matrix and vocab mappings
    emb_matrix, word2id, id2word = get_glove(FLAGS.glove_path,
                                             FLAGS.embedding_size)

    # Get filepaths to train/dev datafiles for tokenized queries, contexts and answers
    train_context_path = os.path.join(FLAGS.data_dir, "train.context")
    train_qn_path = os.path.join(FLAGS.data_dir, "train.question")
    train_ans_path = os.path.join(FLAGS.data_dir, "train.span")
    dev_context_path = os.path.join(FLAGS.data_dir, "dev.context")
    dev_qn_path = os.path.join(FLAGS.data_dir, "dev.question")
    dev_ans_path = os.path.join(FLAGS.data_dir, "dev.span")
    small_context_path = os.path.join(FLAGS.data_dir, "small.context")
    small_qn_path = os.path.join(FLAGS.data_dir, "small.question")
    small_ans_path = os.path.join(FLAGS.data_dir, "small.span")
    qa_model = None
    # Initialize model
    if FLAGS.model_name == "baseline":
        print("Using baseline model")
        qa_model = QABaselineModel(FLAGS, id2word, word2id, emb_matrix)
    elif FLAGS.model_name == "bidaf":
        qa_model = QABidafModel(FLAGS, id2word, word2id, emb_matrix)
    elif FLAGS.model_name == "selfattn":
        print("Using Self Attention")
        qa_model = QASelfAttnModel(FLAGS, id2word, word2id, emb_matrix)
    elif FLAGS.model_name == "stack":
        print("Using stack BIDAF/SA")
        qa_model = QAStackModel(FLAGS, id2word, word2id, emb_matrix)
    elif FLAGS.model_name == "pointer":
        print("Using pointer model")
        qa_model = QAPointerModel(FLAGS, id2word, word2id, emb_matrix)

    # Some GPU settings
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # Split by mode
    if FLAGS.mode == "train":
        # Setup train dir and logfile
        if not os.path.exists(FLAGS.train_dir):
            os.makedirs(FLAGS.train_dir)
        file_handler = logging.FileHandler(
            os.path.join(FLAGS.train_dir, "log.txt"))
        logging.getLogger().addHandler(file_handler)

        # Save a record of flags as a .json file in train_dir
        with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout:
            json.dump(FLAGS.__flags, fout)

        # Make bestmodel dir if necessary
        if not os.path.exists(bestmodel_dir):
            os.makedirs(bestmodel_dir)

        with tf.Session(config=config) as sess:

            # Load most recent model
            initialize_model(sess,
                             qa_model,
                             FLAGS.train_dir,
                             expect_exists=False)

            # Train
            qa_model.train(sess, train_context_path, train_qn_path,
                           train_ans_path, dev_qn_path, dev_context_path,
                           dev_ans_path)

    elif FLAGS.mode == "test":
        # Setup train dir and logfile
        if not os.path.exists(FLAGS.train_dir):
            os.makedirs(FLAGS.train_dir)
        file_handler = logging.FileHandler(
            os.path.join(FLAGS.train_dir, "log.txt"))
        logging.getLogger().addHandler(file_handler)

        # Save a record of flags as a .json file in train_dir
        with open(os.path.join(FLAGS.train_dir, "flags.json"), 'w') as fout:
            json.dump(FLAGS.__flags, fout)

        # Make bestmodel dir if necessary
        if not os.path.exists(bestmodel_dir):
            os.makedirs(bestmodel_dir)

        with tf.Session(config=config) as sess:

            # Load most recent model
            initialize_model(sess,
                             qa_model,
                             FLAGS.train_dir,
                             expect_exists=False)

            # Train
            qa_model.train(sess, small_context_path, small_qn_path,
                           small_ans_path, dev_qn_path, dev_context_path,
                           dev_ans_path)

    elif FLAGS.mode == "show_examples":
        with tf.Session(config=config) as sess:

            # Load best model
            initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True)

            # Show examples with F1/EM scores
            _, _ = qa_model.check_f1_em(sess,
                                        dev_context_path,
                                        dev_qn_path,
                                        dev_ans_path,
                                        "dev",
                                        num_samples=10,
                                        print_to_screen=True)

    elif FLAGS.mode == "visualize":
        with tf.Session(config=config) as sess:

            # Load best model
            initialize_model(sess, qa_model, bestmodel_dir, expect_exists=True)
            # Get distribution of begin and end spans.
            begin_total, end_total, f1_em_scores = qa_model.get_spans(
                sess, dev_context_path, dev_qn_path, dev_ans_path, "dev")
            np.save(os.path.join(FLAGS.train_dir, "begin_span"), begin_total)
            np.save(os.path.join(FLAGS.train_dir, "end_span"), end_total)
            np.save(os.path.join(FLAGS.train_dir, "f1_em"), f1_em_scores)

            # Visualize distribution of Context to Question attention
            c2q_attn = qa_model.get_c2q_attention(sess,
                                                  dev_context_path,
                                                  dev_qn_path,
                                                  dev_ans_path,
                                                  "dev",
                                                  num_samples=0)
            np.save(os.path.join(FLAGS.train_dir, "c2q_attn"), c2q_attn)
            q2c_attn = qa_model.get_q2c_attention(sess,
                                                  dev_context_path,
                                                  dev_qn_path,
                                                  dev_ans_path,
                                                  "dev",
                                                  num_samples=0)
            if len(q2c_attn > 0):
                np.save(os.path.join(FLAGS.train_dir, "q2c_attn"), q2c_attn)
            else:
                print 'This model doesn\'t have question to context attention'
            self_attn = qa_model.get_self_attention(sess,
                                                    dev_context_path,
                                                    dev_qn_path,
                                                    dev_ans_path,
                                                    "dev",
                                                    num_samples=20)
            if len(self_attn > 0):
                np.save(os.path.join(FLAGS.train_dir, "self_attn"), self_attn)
            else:
                print 'This model doesn\'t have self attention'

    elif FLAGS.mode == "ensemble_write":
        if FLAGS.json_in_path == "":
            raise Exception(
                "For ensembling mode, you need to specify --json_in_path")
        if FLAGS.ckpt_load_dir == "":
            raise Exception(
                "For ensembling mode, you need to specify --ckpt_load_dir")
        if FLAGS.ensemble_name == "":
            raise Exception(
                "For ensembling mode, you need to specify --ensemble_name")
        # Read the JSON data from file
        qn_uuid_data, context_token_data, qn_token_data = get_json_data(
            FLAGS.json_in_path)

        with tf.Session(config=config) as sess:
            # Load model
            initialize_model(sess,
                             qa_model,
                             FLAGS.ckpt_load_dir,
                             expect_exists=True)

            distributions = generate_distributions(sess, qa_model, word2id,
                                                   qn_uuid_data,
                                                   context_token_data,
                                                   qn_token_data)
            # np uuid -> [start_dist, end_dist]
            # Write the uuid->answer mapping a to json file in root dir
            save_path = os.path.join(
                FLAGS.ensemble_dir,
                "distribution_" + FLAGS.ensemble_name + '.json')
            print "Writing distributions to %s..." % save_path
            with io.open(save_path, 'w', encoding='utf-8') as f:
                f.write(unicode(json.dumps(distributions, ensure_ascii=False)))
                print "Wrote distributions to %s" % save_path

    elif FLAGS.mode == "ensemble_predict":
        if FLAGS.json_in_path == "":
            raise Exception(
                "For ensembling mode, you need to specify --json_in_path")
        models = ['stack', 'pointer']
        distributions = [
            os.path.join(FLAGS.ensemble_dir, "distribution_" + m + ".json")
            for m in models
        ]
        total_dict = {}
        for d in distributions:
            with open(d) as prediction_file:
                print d
                predictions = json.load(prediction_file)
                for (key, item) in predictions.items():
                    if total_dict.get(key, None) is None:
                        total_dict[key] = np.asarray(item)
                    else:
                        total_dict[key] += np.asarray(item)

        for (key, item) in total_dict.items():
            total_dict[key][0] /= len(models)
            total_dict[key][1] /= len(models)
        # Read the JSON data from file
        qn_uuid_data, context_token_data, qn_token_data = get_json_data(
            FLAGS.json_in_path)
        answers_dict = generate_answers_from_dist(None, qa_model, total_dict,
                                                  word2id, qn_uuid_data,
                                                  context_token_data,
                                                  qn_token_data)

        # Write the uuid->answer mapping a to json file in root dir
        print "Writing predictions to %s..." % FLAGS.json_out_path
        with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f:
            f.write(unicode(json.dumps(answers_dict, ensure_ascii=False)))
            print "Wrote predictions to %s" % FLAGS.json_out_path

    elif FLAGS.mode == "official_eval":
        if FLAGS.json_in_path == "":
            raise Exception(
                "For official_eval mode, you need to specify --json_in_path")
        if FLAGS.ckpt_load_dir == "":
            raise Exception(
                "For official_eval mode, you need to specify --ckpt_load_dir")

        # Read the JSON data from file
        qn_uuid_data, context_token_data, qn_token_data = get_json_data(
            FLAGS.json_in_path)

        with tf.Session(config=config) as sess:

            # Load model from ckpt_load_dir
            initialize_model(sess,
                             qa_model,
                             FLAGS.ckpt_load_dir,
                             expect_exists=True)

            # Get a predicted answer for each example in the data
            # Return a mapping answers_dict from uuid to answer
            answers_dict = generate_answers(sess, qa_model, word2id,
                                            qn_uuid_data, context_token_data,
                                            qn_token_data)

            # Write the uuid->answer mapping a to json file in root dir
            print "Writing predictions to %s..." % FLAGS.json_out_path
            with io.open(FLAGS.json_out_path, 'w', encoding='utf-8') as f:
                f.write(unicode(json.dumps(answers_dict, ensure_ascii=False)))
                print "Wrote predictions to %s" % FLAGS.json_out_path

    else:
        raise Exception("Unexpected value of FLAGS.mode: %s" % FLAGS.mode)