tf.logging.set_verbosity(tf.logging.ERROR) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') data_folder = '' embedding_folder = '' train_file = data_folder + "train-v1.1.json" dev_file = data_folder + "dev-v1.1.json" reader = SquadReader() train_data = reader.read(train_file) eval_data = reader.read(dev_file) evaluator = SquadEvaluator(dev_file) vocab = Vocabulary() vocab.build_vocab(train_data + eval_data, min_word_count=3, min_char_count=10) word_embedding = vocab.make_word_embedding(embedding_folder + "glove.6B.100d.txt") train_batch_generator = BatchGenerator(vocab, train_data, batch_size=60, training=True) eval_batch_generator = BatchGenerator(vocab, eval_data, batch_size=60) model = BiDAF(vocab, pretrained_word_embedding=word_embedding) model.compile(tf.train.AdamOptimizer, 0.001) model.train_and_evaluate(train_batch_generator, eval_batch_generator, evaluator,
from sogou_mrc.data.vocabulary import Vocabulary import logging import sys logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') coqa_reader = CoQAReader(-1) data_folder = '' train_filename = "coqa-train-v1.0.json" eval_filename = 'coqa-dev-v1.0.json' vocab = Vocabulary(do_lowercase=False) train_data = coqa_reader.read(data_folder + train_filename, 'train') eval_data = coqa_reader.read(data_folder + eval_filename, 'dev') vocab.build_vocab(train_data + eval_data) evaluator = CoQAEvaluator(data_folder + eval_filename) bert_dir = 'model' bert_data_helper = BertDataHelper(bert_dir) train_data = bert_data_helper.convert(train_data, data='coqa') eval_data = bert_data_helper.convert(eval_data, data='coqa') from sogou_mrc.data.batch_generator import BatchGenerator train_batch_generator = BatchGenerator( vocab, train_data, training=True, batch_size=6, additional_fields=[