def main(): parser = argparse.ArgumentParser("Train our ELMo model on SQuAD") parser.add_argument("output_dir") parser.add_argument("--dim", type=int, default=90) parser.add_argument("--l2", type=float, default=0) parser.add_argument("--mode", choices=["input", "output", "both", "none"], default="both") parser.add_argument("--top_layer_only", action="store_true") args = parser.parse_args() out = args.output_dir + "-" + datetime.now().strftime("%m%d-%H%M%S") dim = args.dim recurrent_layer = CudnnGru(dim, w_init=TruncatedNormal(stddev=0.05)) params = trainer.TrainParams(trainer.SerializableOptimizer( "Adadelta", dict(learning_rate=1.0)), ema=0.999, max_checkpoints_to_keep=2, async_encoding=10, num_epochs=24, log_period=30, eval_period=1200, save_period=1200, best_weights=("dev", "b17/text-f1"), eval_samples=dict(dev=None, train=8000)) lm_reduce = MapperSeq( ElmoLayer(args.l2, layer_norm=False, top_layer_only=args.top_layer_only), DropoutLayer(0.5), ) model = AttentionWithElmo( encoder=DocumentAndQuestionEncoder(SingleSpanAnswerEncoder()), lm_model=SquadContextConcatSkip(), append_before_atten=(args.mode == "both" or args.mode == "output"), append_embed=(args.mode == "both" or args.mode == "input"), max_batch_size=128, word_embed=FixedWordEmbedder(vec_name="glove.840B.300d", word_vec_init_scale=0, learn_unk=False, cpu=True), char_embed=CharWordEmbedder(LearnedCharEmbedder(word_size_th=14, char_th=49, char_dim=20, init_scale=0.05, force_cpu=True), MaxPool(Conv1d(100, 5, 0.8)), shared_parameters=True), embed_mapper=SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), ), lm_reduce=None, lm_reduce_shared=lm_reduce, per_sentence=False, memory_builder=NullBiMapper(), attention=BiAttention(TriLinear(bias=True), True), match_encoder=SequenceMapperSeq( FullyConnected(dim * 2, activation="relu"), ResidualLayer( SequenceMapperSeq( VariationalDropoutLayer(0.8), recurrent_layer, VariationalDropoutLayer(0.8), StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()), FullyConnected(dim * 2, activation="relu"), )), VariationalDropoutLayer(0.8)), predictor=BoundsPredictor( ChainBiMapper(first_layer=recurrent_layer, second_layer=recurrent_layer))) batcher = ClusteredBatcher(45, ContextLenKey(), False, False) data = DocumentQaTrainingData(SquadCorpus(), None, batcher, batcher) with open(__file__, "r") as f: notes = f.read() notes = str(sorted(args.__dict__.items(), key=lambda x: x[0])) + "\n" + notes trainer.start_training( data, model, params, [LossEvaluator(), SpanEvaluator(bound=[17], text_eval="squad")], ModelDir(out), notes)
def main(): parser = argparse.ArgumentParser( description='Train a model on document-level RACE') parser.add_argument( 'mode', choices=["paragraph", "confidence", "shared-norm", "merge", "sigmoid"]) parser.add_argument("name", help="Output directory") args = parser.parse_args() mode = args.mode out = args.name + "-" + datetime.now().strftime("%m%d-%H%M%S") corpus = SquadCorpus() if mode == "merge": # Adds paragraph start tokens, since we will be concatenating paragraphs together pre = WithIndicators(True, para_tokens=False, doc_start_token=False) else: pre = None model = get_model(50, 100, args.mode, pre) if mode == "paragraph": # Run in the "standard" known-paragraph setting if model.preprocessor is not None: raise NotImplementedError() #pdb.set_trace() n_epochs = 25 #from 26 for dev vs dev num_choices = 4 train_batching = ClusteredBatcher(60, ContextLenBucketedKey(3), True, False) #150 #eval_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), False, False) eval_batching = ClusteredBatcher(60, ContextLenKey(), False, False) data = DocumentQaTrainingData(corpus, None, train_batching, eval_batching, num_choices) #eval = [LossEvaluator(), SpanEvaluator(bound=[17], text_eval="squad")] eval = [LossEvaluator(), MultiChoiceEvaluator(num_choices)] else: eval_set_mode = { "confidence": "flatten", "sigmoid": "flatten", "shared-norm": "group", "merge": "merge" }[mode] eval_dataset = RandomParagraphSetDatasetBuilder( 100, eval_set_mode, True, 0) if mode == "confidence" or mode == "sigmoid": if mode == "sigmoid": # needs to be trained for a really long time for reasons unknown, even this might be too small n_epochs = 100 else: n_epochs = 50 # more epochs since we only "see" the label very other epoch-osh train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True, False) data = PreprocessedData( SquadCorpus(), SquadTfIdfRanker(NltkPlusStopWords(True), 4, True, model.preprocessor), StratifyParagraphsBuilder(train_batching, 1), eval_dataset, eval_on_verified=False, ) else: n_epochs = 26 data = PreprocessedData( SquadCorpus(), SquadTfIdfRanker(NltkPlusStopWords(True), 4, True, model.preprocessor), StratifyParagraphSetsBuilder(25, args.mode == "merge", True, 1), eval_dataset, eval_on_verified=False, ) #eval = [LossEvaluator(), MultiParagraphSpanEvaluator(17, "squad")] eval = [LossEvaluator()] data.preprocess(1) with open(__file__, "r") as f: notes = f.read() notes = args.mode + "\n" + notes trainer.start_training(data, model, train_params(n_epochs), eval, model_dir.ModelDir(out), notes)
def main(): parser = argparse.ArgumentParser( description='Train a model on TriviaQA unfiltered') parser.add_argument( 'mode', choices=["confidence", "merge", "shared-norm", "sigmoid", "paragraph"]) parser.add_argument("name", help="Where to store the model") parser.add_argument("-t", "--n_tokens", default=400, type=int, help="Paragraph size") parser.add_argument( '-n', '--n_processes', type=int, default=2, help="Number of processes (i.e., select which paragraphs to train on) " "the data with") args = parser.parse_args() mode = args.mode out = args.name + "-" + datetime.now().strftime("%m%d-%H%M%S") model = get_model(100, 140, mode, WithIndicators()) extract = ExtractMultiParagraphsPerQuestion(MergeParagraphs(args.n_tokens), ShallowOpenWebRanker(16), model.preprocessor, intern=True) eval = [ LossEvaluator(), MultiParagraphSpanEvaluator(8, "triviaqa", mode != "merge") ] oversample = [1] * 4 if mode == "paragraph": n_epochs = 120 test = RandomParagraphSetDatasetBuilder(120, "flatten", True, oversample) train = StratifyParagraphsBuilder(ClusteredBatcher( 60, ContextLenBucketedKey(3), True), oversample, only_answers=True) elif mode == "confidence" or mode == "sigmoid": if mode == "sigmoid": n_epochs = 640 else: n_epochs = 160 test = RandomParagraphSetDatasetBuilder(120, "flatten", True, oversample) train = StratifyParagraphsBuilder( ClusteredBatcher(60, ContextLenBucketedKey(3), True), oversample) else: n_epochs = 80 test = RandomParagraphSetDatasetBuilder( 120, "merge" if mode == "merge" else "group", True, oversample) train = StratifyParagraphSetsBuilder(30, mode == "merge", True, oversample) data = TriviaQaWebDataset() params = TrainParams(SerializableOptimizer("Adadelta", dict(learning_rate=1)), num_epochs=n_epochs, ema=0.999, max_checkpoints_to_keep=2, async_encoding=10, log_period=30, eval_period=1800, save_period=1800, eval_samples=dict(dev=None, train=6000)) data = PreprocessedData(data, extract, train, test, eval_on_verified=False) data.preprocess(args.n_processes, 1000) with open(__file__, "r") as f: notes = f.read() notes = "Mode: " + args.mode + "\n" + notes trainer.start_training(data, model, params, eval, model_dir.ModelDir(out), notes)
def main(): parser = argparse.ArgumentParser( description='Train a model on document-level SQuAD') parser.add_argument( 'mode', choices=["paragraph", "confidence", "shared-norm", "merge", "sigmoid"]) parser.add_argument("name", help="Output directory") parser.add_argument("--no-tfidf", action='store_true', help="Don't add TF-IDF negative examples") parser.add_argument("--weighted-questions", action='store_true', help="Read a weighted training dataset") args = parser.parse_known_args()[0] mode = args.mode out = args.name + "-" + datetime.now().strftime("%m%d-%H%M%S") corpus = SquadCorpus() if mode == "merge": # Adds paragraph start tokens, since we will be concatenating paragraphs together pre = WithIndicators(True, para_tokens=False, doc_start_token=False) else: pre = None model = get_model(50, 100, args.mode, pre) if mode == "paragraph": if model.preprocessor is not None: raise NotImplementedError() n_epochs = 26 train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True, False) eval_batching = ClusteredBatcher(45, ContextLenKey(), False, False) data = DocumentQaTrainingData(corpus, None, train_batching, eval_batching) eval = [LossEvaluator(), SpanEvaluator(bound=[17], text_eval="squad")] else: eval_set_mode = { "confidence": "flatten", "sigmoid": "flatten", "shared-norm": "group", "merge": "merge" }[mode] eval_dataset = RandomParagraphSetDatasetBuilder( 100, eval_set_mode, True, 0) if args.no_tfidf: if args.weighted_questions: prepro = SquadWeighted(model.preprocessor) else: prepro = SquadDefault(model.preprocessor) else: if args.weighted_questions: raise NotImplementedError( "Weighted questions not supported for tf-idf mode yet") prepro = SquadTfIdfRanker(NltkPlusStopWords(True), 4, True, model.preprocessor) if mode == "confidence" or mode == "sigmoid": if mode == "sigmoid": # needs to be trained for a really long time for reasons unknown, even this might be too small n_epochs = 100 else: n_epochs = 50 # more epochs since we only "see" the label very other epoch-osh if args.no_tfidf: n_epochs = 15 # We see the whole dataset every epoch train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True, False) if args.weighted_questions: dataset_builder = WeightedStratifyParagraphsBuilder( train_batching, 1) else: dataset_builder = StratifyParagraphsBuilder(train_batching, 1) data = PreprocessedData( SquadCorpus(), prepro, dataset_builder, eval_dataset, eval_on_verified=False, ) else: n_epochs = 26 if args.weighted_questions: dataset_builder = WeightedStratifyParagraphSetsBuilder( 25, args.mode == "merge", True, 1) else: dataset_builder = StratifyParagraphSetsBuilder( 25, args.mode == "merge", True, 1) data = PreprocessedData( SquadCorpus(), prepro, dataset_builder, eval_dataset, eval_on_verified=False, ) eval = [LossEvaluator(), MultiParagraphSpanEvaluator(17, "squad")] data.preprocess(1) with open(__file__, "r") as f: notes = f.read() notes = args.mode + "\n" + notes trainer.start_training(data, model, train_params(n_epochs), eval, model_dir.ModelDir(out), notes)
def main(): parser = argparse.ArgumentParser( description='Train a model on TriviaQA web') parser.add_argument('mode', choices=[ "paragraph-level", "confidence", "merge", "shared-norm", "sigmoid", "shared-norm-600" ]) parser.add_argument("name", help="Where to store the model") parser.add_argument( '-n', '--n_processes', type=int, default=2, help="Number of processes (i.e., select which paragraphs to train on) " "the data with") args = parser.parse_args() mode = args.mode out = args.name + "-" + datetime.now().strftime("%m%d-%H%M%S") model = get_model(100, 140, mode, WithIndicators()) stop = NltkPlusStopWords(True) if mode == "paragraph-level": extract = ExtractSingleParagraph(MergeParagraphs(400), TopTfIdf(stop, 1), model.preprocessor, intern=True) elif mode == "shared-norm-600": extract = ExtractMultiParagraphs(MergeParagraphs(600), TopTfIdf(stop, 4), model.preprocessor, intern=True) else: extract = ExtractMultiParagraphs(MergeParagraphs(400), TopTfIdf(stop, 4), model.preprocessor, intern=True) if mode == "paragraph-level": n_epochs = 16 train = ParagraphAndQuestionsBuilder( ClusteredBatcher(60, ContextLenBucketedKey(3), True)) test = ParagraphAndQuestionsBuilder( ClusteredBatcher(60, ContextLenKey(), False)) n_dev, n_train = 21000, 12000 eval = [LossEvaluator(), SpanEvaluator([4, 8], "triviaqa")] else: eval = [ LossEvaluator(), MultiParagraphSpanEvaluator(8, "triviaqa", mode != "merge") ] # we sample two paragraphs per a (question, doc) pair, so evaluate on fewer questions n_dev, n_train = 15000, 8000 if mode == "confidence" or mode == "sigmoid": if mode == "sigmoid": # Trains very slowly, do this at your own risk n_epochs = 71 else: n_epochs = 28 test = RandomParagraphSetDatasetBuilder(120, "flatten", True, 1) train = StratifyParagraphsBuilder( ClusteredBatcher(60, ContextLenBucketedKey(3), True), 0, 1) else: n_epochs = 14 test = RandomParagraphSetDatasetBuilder( 120, "merge" if mode == "merge" else "group", True, 1) train = StratifyParagraphSetsBuilder(35, mode == "merge", True, 1) data = TriviaQaWebDataset() params = get_triviaqa_train_params(n_epochs, n_dev, n_train) data = PreprocessedData(data, extract, train, test, eval_on_verified=False) data.preprocess(args.n_processes, 1000) with open(__file__, "r") as f: notes = f.read() notes = "*" * 10 + "\nMode: " + args.mode + "\n" + "*" * 10 + "\n" + notes trainer.start_training(data, model, params, eval, model_dir.ModelDir(out), notes)
class RandomParagraphSetDataset(Dataset): """ Sample multiple paragraphs for each question and include them in the same batch """ def __init__(self, questions: List[MultiParagraphQuestion], true_len: int, n_paragraphs: int, batch_size: int, mode: str, force_answer: bool, oversample_first_answer: List[int]): self.mode = mode self.questions = questions self.force_answer = force_answer self.true_len = true_len self.n_paragraphs = n_paragraphs self.oversample_first_answer = oversample_first_answer self._n_pairs = sum( min(len(q.paragraphs), n_paragraphs) for q in questions) self.batcher = ClusteredBatcher(batch_size, lambda x: x.n_context_words, truncate_batches=True) def get_vocab(self): voc = set() for q in self.questions: voc.update(q.question) for para in q.paragraphs: voc.update(para.text) return voc def get_spec(self): max_q_len = max(len(q.question) for q in self.questions) max_c_len = max( max(len(p.text) for p in q.paragraphs) for q in self.questions) return ParagraphAndQuestionSpec( self.batcher.get_fixed_batch_size() if self.mode == "merge" else None, max_q_len, max_c_len, None) def get_epoch(self): return self._build_expanded_batches(self.questions) def _build_expanded_batches(self, questions): # We first pick paragraph(s) for each question in the entire training set so we # can cluster by context length accurately out = [] for q in questions: if len(q.paragraphs) <= self.n_paragraphs: selected = np.arange(len(q.paragraphs)) elif not self.force_answer and len( self.oversample_first_answer) == 0: selected = np.random.choice(len(q.paragraphs), self.n_paragraphs, replace=False) else: if not self.force_answer: raise NotImplementedError() with_answer = [ i for i, p in enumerate(q.paragraphs) if len(p.answer_spans) > 0 ] for ix, over_sample in zip(list(with_answer), self.oversample_first_answer): with_answer += [ix] * over_sample if with_answer: answer_selection = with_answer[np.random.randint( len(with_answer))] other = np.array([ i for i, x in enumerate(q.paragraphs) if i != answer_selection ]) selected = np.random.choice(other, min(len(other), self.n_paragraphs - 1), replace=False) selected = np.insert(selected, 0, answer_selection) else: selected = np.random.choice(len(q.paragraphs), self.n_paragraphs, replace=False) if self.mode == "flatten": for i in selected: out.append(q.paragraphs[i].build_qa_pair( q.question, q.question_id, q.answer_text)) else: out.append(ParagraphSelection(q, selected)) out.sort(key=lambda x: x.n_context_words) if self.mode == "flatten": for batch in self.batcher.get_epoch(out): yield batch elif self.mode == "group": group = 0 for selection_batch in self.batcher.get_epoch(out): batch = [] for selected in selection_batch: q = selected.question for i in selected.selection: para = q.paragraphs[i] batch.append( para.build_qa_pair(q.question, q.question_id, q.answer_text, group)) group += 1 yield batch elif self.mode == "merge": for selection_batch in self.batcher.get_epoch(out): batch = [] for selected in selection_batch: q = selected.question paras = [q.paragraphs[i] for i in selected.selection] para = paras[0].merge(paras) batch.append( para.build_qa_pair(q.question, q.question_id, q.answer_text)) yield batch else: raise RuntimeError() def get_samples(self, n_examples): questions = np.random.choice(self.questions, n_examples, replace=False) if self.mode == "flatten": n_batches = self.batcher.epoch_size( sum( min(len(q.paragraphs), self.n_paragraphs) for q in questions)) else: n_batches = self.batcher.epoch_size(n_examples) return self._build_expanded_batches( np.random.choice(questions, n_examples, replace=False)), n_batches def percent_filtered(self): return (self.true_len - len(self.questions)) / self.true_len def __len__(self): if self.mode == "flatten": return self.batcher.epoch_size(self._n_pairs) else: return self.batcher.epoch_size(len(self.questions))
class StratifiedParagraphSetDataset(Dataset): """ Sample multiple paragraphs each epoch and include them in the same batch, but stratify the sampling across epochs """ def __init__(self, questions: List[MultiParagraphQuestion], true_len: int, batch_size: int, force_answer: bool, overample_first_answer: List[int], merge: bool): self.overample_first_answer = overample_first_answer self.questions = questions self.merge = merge self.true_len = true_len self.batcher = ClusteredBatcher(batch_size, lambda x: x.n_context_words, truncate_batches=True) self._order = [] self._on = np.zeros(len(questions), dtype=np.int32) for q in questions: if len(q.paragraphs) == 1: self._order.append(np.zeros((1, 1), dtype=np.int32)) continue if force_answer: sample1 = [ i for i, p in enumerate(q.paragraphs) if len(p.answer_spans) > 0 ] else: sample1 = list(range(len(q.paragraphs))) if (len(self.overample_first_answer) > 0 and not (force_answer and len(sample1) == 1) ): # don't bother if there only is one answer ix = 0 for i, p in enumerate(q.paragraphs): if len(p.answer_spans) > 0: sample1 += [i] * self.overample_first_answer[ix] ix += 1 if ix >= len(self.overample_first_answer): break permutations = [] for i in sample1: for j in range(len(q.paragraphs)): if j != i: permutations.append((i, j)) permutations = np.array(permutations, dtype=np.int32) np.random.shuffle(permutations) self._order.append(permutations) def get_vocab(self): voc = set() for q in self.questions: voc.update(q.question) for para in q.paragraphs: voc.update(para.text) return voc def get_spec(self): max_q_len = max(len(q.question) for q in self.questions) max_c_len = max( max(len(p.text) for p in q.paragraphs) for q in self.questions) return ParagraphAndQuestionSpec(None, max_q_len, max_c_len, None) def get_epoch(self): return self._build_expanded_batches(self.questions) def _build_expanded_batches(self, questions): out = [] for i, q in enumerate(questions): order = self._order[i] out.append(ParagraphSelection(q, order[self._on[i]])) self._on[i] += 1 if self._on[i] == len(order): self._on[i] = 0 np.random.shuffle(order) out.sort(key=lambda x: x.n_context_words) group = 0 for selection_batch in self.batcher.get_epoch(out): batch = [] for selected in selection_batch: q = selected.question if self.merge: paras = [q.paragraphs[i] for i in selected.selection] # Sort paragraph my reading order, not rank order paras.sort(key=lambda x: x.get_order()) answer_spans = [] text = [] for para in paras: answer_spans.append(len(text) + para.answer_spans) text += para.text batch.append( ParagraphAndQuestion( text, q.question, TokenSpans(q.answer_text, np.concatenate(answer_spans)), q.question_id)) else: for i in selected.selection: para = q.paragraphs[i] batch.append( para.build_qa_pair(q.question, q.question_id, q.answer_text, group)) group += 1 yield batch def get_samples(self, n_examples): n_batches = self.batcher.epoch_size(n_examples) return self._build_expanded_batches( np.random.choice(self.questions, n_examples, replace=False)), n_batches def percent_filtered(self): return (self.true_len - len(self.questions)) / self.true_len def __len__(self): return self.batcher.epoch_size(len(self.questions))
def main(): """ A close-as-possible impelemntation of BiDaF, its based on the `dev` tensorflow 1.1 branch of Ming's repo which, in particular, uses Adam not Adadelta. I was not able to replicate the results in paper using Adadelta, but with Adam i was able to get to 78.0 F1 on the dev set with this scripts. I believe this approach is an exact reproduction up the code in the repo, up to initializations. Notes: Exponential Moving Average is very important, as is early stopping. This is also in particualr best run on a GPU due to the large number of parameters and batch size involved. """ out = get_output_name_from_cli() train_params = TrainParams(SerializableOptimizer( "Adam", dict(learning_rate=0.001)), num_epochs=12, ema=0.999, async_encoding=10, log_period=30, eval_period=1000, save_period=1000, eval_samples=dict(dev=None, train=8000)) # recurrent_layer = BiRecurrentMapper(LstmCellSpec(100, keep_probs=0.8)) # recurrent_layer = FusedLstm() recurrent_layer = SequenceMapperSeq(DropoutLayer(0.8), CudnnLstm(100)) model = Attention( encoder=DocumentAndQuestionEncoder(SingleSpanAnswerEncoder()), word_embed=FixedWordEmbedder(vec_name="glove.6B.100d", word_vec_init_scale=0, learn_unk=False), char_embed=CharWordEmbedder(embedder=LearnedCharEmbedder(16, 49, 8), layer=ReduceLayer("max", Conv1d(100, 5, 0.8), mask=False), shared_parameters=True), word_embed_layer=None, embed_mapper=SequenceMapperSeq(HighwayLayer(activation="relu"), HighwayLayer(activation="relu"), recurrent_layer), question_mapper=None, context_mapper=None, memory_builder=NullBiMapper(), attention=BiAttention(TriLinear(bias=True), True), match_encoder=NullMapper(), predictor=BoundsPredictor( ChainConcat(start_layer=SequenceMapperSeq(recurrent_layer, recurrent_layer), end_layer=recurrent_layer))) with open(__file__, "r") as f: notes = f.read() eval = [ LossEvaluator(), SquadSpanEvaluator(), BoundedSquadSpanEvaluator([18]), SentenceSpanEvaluator() ] corpus = SquadCorpus() train_batching = ClusteredBatcher(60, ContextLenBucketedKey(3), True, False) eval_batching = ClusteredBatcher(60, ContextLenKey(), False, False) data = DocumentQaTrainingData(corpus, None, train_batching, eval_batching) trainer.start_training(data, model, train_params, eval, model_dir.ModelDir(out), notes, False)
def main(): parser = argparse.ArgumentParser( description='Train a model on TriviaQA unfiltered') parser.add_argument( 'mode', choices=["confidence", "merge", "shared-norm", "sigmoid", "paragraph"]) parser.add_argument("name", help="Where to store the model") parser.add_argument("-t", "--n_tokens", default=400, type=int, help="Paragraph size") parser.add_argument( '-n', '--n_processes', type=int, default=2, help="Number of processes (i.e., select which paragraphs to train on) " "the data with") parser.add_argument("-s", "--source_dir", type=str, default=None, help="where to take input files") parser.add_argument("--n_epochs", type=int, default=None, help="Max number of epoches to train on ") parser.add_argument("--char_th", type=int, default=None, help="char level embeddings") parser.add_argument("--hl_dim", type=int, default=None, help="hidden layer dim size") parser.add_argument("--regularization", type=int, default=None, help="hidden layer dim size") parser.add_argument("--LR", type=float, default=1.0, help="hidden layer dim size") parser.add_argument("--save_every", type=int, default=1800, help="save period") parser.add_argument("--init_from", type=str, default=None, help="model to init from") args = parser.parse_args() mode = args.mode #out = args.name + "-" + datetime.now().strftime("%m%d-%H%M%S") out = join('models', args.name) char_th = 100 hl_dim = 140 if args.char_th is not None: print(args.char_th) char_th = int(args.char_th) out += '--th' + str(char_th) if args.hl_dim is not None: print(args.hl_dim) hl_dim = int(args.hl_dim) out += '--hl' + str(hl_dim) if args.init_from is None: model = get_model(char_th, hl_dim, mode, WithIndicators()) else: md = model_dir.ModelDir(args.init_from) model = md.get_model() extract = ExtractMultiParagraphsPerQuestion(MergeParagraphs(args.n_tokens), ShallowOpenWebRanker(16), model.preprocessor, intern=True) eval = [ LossEvaluator(), MultiParagraphSpanEvaluator(8, "triviaqa", mode != "merge", per_doc=False) ] oversample = [1] * 4 if mode == "paragraph": n_epochs = 120 test = RandomParagraphSetDatasetBuilder(120, "flatten", True, oversample) train = StratifyParagraphsBuilder(ClusteredBatcher( 60, ContextLenBucketedKey(3), True), oversample, only_answers=True) elif mode == "confidence" or mode == "sigmoid": if mode == "sigmoid": n_epochs = 640 else: n_epochs = 160 test = RandomParagraphSetDatasetBuilder(120, "flatten", True, oversample) train = StratifyParagraphsBuilder( ClusteredBatcher(60, ContextLenBucketedKey(3), True), oversample) else: n_epochs = 80 test = RandomParagraphSetDatasetBuilder( 120, "merge" if mode == "merge" else "group", True, oversample) train = StratifyParagraphSetsBuilder(30, mode == "merge", True, oversample) if args.n_epochs is not None: n_epochs = args.n_epochs out += '--' + str(n_epochs) if args.LR != 1.0: out += '--' + str(args.LR) data = TriviaQaOpenDataset(args.source_dir) async_encoding = 10 #async_encoding = 0 params = TrainParams(SerializableOptimizer("Adadelta", dict(learning_rate=args.LR)), num_epochs=n_epochs, num_of_steps=250000, ema=0.999, max_checkpoints_to_keep=2, async_encoding=async_encoding, log_period=30, eval_period=1800, save_period=args.save_every, eval_samples=dict(dev=None, train=6000), regularization_weight=None) data = PreprocessedData(data, extract, train, test, eval_on_verified=False) data.preprocess(args.n_processes, 1000) with open(__file__, "r") as f: notes = f.read() notes = "Mode: " + args.mode + "\n" + notes if args.init_from is not None: init_from = model_dir.ModelDir(args.init_from).get_best_weights() if init_from is None: init_from = model_dir.ModelDir( args.init_from).get_latest_checkpoint() else: init_from = None trainer.start_training(data, model, params, eval, model_dir.ModelDir(out), notes, initialize_from=init_from)
def run(): parser = argparse.ArgumentParser() parser.add_argument("input_data") parser.add_argument("output_data") parser.add_argument("--plot_dir", type=str, default=None) parser.add_argument("--model_dir", type=str, default="/tmp/model/document-qa") parser.add_argument("--lm_dir", type=str, default="/home/castle/data/lm/squad-context-concat-skip") parser.add_argument("--glove_dir", type=str, default="/home/castle/data/glove") parser.add_argument("--n", type=int, default=None) parser.add_argument("-b", "--batch_size", type=int, default=30) parser.add_argument("--ema", action="store_true") args = parser.parse_args() input_data = args.input_data output_path = args.output_data model_dir = ModelDir(args.model_dir) nltk.data.path.append("nltk_data") print("Loading data") docs = parse_squad_data(input_data, "", NltkAndPunctTokenizer(), False) pairs = split_docs(docs) dataset = ParagraphAndQuestionDataset(pairs, ClusteredBatcher(args.batch_size, ContextLenKey(), False, True)) print("Done, init model") model = model_dir.get_model() # small hack, just load the vector file at its expected location rather then using the config location loader = ResourceLoader(lambda a, b: load_word_vector_file(join(args.glove_dir, "glove.840B.300d.txt"), b)) lm_model = model.lm_model basedir = args.lm_dir plotdir = args.plot_dir lm_model.lm_vocab_file = join(basedir, "squad_train_dev_all_unique_tokens.txt") lm_model.options_file = join(basedir, "options_squad_lm_2x4096_512_2048cnn_2xhighway_skip.json") lm_model.weight_file = join(basedir, "squad_context_concat_lm_2x4096_512_2048cnn_2xhighway_skip.hdf5") lm_model.embed_weights_file = None model.set_inputs([dataset], loader) print("Done, building graph") sess = tf.Session() with sess.as_default(): pred = model.get_prediction() best_span = pred.get_best_span(17)[0] if plotdir != None: start_logits_op, end_logits_op = pred.get_logits() all_vars = tf.global_variables() + tf.get_collection(tf.GraphKeys.SAVEABLE_OBJECTS) dont_restore_names = {x.name for x in all_vars if x.name.startswith("bilm")} print(sorted(dont_restore_names)) vars = [x for x in all_vars if x.name not in dont_restore_names] print("Done, loading weights") checkpoint = model_dir.get_best_weights() if checkpoint is None: print("Loading most recent checkpoint") checkpoint = model_dir.get_latest_checkpoint() else: print("Loading best weights") saver = tf.train.Saver(vars) saver.restore(sess, checkpoint) if args.ema: ema = tf.train.ExponentialMovingAverage(0) saver = tf.train.Saver({ema.average_name(x): x for x in tf.trainable_variables()}) saver.restore(sess, checkpoint) sess.run(tf.variables_initializer([x for x in all_vars if x.name in dont_restore_names])) print("Done, starting evaluation") out = {} for i, batch in enumerate(dataset.get_epoch()): if args.n is not None and i == args.n: break print("On batch size [%d], now in %d th batch" % (args.batch_size, i +1)) enc = model.encode(batch, False) if plotdir != None: spans, start_logits, end_logits = sess.run([best_span, start_logits_op, end_logits_op], feed_dict=enc) for bi, point in enumerate(batch): q = ' '.join(point.question) c = point.paragraph.get_context() gt = ' | '.join(point.answer.answer_text) s, e = spans[bi] pred = point.get_original_text(s, e) start_dist = start_logits[bi] end_dist = end_logits[bi] c_interval = np.arange(0.0, start_dist.shape[0], 1) c_label = c plt.figure(1) plt.subplot(211) plt.plot(c_interval, start_dist, color='r') plt.title("Q : " + q + " // A : " + gt, fontsize=9) plt.text(0, 0, r'Predict : %s [%d:%d]' % (pred, s, e), color='b') axes = plt.gca() axes.set_ylim([-20, 20]) plt.subplot(212) plt.plot(c_interval, end_dist, color='g') plt.xticks(c_interval, c_label, rotation=90, fontsize=5) axes = plt.gca() axes.set_ylim([-20, 20]) plt.show() break else: spans = sess.run(best_span, feed_dict=enc) for (s, e), point in zip(spans, batch): out[point.question_id] = point.get_original_text(s, e) sess.close() print("Done, saving") with open(output_path, "w") as f: json.dump(out, f) print("Mission accomplished!")
def main(): parser = argparse.ArgumentParser(description='Evaluate a model on SQuAD') parser.add_argument('model', help='model directory to evaluate') parser.add_argument("-o", "--official_output", type=str, help="where to output an official result file") parser.add_argument('-n', '--sample_questions', type=int, default=None, help="(for testing) run on a subset of questions") parser.add_argument('--answer_bounds', nargs='+', type=int, default=[17], help="Max size of answer") parser.add_argument('-b', '--batch_size', type=int, default=45, help="Batch size, larger sizes can be faster but uses more memory") parser.add_argument('-s', '--step', default=None, help="Weights to load, can be a checkpoint step or 'latest'") parser.add_argument('-c', '--corpus', choices=["dev", "train"], default="dev") parser.add_argument('--no_ema', action="store_true", help="Don't use EMA weights even if they exist") args = parser.parse_args() num_choices = 4 model_dir = ModelDir(args.model) corpus = SquadCorpus() if args.corpus == "dev": questions = corpus.get_dev() else: questions = corpus.get_train() questions = split_docs(questions) if args.sample_questions: np.random.RandomState(0).shuffle(sorted(questions, key=lambda x: x.question_id)) questions = questions[:args.sample_questions] questions.sort(key=lambda x:x.n_context_words, reverse=True) #pdb.set_trace() #print(args.batch_size) #dataset = ParagraphAndQuestionDataset(questions, FixedOrderBatcher(args.batch_size, False),None,num_choices) dataset = ParagraphAndQuestionDataset(questions, ClusteredBatcher(45, ContextLenKey(), False, False),None,num_choices) #ClusteredBatcher(45, ContextLenKey(), False, False) evaluators = [MultiChoiceEvaluator(num_choices)] #if args.official_output is not None: #evaluators.append(RecordSpanPrediction(args.answer_bounds[0])) #pdb.set_trace() if args.step is not None: if args.step == "latest": checkpoint = model_dir.get_latest_checkpoint() else: checkpoint = model_dir.get_checkpoint(int(args.step)) else: checkpoint = model_dir.get_best_weights() if checkpoint is not None: print("Using best weights") else: print("Using latest checkpoint") checkpoint = model_dir.get_latest_checkpoint() model = model_dir.get_model() #pdb.set_trace() evaluation = trainer.test(model, evaluators, {args.corpus: dataset}, corpus.get_resource_loader(), checkpoint, not args.no_ema)[args.corpus] #pdb.set_trace() # Print the scalar results in a two column table scalars = evaluation.scalars cols = list(sorted(scalars.keys())) table = [cols] header = ["Metric", ""] table.append([("%s" % scalars[x] if x in scalars else "-") for x in cols]) print_table([header] + transpose_lists(table)) # Save the official output if args.official_output is not None: data_to_dump = {} list_of_choices = ['A','B','C','D'] q_ids = evaluation.per_sample["question_id"] correct_ans = evaluation.per_sample["correct answer"] correct_ids = evaluation.per_sample["correct index"] pred_ids = evaluation.per_sample["predictied index"] pred_ans = evaluation.per_sample["predictied answer"] is_correct = evaluation.per_sample["is correct"] #pdb.set_trace() for ix, q_ids in enumerate(q_ids): if(is_correct[ix]): data_to_dump[q_ids] = {'Is Correct' : 'True', 'predictied' : [' '.join(pred_ans[ix]),list_of_choices[pred_ids[ix]]], 'correct' : [' '.join(correct_ans[ix]),list_of_choices[correct_ids[ix]]] } else: data_to_dump[q_ids] = {'Is Correct' : 'False', 'predictied' : [' '.join(pred_ans[ix]),list_of_choices[pred_ids[ix]]], 'correct' : [' '.join(correct_ans[ix]),list_of_choices[correct_ids[ix]]] } #pdb.set_trace() with open(args.official_output, "w") as f: json.dump(data_to_dump , f)
def run(): parser = argparse.ArgumentParser() parser.add_argument("input_data") parser.add_argument("output_data") parser.add_argument("--n", type=int, default=None) parser.add_argument("-b", "--batch_size", type=int, default=100) parser.add_argument("--ema", action="store_true") args = parser.parse_args() input_data = args.input_data output_path = args.output_data model_dir = ModelDir("model") nltk.data.path.append("nltk_data") print("Loading data") docs = parse_squad_data(input_data, "", NltkAndPunctTokenizer(), False) pairs = split_docs(docs) dataset = ParagraphAndQuestionDataset( pairs, ClusteredBatcher(args.batch_size, ContextLenKey(), False, True)) print("Done, init model") model = model_dir.get_model() # small hack, just load the vector file at its expected location rather then using the config location loader = ResourceLoader( lambda a, b: load_word_vector_file("glove.840B.300d.txt", b)) lm_model = model.lm_model basedir = "lm" lm_model.lm_vocab_file = join(basedir, "squad_train_dev_all_unique_tokens.txt") lm_model.options_file = join( basedir, "options_squad_lm_2x4096_512_2048cnn_2xhighway_skip.json") lm_model.weight_file = join( basedir, "squad_context_concat_lm_2x4096_512_2048cnn_2xhighway_skip.hdf5") lm_model.embed_weights_file = None model.set_inputs([dataset], loader) print("Done, building graph") sess = tf.Session() with sess.as_default(): pred = model.get_prediction() best_span = pred.get_best_span(17)[0] all_vars = tf.global_variables() + tf.get_collection( tf.GraphKeys.SAVEABLE_OBJECTS) dont_restore_names = { x.name for x in all_vars if x.name.startswith("bilm") } print(sorted(dont_restore_names)) vars = [x for x in all_vars if x.name not in dont_restore_names] print("Done, loading weights") checkpoint = model_dir.get_best_weights() if checkpoint is None: print("Loading most recent checkpoint") checkpoint = model_dir.get_latest_checkpoint() else: print("Loading best weights") saver = tf.train.Saver(vars) saver.restore(sess, checkpoint) if args.ema: ema = tf.train.ExponentialMovingAverage(0) saver = tf.train.Saver( {ema.average_name(x): x for x in tf.trainable_variables()}) saver.restore(sess, checkpoint) sess.run( tf.variables_initializer( [x for x in all_vars if x.name in dont_restore_names])) print("Done, starting evaluation") out = {} for i, batch in enumerate(dataset.get_epoch()): if args.n is not None and i == args.n: break print("On batch: %d" % (i + 1)) enc = model.encode(batch, False) spans = sess.run(best_span, feed_dict=enc) for (s, e), point in zip(spans, batch): out[point.question_id] = point.get_original_text(s, e) sess.close() print("Done, saving") with open(output_path, "w") as f: json.dump(out, f) print("Mission accomplished!")