def test_trainer_can_run_multiple_gpu(self): multigpu_iterator = BasicIterator(batch_size=4) multigpu_iterator.index_with(self.vocab) trainer = Trainer(self.model, self.optimizer, multigpu_iterator, self.instances, num_epochs=2, cuda_device=[0, 1]) trainer.train()
def test_trainer_can_run_multiple_gpu(self): class MetaDataCheckWrapper(Model): """ Checks that the metadata field has been correctly split across the batch dimension when running on multiple gpus. """ def __init__(self, model): super().__init__(model.vocab) self.model = model def forward(self, **kwargs) -> Dict[str, torch.Tensor]: # type: ignore # pylint: disable=arguments-differ assert 'metadata' in kwargs and 'tags' in kwargs, \ f'tokens and metadata must be provided. Got {kwargs.keys()} instead.' batch_size = kwargs['tokens']['tokens'].size()[0] assert len(kwargs['metadata']) == batch_size, \ f'metadata must be split appropriately. Expected {batch_size} elements, ' \ f"got {len(kwargs['metadata'])} elements." return self.model.forward(**kwargs) multigpu_iterator = BasicIterator(batch_size=4) multigpu_iterator.index_with(self.vocab) trainer = Trainer(MetaDataCheckWrapper(self.model), self.optimizer, multigpu_iterator, self.instances, num_epochs=2, cuda_device=[0, 1]) trainer.train()
def test_multiple_cursors(self): # pylint: disable=protected-access lazy_instances1 = _LazyInstances(lambda: (i for i in self.instances)) lazy_instances2 = _LazyInstances(lambda: (i for i in self.instances)) eager_instances1 = self.instances[:] eager_instances2 = self.instances[:] for instances1, instances2 in [(eager_instances1, eager_instances2), (lazy_instances1, lazy_instances2)]: iterator = BasicIterator(batch_size=1, instances_per_epoch=2) iterator.index_with(self.vocab) # First epoch through dataset1 batches = list(iterator._create_batches(instances1, shuffle=False)) grouped_instances = [batch.instances for batch in batches] assert grouped_instances == [[self.instances[0]], [self.instances[1]]] # First epoch through dataset2 batches = list(iterator._create_batches(instances2, shuffle=False)) grouped_instances = [batch.instances for batch in batches] assert grouped_instances == [[self.instances[0]], [self.instances[1]]] # Second epoch through dataset1 batches = list(iterator._create_batches(instances1, shuffle=False)) grouped_instances = [batch.instances for batch in batches] assert grouped_instances == [[self.instances[2]], [self.instances[3]]] # Second epoch through dataset2 batches = list(iterator._create_batches(instances2, shuffle=False)) grouped_instances = [batch.instances for batch in batches] assert grouped_instances == [[self.instances[2]], [self.instances[3]]]
def test_can_optimise_model_with_dense_and_sparse_params(self): optimizer_params = Params({ "type": "dense_sparse_adam" }) parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad] optimizer = Optimizer.from_params(parameters, optimizer_params) iterator = BasicIterator(2) iterator.index_with(self.vocab) Trainer(self.model, optimizer, iterator, self.instances).train()
def test_epoch_tracking_multiple_epochs(self): iterator = BasicIterator(batch_size=2, track_epoch=True) iterator.index_with(self.vocab) all_batches = list(iterator(self.instances, num_epochs=10)) assert len(all_batches) == 10 * 3 for i, batch in enumerate(all_batches): # Should have 3 batches per epoch epoch = i // 3 assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])
def test_trainer_can_log_learning_rates_tensorboard(self): iterator = BasicIterator(batch_size=4) iterator.index_with(self.vocab) trainer = Trainer(self.model, self.optimizer, iterator, self.instances, num_epochs=2, serialization_dir=self.TEST_DIR, should_log_learning_rate=True, summary_interval=2) trainer.train()
def test_yield_one_epoch_iterates_over_the_data_once(self): for test_instances in (self.instances, self.lazy_instances): iterator = BasicIterator(batch_size=2) iterator.index_with(self.vocab) batches = list(iterator(test_instances, num_epochs=1)) # We just want to get the single-token array for the text field in the instance. instances = [tuple(instance.detach().cpu().numpy()) for batch in batches for instance in batch['text']["tokens"]] assert len(instances) == 5 self.assert_instances_are_correct(instances)
def test_call_iterates_over_data_forever(self): for test_instances in (self.instances, self.lazy_instances): iterator = BasicIterator(batch_size=2) iterator.index_with(self.vocab) generator = iterator(test_instances) batches = [next(generator) for _ in range(18)] # going over the data 6 times # We just want to get the single-token array for the text field in the instance. instances = [tuple(instance.detach().cpu().numpy()) for batch in batches for instance in batch['text']["tokens"]] assert len(instances) == 5 * 6 self.assert_instances_are_correct(instances)
def test_with_iterator(self): reader = MultiprocessDatasetReader(base_reader=self.base_reader, num_workers=2) instances = reader.read(self.glob) iterator = BasicIterator(batch_size=32) iterator.index_with(self.vocab) batches = [batch for batch in iterator(instances, num_epochs=1)] # 400 instances / batch_size 32 = 12 full batches + 1 batch of 16 sizes = sorted([len(batch['tags']) for batch in batches]) assert sizes == [16] + 12 * [32]
def test_epoch_tracking_forever(self): iterator = BasicIterator(batch_size=2, track_epoch=True) iterator.index_with(self.vocab) it = iterator(self.instances, num_epochs=None) all_batches = [next(it) for _ in range(30)] assert len(all_batches) == 30 for i, batch in enumerate(all_batches): # Should have 3 batches per epoch epoch = i // 3 assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])
def test_elmo_bilm(self): # get the raw data sentences, expected_lm_embeddings = self._load_sentences_embeddings() # load the test model elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file) # Deal with the data. indexer = ELMoTokenCharactersIndexer() # For each sentence, first create a TextField, then create an instance instances = [] for batch in zip(*sentences): for sentence in batch: tokens = [Token(token) for token in sentence.split()] field = TextField(tokens, {'character_ids': indexer}) instance = Instance({"elmo": field}) instances.append(instance) vocab = Vocabulary() # Now finally we can iterate through batches. iterator = BasicIterator(3) iterator.index_with(vocab) for i, batch in enumerate(iterator(instances, num_epochs=1, shuffle=False)): lm_embeddings = elmo_bilm(batch['elmo']['character_ids']) top_layer_embeddings, mask = remove_sentence_boundaries( lm_embeddings['activations'][2], lm_embeddings['mask'] ) # check the mask lengths lengths = mask.data.numpy().sum(axis=1) batch_sentences = [sentences[k][i] for k in range(3)] expected_lengths = [ len(sentence.split()) for sentence in batch_sentences ] self.assertEqual(lengths.tolist(), expected_lengths) # get the expected embeddings and compare! expected_top_layer = [expected_lm_embeddings[k][i] for k in range(3)] for k in range(3): self.assertTrue( numpy.allclose( top_layer_embeddings[k, :lengths[k], :].data.numpy(), expected_top_layer[k], atol=1.0e-6 ) )
def main(serialization_directory, device): """ serialization_directory : str, required. The directory containing the serialized weights. device: int, default = -1 The device to run the evaluation on. """ config = Params.from_file(os.path.join(serialization_directory, "config.json")) dataset_reader = DatasetReader.from_params(config['dataset_reader']) evaluation_data_path = config['validation_data_path'] model = Model.load(config, serialization_dir=serialization_directory, cuda_device=device) prediction_file_path = os.path.join(serialization_directory, "predictions.txt") gold_file_path = os.path.join(serialization_directory, "gold.txt") prediction_file = open(prediction_file_path, "w+") gold_file = open(gold_file_path, "w+") # Load the evaluation data and index it. print("Reading evaluation data from {}".format(evaluation_data_path)) instances = dataset_reader.read(evaluation_data_path) iterator = BasicIterator(batch_size=32) iterator.index_with(model.vocab) model_predictions = [] batches = iterator(instances, num_epochs=1, shuffle=False, cuda_device=device, for_training=False) for batch in Tqdm.tqdm(batches): result = model(**batch) predictions = model.decode(result) model_predictions.extend(predictions["tags"]) for instance, prediction in zip(instances, model_predictions): fields = instance.fields try: # Most sentences have a verbal predicate, but not all. verb_index = fields["verb_indicator"].labels.index(1) except ValueError: verb_index = None gold_tags = fields["tags"].labels sentence = fields["tokens"].tokens write_to_conll_eval_file(prediction_file, gold_file, verb_index, sentence, prediction, gold_tags) prediction_file.close() gold_file.close()
def test_maximum_samples_per_batch(self): for test_instances in (self.instances, self.lazy_instances): # pylint: disable=protected-access iterator = BasicIterator( batch_size=3, maximum_samples_per_batch=['num_tokens', 9] ) iterator.index_with(self.vocab) batches = list(iterator._create_batches(test_instances, shuffle=False)) stats = self.get_batches_stats(batches) # ensure all instances are in a batch assert stats['total_instances'] == len(self.instances) # ensure correct batch sizes assert stats['batch_lengths'] == [2, 1, 1, 1] # ensure correct sample sizes (<= 9) assert stats['sample_sizes'] == [8, 3, 9, 1]
def test_maximum_samples_per_batch_packs_tightly(self): # pylint: disable=protected-access token_counts = [10, 4, 3] test_instances = self.create_instances_from_token_counts(token_counts) iterator = BasicIterator( batch_size=3, maximum_samples_per_batch=['num_tokens', 11] ) iterator.index_with(self.vocab) batches = list(iterator._create_batches(test_instances, shuffle=False)) stats = self.get_batches_stats(batches) # ensure all instances are in a batch assert stats['total_instances'] == len(token_counts) # ensure correct batch sizes assert stats['batch_lengths'] == [1, 2] # ensure correct sample sizes (<= 11) assert stats['sample_sizes'] == [10, 8]
def test_maximum_samples_per_batch(self): for test_instances in (self.instances, self.lazy_instances): # pylint: disable=protected-access iterator = BasicIterator( batch_size=3, maximum_samples_per_batch=['num_tokens', 9] ) iterator.index_with(self.vocab) batches = list(iterator._create_batches(test_instances, shuffle=False)) # ensure all instances are in a batch grouped_instances = [batch.instances for batch in batches] num_instances = sum(len(group) for group in grouped_instances) assert num_instances == len(self.instances) # ensure all batches are sufficiently small for batch in batches: batch_sequence_length = max( [instance.get_padding_lengths()['text']['num_tokens'] for instance in batch.instances] ) assert batch_sequence_length * len(batch.instances) <= 9
def test_trainer_saves_models_at_specified_interval(self): iterator = BasicIterator(batch_size=4) iterator.index_with(self.vocab) trainer = Trainer(self.model, self.optimizer, iterator, self.instances, num_epochs=2, serialization_dir=self.TEST_DIR, model_save_interval=0.0001) trainer.train() # Now check the serialized files for models saved during the epoch. prefix = 'model_state_epoch_*' file_names = sorted(glob.glob(os.path.join(self.TEST_DIR, prefix))) epochs = [re.search(r"_([0-9\.\-]+)\.th", fname).group(1) for fname in file_names] # We should have checkpoints at the end of each epoch and during each, e.g. # [0.timestamp, 0, 1.timestamp, 1] assert len(epochs) == 4 assert epochs[3] == '1' assert '.' in epochs[0] # Now make certain we can restore from timestamped checkpoint. # To do so, remove the checkpoint from the end of epoch 1&2, so # that we are forced to restore from the timestamped checkpoints. for k in range(2): os.remove(os.path.join(self.TEST_DIR, 'model_state_epoch_{}.th'.format(k))) os.remove(os.path.join(self.TEST_DIR, 'training_state_epoch_{}.th'.format(k))) os.remove(os.path.join(self.TEST_DIR, 'best.th')) restore_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, num_epochs=2, serialization_dir=self.TEST_DIR, model_save_interval=0.0001) epoch, _ = restore_trainer._restore_checkpoint() # pylint: disable=protected-access assert epoch == 2 # One batch per epoch. assert restore_trainer._batch_num_total == 2 # pylint: disable=protected-access
def test_multiple_cursors(self): # pylint: disable=protected-access lazy_instances1 = _LazyInstances(lambda: (i for i in self.instances)) lazy_instances2 = _LazyInstances(lambda: (i for i in self.instances)) eager_instances1 = self.instances[:] eager_instances2 = self.instances[:] for instances1, instances2 in [(eager_instances1, eager_instances2), (lazy_instances1, lazy_instances2)]: iterator = BasicIterator(batch_size=1, instances_per_epoch=2) iterator.index_with(self.vocab) # First epoch through dataset1 batches = list(iterator._create_batches(instances1, shuffle=False)) grouped_instances = [batch.instances for batch in batches] assert grouped_instances == [[self.instances[0]], [self.instances[1]]] # First epoch through dataset2 batches = list(iterator._create_batches(instances2, shuffle=False)) grouped_instances = [batch.instances for batch in batches] assert grouped_instances == [[self.instances[0]], [self.instances[1]]] # Second epoch through dataset1 batches = list(iterator._create_batches(instances1, shuffle=False)) grouped_instances = [batch.instances for batch in batches] assert grouped_instances == [[self.instances[2]], [self.instances[3]]] # Second epoch through dataset2 batches = list(iterator._create_batches(instances2, shuffle=False)) grouped_instances = [batch.instances for batch in batches] assert grouped_instances == [[self.instances[2]], [self.instances[3]]]
def test_iterator(): indexer = StaticFasttextTokenIndexer( model_path="./data/fasttext_embedding.model", model_params_path="./data/fasttext_embedding.model.params") loader = MenionsLoader( category_mapping_file='./data/test_category_mapping.json', token_indexers={"tokens": indexer}, tokenizer=WordTokenizer(word_splitter=FastSplitter())) vocab = Vocabulary.from_params(Params({"directory_path": "./data/vocab2/"})) iterator = BasicIterator(batch_size=32) iterator.index_with(vocab) limit = 50 for _ in tqdm.tqdm(iterator(loader.read('./data/train_data_aa.tsv'), num_epochs=1), mininterval=2): limit -= 1 if limit <= 0: break
def main(): all_chars = {END_SYMBOL, START_SYMBOL} all_chars.update("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .,!?'-") token_counts = {char: 1 for char in all_chars} vocab = Vocabulary({'tokens': token_counts}) token_indexers = {'tokens': SingleIdTokenIndexer()} train_set = read_dataset(all_chars) instances = [tokens_to_lm_instance(tokens, token_indexers) for tokens in train_set] token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'), embedding_dim=EMBEDDING_SIZE) embedder = BasicTextFieldEmbedder({"tokens": token_embedding}) model = RNNLanguageModel(embedder=embedder, hidden_size=HIDDEN_SIZE, max_len=80, vocab=vocab) iterator = BasicIterator(batch_size=BATCH_SIZE) iterator.index_with(vocab) optimizer = optim.Adam(model.parameters(), lr=5.e-3) trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=instances, num_epochs=10) trainer.train() for _ in range(50): tokens, _ = model.generate() print(''.join(token.text for token in tokens))
def test_trainer_respects_keep_serialized_model_every_num_seconds(self): # To test: # Create an iterator that sleeps for 2.5 second per epoch, so the total training # time for one epoch is slightly greater then 2.5 seconds. # Run for 6 epochs, keeping the last 2 models, models also kept every 5 seconds. # Check the resulting checkpoints. Should then have models at epochs # 2, 4, plus the last two at 5 and 6. class WaitingIterator(BasicIterator): # pylint: disable=arguments-differ def _create_batches(self, *args, **kwargs): time.sleep(2.5) return super(WaitingIterator, self)._create_batches(*args, **kwargs) iterator = WaitingIterator(batch_size=2) iterator.index_with(self.vocab) # Don't want validation iterator to wait. viterator = BasicIterator(batch_size=2) viterator.index_with(self.vocab) trainer = CallbackTrainer(self.model, self.optimizer, num_epochs=6, serialization_dir=self.TEST_DIR, callbacks=self.default_callbacks(max_checkpoints=2, checkpoint_every=5, iterator=iterator, validation_iterator=viterator)) trainer.train() # Now check the serialized files for prefix in ['model_state_epoch_*', 'training_state_epoch_*']: file_names = glob.glob(os.path.join(self.TEST_DIR, prefix)) epochs = [int(re.search(r"_([0-9])\.th", fname).group(1)) for fname in file_names] # epoch N has N-1 in file name assert sorted(epochs) == [1, 3, 4, 5]
def test_production_rule_field_with_multiple_gpus(self): wikitables_dir = 'allennlp/tests/fixtures/data/wikitables/' wikitables_reader = WikiTablesDatasetReader( tables_directory=wikitables_dir, dpd_output_directory=wikitables_dir + 'dpd_output/') instances = wikitables_reader.read(wikitables_dir + 'sample_data.examples') archive_path = self.FIXTURES_ROOT / 'semantic_parsing' / 'wikitables' / 'serialization' / 'model.tar.gz' model = load_archive(archive_path).model model.cuda() multigpu_iterator = BasicIterator(batch_size=4) multigpu_iterator.index_with(model.vocab) trainer = CallbackTrainer(model, self.optimizer, num_epochs=2, cuda_device=[0, 1], callbacks=[ GenerateTrainingBatches( instances, multigpu_iterator), TrainSupervised() ]) trainer.train()
def _filter_data(data, vocab): def _is_correct_instance(batch): assert len(batch['words']['ru_bert']['offsets']) == 1 if batch['words']['ru_bert']['token_ids'].shape[1] > 256: return False return all( begin <= end < batch['words']['ru_bert']['token_ids'].shape[1] for begin, end in batch['words']['ru_bert']['offsets'][0]) iterator = BasicIterator(batch_size=1) iterator.index_with(vocab) result_data = [] for instance in tqdm(data): batch = next(iterator([instance])) if _is_correct_instance(batch): result_data.append(instance) else: logger.info('Filtering out %s', batch['metadata'][0]['words']) logger.info('Removed %s samples', len(data) - len(result_data)) return result_data
for fname in [config.train_file, config.val_file]) # In[ ]: vars(train_ds[0].fields["input"]) # In[ ]: # ### Data Iterator # In[ ]: from allennlp.data.iterators import BasicIterator iterator = BasicIterator(batch_size=config.batch_size, ) iterator.index_with(global_vocab) # Sanity check # In[ ]: batch = next(iter(iterator(train_ds))) # In[ ]: batch # In[ ]: # # Model and Loss
def eval_model_for_downstream(model_saved_path): seed = 12 torch.manual_seed(seed) bert_model_name = 'bert-base-uncased' # lazy = False lazy = True forward_size = 32 # batch_size = 64 batch_size = 128 do_lower_case = True debug_mode = False # est_datasize = 900_000 num_class = 1 # num_train_optimization_steps device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device_num = 0 if torch.cuda.is_available() else -1 n_gpu = torch.cuda.device_count() unk_token_num = {'tokens': 1} # work around for initiating vocabulary. vocab = ExVocabulary(unk_token_num=unk_token_num) vocab.add_token_to_namespace("false", namespace="labels") # 0 vocab.add_token_to_namespace("true", namespace="labels") # 1 vocab.add_token_to_namespace("hidden", namespace="labels") vocab.change_token_with_index_to_namespace("hidden", -2, namespace='labels') # Load Dataset train_list = common.load_json(config.TRAIN_FILE) dev_list = common.load_json(config.DEV_FULLWIKI_FILE) dev_fitems_list = common.load_jsonl( config.PDATA_ROOT / "content_selection_forward" / "hotpot_dev_p_level_unlabeled.jsonl") train_fitems_list = common.load_jsonl( config.PDATA_ROOT / "content_selection_forward" / "hotpot_train_p_level_labeled.jsonl") test_fitems_list = common.load_jsonl( config.PDATA_ROOT / "content_selection_forward" / "hotpot_test_p_level_unlabeled.jsonl") if debug_mode: dev_list = dev_list[:10] dev_fitems_list = dev_fitems_list[:296] train_fitems_list = train_fitems_list[:300] eval_frequency = 2 # print(dev_list[-1]['_id']) # exit(0) dev_o_dict = list_dict_data_tool.list_to_dict(dev_list, '_id') train_o_dict = list_dict_data_tool.list_to_dict(train_list, '_id') bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name, do_lower_case=do_lower_case) bert_cs_reader = BertContentSelectionReader(bert_tokenizer, lazy, is_paired=True, example_filter=lambda x: len(x['context']) == 0, max_l=286) bert_encoder = BertModel.from_pretrained(bert_model_name) model = BertMultiLayerSeqClassification(bert_encoder, num_labels=num_class, num_of_pooling_layer=1, act_type='tanh', use_pretrained_pooler=True, use_sigmoid=True) model.load_state_dict(torch.load(model_saved_path)) model.to(device) if n_gpu > 1: model = torch.nn.DataParallel(model) # dev_instances = bert_cs_reader.read(dev_fitems_list) train_instance = bert_cs_reader.read(train_fitems_list) test_instances = bert_cs_reader.read(test_fitems_list) biterator = BasicIterator(batch_size=forward_size) biterator.index_with(vocab) # train_iter = biterator(train_instance, num_epochs=1, shuffle=False) # dev_iter = biterator(dev_instances, num_epochs=1, shuffle=False) test_iter = biterator(test_instances, num_epochs=1, shuffle=False) print(len(dev_fitems_list)) print(len(test_fitems_list)) print(len(train_fitems_list)) # cur_dev_eval_results_list = eval_model(model, dev_iter, device_num, with_probs=True, show_progress=True) # cur_train_eval_results_list = eval_model(model, train_iter, device_num, with_probs=True, show_progress=True) cur_test_eval_results_list = eval_model(model, test_iter, device_num, with_probs=True, show_progress=True) common.save_jsonl(cur_test_eval_results_list, "test_p_level_bert_v1_results.jsonl") print("Test write finished.") exit(0) copied_dev_o_dict = copy.deepcopy(dev_o_dict) list_dict_data_tool.append_subfield_from_list_to_dict(cur_dev_eval_results_list, copied_dev_o_dict, 'qid', 'fid', check=True) # Top_3 cur_results_dict_top3 = select_top_k_and_to_results_dict(copied_dev_o_dict, top_k=3) upperbound_results_dict_top3 = append_gt_downstream_to_get_upperbound_from_doc_retri( cur_results_dict_top3, dev_list) # Top_5 cur_results_dict_top5 = select_top_k_and_to_results_dict(copied_dev_o_dict, top_k=5) upperbound_results_dict_top5 = append_gt_downstream_to_get_upperbound_from_doc_retri( cur_results_dict_top5, dev_list) cur_results_dict_top10 = select_top_k_and_to_results_dict(copied_dev_o_dict, top_k=10) upperbound_results_dict_top10 = append_gt_downstream_to_get_upperbound_from_doc_retri( cur_results_dict_top10, dev_list) _, metrics_top3 = ext_hotpot_eval.eval(cur_results_dict_top3, dev_list, verbose=False) _, metrics_top3_UB = ext_hotpot_eval.eval(upperbound_results_dict_top3, dev_list, verbose=False) _, metrics_top5 = ext_hotpot_eval.eval(cur_results_dict_top5, dev_list, verbose=False) _, metrics_top5_UB = ext_hotpot_eval.eval(upperbound_results_dict_top5, dev_list, verbose=False) _, metrics_top10 = ext_hotpot_eval.eval(cur_results_dict_top10, dev_list, verbose=False) _, metrics_top10_UB = ext_hotpot_eval.eval(upperbound_results_dict_top10, dev_list, verbose=False) logging_item = { 'top3': metrics_top3, 'top3_UB': metrics_top3_UB, 'top5': metrics_top5, 'top5_UB': metrics_top5_UB, 'top10': metrics_top10, 'top10_UB': metrics_top10_UB, } print(logging_item) common.save_jsonl(cur_train_eval_results_list, "train_p_level_bert_v1_results.jsonl") common.save_jsonl(cur_dev_eval_results_list, "dev_p_level_bert_v1_results.jsonl")
def model_go(): seed = 12 torch.manual_seed(seed) # bert_model_name = 'bert-large-uncased' bert_model_name = 'bert-base-uncased' experiment_name = 'hotpot_v0_cs' lazy = False # lazy = True forward_size = 16 # batch_size = 64 batch_size = 128 gradient_accumulate_step = int(batch_size / forward_size) warmup_proportion = 0.1 learning_rate = 5e-5 num_train_epochs = 5 eval_frequency = 5000 pos_ratio = 0.2 do_lower_case = True debug_mode = False # est_datasize = 900_000 num_class = 1 # num_train_optimization_steps device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device_num = 0 if torch.cuda.is_available() else -1 n_gpu = torch.cuda.device_count() unk_token_num = {'tokens': 1} # work around for initiating vocabulary. vocab = ExVocabulary(unk_token_num=unk_token_num) vocab.add_token_to_namespace("false", namespace="labels") # 0 vocab.add_token_to_namespace("true", namespace="labels") # 1 vocab.add_token_to_namespace("hidden", namespace="labels") vocab.change_token_with_index_to_namespace("hidden", -2, namespace='labels') # Load Dataset train_list = common.load_json(config.TRAIN_FILE) dev_list = common.load_json(config.DEV_FULLWIKI_FILE) dev_fitems_list = common.load_jsonl( config.PDATA_ROOT / "content_selection_forward" / "hotpot_dev_p_level_unlabeled.jsonl") train_fitems_list = common.load_jsonl( config.PDATA_ROOT / "content_selection_forward" / "hotpot_train_p_level_labeled.jsonl") if debug_mode: dev_list = dev_list[:10] dev_fitems_list = dev_fitems_list[:296] train_fitems_list = train_fitems_list[:300] eval_frequency = 2 # print(dev_list[-1]['_id']) # exit(0) sampled_train_list = down_sample_neg(train_fitems_list, ratio=pos_ratio) est_datasize = len(sampled_train_list) dev_o_dict = list_dict_data_tool.list_to_dict(dev_list, '_id') # print(dev_o_dict) bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name, do_lower_case=do_lower_case) bert_cs_reader = BertContentSelectionReader(bert_tokenizer, lazy, is_paired=True, example_filter=lambda x: len(x['context']) == 0, max_l=286) bert_encoder = BertModel.from_pretrained(bert_model_name) model = BertMultiLayerSeqClassification(bert_encoder, num_labels=num_class, num_of_pooling_layer=1, act_type='tanh', use_pretrained_pooler=True, use_sigmoid=True) model.to(device) if n_gpu > 1: model = torch.nn.DataParallel(model) # param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] num_train_optimization_steps = int(est_datasize / forward_size / gradient_accumulate_step) * \ num_train_epochs print("Estimated training size", est_datasize) print("Number of optimization steps:", num_train_optimization_steps) optimizer = BertAdam(optimizer_grouped_parameters, lr=learning_rate, warmup=warmup_proportion, t_total=num_train_optimization_steps) dev_instances = bert_cs_reader.read(dev_fitems_list) biterator = BasicIterator(batch_size=forward_size) biterator.index_with(vocab) forbackward_step = 0 update_step = 0 logging_agent = save_tool.ScoreLogger({}) # # # Create Log File file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}") # Save the source code. script_name = os.path.basename(__file__) with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it: out_f.write(it.read()) out_f.flush() # # # Log File end for epoch_i in range(num_train_epochs): print("Epoch:", epoch_i) sampled_train_list = down_sample_neg(train_fitems_list, ratio=pos_ratio) train_instance = bert_cs_reader.read(sampled_train_list) train_iter = biterator(train_instance, num_epochs=1, shuffle=True) for batch in tqdm(train_iter): model.train() batch = move_to_device(batch, device_num) paired_sequence = batch['paired_sequence'] paired_segments_ids = batch['paired_segments_ids'] labels_ids = batch['label'] att_mask, _ = torch_util.get_length_and_mask(paired_sequence) s1_span = batch['bert_s1_span'] s2_span = batch['bert_s2_span'] loss = model(paired_sequence, token_type_ids=paired_segments_ids, attention_mask=att_mask, mode=BertMultiLayerSeqClassification.ForwardMode.TRAIN, labels=labels_ids) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if gradient_accumulate_step > 1: loss = loss / gradient_accumulate_step loss.backward() forbackward_step += 1 if forbackward_step % gradient_accumulate_step == 0: optimizer.step() optimizer.zero_grad() update_step += 1 if update_step % eval_frequency == 0: print("Update steps:", update_step) dev_iter = biterator(dev_instances, num_epochs=1, shuffle=False) cur_eval_results_list = eval_model(model, dev_iter, device_num, with_probs=True) copied_dev_o_dict = copy.deepcopy(dev_o_dict) list_dict_data_tool.append_subfield_from_list_to_dict(cur_eval_results_list, copied_dev_o_dict, 'qid', 'fid', check=True) # Top_5 cur_results_dict_top5 = select_top_k_and_to_results_dict(copied_dev_o_dict, top_k=5) upperbound_results_dict_top5 = append_gt_downstream_to_get_upperbound_from_doc_retri( cur_results_dict_top5, dev_list) cur_results_dict_top10 = select_top_k_and_to_results_dict(copied_dev_o_dict, top_k=10) upperbound_results_dict_top10 = append_gt_downstream_to_get_upperbound_from_doc_retri( cur_results_dict_top10, dev_list) _, metrics_top5 = ext_hotpot_eval.eval(cur_results_dict_top5, dev_list, verbose=False) _, metrics_top5_UB = ext_hotpot_eval.eval(upperbound_results_dict_top5, dev_list, verbose=False) _, metrics_top10 = ext_hotpot_eval.eval(cur_results_dict_top10, dev_list, verbose=False) _, metrics_top10_UB = ext_hotpot_eval.eval(upperbound_results_dict_top10, dev_list, verbose=False) # top5_doc_f1, top5_UB_sp_f1, top10_doc_f1, top10_Ub_sp_f1 # top5_doc_f1 = metrics_top5['doc_f1'] # top5_UB_sp_f1 = metrics_top5_UB['sp_f1'] # top10_doc_f1 = metrics_top10['doc_f1'] # top10_Ub_sp_f1 = metrics_top10_UB['sp_f1'] top5_doc_recall = metrics_top5['doc_recall'] top5_UB_sp_recall = metrics_top5_UB['sp_recall'] top10_doc_recall = metrics_top10['doc_recall'] top10_Ub_sp_recall = metrics_top10_UB['sp_recall'] logging_item = { 'top5': metrics_top5, 'top5_UB': metrics_top5_UB, 'top10': metrics_top10, 'top10_UB': metrics_top10_UB, } # print(logging_item) save_file_name = f'i({update_step})|e({epoch_i})' \ f'|t5_doc_recall({top5_doc_recall})|t5_sp_recall({top5_UB_sp_recall})' \ f'|t10_doc_recall({top10_doc_recall})|t5_sp_recall({top10_Ub_sp_recall})|seed({seed})' # print(save_file_name) logging_agent.incorporate_results({}, save_file_name, logging_item) logging_agent.logging_to_file(Path(file_path_prefix) / "log.json") model_to_save = model.module if hasattr(model, 'module') else model output_model_file = Path(file_path_prefix) / save_file_name torch.save(model_to_save.state_dict(), str(output_model_file))
def train_fever_std_ema_v1(resume_model=None, wn_feature=False): """ This method is the new training script for train fever with span and probability score. :param resume_model: :param wn_feature: :return: """ num_epoch = 200 seed = 12 batch_size = 32 lazy = True dev_prob_threshold = 0.1 train_prob_threshold = 0.1 train_sample_top_k = 8 experiment_name = f"nsmn_sent_wise_std_ema_lr1|t_prob:{train_prob_threshold}|top_k:{train_sample_top_k}" # resume_model = None print("Do EMA:") print("Dev prob threshold:", dev_prob_threshold) print("Train prob threshold:", train_prob_threshold) print("Train sample top k:", train_sample_top_k) dev_upstream_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/dev_sent_pred_scores.jsonl" ) train_upstream_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/train_sent_scores.jsonl" ) # Prepare Data token_indexers = { 'tokens': SingleIdTokenIndexer(namespace='tokens'), # This is the raw tokens 'elmo_chars': ELMoTokenCharactersIndexer( namespace='elmo_characters') # This is the elmo_characters } print("Building Prob Dicts...") train_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/train_sent_scores.jsonl" ) dev_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/balanced_sentence_selection_results/dev_sent_pred_scores.jsonl" ) selection_dict = paired_selection_score_dict(train_sent_list) selection_dict = paired_selection_score_dict(dev_sent_list, selection_dict) upstream_dev_list = threshold_sampler_insure_unique( config.T_FEVER_DEV_JSONL, dev_upstream_sent_list, prob_threshold=dev_prob_threshold, top_n=5) # Specifiy ablation to remove wordnet and number embeddings. dev_fever_data_reader = WNSIMIReader(token_indexers=token_indexers, lazy=lazy, wn_p_dict=p_dict, max_l=320, ablation=None) train_fever_data_reader = WNSIMIReader(token_indexers=token_indexers, lazy=lazy, wn_p_dict=p_dict, max_l=320, shuffle_sentences=False, ablation=None) complete_upstream_dev_data = select_sent_with_prob_for_eval( config.T_FEVER_DEV_JSONL, upstream_dev_list, selection_dict, tokenized=True) dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data) # Load Vocabulary biterator = BasicIterator(batch_size=batch_size) vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic") vocab.change_token_with_index_to_namespace('hidden', -2, namespace='labels') print(vocab.get_token_to_index_vocabulary('labels')) print(vocab.get_vocab_size('tokens')) biterator.index_with(vocab) # Build Model device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0) device_num = -1 if device.type == 'cpu' else 0 model = Model( rnn_size_in=(1024 + 300 + dev_fever_data_reader.wn_feature_size, 1024 + 450 + dev_fever_data_reader.wn_feature_size), rnn_size_out=(450, 450), weight=weight_dict['glove.840B.300d'], vocab_size=vocab.get_vocab_size('tokens'), mlp_d=900, embedding_dim=300, max_l=300, use_extra_lex_feature=False, max_span_l=100) print("Model Max length:", model.max_l) if resume_model is not None: model.load_state_dict(torch.load(resume_model)) model.display() model.to(device) cloned_empty_model = copy.deepcopy(model) ema: EMA = EMA(parameters=model.named_parameters()) # Create Log File file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}") # Save the source code. script_name = os.path.basename(__file__) with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it: out_f.write(it.read()) out_f.flush() # Save source code end. best_dev = -1 iteration = 0 start_lr = 0.0001 optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=start_lr) criterion = nn.CrossEntropyLoss() for i_epoch in range(num_epoch): print("Resampling...") # Resampling train_data_with_candidate_sample_list = \ threshold_sampler_insure_unique(config.T_FEVER_TRAIN_JSONL, train_upstream_sent_list, train_prob_threshold, top_n=train_sample_top_k) complete_upstream_train_data = adv_simi_sample_with_prob_v1_1( config.T_FEVER_TRAIN_JSONL, train_data_with_candidate_sample_list, selection_dict, tokenized=True) print("Sample data length:", len(complete_upstream_train_data)) sampled_train_instances = train_fever_data_reader.read( complete_upstream_train_data) train_iter = biterator(sampled_train_instances, shuffle=True, num_epochs=1, cuda_device=device_num) for i, batch in tqdm(enumerate(train_iter)): model.train() out = model(batch) y = batch['label'] loss = criterion(out, y) # No decay optimizer.zero_grad() loss.backward() optimizer.step() iteration += 1 # EMA update ema(model.named_parameters()) if i_epoch < 15: mod = 10000 # mod = 10 else: mod = 2000 if iteration % mod == 0: # eval_iter = biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) # complete_upstream_dev_data = hidden_eval(model, eval_iter, complete_upstream_dev_data) # # eval_mode = {'check_sent_id_correct': True, 'standard': True} # strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(complete_upstream_dev_data, # common.load_jsonl(config.T_FEVER_DEV_JSONL), # mode=eval_mode, # verbose=False) # print("Fever Score(Strict/Acc./Precision/Recall/F1):", strict_score, acc_score, pr, rec, f1) # # print(f"Dev:{strict_score}/{acc_score}") # EMA saving eval_iter = biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) load_ema_to_model(cloned_empty_model, ema) complete_upstream_dev_data = hidden_eval( cloned_empty_model, eval_iter, complete_upstream_dev_data) eval_mode = {'check_sent_id_correct': True, 'standard': True} strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score( complete_upstream_dev_data, common.load_jsonl(config.T_FEVER_DEV_JSONL), mode=eval_mode, verbose=False) print("Fever Score EMA(Strict/Acc./Precision/Recall/F1):", strict_score, acc_score, pr, rec, f1) print(f"Dev EMA:{strict_score}/{acc_score}") need_save = False if strict_score > best_dev: best_dev = strict_score need_save = True if need_save: # save_path = os.path.join( # file_path_prefix, # f'i({iteration})_epoch({i_epoch})_dev({strict_score})_lacc({acc_score})_seed({seed})' # ) # torch.save(model.state_dict(), save_path) ema_save_path = os.path.join( file_path_prefix, f'ema_i({iteration})_epoch({i_epoch})_dev({strict_score})_lacc({acc_score})_seed({seed})' ) save_ema_to_file(ema, ema_save_path)
def hidden_eval_fever(): batch_size = 64 lazy = True SAVE_PATH = "/home/easonnie/projects/FunEver/saved_models/07-18-21:07:28_m_esim_wn_elmo_sample_fixed/i(57000)_epoch(8)_dev(0.5755075507550755)_loss(1.7175163737963839)_seed(12)" dev_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/dev.jsonl" # Prepare Data token_indexers = { 'tokens': SingleIdTokenIndexer(namespace='tokens'), # This is the raw tokens 'elmo_chars': ELMoTokenCharactersIndexer( namespace='elmo_characters') # This is the elmo_characters } p_dict = wn_persistent_api.persistence_load() dev_fever_data_reader = WNReader(token_indexers=token_indexers, lazy=lazy, wn_p_dict=p_dict, max_l=360) complete_upstream_dev_data = get_actual_data(config.T_FEVER_DEV_JSONL, dev_upstream_file) dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data) # Load Vocabulary biterator = BasicIterator(batch_size=batch_size) # dev_biterator = BasicIterator(batch_size=batch_size * 2) vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic") vocab.change_token_with_index_to_namespace('hidden', -2, namespace='labels') print(vocab.get_token_to_index_vocabulary('labels')) print(vocab.get_vocab_size('tokens')) biterator.index_with(vocab) # Build Model device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0) device_num = -1 if device.type == 'cpu' else 0 model = Model( rnn_size_in=(1024 + 300 + dev_fever_data_reader.wn_feature_size, 1024 + 300), weight=weight_dict['glove.840B.300d'], vocab_size=vocab.get_vocab_size('tokens'), embedding_dim=300, max_l=300) print("Model Max length:", model.max_l) model.load_state_dict(torch.load(SAVE_PATH)) model.display() model.to(device) eval_iter = biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) builded_dev_data = hidden_eval(model, eval_iter, complete_upstream_dev_data) eval_mode = {'check_sent_id_correct': True, 'standard': True} for item in builded_dev_data: del item['label'] print( c_scorer.fever_score(builded_dev_data, common.load_jsonl(config.T_FEVER_DEV_JSONL), mode=eval_mode))
class TestTrainer(AllenNlpTestCase): def setUp(self): super(TestTrainer, self).setUp() self.instances = SequenceTaggingDatasetReader().read(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv') vocab = Vocabulary.from_instances(self.instances) self.vocab = vocab self.model_params = Params({ "text_field_embedder": { "tokens": { "type": "embedding", "embedding_dim": 5 } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 } }) self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params) self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01) self.iterator = BasicIterator(batch_size=2) self.iterator.index_with(vocab) def test_trainer_can_run(self): trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, train_dataset=self.instances, validation_dataset=self.instances, num_epochs=2) metrics = trainer.train() assert 'best_validation_loss' in metrics assert isinstance(metrics['best_validation_loss'], float) assert 'best_validation_accuracy' in metrics assert isinstance(metrics['best_validation_accuracy'], float) assert 'best_validation_accuracy3' in metrics assert isinstance(metrics['best_validation_accuracy3'], float) assert 'best_epoch' in metrics assert isinstance(metrics['best_epoch'], int) # Making sure that both increasing and decreasing validation metrics work. trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, train_dataset=self.instances, validation_dataset=self.instances, validation_metric='+loss', num_epochs=2) metrics = trainer.train() assert 'best_validation_loss' in metrics assert isinstance(metrics['best_validation_loss'], float) assert 'best_validation_accuracy' in metrics assert isinstance(metrics['best_validation_accuracy'], float) assert 'best_validation_accuracy3' in metrics assert isinstance(metrics['best_validation_accuracy3'], float) assert 'best_epoch' in metrics assert isinstance(metrics['best_epoch'], int) @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device registered.") def test_trainer_can_run_cuda(self): trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, num_epochs=2, cuda_device=0) trainer.train() @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Need multiple GPUs.") def test_trainer_can_run_multiple_gpu(self): class MetaDataCheckWrapper(Model): """ Checks that the metadata field has been correctly split across the batch dimension when running on multiple gpus. """ def __init__(self, model): super().__init__(model.vocab) self.model = model def forward(self, **kwargs) -> Dict[str, torch.Tensor]: # type: ignore # pylint: disable=arguments-differ assert 'metadata' in kwargs and 'tags' in kwargs, \ f'tokens and metadata must be provided. Got {kwargs.keys()} instead.' batch_size = kwargs['tokens']['tokens'].size()[0] assert len(kwargs['metadata']) == batch_size, \ f'metadata must be split appropriately. Expected {batch_size} elements, ' \ f"got {len(kwargs['metadata'])} elements." return self.model.forward(**kwargs) multigpu_iterator = BasicIterator(batch_size=4) multigpu_iterator.index_with(self.vocab) trainer = Trainer(MetaDataCheckWrapper(self.model), self.optimizer, multigpu_iterator, self.instances, num_epochs=2, cuda_device=[0, 1]) trainer.train() def test_trainer_can_resume_training(self): trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=1, serialization_dir=self.TEST_DIR) trainer.train() new_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=3, serialization_dir=self.TEST_DIR) epoch, val_metrics_per_epoch = new_trainer._restore_checkpoint() # pylint: disable=protected-access assert epoch == 1 assert len(val_metrics_per_epoch) == 1 assert isinstance(val_metrics_per_epoch[0], float) assert val_metrics_per_epoch[0] != 0. new_trainer.train() def test_metric_only_considered_best_so_far_when_strictly_better_than_those_before_it_increasing_metric( self): new_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, validation_metric="+test") # when it is the only metric it should be considered the best assert new_trainer._is_best_so_far(1, []) # pylint: disable=protected-access # when it is the same as one before it it is not considered the best assert not new_trainer._is_best_so_far(.3, [.3, .3, .3, .2, .5, .1]) # pylint: disable=protected-access # when it is the best it is considered the best assert new_trainer._is_best_so_far(13.00, [.3, .3, .3, .2, .5, .1]) # pylint: disable=protected-access # when it is not the the best it is not considered the best assert not new_trainer._is_best_so_far(.0013, [.3, .3, .3, .2, .5, .1]) # pylint: disable=protected-access def test_metric_only_considered_best_so_far_when_strictly_better_than_those_before_it_decreasing_metric(self): new_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, validation_metric="-test") # when it is the only metric it should be considered the best assert new_trainer._is_best_so_far(1, []) # pylint: disable=protected-access # when it is the same as one before it it is not considered the best assert not new_trainer._is_best_so_far(.3, [.3, .3, .3, .2, .5, .1]) # pylint: disable=protected-access # when it is the best it is considered the best assert new_trainer._is_best_so_far(.013, [.3, .3, .3, .2, .5, .1]) # pylint: disable=protected-access # when it is not the the best it is not considered the best assert not new_trainer._is_best_so_far(13.00, [.3, .3, .3, .2, .5, .1]) # pylint: disable=protected-access def test_should_stop_early_with_increasing_metric(self): new_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, validation_metric="+test") assert new_trainer._should_stop_early([.5, .3, .2, .1, .4, .4]) # pylint: disable=protected-access assert not new_trainer._should_stop_early([.3, .3, .3, .2, .5, .1]) # pylint: disable=protected-access def test_should_stop_early_with_flat_lining_metric(self): flatline = [.2] * 6 assert Trainer(self.model, self.optimizer, # pylint: disable=protected-access self.iterator, self.instances, validation_dataset=self.instances, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, validation_metric="+test")._should_stop_early(flatline) # pylint: disable=protected-access assert Trainer(self.model, self.optimizer, # pylint: disable=protected-access self.iterator, self.instances, validation_dataset=self.instances, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, validation_metric="-test")._should_stop_early(flatline) # pylint: disable=protected-access def test_should_stop_early_with_decreasing_metric(self): new_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, validation_metric="-test") assert new_trainer._should_stop_early([.02, .3, .2, .1, .4, .4]) # pylint: disable=protected-access assert not new_trainer._should_stop_early([.3, .3, .2, .1, .4, .5]) # pylint: disable=protected-access assert new_trainer._should_stop_early([.1, .3, .2, .1, .4, .5]) # pylint: disable=protected-access def test_should_stop_early_with_early_stopping_disabled(self): # Increasing metric trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=100, patience=None, validation_metric="+test") decreasing_history = [float(i) for i in reversed(range(20))] assert not trainer._should_stop_early(decreasing_history) # pylint: disable=protected-access # Decreasing metric trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=100, patience=None, validation_metric="-test") increasing_history = [float(i) for i in range(20)] assert not trainer._should_stop_early(increasing_history) # pylint: disable=protected-access def test_should_stop_early_with_invalid_patience(self): for patience in [0, -1, -2, 1.5, 'None']: with pytest.raises(ConfigurationError, message='No ConfigurationError for patience={}'.format(patience)): Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=100, patience=patience, validation_metric="+test") def test_trainer_can_run_with_lr_scheduler(self): lr_params = Params({"type": "reduce_on_plateau"}) lr_scheduler = LearningRateScheduler.from_params(self.optimizer, lr_params) trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, learning_rate_scheduler=lr_scheduler, validation_metric="-loss", train_dataset=self.instances, validation_dataset=self.instances, num_epochs=2) trainer.train() def test_trainer_can_resume_with_lr_scheduler(self): # pylint: disable=protected-access lr_scheduler = LearningRateScheduler.from_params( self.optimizer, Params({"type": "exponential", "gamma": 0.5})) trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, learning_rate_scheduler=lr_scheduler, train_dataset=self.instances, validation_dataset=self.instances, num_epochs=2, serialization_dir=self.TEST_DIR) trainer.train() new_lr_scheduler = LearningRateScheduler.from_params( self.optimizer, Params({"type": "exponential", "gamma": 0.5})) new_trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, learning_rate_scheduler=new_lr_scheduler, train_dataset=self.instances, validation_dataset=self.instances, num_epochs=4, serialization_dir=self.TEST_DIR) epoch, _ = new_trainer._restore_checkpoint() assert epoch == 2 assert new_trainer._learning_rate_scheduler.lr_scheduler.last_epoch == 1 new_trainer.train() def test_trainer_raises_on_model_with_no_loss_key(self): class FakeModel(torch.nn.Module): def forward(self, **kwargs): # pylint: disable=arguments-differ,unused-argument return {} with pytest.raises(RuntimeError): trainer = Trainer(FakeModel(), self.optimizer, self.iterator, self.instances, num_epochs=2, serialization_dir=self.TEST_DIR) trainer.train() def test_trainer_can_log_histograms(self): # enable activation logging for module in self.model.modules(): module.should_log_activations = True trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, num_epochs=3, serialization_dir=self.TEST_DIR, histogram_interval=2) trainer.train() def test_trainer_respects_num_serialized_models_to_keep(self): trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, num_epochs=5, serialization_dir=self.TEST_DIR, num_serialized_models_to_keep=3) trainer.train() # Now check the serialized files for prefix in ['model_state_epoch_*', 'training_state_epoch_*']: file_names = glob.glob(os.path.join(self.TEST_DIR, prefix)) epochs = [int(re.search(r"_([0-9])\.th", fname).group(1)) for fname in file_names] assert sorted(epochs) == [2, 3, 4] def test_trainer_saves_metrics_every_epoch(self): trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, train_dataset=self.instances, validation_dataset=self.instances, num_epochs=5, serialization_dir=self.TEST_DIR, num_serialized_models_to_keep=3) trainer.train() for epoch in range(5): epoch_file = self.TEST_DIR / f'metrics_epoch_{epoch}.json' assert epoch_file.exists() metrics = json.load(open(epoch_file)) assert "validation_loss" in metrics assert "best_validation_loss" in metrics assert metrics.get("epoch") == epoch def test_trainer_respects_keep_serialized_model_every_num_seconds(self): # To test: # Create an iterator that sleeps for 2.5 second per epoch, so the total training # time for one epoch is slightly greater then 2.5 seconds. # Run for 6 epochs, keeping the last 2 models, models also kept every 5 seconds. # Check the resulting checkpoints. Should then have models at epochs # 2, 4, plus the last two at 5 and 6. class WaitingIterator(BasicIterator): # pylint: disable=arguments-differ def _create_batches(self, *args, **kwargs): time.sleep(2.5) return super(WaitingIterator, self)._create_batches(*args, **kwargs) iterator = WaitingIterator(batch_size=2) iterator.index_with(self.vocab) trainer = Trainer(self.model, self.optimizer, iterator, self.instances, num_epochs=6, serialization_dir=self.TEST_DIR, num_serialized_models_to_keep=2, keep_serialized_model_every_num_seconds=5) trainer.train() # Now check the serialized files for prefix in ['model_state_epoch_*', 'training_state_epoch_*']: file_names = glob.glob(os.path.join(self.TEST_DIR, prefix)) epochs = [int(re.search(r"_([0-9])\.th", fname).group(1)) for fname in file_names] # epoch N has N-1 in file name assert sorted(epochs) == [1, 3, 4, 5] def test_trainer_can_log_learning_rates_tensorboard(self): iterator = BasicIterator(batch_size=4) iterator.index_with(self.vocab) trainer = Trainer(self.model, self.optimizer, iterator, self.instances, num_epochs=2, serialization_dir=self.TEST_DIR, should_log_learning_rate=True, summary_interval=2) trainer.train() def test_trainer_saves_models_at_specified_interval(self): iterator = BasicIterator(batch_size=4) iterator.index_with(self.vocab) trainer = Trainer(self.model, self.optimizer, iterator, self.instances, num_epochs=2, serialization_dir=self.TEST_DIR, model_save_interval=0.0001) trainer.train() # Now check the serialized files for models saved during the epoch. prefix = 'model_state_epoch_*' file_names = sorted(glob.glob(os.path.join(self.TEST_DIR, prefix))) epochs = [re.search(r"_([0-9\.\-]+)\.th", fname).group(1) for fname in file_names] # We should have checkpoints at the end of each epoch and during each, e.g. # [0.timestamp, 0, 1.timestamp, 1] assert len(epochs) == 4 assert epochs[3] == '1' assert '.' in epochs[0] # Now make certain we can restore from timestamped checkpoint. # To do so, remove the checkpoint from the end of epoch 1&2, so # that we are forced to restore from the timestamped checkpoints. for k in range(2): os.remove(os.path.join(self.TEST_DIR, 'model_state_epoch_{}.th'.format(k))) os.remove(os.path.join(self.TEST_DIR, 'training_state_epoch_{}.th'.format(k))) os.remove(os.path.join(self.TEST_DIR, 'best.th')) restore_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, num_epochs=2, serialization_dir=self.TEST_DIR, model_save_interval=0.0001) epoch, _ = restore_trainer._restore_checkpoint() # pylint: disable=protected-access assert epoch == 2 # One batch per epoch. assert restore_trainer._batch_num_total == 2 # pylint: disable=protected-access
def train_fever(): num_epoch = 8 seed = 12 batch_size = 128 experiment_name = "simple_nn" lazy = True torch.manual_seed(seed) keep_neg_sample_prob = 0.5 sample_prob_decay = 0.1 dev_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/dev.jsonl" train_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/train.jsonl" # Prepare Data token_indexers = { 'tokens': SingleIdTokenIndexer(namespace='tokens'), # This is the raw tokens 'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters') # This is the elmo_characters } train_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=lazy) # dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=False) dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=lazy) complete_upstream_dev_data = get_full_list(config.T_FEVER_DEV_JSONL, dev_upstream_file, pred=True) print("Dev size:", len(complete_upstream_dev_data)) dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data) # Load Vocabulary biterator = BasicIterator(batch_size=batch_size) dev_biterator = BasicIterator(batch_size=batch_size) vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic") # THis is important vocab.add_token_to_namespace("true", namespace="selection_labels") vocab.add_token_to_namespace("false", namespace="selection_labels") vocab.add_token_to_namespace("hidden", namespace="selection_labels") vocab.change_token_with_index_to_namespace("hidden", -2, namespace='selection_labels') # Label value vocab.get_index_to_token_vocabulary('selection_labels') print(vocab.get_token_to_index_vocabulary('selection_labels')) print(vocab.get_vocab_size('tokens')) biterator.index_with(vocab) dev_biterator.index_with(vocab) # exit(0) # Build Model device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0) device_num = -1 if device.type == 'cpu' else 0 model = Model(weight=weight_dict['glove.840B.300d'], vocab_size=vocab.get_vocab_size('tokens'), embedding_dim=300, max_l=300, num_of_class=2) model.display() model.to(device) # Create Log File file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}") # Save the source code. script_name = os.path.basename(__file__) with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it: out_f.write(it.read()) out_f.flush() # Save source code end. best_dev = -1 iteration = 0 start_lr = 0.0002 optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=start_lr) criterion = nn.CrossEntropyLoss() for i_epoch in range(num_epoch): print("Resampling...") # Resampling complete_upstream_train_data = get_full_list(config.T_FEVER_TRAIN_JSONL, train_upstream_file, pred=False) filtered_train_data = post_filter(complete_upstream_train_data, keep_prob=keep_neg_sample_prob, seed=12 + i_epoch) # Change the seed to avoid duplicate sample... keep_neg_sample_prob -= sample_prob_decay print("Sampled_length:", len(filtered_train_data)) sampled_train_instances = train_fever_data_reader.read(filtered_train_data) train_iter = biterator(sampled_train_instances, shuffle=True, num_epochs=1, cuda_device=device_num) for i, batch in tqdm(enumerate(train_iter)): model.train() out = model(batch) y = batch['selection_label'] loss = criterion(out, y) # No decay optimizer.zero_grad() loss.backward() optimizer.step() iteration += 1 if i_epoch <= 4: mod = 25000 else: mod = 10000 if iteration % mod == 0: eval_iter = dev_biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) dev_score, dev_loss, complete_upstream_dev_data = full_eval_model(model, eval_iter, criterion, complete_upstream_dev_data) dev_results_list = score_converter_v0(config.T_FEVER_DEV_JSONL, complete_upstream_dev_data) eval_mode = {'check_sent_id_correct': True, 'standard': True} strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(dev_results_list, config.T_FEVER_DEV_JSONL, mode=eval_mode, verbose=False) total = len(dev_results_list) hit = eval_mode['check_sent_id_correct_hits'] tracking_score = hit / total print(f"Dev(clf_acc/pr/rec/f1/loss):{dev_score}/{pr}/{rec}/{f1}/{dev_loss}") print(f"Tracking score:", f"{tracking_score}") need_save = False if tracking_score > best_dev: best_dev = tracking_score need_save = True if need_save: save_path = os.path.join( file_path_prefix, f'i({iteration})_epoch({i_epoch})_' f'(tra_score:{tracking_score}|clf_acc:{dev_score}|pr:{pr}|rec:{rec}|f1:{f1}|loss:{dev_loss})' ) torch.save(model.state_dict(), save_path)
def multitask_learning(): # load datasetreader # Save logging to a local file # Multitasking log.getLogger().addHandler(log.FileHandler(directory+"/log.log")) lr = 0.00001 batch_size = 2 epochs = 10 max_seq_len = 512 max_span_width = 30 #import pdb #pdb.set_trace() #token_indexer = BertIndexer(pretrained_model="bert-base-uncased", max_pieces=max_seq_len, do_lowercase=True,) #token_indexer = PretrainedBertIndexer("bert-base-cased", do_lowercase=False) from allennlp.data.token_indexers.elmo_indexer import ELMoTokenCharactersIndexer # the token indexer is responsible for mapping tokens to integers token_indexer = ELMoTokenCharactersIndexer() def tokenizer(x: str): return [w.text for w in SpacyWordSplitter(language='en_core_web_sm', pos_tags=False).split_words(x)[:max_seq_len]] #conll_reader = ConllCorefBertReader(max_span_width = max_span_width, token_indexers = {"tokens": token_indexer}) conll_reader = ConllCorefReader(max_span_width = max_span_width, token_indexers = {"tokens": token_indexer}) swag_reader = SWAGDatasetReader(tokenizer=tokenizer, token_indexers = token_indexer) EMBEDDING_DIM = 1024 HIDDEN_DIM = 200 conll_datasets, swag_datasets = load_datasets(conll_reader, swag_reader, directory) conll_vocab = Vocabulary() conll_iterator = BasicIterator(batch_size=batch_size) conll_iterator.index_with(conll_vocab) swag_vocab = Vocabulary() swag_iterator = BasicIterator(batch_size=batch_size) swag_iterator.index_with(swag_vocab) from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder from allennlp.modules.token_embedders import ElmoTokenEmbedder #bert_embedder = PretrainedBertEmbedder(pretrained_model="bert-base-cased",top_layer_only=True, requires_grad=True) options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json' weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5' elmo_embedder = ElmoTokenEmbedder(options_file, weight_file) word_embedding = BasicTextFieldEmbedder({"tokens": elmo_embedder})#, allow_unmatched_keys=True) #word_embedding = BasicTextFieldEmbedder({"tokens": bert_embedder}, allow_unmatched_keys=True) #BERT_DIM = word_embedding.get_output_dim() ELMO_DIM = word_embedding.get_output_dim() seq2seq = PytorchSeq2SeqWrapper(torch.nn.LSTM(ELMO_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True)) seq2vec = PytorchSeq2VecWrapper(torch.nn.LSTM(ELMO_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True)) mention_feedforward = FeedForward(input_dim = 2336, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU()) antecedent_feedforward = FeedForward(input_dim = 7776, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU()) model1 = CoreferenceResolver(vocab=conll_vocab, text_field_embedder=word_embedding,context_layer= seq2seq, mention_feedforward=mention_feedforward,antecedent_feedforward=antecedent_feedforward , feature_size=768,max_span_width=max_span_width,spans_per_word=0.4,max_antecedents=250,lexical_dropout= 0.2) model2 = SWAGExampleModel(vocab=swag_vocab, text_field_embedder=word_embedding, phrase_encoder=seq2vec) optimizer1 = optim.Adam(model1.parameters(), lr=lr) optimizer2 = optim.Adam(model2.parameters(), lr=lr) swag_train_iterator = swag_iterator(swag_datasets[0], num_epochs=1, shuffle=True) conll_train_iterator = conll_iterator(conll_datasets[0], num_epochs=1, shuffle=True) swag_val_iterator = swag_iterator(swag_datasets[1], num_epochs=1, shuffle=True) conll_val_iterator:q = conll_iterator(conll_datasets[1], num_epochs=1, shuffle=True) task_infos = {"swag": {"model": model2, "optimizer": optimizer2, "loss": 0.0, "iterator": swag_iterator, "train_data": swag_datasets[0], "val_data": swag_datasets[1], "num_train": len(swag_datasets[0]), "num_val": len(swag_datasets[1]), "lr": lr, "score": {"accuracy":0.0}}, \ "conll": {"model": model1, "iterator": conll_iterator, "loss": 0.0, "val_data": conll_datasets[1], "train_data": conll_datasets[0], "optimizer": optimizer1, "num_train": len(conll_datasets[0]), "num_val": len(conll_datasets[1]),"lr": lr, "score": {"coref_prediction": 0.0, "coref_recall": 0.0, "coref_f1": 0.0,"mention_recall": 0.0}}} USE_GPU = 1 trainer = MultiTaskTrainer( task_infos=task_infos, num_epochs=epochs, serialization_dir=directory + "saved_models/multitask/" ) metrics = trainer.train()
class TextInAllAspectSentimentOutTrainTemplate( ModelTrainTemplate.ModelTrainTemplate): """ """ def __init__(self, configuration): super().__init__(configuration) self.data_reader: DatasetReader = None self.train_data = None self.dev_data = None self.test_data = None self.hard_test_data = None self.distinct_categories: List[str] = None self.distinct_polarities: List[str] = None self._load_data() self._get_max_sentence_len() if self.configuration['debug']: self.train_data = self.train_data[:128] self.dev_data = self.dev_data[:128] self.test_data = self.test_data[:128] self.vocab = None self._build_vocab() self.iterator = None self.val_iterator = None self._build_iterator() self.acd_model_dir = self.model_dir + 'acd/' def _get_max_sentence_len(self): len_count = collections.defaultdict(int) for data in [self.train_data, self.test_data, self.dev_data]: if data is None: continue for sample in data: tokens = sample.fields['tokens'].tokens # tokens = sample.fields['sample'].metadata[4] # if len(tokens) > self.configuration['max_len']: # print(tokens) len_count[len(tokens)] += 1 len_count_list = [[items[0], items[1]] for items in len_count.items()] len_count_list.sort(key=lambda x: x[0]) self.logger.info('len_count_list: %s' % str(len_count_list)) def _get_data_reader(self): token_indexer = SingleIdTokenIndexer(namespace="tokens") position_indexer = SingleIdTokenIndexer(namespace='position') aspect_indexer = SingleIdTokenIndexer(namespace='aspect') reader = acd_and_sc_data_reader.TextInAllAspectSentimentOut( self.distinct_categories, self.distinct_polarities, tokenizer=self._get_word_segmenter(), token_indexers={"tokens": token_indexer}, position_indexers={'position': position_indexer}, aspect_indexers={'aspect': aspect_indexer}, configuration=self.configuration) return reader def _load_data(self): data_filepath = self.base_data_dir + 'data' if os.path.exists(data_filepath): self.train_data, self.dev_data, self.test_data, self.distinct_categories, self.distinct_polarities, \ self.hard_test_data = super()._load_object(data_filepath) reader = self._get_data_reader() self.data_reader = reader else: train_dev_test_data, distinct_categories, distinct_polarities = self.dataset. \ generate_acd_and_sc_data(dev_size=0.2) if self.configuration['hard_test']: train_dev_test_data['hard_test'] = [] for sample in train_dev_test_data['test']: polarities = set([e[1] for e in sample[1]]) if len(polarities) >= 2: train_dev_test_data['hard_test'].append(sample) distinct_polarities_new = [] for polarity in distinct_polarities: if polarity != 'conflict': distinct_polarities_new.append(polarity) self.distinct_categories = distinct_categories self.distinct_polarities = distinct_polarities_new train_dev_test_data_label_indexed = {} for data_type, data in train_dev_test_data.items(): if data is None: continue data_new = [] for sample in data: sample_new = [sample[0]] labels_new = [] for label in sample[1]: aspect = label[0] polarity = label[1] aspect_index = distinct_categories.index(aspect) if polarity == 'conflict': polarity_index = -100 else: polarity_index = distinct_polarities_new.index( polarity) labels_new.append((aspect_index, polarity_index)) if len(labels_new) != 0: sample_new.append(labels_new) data_new.append(sample_new) train_dev_test_data_label_indexed[data_type] = data_new reader = self._get_data_reader() self.data_reader = reader self.train_data = reader.read( train_dev_test_data_label_indexed['train']) self.dev_data = reader.read( train_dev_test_data_label_indexed['dev']) self.test_data = reader.read( train_dev_test_data_label_indexed['test']) if self.configuration['hard_test']: self.hard_test_data = reader.read( train_dev_test_data_label_indexed['hard_test']) data = [ self.train_data, self.dev_data, self.test_data, self.distinct_categories, self.distinct_polarities, self.hard_test_data ] super()._save_object(data_filepath, data) def _build_vocab(self): if self.configuration['train']: vocab_file_path = self.base_data_dir + 'vocab' if os.path.exists(vocab_file_path): self.vocab = super()._load_object(vocab_file_path) else: data = self.train_data + self.dev_data + self.test_data self.vocab = Vocabulary.from_instances( data, max_vocab_size=sys.maxsize) super()._save_object(vocab_file_path, self.vocab) self.model_meta_data['vocab'] = self.vocab else: self.vocab = self.model_meta_data['vocab'] def _build_iterator(self): self.iterator = BucketIterator( batch_size=self.configuration['batch_size'], sorting_keys=[("tokens", "num_tokens")], ) self.iterator.index_with(self.vocab) self.val_iterator = BasicIterator( batch_size=self.configuration['batch_size']) self.val_iterator.index_with(self.vocab) def _print_args(self, model): n_trainable_params, n_nontrainable_params = 0, 0 for p in model.parameters(): n_params = torch.prod(torch.tensor(p.shape)).item() if p.requires_grad: n_trainable_params += n_params else: n_nontrainable_params += n_params self.logger.info( 'n_trainable_params: {0}, n_nontrainable_params: {1}'.format( n_trainable_params, n_nontrainable_params)) self.logger.info('> training arguments:') for arg in self.configuration.keys(): self.logger.info('>>> {0}: {1}'.format(arg, self.configuration[arg])) def _find_model_function_pure(self): raise NotImplementedError('_find_model_function_pure') def _get_aspect_embeddings_dim(self): return 300 def _get_position_embeddings_dim(self): return 300 def _is_train_token_embeddings(self): return False def _find_model_function(self): embedding_dim = self.configuration['embed_size'] embedding_matrix_filepath = self.base_data_dir + 'embedding_matrix' if os.path.exists(embedding_matrix_filepath): embedding_matrix = super()._load_object(embedding_matrix_filepath) else: embedding_filepath = self.configuration['embedding_filepath'] embedding_matrix = embedding._read_embeddings_from_text_file( embedding_filepath, embedding_dim, self.vocab, namespace='tokens') super()._save_object(embedding_matrix_filepath, embedding_matrix) embedding_matrix = embedding_matrix.to(self.configuration['device']) token_embedding = Embedding( num_embeddings=self.vocab.get_vocab_size(namespace='tokens'), embedding_dim=embedding_dim, padding_index=0, vocab_namespace='tokens', trainable=self._is_train_token_embeddings(), weight=embedding_matrix) # the embedder maps the input tokens to the appropriate embedding matrix word_embedder: TextFieldEmbedder = BasicTextFieldEmbedder( {"tokens": token_embedding}) position_embedding = Embedding( num_embeddings=self.vocab.get_vocab_size(namespace='position'), embedding_dim=self._get_position_embeddings_dim(), padding_index=0) position_embedder: TextFieldEmbedder = BasicTextFieldEmbedder( {"position": position_embedding}, # we'll be ignoring masks so we'll need to set this to True allow_unmatched_keys=True) aspect_embedding = Embedding( num_embeddings=self.vocab.get_vocab_size(namespace='aspect'), embedding_dim=self._get_aspect_embeddings_dim(), padding_index=0) aspect_embedder: TextFieldEmbedder = BasicTextFieldEmbedder( {"aspect": aspect_embedding}, # we'll be ignoring masks so we'll need to set this to True allow_unmatched_keys=True) model_function: pytorch_models.TextInAllAspectSentimentOutModel = self._find_model_function_pure( ) model = model_function( word_embedder, position_embedder, aspect_embedder, self.distinct_categories, self.distinct_polarities, self.vocab, self.configuration, ) self._print_args(model) model = model.to(self.configuration['device']) return model def _get_optimizer(self, model): _params = filter(lambda p: p.requires_grad, model.parameters()) return optim.Adam(_params, lr=0.001, weight_decay=0.00001) def _get_acd_optimizer(self, model): _params = filter(lambda p: p.requires_grad, model.parameters()) return optim.Adam(_params, lr=0.001, weight_decay=0.00001) def _get_acd_warmup_epoch_num(self): return 3 def _get_estimator(self, model): USE_GPU = torch.cuda.is_available() if USE_GPU: gpu_id = self.configuration['gpu_id'] else: gpu_id = -1 estimator = pytorch_models.TextInAllAspectSentimentOutEstimator( model, self.val_iterator, self.distinct_categories, self.distinct_polarities, configuration=self.configuration, cuda_device=gpu_id) return estimator def _get_estimate_callback(self, model): result = [] data_type_and_data = { 'train': self.train_data, 'dev': self.dev_data, 'test': self.test_data } if self.hard_test_data: data_type_and_data['hard_test'] = self.hard_test_data estimator = self._get_estimator(model) estimate_callback = allennlp_callback.EstimateCallback( data_type_and_data, estimator, self.logger) result.append(estimate_callback) return result def _get_loss_weight_callback(self): result = [] set_loss_weight_callback = allennlp_callback.SetLossWeightCallback( self.model, self.logger, acd_warmup_epoch_num=self._get_acd_warmup_epoch_num()) result.append(set_loss_weight_callback) return result def _get_fixed_loss_weight_callback(self, model, category_loss_weight=1, sentiment_loss_weight=1): result = [] fixed_loss_weight_callback = allennlp_callback.FixedLossWeightCallback( model, self.logger, category_loss_weight=category_loss_weight, sentiment_loss_weight=sentiment_loss_weight) result.append(fixed_loss_weight_callback) return result def _get_bert_word_embedder(self): return None def _inner_train(self): USE_GPU = torch.cuda.is_available() if USE_GPU: gpu_id = self.configuration['gpu_id'] else: gpu_id = -1 self.model: pytorch_models.TextInAllAspectSentimentOutModel = self._find_model_function( ) estimator = self._get_estimator(self.model) if self.configuration['acd_warmup']: if self.configuration[ 'frozen_all_acsc_parameter_while_pretrain_acd']: self.model.set_grad_for_acsc_parameter(requires_grad=False) optimizer = self._get_acd_optimizer(self.model) self.logger.info('acd warmup') validation_metric = '+category_f1' callbacks = self._get_estimate_callback(self.model) callbacks.extend( self._get_fixed_loss_weight_callback(self.model, category_loss_weight=1, sentiment_loss_weight=0)) self._print_args(self.model) trainer = Trainer( model=self.model, optimizer=optimizer, iterator=self.iterator, train_dataset=self.train_data, validation_dataset=self.dev_data, cuda_device=gpu_id, num_epochs=self.configuration['acd_warmup_epochs'], validation_metric=validation_metric, validation_iterator=self.val_iterator, serialization_dir=self.acd_model_dir, patience=None if self.configuration['acd_warmup_patience'] == -1 else self.configuration['acd_warmup_patience'], callbacks=callbacks, num_serialized_models_to_keep=2, early_stopping_by_batch=self. configuration['early_stopping_by_batch'], estimator=estimator, grad_clipping=5) metrics = trainer.train() self.logger.info('acd metrics: %s' % str(metrics)) if self.configuration[ 'frozen_all_acsc_parameter_while_pretrain_acd']: self.model.set_grad_for_acsc_parameter(requires_grad=True) # 恢复bert到初始状态 if 'bert' in self.configuration and self.configuration['bert']: self.model.set_bert_word_embedder() bert_word_embedder = self._get_bert_word_embedder() self.model.set_bert_word_embedder(bert_word_embedder) if self.configuration['only_acd']: return None validation_metric = '+accuracy' if 'early_stopping_metric' in self.configuration: validation_metric = '+%s' % self.configuration[ 'early_stopping_metric'] callbacks = self._get_estimate_callback(self.model) if self.configuration['acd_warmup'] and self.configuration['pipeline']: callbacks.extend( self._get_fixed_loss_weight_callback(self.model, category_loss_weight=0, sentiment_loss_weight=1)) # acd 相关的参数不更新 self.model.no_grad_for_acd_parameter() else: callbacks.extend( self._get_fixed_loss_weight_callback( self.model, category_loss_weight=self.configuration['acd_init_weight'], sentiment_loss_weight=1)) self.logger.info('validation_metric: %s' % validation_metric) optimizer = self._get_optimizer(self.model) self._print_args(self.model) trainer = Trainer(model=self.model, optimizer=optimizer, iterator=self.iterator, train_dataset=self.train_data, validation_dataset=self.dev_data if self.configuration['early_stopping'] else None, cuda_device=gpu_id, num_epochs=self.configuration['epochs'], validation_metric=validation_metric, validation_iterator=self.val_iterator, serialization_dir=self.model_dir, patience=self.configuration['patience'], callbacks=callbacks, num_serialized_models_to_keep=2, early_stopping_by_batch=self. configuration['early_stopping_by_batch'], estimator=estimator, grad_clipping=5) metrics = trainer.train() self.logger.info('metrics: %s' % str(metrics)) def _save_model(self): torch.save(self.model, self.best_model_filepath) def _load_model(self): if torch.cuda.is_available(): self.model = torch.load(self.best_model_filepath) else: self.model = torch.load(self.best_model_filepath, map_location=torch.device('cpu')) self.model.configuration = self.configuration def evaluate(self): USE_GPU = torch.cuda.is_available() if USE_GPU: gpu_id = self.configuration['gpu_id'] else: gpu_id = -1 estimator = pytorch_models.TextInAllAspectSentimentOutEstimator( self.model, self.val_iterator, self.distinct_categories, self.distinct_polarities, configuration=self.configuration, cuda_device=gpu_id) data_type_and_data = { # 'train': self.train_data, 'dev': self.dev_data, 'test': self.test_data } if self.hard_test_data: data_type_and_data['hard_test'] = self.hard_test_data if 'performance_of_different_lengths' in self.configuration: lengths = self.configuration[ 'performance_of_different_lengths'].split(',') if len(lengths) > 1: data_of_different_lengths = { int(length): [] for length in lengths } for sample in data_type_and_data['test']: tokens = sample.fields['tokens'].tokens for length in data_of_different_lengths: if len(tokens) <= length: data_of_different_lengths[length].append(sample) for length, data in data_of_different_lengths.items(): if len(data) > 0: data_type_and_data['test_%d' % length] = data for data_type, data in data_type_and_data.items(): result = estimator.estimate(data) self.logger.info('data_type: %s result: %s' % (data_type, result)) def predict(self, texts: List[str] = None): """ :param texts: 如果texts为None,就是用训练时的测试集 :return: """ USE_GPU = torch.cuda.is_available() if USE_GPU: gpu_id = self.configuration['gpu_id'] else: gpu_id = -1 predictor = pytorch_models.TextInAllAspectSentimentOutPredictor( self.model, self.val_iterator, self.distinct_categories, self.distinct_polarities, configuration=self.configuration, cuda_device=gpu_id) data = self.data_reader.read(texts) result = predictor.predict(data) return result def error_analysis(self): """ :return: """ USE_GPU = torch.cuda.is_available() if USE_GPU: gpu_id = self.configuration['gpu_id'] else: gpu_id = -1 predictor = pytorch_models.TextInAllAspectSentimentOutPredictor( self.model, self.val_iterator, self.distinct_categories, self.distinct_polarities, configuration=self.configuration, cuda_device=gpu_id) data = self.test_data result = predictor.predict(data) result_final = [] for i in range(len(data)): instance: Instance = data[i] metadata = instance.fields['sample'].metadata sentence = metadata[0] labels_true = { self.distinct_categories[e[0]]: self.distinct_polarities[e[1]] for e in metadata[1] } labels_pred = result[i] for label_pred in labels_pred: label_true = labels_true[label_pred[0]] if label_true == label_pred[1]: continue result_final.append( (sentence, label_pred[0], label_pred[1], label_true)) result_str = ['\t'.join(e) for e in result_final] output_filepath = os.path.join(self.model_dir, 'error_analysis.csv') file_utils.write_lines(result_str, output_filepath) return result_final
class TestTrainer(AllenNlpTestCase): def setUp(self): super(TestTrainer, self).setUp() self.instances = SequenceTaggingDatasetReader().read( 'tests/fixtures/data/sequence_tagging.tsv') vocab = Vocabulary.from_instances(self.instances) self.vocab = vocab self.model_params = Params({ "text_field_embedder": { "tokens": { "type": "embedding", "embedding_dim": 5 } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 } }) self.model = SimpleTagger.from_params(self.vocab, self.model_params) self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01) self.iterator = BasicIterator(batch_size=2) self.iterator.index_with(vocab) def test_trainer_can_run(self): trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, train_dataset=self.instances, validation_dataset=self.instances, num_epochs=2) metrics = trainer.train() assert 'best_validation_loss' in metrics assert isinstance(metrics['best_validation_loss'], float) assert 'best_epoch' in metrics assert isinstance(metrics['best_epoch'], int) # Making sure that both increasing and decreasing validation metrics work. trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, train_dataset=self.instances, validation_dataset=self.instances, validation_metric='+loss', num_epochs=2) metrics = trainer.train() assert 'best_validation_loss' in metrics assert isinstance(metrics['best_validation_loss'], float) assert 'best_epoch' in metrics assert isinstance(metrics['best_epoch'], int) @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device registered.") def test_trainer_can_run_cuda(self): trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, num_epochs=2, cuda_device=0) trainer.train() @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Need multiple GPUs.") def test_trainer_can_run_multiple_gpu(self): multigpu_iterator = BasicIterator(batch_size=4) multigpu_iterator.index_with(self.vocab) trainer = Trainer(self.model, self.optimizer, multigpu_iterator, self.instances, num_epochs=2, cuda_device=[0, 1]) trainer.train() def test_trainer_can_resume_training(self): trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=1, serialization_dir=self.TEST_DIR) trainer.train() new_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=3, serialization_dir=self.TEST_DIR) epoch, val_metrics_per_epoch = new_trainer._restore_checkpoint() # pylint: disable=protected-access assert epoch == 1 assert len(val_metrics_per_epoch) == 1 assert isinstance(val_metrics_per_epoch[0], float) assert val_metrics_per_epoch[0] != 0. new_trainer.train() def test_should_stop_early_with_increasing_metric(self): new_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, validation_metric="+test") assert new_trainer._should_stop_early([.5, .3, .2, .1, .4, .4]) # pylint: disable=protected-access assert not new_trainer._should_stop_early([.3, .3, .3, .2, .5, .1]) # pylint: disable=protected-access def test_should_stop_early_with_decreasing_metric(self): new_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, validation_metric="-test") assert new_trainer._should_stop_early([.02, .3, .2, .1, .4, .4]) # pylint: disable=protected-access assert not new_trainer._should_stop_early([.3, .3, .2, .1, .4, .5]) # pylint: disable=protected-access def test_trainer_can_run_with_lr_scheduler(self): lr_params = Params({"type": "reduce_on_plateau"}) lr_scheduler = LearningRateScheduler.from_params( self.optimizer, lr_params) trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, learning_rate_scheduler=lr_scheduler, validation_metric="-loss", train_dataset=self.instances, validation_dataset=self.instances, num_epochs=2) trainer.train() def test_trainer_raises_on_model_with_no_loss_key(self): class FakeModel(torch.nn.Module): def forward(self, **kwargs): # pylint: disable=arguments-differ,unused-argument return {} with pytest.raises(RuntimeError): trainer = Trainer(FakeModel(), self.optimizer, self.iterator, self.instances, num_epochs=2, serialization_dir=self.TEST_DIR) trainer.train() def test_trainer_can_log_histograms(self): # enable activation logging for module in self.model.modules(): module.should_log_activations = True trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, num_epochs=3, serialization_dir=self.TEST_DIR, histogram_interval=2) trainer.train() def test_trainer_respects_num_serialized_models_to_keep(self): trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, num_epochs=5, serialization_dir=self.TEST_DIR, num_serialized_models_to_keep=3) trainer.train() # Now check the serialized files for prefix in ['model_state_epoch_*', 'training_state_epoch_*']: file_names = glob.glob(os.path.join(self.TEST_DIR, prefix)) epochs = [ int(re.search(r"_([0-9])\.th", fname).group(1)) for fname in file_names ] assert sorted(epochs) == [2, 3, 4] def test_trainer_respects_keep_serialized_model_every_num_seconds(self): # To test: # Create an iterator that sleeps for 0.5 second per epoch, so the total training # time for one epoch is slightly greater then 0.5 seconds. # Run for 6 epochs, keeping the last 2 models, models also kept every 1 second. # Check the resulting checkpoints. Should then have models at epochs # 2, 4, plus the last two at 5 and 6. class WaitingIterator(BasicIterator): # pylint: disable=arguments-differ def _create_batches(self, *args, **kwargs): time.sleep(0.5) return super(WaitingIterator, self)._create_batches(*args, **kwargs) iterator = WaitingIterator(batch_size=2) iterator.index_with(self.vocab) trainer = Trainer(self.model, self.optimizer, iterator, self.instances, num_epochs=6, serialization_dir=self.TEST_DIR, num_serialized_models_to_keep=2, keep_serialized_model_every_num_seconds=1) trainer.train() # Now check the serialized files for prefix in ['model_state_epoch_*', 'training_state_epoch_*']: file_names = glob.glob(os.path.join(self.TEST_DIR, prefix)) epochs = [ int(re.search(r"_([0-9])\.th", fname).group(1)) for fname in file_names ] # epoch N has N-1 in file name assert sorted(epochs) == [1, 3, 4, 5] def test_trainer_saves_models_at_specified_interval(self): iterator = BasicIterator(batch_size=4) iterator.index_with(self.vocab) trainer = Trainer(self.model, self.optimizer, iterator, self.instances, num_epochs=2, serialization_dir=self.TEST_DIR, model_save_interval=0.0001) trainer.train() # Now check the serialized files for models saved during the epoch. prefix = 'model_state_epoch_*' file_names = sorted(glob.glob(os.path.join(self.TEST_DIR, prefix))) epochs = [ re.search(r"_([0-9\.\-]+)\.th", fname).group(1) for fname in file_names ] # We should have checkpoints at the end of each epoch and during each, e.g. # [0.timestamp, 0, 1.timestamp, 1] assert len(epochs) == 4 assert epochs[3] == '1' assert '.' in epochs[0] # Now make certain we can restore from timestamped checkpoint. # To do so, remove the checkpoint from the end of epoch 1&2, so # that we are forced to restore from the timestamped checkpoints. for k in range(2): os.remove( os.path.join(self.TEST_DIR, 'model_state_epoch_{}.th'.format(k))) os.remove( os.path.join(self.TEST_DIR, 'training_state_epoch_{}.th'.format(k))) os.remove(os.path.join(self.TEST_DIR, 'best.th')) restore_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, num_epochs=2, serialization_dir=self.TEST_DIR, model_save_interval=0.0001) epoch, _ = restore_trainer._restore_checkpoint() # pylint: disable=protected-access assert epoch == 2 # One batch per epoch. assert restore_trainer._batch_num_total == 2 # pylint: disable=protected-access
def train_fever(): num_epoch = 8 seed = 12 batch_size = 32 experiment_name = "mesim_elmo" lazy = True dev_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/dev.jsonl" train_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/train.jsonl" # Prepare Data token_indexers = { 'tokens': SingleIdTokenIndexer(namespace='tokens'), # This is the raw tokens 'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters') # This is the elmo_characters } train_fever_data_reader = BasicReader(token_indexers=token_indexers, lazy=lazy, max_l=360) dev_fever_data_reader = BasicReader(token_indexers=token_indexers, lazy=lazy, max_l=360) complete_upstream_dev_data = get_actual_data(config.T_FEVER_DEV_JSONL, dev_upstream_file) dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data) # Load Vocabulary biterator = BasicIterator(batch_size=batch_size) vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic") vocab.change_token_with_index_to_namespace('hidden', -2, namespace='labels') print(vocab.get_token_to_index_vocabulary('labels')) print(vocab.get_vocab_size('tokens')) biterator.index_with(vocab) # Build Model device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0) device_num = -1 if device.type == 'cpu' else 0 model = Model(weight=weight_dict['glove.840B.300d'], vocab_size=vocab.get_vocab_size('tokens'), embedding_dim=300, max_l=300) model.display() model.to(device) # Create Log File file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}") best_dev = -1 iteration = 0 start_lr = 0.0002 optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=start_lr) criterion = nn.CrossEntropyLoss() for i_epoch in range(num_epoch): print("Resampling...") # Resampling complete_upstream_train_data = get_sampled_data(config.T_FEVER_TRAIN_JSONL, train_upstream_file) sampled_train_instances = train_fever_data_reader.read(complete_upstream_train_data) train_iter = biterator(sampled_train_instances, shuffle=True, num_epochs=1, cuda_device=device_num) for i, batch in tqdm(enumerate(train_iter)): model.train() out = model(batch) y = batch['label'] loss = criterion(out, y) # No decay optimizer.zero_grad() loss.backward() optimizer.step() iteration += 1 if i_epoch <= 4: mod = 5000 else: mod = 200 if iteration % mod == 0: eval_iter = biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) dev_score, dev_loss = full_eval_model(model, eval_iter, criterion, complete_upstream_dev_data) print(f"Dev:{dev_score}/{dev_loss}") need_save = False if dev_score > best_dev: best_dev = dev_score need_save = True if need_save: save_path = os.path.join( file_path_prefix, f'i({iteration})_epoch({i_epoch})_dev({dev_score})_loss({dev_loss})_seed({seed})' ) torch.save(model.state_dict(), save_path)
def debug_fever(): num_epoch = 8 seed = 12 batch_size = 128 experiment_name = "simple_nn" lazy = True torch.manual_seed(seed) keep_neg_sample_prob = 0.6 sample_prob_decay = 0.1 dev_upstream_file = config.RESULT_PATH / "doc_retri/cn_util_Jul17_docretri.singularize/dev.jsonl" train_upstream_file = config.RESULT_PATH / "doc_retri/cn_util_Jul17_docretri.singularize/train.jsonl" # Prepare Data token_indexers = { 'tokens': SingleIdTokenIndexer(namespace='tokens'), # This is the raw tokens 'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters') # This is the elmo_characters } train_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=lazy, max_l=300) # dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=False) dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=lazy, max_l=300) complete_upstream_dev_data = get_full_list(config.T_FEVER_DEV_JSONL, dev_upstream_file, pred=True) print("Dev size:", len(complete_upstream_dev_data)) dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data) # Load Vocabulary biterator = BasicIterator(batch_size=batch_size) dev_biterator = BasicIterator(batch_size=batch_size) vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic") # THis is important vocab.add_token_to_namespace("true", namespace="selection_labels") vocab.add_token_to_namespace("false", namespace="selection_labels") vocab.add_token_to_namespace("hidden", namespace="selection_labels") vocab.change_token_with_index_to_namespace("hidden", -2, namespace='selection_labels') # Label value vocab.get_index_to_token_vocabulary('selection_labels') print(vocab.get_token_to_index_vocabulary('selection_labels')) print(vocab.get_vocab_size('tokens')) biterator.index_with(vocab) dev_biterator.index_with(vocab) # exit(0) # Build Model device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0) device_num = -1 if device.type == 'cpu' else 0 model = Model(weight=weight_dict['glove.840B.300d'], vocab_size=vocab.get_vocab_size('tokens'), embedding_dim=300, max_l=280, num_of_class=2) model.display() model.to(device) # Create Log File file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}") # Save the source code. script_name = os.path.basename(__file__) with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it: out_f.write(it.read()) out_f.flush() # Save source code end. best_dev = -1 iteration = 0 i_epoch = 0 start_lr = 0.0002 optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=start_lr) criterion = nn.CrossEntropyLoss() eval_iter = dev_biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) complete_upstream_dev_data = hidden_eval(model, eval_iter, complete_upstream_dev_data) dev_results_list = score_converter_v0(config.T_FEVER_DEV_JSONL, complete_upstream_dev_data) eval_mode = {'check_sent_id_correct': True, 'standard': True} strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(dev_results_list, config.T_FEVER_DEV_JSONL, mode=eval_mode, verbose=False) total = len(dev_results_list) hit = eval_mode['check_sent_id_correct_hits'] tracking_score = hit / total print(f"Dev(raw_acc/pr/rec/f1):{acc_score}/{pr}/{rec}/{f1}/") print("Strict score:", strict_score) print(f"Eval Tracking score:", f"{tracking_score}") need_save = False if tracking_score > best_dev: best_dev = tracking_score need_save = True if need_save: save_path = os.path.join( file_path_prefix, f'i({iteration})_epoch({i_epoch})_' f'(tra_score:{tracking_score}|raw_acc:{acc_score}|pr:{pr}|rec:{rec}|f1:{f1})' ) torch.save(model.state_dict(), save_path) print("Epoch Evaluation...") eval_iter = dev_biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) complete_upstream_dev_data = hidden_eval(model, eval_iter, complete_upstream_dev_data) dev_results_list = score_converter_v0(config.T_FEVER_DEV_JSONL, complete_upstream_dev_data) eval_mode = {'check_sent_id_correct': True, 'standard': True} strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(dev_results_list, config.T_FEVER_DEV_JSONL, mode=eval_mode, verbose=False) total = len(dev_results_list) hit = eval_mode['check_sent_id_correct_hits'] tracking_score = hit / total print(f"Dev(raw_acc/pr/rec/f1):{acc_score}/{pr}/{rec}/{f1}/") print("Strict score:", strict_score) print(f"Eval Tracking score:", f"{tracking_score}") if tracking_score > best_dev: best_dev = tracking_score save_path = os.path.join( file_path_prefix, f'i({iteration})_epoch({i_epoch})_' f'(tra_score:{tracking_score}|raw_acc:{acc_score}|pr:{pr}|rec:{rec}|f1:{f1})_epoch' ) torch.save(model.state_dict(), save_path)
def main(): # Load SNLI dataset single_id_indexer = SingleIdTokenIndexer(lowercase_tokens=True) # word tokenizer tokenizer = WordTokenizer( end_tokens=["@@NULL@@"] ) # add @@NULL@@ to the end of sentences reader = SnliReader( token_indexers={"tokens": single_id_indexer}, tokenizer=tokenizer ) dev_dataset = reader.read( "https://s3-us-west-2.amazonaws.com/allennlp/datasets/snli/snli_1.0_dev.jsonl" ) # Load model and vocab model = load_archive( "https://allennlp.s3-us-west-2.amazonaws.com/models/esim-glove-snli-2019.04.23.tar.gz" ).model model.eval().cuda() vocab = model.vocab # add hooks for embeddings so we can compute gradients w.r.t. to the input tokens utils.add_hooks(model) embedding_weight = utils.get_embedding_weight( model ) # save the word embedding matrix # Batches of examples to construct triggers universal_perturb_batch_size = 32 iterator = BasicIterator(batch_size=universal_perturb_batch_size) iterator.index_with(vocab) # Subsample the dataset to one class to do a universal attack on that class dataset_label_filter = "entailment" # only entailment examples # dataset_label_filter = 'contradiction' # only contradiction examples # dataset_label_filter = 'neutral' # only neutral examples subset_dev_dataset = [] for instance in dev_dataset: if instance["label"].label == dataset_label_filter: subset_dev_dataset.append(instance) # the attack is targeted towards a specific class # target_label = "0" # flip to entailment target_label = "1" # flip to contradiction # target_label = "2" # flip to neutral # A k-d tree if you want to do gradient + nearest neighbors # tree = KDTree(embedding_weight.numpy()) # Get original accuracy before adding universal triggers utils.get_accuracy( model, subset_dev_dataset, vocab, trigger_token_ids=None, snli=True ) model.train() # rnn cannot do backwards in train mode # Initialize triggers num_trigger_tokens = 1 # one token prepended trigger_token_ids = [vocab.get_token_index("a")] * num_trigger_tokens # sample batches, update the triggers, and repeat for batch in lazy_groups_of( iterator(subset_dev_dataset, num_epochs=10, shuffle=True), group_size=1 ): # get model accuracy with current triggers utils.get_accuracy( model, subset_dev_dataset, vocab, trigger_token_ids, snli=True ) model.train() # rnn cannot do backwards in train mode # get grad of triggers averaged_grad = utils.get_average_grad( model, batch, trigger_token_ids, target_label, snli=True ) # find attack candidates using an attack method cand_trigger_token_ids = attacks.hotflip_attack( averaged_grad, embedding_weight, num_candidates=40 ) # cand_trigger_token_ids = attacks.random_attack(embedding_weight, # trigger_token_ids, # num_candidates=40) # cand_trigger_token_ids = attacks.nearest_neighbor_grad(averaged_grad, # embedding_weight, # trigger_token_ids, # tree, # 100, # decrease_prob=True) # query the model to get the best candidates trigger_token_ids = utils.get_best_candidates( model, batch, trigger_token_ids, cand_trigger_token_ids, snli=True )
def test_epoch_tracking_when_one_epoch_at_a_time(self): iterator = BasicIterator(batch_size=2, track_epoch=True) iterator.index_with(self.vocab) for epoch in range(10): for batch in iterator(self.instances, num_epochs=1): assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])
def train_only_lee(): # This is WORKING! # load datasetreader # Save logging to a local file # Multitasking log.getLogger().addHandler(log.FileHandler(directory+"/log.log")) lr = 0.00001 batch_size = 2 epochs = 100 max_seq_len = 512 max_span_width = 30 #token_indexer = BertIndexer(pretrained_model="bert-base-uncased", max_pieces=max_seq_len, do_lowercase=True,) token_indexer = PretrainedBertIndexer("bert-base-cased", do_lowercase=False) reader = ConllCorefBertReader(max_span_width = max_span_width, token_indexers = {"tokens": token_indexer}) EMBEDDING_DIM = 1024 HIDDEN_DIM = 200 processed_reader_dir = Path(directory+"processed/") train_ds, val_ds, test_ds = load_lee(reader, directory) # restore checkpoint here from allennlp.modules.token_embedders import ElmoTokenEmbedder #vocab = Vocabulary.from_instances(train_ds + val_ds) vocab = Vocabulary() iterator = BasicIterator(batch_size=batch_size) iterator.index_with(vocab) val_iterator = BasicIterator(batch_size=batch_size) val_iterator.index_with(vocab) from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder # here, allow_unmatched_key = True since we dont pass in offsets since #we allow for word embedings of the bert-tokenized, wnot necessiarly the # original tokens # see the documetnation for offsets here for more info: # https://github.com/allenai/allennlp/blob/master/allennlp/modules/token_embedders/bert_token_embedder.py options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json' weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5' elmo_embedder = ElmoTokenEmbedder(options_file, weight_file) word_embedding = BasicTextFieldEmbedder({"tokens": elmo_embedder})#, allow_unmatched_keys=True) #word_embedding = BasicTextFieldEmbedder({"tokens": bert_embedder}, allow_unmatched_keys=True) #BERT_DIM = word_embedding.get_output_dim() ELMO_DIM = word_embedding.get_output_dim() # at each batch, sample from the two, and load th eLSTM shared_layer = torch.nn.LSTM(ELMO_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True) seq2seq = PytorchSeq2SeqWrapper(shared_layer) mention_feedforward = FeedForward(input_dim =512, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU()) antecedent_feedforward = FeedForward(input_dim =2304, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU()) model = CoreferenceResolver(vocab=vocab, text_field_embedder=word_embedding,context_layer= seq2seq, mention_feedforward=mention_feedforward,antecedent_feedforward=antecedent_feedforward , feature_size=768,max_span_width=max_span_width,spans_per_word=0.4,max_antecedents=250,lexical_dropout= 0.2) print(model) optimizer = optim.Adam(model.parameters(), lr=lr) # and then we can do the shared loss # # Get USE_GPU = 1 trainer = Trainer( model=model.cuda(), optimizer=optimizer, iterator=iterator, validation_iterator = val_iterator, train_dataset=train_ds, validation_dataset = val_ds, validation_metric = "+coref_f1", cuda_device=0 if USE_GPU else -1, serialization_dir= directory + "saved_models/only_lee", num_epochs=epochs, ) metrics = trainer.train() # save the model with open(directory + "saved_models/current_run_model_state", 'wb') as f: torch.save(model.state_dict(), f)
class BiEncoderTopXRetriever: def __init__(self, args, vocab, biencoder_onlyfor_encodingmentions, faiss_stored_kb, reader_for_mentions, duidx2encoded_emb): self.args = args self.mention_encoder = biencoder_onlyfor_encodingmentions self.mention_encoder.eval() self.faiss_searcher = faiss_stored_kb self.reader_for_mentions = reader_for_mentions self.sequence_iterator = BasicIterator( batch_size=self.args.batch_size_for_eval) self.sequence_iterator.index_with(vocab) self.cuda_device = 0 self.duidx2encoded_emb = duidx2encoded_emb def biencoder_tophits_retrievaler(self, train_or_dev_or_test_flag, how_many_top_hits_preserved=500): ds = self.reader_for_mentions.read(train_or_dev_or_test_flag) generator_for_biencoder = self.sequence_iterator(ds, num_epochs=1, shuffle=False) generator_for_biencoder_tqdm = tqdm( generator_for_biencoder, total=self.sequence_iterator.get_num_batches(ds)) with torch.no_grad(): for batch in generator_for_biencoder_tqdm: batch = nn_util.move_to_device(batch, self.cuda_device) mention_uniq_ids, encoded_mentions, gold_duidxs = self._extract_mention_idx_encoded_emb_and_its_gold_cuidx( batch=batch) faiss_search_candidate_result_cuidxs = self.faiss_topx_retriever( encoded_mentions=encoded_mentions, how_many_top_hits_preserved=how_many_top_hits_preserved) yield faiss_search_candidate_result_cuidxs, mention_uniq_ids, gold_duidxs def faiss_topx_retriever(self, encoded_mentions, how_many_top_hits_preserved): ''' if cossimsearch -> re-sort with L2, we have to use self.args.cand_num_before_sort_candidates_forBLINKbiencoder Args: encoded_mentions: how_many_top_hits_preserved: Returns: ''' if self.args.search_method == 'cossim': encoded_mentions = normalize(torch.from_numpy(encoded_mentions), dim=1).cpu().detach().numpy() _, faiss_search_candidate_result_cuidxs = self.faiss_searcher.search( encoded_mentions, how_many_top_hits_preserved) else: # assert self.args.search_method == 'indexflatl2' _, faiss_search_candidate_result_cuidxs = self.faiss_searcher.search( encoded_mentions, how_many_top_hits_preserved) return faiss_search_candidate_result_cuidxs def calc_L2distance(self, h, t): diff = h - t return torch.norm(diff, dim=2) def tonp(self, tsr): return tsr.detach().cpu().numpy() def _extract_mention_idx_encoded_emb_and_its_gold_cuidx(self, batch): out_dict = self.mention_encoder(**batch) return self.tonp(out_dict['mention_uniq_id']), self.tonp( out_dict['contextualized_mention']), self.tonp( out_dict['gold_duidx'])
def train_fever_v1(): num_epoch = 10 seed = 12 batch_size = 128 dev_batch_size = 128 # experiment_name = "simple_nn_doc_first_sent" experiment_name = "simple_nn_doc" lazy = True torch.manual_seed(seed) contain_first_sentence = False pn_ratio = 1.0 # keep_neg_sample_prob = 0.4 # sample_prob_decay = 0.05 dev_upstream_file = config.RESULT_PATH / "doc_retri_bls/docretri.basic.nopageview/dev.jsonl" train_upstream_file = config.RESULT_PATH / "doc_retri_bls/docretri.basic.nopageview/train.jsonl" dev_data_list = common.load_jsonl(dev_upstream_file) # Prepare Data token_indexers = { 'tokens': SingleIdTokenIndexer(namespace='tokens'), # This is the raw tokens 'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters') # This is the elmo_characters } train_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=lazy, max_l=180) # dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=False) dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=lazy, max_l=180) cursor = fever_db.get_cursor() complete_upstream_dev_data = disamb.sample_disamb_inference(common.load_jsonl(dev_upstream_file), cursor, contain_first_sentence=contain_first_sentence) print("Dev size:", len(complete_upstream_dev_data)) dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data) # Load Vocabulary biterator = BasicIterator(batch_size=batch_size) dev_biterator = BasicIterator(batch_size=dev_batch_size) vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic") # THis is important vocab.add_token_to_namespace("true", namespace="selection_labels") vocab.add_token_to_namespace("false", namespace="selection_labels") vocab.add_token_to_namespace("hidden", namespace="selection_labels") vocab.change_token_with_index_to_namespace("hidden", -2, namespace='selection_labels') # Label value vocab.get_index_to_token_vocabulary('selection_labels') print(vocab.get_token_to_index_vocabulary('selection_labels')) print(vocab.get_vocab_size('tokens')) biterator.index_with(vocab) dev_biterator.index_with(vocab) # exit(0) # Build Model device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0) device_num = -1 if device.type == 'cpu' else 0 model = Model(weight=weight_dict['glove.840B.300d'], vocab_size=vocab.get_vocab_size('tokens'), embedding_dim=300, max_l=160, num_of_class=2) model.display() model.to(device) # Create Log File file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}") # Save the source code. script_name = os.path.basename(__file__) with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it: out_f.write(it.read()) out_f.flush() # Save source code end. best_dev = -1 iteration = 0 start_lr = 0.0002 optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=start_lr) criterion = nn.CrossEntropyLoss() for i_epoch in range(num_epoch): print("Resampling...") # Resampling complete_upstream_train_data = disamb.sample_disamb_training_v0(common.load_jsonl(train_upstream_file), cursor, pn_ratio, contain_first_sentence) print("Sample Prob.:", pn_ratio) print("Sampled_length:", len(complete_upstream_train_data)) sampled_train_instances = train_fever_data_reader.read(complete_upstream_train_data) train_iter = biterator(sampled_train_instances, shuffle=True, num_epochs=1, cuda_device=device_num) for i, batch in tqdm(enumerate(train_iter)): model.train() out = model(batch) y = batch['selection_label'] loss = criterion(out, y) # No decay optimizer.zero_grad() loss.backward() optimizer.step() iteration += 1 if i_epoch <= 5: mod = 1000 else: mod = 500 if iteration % mod == 0: eval_iter = dev_biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) complete_upstream_dev_data = hidden_eval(model, eval_iter, complete_upstream_dev_data) disamb.enforce_disabuigation_into_retrieval_result_v0(complete_upstream_dev_data, dev_data_list) oracle_score, pr, rec, f1 = c_scorer.fever_doc_only(dev_data_list, dev_data_list, max_evidence=5) print(f"Dev(raw_acc/pr/rec/f1):{oracle_score}/{pr}/{rec}/{f1}") print("Strict score:", oracle_score) print(f"Eval Tracking score:", f"{oracle_score}") need_save = False if oracle_score > best_dev: best_dev = oracle_score need_save = True if need_save: save_path = os.path.join( file_path_prefix, f'i({iteration})_epoch({i_epoch})_' f'(tra_score:{oracle_score}|pr:{pr}|rec:{rec}|f1:{f1})' ) torch.save(model.state_dict(), save_path) # print("Epoch Evaluation...") eval_iter = dev_biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) complete_upstream_dev_data = hidden_eval(model, eval_iter, complete_upstream_dev_data) disamb.enforce_disabuigation_into_retrieval_result_v0(complete_upstream_dev_data, dev_data_list) oracle_score, pr, rec, f1 = c_scorer.fever_doc_only(dev_data_list, dev_data_list, max_evidence=5) print(f"Dev(raw_acc/pr/rec/f1):{oracle_score}/{pr}/{rec}/{f1}") print("Strict score:", oracle_score) print(f"Eval Tracking score:", f"{oracle_score}") need_save = False if oracle_score > best_dev: best_dev = oracle_score need_save = True if need_save: save_path = os.path.join( file_path_prefix, f'i({iteration})_epoch({i_epoch})_e' f'(tra_score:{oracle_score}|pr:{pr}|rec:{rec}|f1:{f1})' ) torch.save(model.state_dict(), save_path)
def attack_unitrigger(args, model, vocab, target_label, trigger_data, init_trigger='the', previous_inits=[], previous_triggers=[], exempt_triggers=[], tree=None, surrogate=None): # Register a gradient hook on the embeddings. This saves the gradient w.r.t. the word embeddings. # We use the gradient later in the attack. unitrigger_utils.add_hooks(model) embedding_weight = unitrigger_utils.get_embedding_weight( model) # also save the word embedding matrix if init_trigger == "": # randomly choose a starting point total_vocab = vocab.get_vocab_size() choices = np.array(list(range(total_vocab))) # previous_list = previous_inits + previous_triggers previous_list = previous_inits # print(previous_list) if not len(previous_list) or args.trigger_neighbor < 1: idx = np.random.choice(choices) else: mask = np.array([True] * total_vocab) for word_idx in previous_list: word_embed = torch.nn.functional.embedding( torch.LongTensor([word_idx]), embedding_weight).detach().cpu().numpy()[0] neighbors = tree.query([word_embed], k=args.trigger_neighbor, return_distance=False) mask[neighbors] = False idx = np.random.choice(choices[mask]) init_trigger = vocab.get_token_from_index(idx) previous_inits.append(idx) iterator = BasicIterator(batch_size=args.universal_batch_size) iterator.index_with(vocab) model.train() # rnn cannot do backwards in train mode # initialize triggers which are concatenated to the input trigger_token_ids = [vocab.get_token_index(init_trigger) ] * args.trigger_length for batch in lazy_groups_of(iterator(trigger_data, num_epochs=args.trigger_epochs, shuffle=True), group_size=1): averaged_grad = unitrigger_utils.get_average_grad( model, batch, trigger_token_ids) cand_trigger_token_ids = hotflip_attack( averaged_grad, embedding_weight, trigger_token_ids, num_candidates=args.num_candidates, exempt_candidates=exempt_triggers, increase_loss=True) cand_trigger_token_ids = [ a[args.trigger_ignore:] for a in cand_trigger_token_ids ] # Tries all of the candidates and returns the trigger sequence with highest loss. trigger_token_ids = unitrigger_utils.get_best_candidates( model, batch, trigger_token_ids, cand_trigger_token_ids, surrogate=surrogate) for token_id in trigger_token_ids: if token_id not in previous_triggers: previous_triggers.append(token_id) return trigger_token_ids, init_trigger
def eval_fever(): # save_path = "/home/easonnie/projects/MiscEnc/saved_models/06-07-21:58:06_esim_elmo/i(60900)_epoch(4)_um_dev(80.03458096013019)_m_dev(79.174732552216)_seed(12)" save_path = "/home/easonnie/projects/MiscEnc/saved_models/07-02-14:40:01_esim_elmo_linear_amr_cs_score_filtering_0.5/i(5900)_epoch(3)_um_dev(39.73759153783564)_m_dev(40.18339276617422)_seed(12)" # save_path = "/home/easonnie/projects/MiscEnc/saved_models/07-02-14:42:34_esim_elmo_cs_score_filtering_0.7/i(1300)_epoch(4)_um_dev(32.55695687550855)_m_dev(32.42995415180846)_seed(12)" batch_size = 32 # Prepare Data token_indexers = { 'tokens': SingleIdTokenIndexer(namespace='tokens'), # This is the raw tokens 'elmo_chars': ELMoTokenCharactersIndexer( namespace='elmo_characters') # This is the elmo_characters } csnli_dataset_reader = CNLIReader( token_indexers=token_indexers, example_filter=lambda x: float(x['cs_score']) >= 0.7) # mnli_train_data_path = config.DATA_ROOT / "mnli/multinli_1.0_train.jsonl" mnli_m_dev_data_path = config.DATA_ROOT / "amrs/mnli_amr_ln/mnli_mdev.jsonl.cs" mnli_um_dev_data_path = config.DATA_ROOT / "amrs/mnli_amr_ln/mnli_umdev.jsonl.cs" # mnli_train_instances = csnli_dataset_reader.read(mnli_train_data_path) mnli_m_dev_instances = csnli_dataset_reader.read(mnli_m_dev_data_path) mnli_um_dev_instances = csnli_dataset_reader.read(mnli_um_dev_data_path) # Load Vocabulary biterator = BasicIterator(batch_size=batch_size) vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli") vocab.change_token_with_index_to_namespace('hidden', -2, namespace='labels') print(vocab.get_token_to_index_vocabulary('labels')) print(vocab.get_vocab_size('tokens')) biterator.index_with(vocab) # Build Model device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0) device_num = -1 if device.type == 'cpu' else 0 model = Model(weight=weight_dict['glove.840B.300d'], vocab_size=vocab.get_vocab_size('tokens'), embedding_dim=300) model.load_state_dict(torch.load(save_path)) model.display() model.to(device) # Create Log File criterion = nn.CrossEntropyLoss() eval_iter = biterator(mnli_m_dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) m_dev_score, m_dev_loss = eval_model(model, eval_iter, criterion) eval_iter = biterator(mnli_um_dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) um_dev_score, um_dev_loss = eval_model(model, eval_iter, criterion) print(f"Dev(M):{m_dev_score}/{m_dev_loss}") print(f"Dev(UM):{um_dev_score}/{um_dev_loss}")
class TestCallbackTrainer(ModelTestCase): def setUp(self): super().setUp() # A lot of the tests want access to the metric tracker # so we add a property that gets it by grabbing it from # the relevant callback. def metric_tracker(self: CallbackTrainer): for callback in self.handler.callbacks(): if isinstance(callback, TrackMetrics): return callback.metric_tracker return None setattr(CallbackTrainer, "metric_tracker", property(metric_tracker)) self.instances = SequenceTaggingDatasetReader().read( self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv") vocab = Vocabulary.from_instances(self.instances) self.vocab = vocab self.model_params = Params({ "text_field_embedder": { "token_embedders": { "tokens": { "type": "embedding", "embedding_dim": 5 } } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 }, }) self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params) self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01, momentum=0.9) self.iterator = BasicIterator(batch_size=2) self.iterator.index_with(vocab) def tearDown(self): super().tearDown() delattr(CallbackTrainer, "metric_tracker") def default_callbacks( self, validation_metric: str = "-loss", patience: int = None, max_checkpoints: int = 20, checkpoint_every: int = None, model_save_interval: float = None, serialization_dir: str = "__DEFAULT__", validation_data: Iterable[Instance] = None, validation_iterator: DataIterator = None, batch_size: int = 2, ): if serialization_dir == "__DEFAULT__": serialization_dir = self.TEST_DIR checkpointer = Checkpointer(serialization_dir, checkpoint_every, max_checkpoints) tensorboard = TensorboardWriter(get_batch_num_total=lambda: None) if validation_iterator is None: validation_iterator = BasicIterator(batch_size=batch_size) validation_iterator.index_with(self.vocab) return [ LogToTensorboard(log_batch_size_period=10, tensorboard=tensorboard), Checkpoint(checkpointer, model_save_interval), Validate( validation_data=self.instances if validation_data is None else validation_data, validation_iterator=validation_iterator, ), TrackMetrics(patience, validation_metric), GradientNormAndClip(), ] def test_end_to_end(self): self.ensure_model_can_train_save_and_load( self.FIXTURES_ROOT / "simple_tagger" / "experiment_callback_trainer.json") def test_trainer_can_run_from_params(self): from allennlp.commands.train import train_model params = Params({ "trainer": { "type": "callback", "optimizer": { "type": "sgd", "lr": 0.01, "momentum": 0.9 }, "num_epochs": 2, "callbacks": [ "checkpoint", "track_metrics", "validate", { "type": "log_to_tensorboard", "log_batch_size_period": 10 }, ], }, "dataset_reader": { "type": "sequence_tagging" }, "train_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "validation_data_path": str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"), "model": { "type": "simple_tagger", "text_field_embedder": { "token_embedders": { "tokens": { "type": "embedding", "embedding_dim": 5 } } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 }, }, "iterator": { "type": "basic", "batch_size": 2 }, }) train_model(params, self.TEST_DIR) with open(self.TEST_DIR / "metrics.json") as f: metrics = json.load(f) assert "best_validation_loss" in metrics assert isinstance(metrics["best_validation_loss"], float) assert "best_validation_accuracy" in metrics assert isinstance(metrics["best_validation_accuracy"], float) assert "best_validation_accuracy3" in metrics assert isinstance(metrics["best_validation_accuracy3"], float) assert "best_epoch" in metrics assert isinstance(metrics["best_epoch"], int) def test_trainer_can_run(self): trainer = CallbackTrainer( model=self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=self.default_callbacks(serialization_dir=None), num_epochs=2, ) metrics = trainer.train() assert "best_validation_loss" in metrics assert isinstance(metrics["best_validation_loss"], float) assert "best_validation_accuracy" in metrics assert isinstance(metrics["best_validation_accuracy"], float) assert "best_validation_accuracy3" in metrics assert isinstance(metrics["best_validation_accuracy3"], float) assert "best_epoch" in metrics assert isinstance(metrics["best_epoch"], int) assert "peak_cpu_memory_MB" in metrics # Making sure that both increasing and decreasing validation metrics work. trainer = CallbackTrainer( model=self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=self.default_callbacks(validation_metric="+loss", serialization_dir=None), num_epochs=2, ) metrics = trainer.train() assert "best_validation_loss" in metrics assert isinstance(metrics["best_validation_loss"], float) assert "best_validation_accuracy" in metrics assert isinstance(metrics["best_validation_accuracy"], float) assert "best_validation_accuracy3" in metrics assert isinstance(metrics["best_validation_accuracy3"], float) assert "best_epoch" in metrics assert isinstance(metrics["best_epoch"], int) assert "peak_cpu_memory_MB" in metrics assert isinstance(metrics["peak_cpu_memory_MB"], float) assert metrics["peak_cpu_memory_MB"] > 0 @responses.activate def test_trainer_posts_to_url(self): url = "https://slack.com?webhook=ewifjweoiwjef" responses.add(responses.POST, url) post_to_url = PostToUrl(url, message="only a test") callbacks = self.default_callbacks() + [post_to_url] trainer = CallbackTrainer( model=self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=2, callbacks=callbacks, ) trainer.train() assert len(responses.calls) == 1 assert responses.calls[ 0].response.request.body == b'{"text": "only a test"}' def test_trainer_can_run_exponential_moving_average(self): moving_average = ExponentialMovingAverage( self.model.named_parameters(), decay=0.9999) callbacks = self.default_callbacks() + [ UpdateMovingAverage(moving_average) ] trainer = CallbackTrainer( model=self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=2, callbacks=callbacks, ) trainer.train() def test_trainer_can_run_ema_from_params(self): uma_params = Params({"moving_average": {"decay": 0.9999}}) callbacks = self.default_callbacks() + [ UpdateMovingAverage.from_params(uma_params, self.model) ] trainer = CallbackTrainer( model=self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=2, callbacks=callbacks, ) trainer.train() @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device registered.") def test_trainer_can_run_cuda(self): self.model.cuda() trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=2, callbacks=self.default_callbacks(), cuda_device=0, ) trainer.train() @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Need multiple GPUs.") def test_trainer_can_run_multiple_gpu(self): self.model.cuda() class MetaDataCheckWrapper(Model): """ Checks that the metadata field has been correctly split across the batch dimension when running on multiple gpus. """ def __init__(self, model): super().__init__(model.vocab) self.model = model def forward(self, **kwargs) -> Dict[str, torch.Tensor]: # type: ignore assert ( "metadata" in kwargs and "tags" in kwargs ), f"tokens and metadata must be provided. Got {kwargs.keys()} instead." batch_size = kwargs["tokens"]["tokens"].size()[0] assert len(kwargs["metadata"]) == batch_size, ( f"metadata must be split appropriately. Expected {batch_size} elements, " f"got {len(kwargs['metadata'])} elements.") return self.model.forward(**kwargs) multigpu_iterator = BasicIterator(batch_size=4) multigpu_iterator.index_with(self.vocab) trainer = CallbackTrainer( MetaDataCheckWrapper(self.model), training_data=self.instances, iterator=multigpu_iterator, optimizer=self.optimizer, num_epochs=2, callbacks=self.default_callbacks(), cuda_device=[0, 1], ) metrics = trainer.train() assert "peak_cpu_memory_MB" in metrics assert isinstance(metrics["peak_cpu_memory_MB"], float) assert metrics["peak_cpu_memory_MB"] > 0 assert "peak_gpu_0_memory_MB" in metrics assert isinstance(metrics["peak_gpu_0_memory_MB"], int) assert "peak_gpu_1_memory_MB" in metrics assert isinstance(metrics["peak_gpu_1_memory_MB"], int) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Need multiple GPUs.") def test_production_rule_field_with_multiple_gpus(self): wikitables_dir = "allennlp/tests/fixtures/data/wikitables/" offline_lf_directory = wikitables_dir + "action_space_walker_output/" wikitables_reader = WikiTablesDatasetReader( tables_directory=wikitables_dir, offline_logical_forms_directory=offline_lf_directory) instances = wikitables_reader.read(wikitables_dir + "sample_data.examples") archive_path = (self.FIXTURES_ROOT / "semantic_parsing" / "wikitables" / "serialization" / "model.tar.gz") model = load_archive(archive_path).model model.cuda() multigpu_iterator = BasicIterator(batch_size=4) multigpu_iterator.index_with(model.vocab) trainer = CallbackTrainer( model, instances, multigpu_iterator, self.optimizer, num_epochs=2, cuda_device=[0, 1], callbacks=[GradientNormAndClip()], ) trainer.train() def test_trainer_can_resume_training(self): trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=self.default_callbacks(), num_epochs=1, serialization_dir=self.TEST_DIR, ) trainer.train() new_trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=self.default_callbacks(), num_epochs=3, serialization_dir=self.TEST_DIR, ) new_trainer.handler.fire_event(Events.TRAINING_START) assert new_trainer.epoch_number == 1 tracker = new_trainer.metric_tracker assert tracker is not None assert tracker.is_best_so_far() assert tracker._best_so_far is not None new_trainer.train() def test_trainer_can_resume_training_for_exponential_moving_average(self): moving_average = ExponentialMovingAverage( self.model.named_parameters()) callbacks = self.default_callbacks() + [ UpdateMovingAverage(moving_average) ] trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=1, serialization_dir=self.TEST_DIR, callbacks=callbacks, ) trainer.train() new_moving_average = ExponentialMovingAverage( self.model.named_parameters()) new_callbacks = self.default_callbacks() + [ UpdateMovingAverage(new_moving_average) ] new_trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=3, serialization_dir=self.TEST_DIR, callbacks=new_callbacks, ) new_trainer.handler.fire_event(Events.TRAINING_START) assert new_trainer.epoch_number == 1 tracker = trainer.metric_tracker assert tracker.is_best_so_far() assert tracker._best_so_far is not None new_trainer.train() def test_training_metrics_consistent_with_and_without_validation(self): default_callbacks = self.default_callbacks(serialization_dir=None) default_callbacks_without_validation = [ callback for callback in default_callbacks if not isinstance(callback, Validate) ] trainer1 = CallbackTrainer( copy.deepcopy(self.model), training_data=self.instances, iterator=self.iterator, optimizer=copy.deepcopy(self.optimizer), callbacks=default_callbacks_without_validation, num_epochs=1, serialization_dir=None, ) trainer1.train() trainer2 = CallbackTrainer( copy.deepcopy(self.model), training_data=self.instances, iterator=self.iterator, optimizer=copy.deepcopy(self.optimizer), callbacks=default_callbacks, num_epochs=1, serialization_dir=None, ) trainer2.train() metrics1 = trainer1.train_metrics metrics2 = trainer2.train_metrics assert metrics1.keys() == metrics2.keys() for key in ["accuracy", "accuracy3", "loss"]: np.testing.assert_almost_equal(metrics1[key], metrics2[key]) def test_validation_metrics_consistent_with_and_without_tracking(self): default_callbacks = self.default_callbacks(serialization_dir=None) default_callbacks_without_tracking = [ callback for callback in default_callbacks if not isinstance(callback, TrackMetrics) ] trainer1 = CallbackTrainer( copy.deepcopy(self.model), training_data=self.instances, iterator=self.iterator, optimizer=copy.deepcopy(self.optimizer), callbacks=default_callbacks_without_tracking, num_epochs=1, serialization_dir=None, ) trainer1.train() trainer2 = CallbackTrainer( copy.deepcopy(self.model), training_data=self.instances, iterator=self.iterator, optimizer=copy.deepcopy(self.optimizer), callbacks=default_callbacks, num_epochs=1, serialization_dir=None, ) trainer2.train() metrics1 = trainer1.val_metrics metrics2 = trainer2.val_metrics assert metrics1.keys() == metrics2.keys() for key in ["accuracy", "accuracy3", "loss"]: np.testing.assert_almost_equal(metrics1[key], metrics2[key]) def test_metric_only_considered_best_so_far_when_strictly_better_than_those_before_it_increasing_metric( self): new_trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=3, serialization_dir=self.TEST_DIR, callbacks=self.default_callbacks("+test", patience=5), ) tracker = new_trainer.metric_tracker # when it is the only metric it should be considered the best new_tracker = copy.deepcopy(tracker) new_tracker.add_metric(1) assert new_tracker.is_best_so_far() # when it is the same as one before it it is not considered the best new_tracker = copy.deepcopy(tracker) new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 0.3]) assert not new_tracker.is_best_so_far() # when it is the best it is considered the best new_tracker = copy.deepcopy(tracker) new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 13]) assert new_tracker.is_best_so_far() # when it is not the the best it is not considered the best new_tracker = copy.deepcopy(tracker) new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 0.0013]) assert not new_tracker.is_best_so_far() def test_metric_only_considered_best_so_far_when_strictly_better_than_those_before_it_decreasing_metric( self): new_trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=3, serialization_dir=self.TEST_DIR, callbacks=self.default_callbacks(patience=5), ) tracker = new_trainer.metric_tracker # when it is the only metric it should be considered the best new_tracker = copy.deepcopy(tracker) new_tracker.add_metric(1) assert new_tracker.is_best_so_far() # when it is the same as one before it it is not considered the best new_tracker = copy.deepcopy(tracker) new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 0.3]) assert not new_tracker.is_best_so_far() # when it is the best it is considered the best new_tracker = copy.deepcopy(tracker) new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 0.0013]) assert new_tracker.is_best_so_far() # when it is not the the best it is not considered the best new_tracker = copy.deepcopy(tracker) new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 13]) def test_should_stop_early_with_increasing_metric(self): new_trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=3, serialization_dir=self.TEST_DIR, callbacks=self.default_callbacks(patience=5, validation_metric="+test"), ) tracker = new_trainer.metric_tracker new_tracker = copy.deepcopy(tracker) new_tracker.add_metrics([0.5, 0.3, 0.2, 0.1, 0.4, 0.4]) assert new_tracker.should_stop_early() new_tracker = copy.deepcopy(tracker) new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1]) assert not new_tracker.should_stop_early() def test_should_stop_early_with_decreasing_metric(self): new_trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=3, serialization_dir=self.TEST_DIR, callbacks=self.default_callbacks(patience=5), ) tracker = new_trainer.metric_tracker new_tracker = copy.deepcopy(tracker) new_tracker.add_metrics([0.02, 0.3, 0.2, 0.1, 0.4, 0.4]) assert new_tracker.should_stop_early() new_tracker = copy.deepcopy(tracker) new_tracker.add_metrics([0.3, 0.3, 0.2, 0.1, 0.4, 0.5]) assert not new_tracker.should_stop_early() new_tracker = copy.deepcopy(tracker) new_tracker.add_metrics([0.1, 0.3, 0.2, 0.1, 0.4, 0.5]) assert new_tracker.should_stop_early() def test_should_stop_early_with_early_stopping_disabled(self): # Increasing metric trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=100, callbacks=self.default_callbacks(validation_metric="+test"), ) tracker = trainer.metric_tracker tracker.add_metrics([float(i) for i in reversed(range(20))]) assert not tracker.should_stop_early() # Decreasing metric trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=100, callbacks=self.default_callbacks(validation_metric="-test"), ) tracker = trainer.metric_tracker tracker.add_metrics([float(i) for i in range(20)]) assert not tracker.should_stop_early() def test_should_stop_early_with_invalid_patience(self): for patience in [0, -1, -2, 1.5, "None"]: with pytest.raises(ConfigurationError): CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=100, callbacks=self.default_callbacks( patience=patience, validation_metric="+test"), ) def test_trainer_can_run_and_resume_with_momentum_scheduler(self): scheduler = MomentumScheduler.from_params( self.optimizer, Params({ "type": "inverted_triangular", "cool_down": 2, "warm_up": 2 })) callbacks = self.default_callbacks() + [UpdateMomentum(scheduler)] trainer = CallbackTrainer( model=self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=4, callbacks=callbacks, serialization_dir=self.TEST_DIR, ) trainer.train() new_scheduler = MomentumScheduler.from_params( self.optimizer, Params({ "type": "inverted_triangular", "cool_down": 2, "warm_up": 2 })) new_callbacks = self.default_callbacks() + [ UpdateMomentum(new_scheduler) ] new_trainer = CallbackTrainer( model=self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=6, callbacks=new_callbacks, serialization_dir=self.TEST_DIR, ) new_trainer.handler.fire_event(Events.TRAINING_START) assert new_trainer.epoch_number == 4 assert new_scheduler.last_epoch == 3 new_trainer.train() def test_trainer_can_run_with_lr_scheduler(self): lr_params = Params({"type": "reduce_on_plateau"}) lr_scheduler = LearningRateScheduler.from_params( self.optimizer, lr_params) callbacks = self.default_callbacks() + [ UpdateLearningRate(lr_scheduler) ] trainer = CallbackTrainer( model=self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=callbacks, num_epochs=2, ) trainer.train() def test_trainer_can_resume_with_lr_scheduler(self): lr_scheduler = LearningRateScheduler.from_params( self.optimizer, Params({ "type": "exponential", "gamma": 0.5 })) callbacks = self.default_callbacks() + [ UpdateLearningRate(lr_scheduler) ] trainer = CallbackTrainer( model=self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=callbacks, num_epochs=2, serialization_dir=self.TEST_DIR, ) trainer.train() new_lr_scheduler = LearningRateScheduler.from_params( self.optimizer, Params({ "type": "exponential", "gamma": 0.5 })) callbacks = self.default_callbacks() + [ UpdateLearningRate(new_lr_scheduler) ] new_trainer = CallbackTrainer( model=self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=callbacks, num_epochs=4, serialization_dir=self.TEST_DIR, ) new_trainer.handler.fire_event(Events.TRAINING_START) assert new_trainer.epoch_number == 2 assert new_lr_scheduler.lr_scheduler.last_epoch == 1 new_trainer.train() def test_trainer_raises_on_model_with_no_loss_key(self): class FakeModel(Model): def forward(self, **kwargs): return {} with pytest.raises(RuntimeError): trainer = CallbackTrainer( FakeModel(None), training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=self.default_callbacks(), num_epochs=2, serialization_dir=self.TEST_DIR, ) trainer.train() def test_trainer_can_log_histograms(self): # enable activation logging for module in self.model.modules(): module.should_log_activations = True callbacks = [ cb for cb in self.default_callbacks() if not isinstance(cb, LogToTensorboard) ] # The lambda: None is unfortunate, but it will get replaced by the callback. tensorboard = TensorboardWriter(lambda: None, histogram_interval=2) callbacks.append(LogToTensorboard(tensorboard)) trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=3, serialization_dir=self.TEST_DIR, callbacks=callbacks, ) trainer.train() def test_trainer_respects_num_serialized_models_to_keep(self): trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=5, serialization_dir=self.TEST_DIR, callbacks=self.default_callbacks(max_checkpoints=3), ) trainer.train() # Now check the serialized files for prefix in ["model_state_epoch_*", "training_state_epoch_*"]: file_names = glob.glob(os.path.join(self.TEST_DIR, prefix)) epochs = [ int(re.search(r"_([0-9])\.th", fname).group(1)) for fname in file_names ] assert sorted(epochs) == [2, 3, 4] def test_trainer_saves_metrics_every_epoch(self): trainer = CallbackTrainer( model=self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=5, serialization_dir=self.TEST_DIR, callbacks=self.default_callbacks(max_checkpoints=3), ) trainer.train() for epoch in range(5): epoch_file = self.TEST_DIR / f"metrics_epoch_{epoch}.json" assert epoch_file.exists() metrics = json.load(open(epoch_file)) assert "validation_loss" in metrics assert "best_validation_loss" in metrics assert metrics.get("epoch") == epoch def test_trainer_respects_keep_serialized_model_every_num_seconds(self): # To test: # Create an iterator that sleeps for 2.5 second per epoch, so the total training # time for one epoch is slightly greater then 2.5 seconds. # Run for 6 epochs, keeping the last 2 models, models also kept every 5 seconds. # Check the resulting checkpoints. Should then have models at epochs # 2, 4, plus the last two at 5 and 6. class WaitingIterator(BasicIterator): def _create_batches(self, *args, **kwargs): time.sleep(2.5) return super()._create_batches(*args, **kwargs) waiting_iterator = WaitingIterator(batch_size=2) waiting_iterator.index_with(self.vocab) # Don't want validation iterator to wait. viterator = BasicIterator(batch_size=2) viterator.index_with(self.vocab) trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=waiting_iterator, optimizer=self.optimizer, num_epochs=6, serialization_dir=self.TEST_DIR, callbacks=self.default_callbacks(max_checkpoints=2, checkpoint_every=5, validation_iterator=viterator), ) trainer.train() # Now check the serialized files for prefix in ["model_state_epoch_*", "training_state_epoch_*"]: file_names = glob.glob(os.path.join(self.TEST_DIR, prefix)) epochs = [ int(re.search(r"_([0-9])\.th", fname).group(1)) for fname in file_names ] # epoch N has N-1 in file name assert sorted(epochs) == [1, 3, 4, 5] def test_trainer_can_log_learning_rates_tensorboard(self): callbacks = [ cb for cb in self.default_callbacks() if not isinstance(cb, LogToTensorboard) ] # The lambda: None is unfortunate, but it will get replaced by the callback. tensorboard = TensorboardWriter(lambda: None, should_log_learning_rate=True, summary_interval=2) callbacks.append(LogToTensorboard(tensorboard)) trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, num_epochs=2, serialization_dir=self.TEST_DIR, callbacks=callbacks, ) trainer.train() def test_trainer_saves_models_at_specified_interval(self): iterator = BasicIterator(batch_size=4) iterator.index_with(self.vocab) trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=iterator, optimizer=self.optimizer, num_epochs=2, serialization_dir=self.TEST_DIR, callbacks=self.default_callbacks(model_save_interval=0.0001), ) trainer.train() # Now check the serialized files for models saved during the epoch. prefix = "model_state_epoch_*" file_names = sorted(glob.glob(os.path.join(self.TEST_DIR, prefix))) epochs = [ re.search(r"_([0-9\.\-]+)\.th", fname).group(1) for fname in file_names ] # We should have checkpoints at the end of each epoch and during each, e.g. # [0.timestamp, 0, 1.timestamp, 1] assert len(epochs) == 4 assert epochs[3] == "1" assert "." in epochs[0] # Now make certain we can restore from timestamped checkpoint. # To do so, remove the checkpoint from the end of epoch 1&2, so # that we are forced to restore from the timestamped checkpoints. for k in range(2): os.remove( os.path.join(self.TEST_DIR, "model_state_epoch_{}.th".format(k))) os.remove( os.path.join(self.TEST_DIR, "training_state_epoch_{}.th".format(k))) os.remove(os.path.join(self.TEST_DIR, "best.th")) restore_trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=iterator, optimizer=self.optimizer, num_epochs=2, serialization_dir=self.TEST_DIR, callbacks=self.default_callbacks(model_save_interval=0.0001), ) restore_trainer.handler.fire_event(Events.TRAINING_START) assert restore_trainer.epoch_number == 2 # One batch per epoch. assert restore_trainer.batch_num_total == 2 def test_trainer_saves_and_loads_best_validation_metrics_correctly_1(self): # Use -loss and run 1 epoch of original-training, and one of restored-training # Run 1 epoch of original training. trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=self.default_callbacks(), num_epochs=1, serialization_dir=self.TEST_DIR, ) trainer.train() trainer.handler.fire_event(Events.TRAINING_START) best_epoch_1 = trainer.metric_tracker.best_epoch best_validation_metrics_epoch_1 = trainer.metric_tracker.best_epoch_metrics # best_validation_metrics_epoch_1: {'accuracy': 0.75, 'accuracy3': 1.0, 'loss': 0.6243013441562653} assert isinstance(best_validation_metrics_epoch_1, dict) assert "loss" in best_validation_metrics_epoch_1 # Run 1 epoch of restored training. restore_trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=self.default_callbacks(), num_epochs=2, serialization_dir=self.TEST_DIR, ) restore_trainer.train() restore_trainer.handler.fire_event(Events.TRAINING_START) best_epoch_2 = restore_trainer.metric_tracker.best_epoch best_validation_metrics_epoch_2 = restore_trainer.metric_tracker.best_epoch_metrics # Because of using -loss, 2nd epoch would be better than 1st. So best val metrics should not be same. assert best_epoch_1 == 0 and best_epoch_2 == 1 assert best_validation_metrics_epoch_2 != best_validation_metrics_epoch_1 def test_trainer_saves_and_loads_best_validation_metrics_correctly_2(self): # Use -loss and run 1 epoch of original-training, and one of restored-training # Run 1 epoch of original training. trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=self.default_callbacks(validation_metric="+loss"), num_epochs=1, serialization_dir=self.TEST_DIR, ) trainer.handler.verbose = True trainer.train() trainer.handler.fire_event(Events.TRAINING_START) best_epoch_1 = trainer.metric_tracker.best_epoch best_validation_metrics_epoch_1 = trainer.metric_tracker.best_epoch_metrics # best_validation_metrics_epoch_1: {'accuracy': 0.75, 'accuracy3': 1.0, 'loss': 0.6243013441562653} assert isinstance(best_validation_metrics_epoch_1, dict) assert "loss" in best_validation_metrics_epoch_1 # Run 1 more epoch of restored training. restore_trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=self.default_callbacks(validation_metric="+loss"), num_epochs=2, serialization_dir=self.TEST_DIR, ) print("restore trainer") restore_trainer.handler.verbose = True restore_trainer.train() restore_trainer.handler.fire_event(Events.TRAINING_START) best_epoch_2 = restore_trainer.metric_tracker.best_epoch best_validation_metrics_epoch_2 = restore_trainer.metric_tracker.best_epoch_metrics # Because of using +loss, 2nd epoch won't be better than 1st. So best val metrics should be same. assert best_epoch_1 == best_epoch_2 == 0 assert best_validation_metrics_epoch_2 == best_validation_metrics_epoch_1 def test_restored_training_returns_best_epoch_metrics_even_if_no_better_epoch_is_found_after_restoring( self): # Instead of -loss, use +loss to assure 2nd epoch is considered worse. # Run 1 epoch of original training. original_trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=self.default_callbacks(validation_metric="+loss"), num_epochs=1, serialization_dir=self.TEST_DIR, ) training_metrics = original_trainer.train() # Run 1 epoch of restored training. restored_trainer = CallbackTrainer( self.model, training_data=self.instances, iterator=self.iterator, optimizer=self.optimizer, callbacks=self.default_callbacks(validation_metric="+loss"), num_epochs=2, serialization_dir=self.TEST_DIR, ) restored_metrics = restored_trainer.train() assert "best_validation_loss" in restored_metrics assert "best_validation_accuracy" in restored_metrics assert "best_validation_accuracy3" in restored_metrics assert "best_epoch" in restored_metrics # Epoch 2 validation loss should be lesser than that of Epoch 1 assert training_metrics["best_validation_loss"] == restored_metrics[ "best_validation_loss"] assert training_metrics["best_epoch"] == 0 assert training_metrics["validation_loss"] > restored_metrics[ "validation_loss"] def test_handle_errors(self): class ErrorTest(Callback): """ A callback with three triggers * at BATCH_START, it raises a RuntimeError * at TRAINING_END, it sets a finished flag to True * at ERROR, it captures `trainer.exception` """ def __init__(self) -> None: self.exc: Optional[Exception] = None self.finished_training = None @handle_event(Events.BATCH_START) def raise_exception(self, trainer): raise RuntimeError("problem starting batch") @handle_event(Events.TRAINING_END) def finish_training(self, trainer): self.finished_training = True @handle_event(Events.ERROR) def capture_error(self, trainer): self.exc = trainer.exception error_test = ErrorTest() callbacks = self.default_callbacks() + [error_test] original_trainer = CallbackTrainer( self.model, self.instances, self.iterator, self.optimizer, callbacks=callbacks, num_epochs=1, serialization_dir=self.TEST_DIR, ) with pytest.raises(RuntimeError): original_trainer.train() # The callback should have captured the exception. assert error_test.exc is not None assert error_test.exc.args == ("problem starting batch", ) # The "finished" flag should never have been set to True. assert not error_test.finished_training
def hidden_eval_fever_adv_v1(): batch_size = 64 lazy = True dev_prob_threshold = 0.5 SAVE_PATH = "/home/easonnie/projects/FunEver/saved_models/07-20-22:28:24_mesim_wn_450_adv_sample_v1_|t_prob:0.35|top_k:8/i(46000)_epoch(7)_dev(0.6405140514051405)_loss(1.0761665150348825)_seed(12)" dev_upstream_sent_list = common.load_jsonl( config.RESULT_PATH / "sent_retri_nn/2018_07_20_15:17:59_r/dev_sent.jsonl") # Prepare Data token_indexers = { 'tokens': SingleIdTokenIndexer(namespace='tokens'), # This is the raw tokens 'elmo_chars': ELMoTokenCharactersIndexer( namespace='elmo_characters') # This is the elmo_characters } p_dict = wn_persistent_api.persistence_load() upstream_dev_list = score_converter_scaled(config.T_FEVER_DEV_JSONL, dev_upstream_sent_list, scale_prob=dev_prob_threshold, delete_prob=False) dev_fever_data_reader = WNReader(token_indexers=token_indexers, lazy=lazy, wn_p_dict=p_dict, max_l=360) complete_upstream_dev_data = get_actual_data(config.T_FEVER_DEV_JSONL, upstream_dev_list) dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data) # Load Vocabulary biterator = BasicIterator(batch_size=batch_size) vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic") vocab.change_token_with_index_to_namespace('hidden', -2, namespace='labels') print(vocab.get_token_to_index_vocabulary('labels')) print(vocab.get_vocab_size('tokens')) biterator.index_with(vocab) # Build Model device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0) device_num = -1 if device.type == 'cpu' else 0 model = Model( rnn_size_in=(1024 + 300 + dev_fever_data_reader.wn_feature_size, 1024 + 450), rnn_size_out=(450, 450), weight=weight_dict['glove.840B.300d'], vocab_size=vocab.get_vocab_size('tokens'), mlp_d=900, embedding_dim=300, max_l=300) print("Model Max length:", model.max_l) model.load_state_dict(torch.load(SAVE_PATH)) model.display() model.to(device) eval_iter = biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num) builded_dev_data = hidden_eval(model, eval_iter, complete_upstream_dev_data) eval_mode = {'check_sent_id_correct': True, 'standard': True} common.save_jsonl( builded_dev_data, config.RESULT_PATH / "nli_results" / "pipeline_results_1.jsonl") c_scorer.delete_label(builded_dev_data) print( c_scorer.fever_score(builded_dev_data, common.load_jsonl(config.FEVER_DEV_JSONL), mode=eval_mode))
def model_go_pure_aug(): # for some_params in [0.25, 0.25, 0.25]: for some_params in [0.25, 0.25, 0.25]: # bert_model_name = 'bert-large-uncased' seed = 6 bert_model_name = 'bert-base-uncased' lazy = False forward_size = 16 batch_size = 32 gradient_accumulate_step = int(batch_size / forward_size) warmup_proportion = 0.1 learning_rate = 5e-5 num_train_epochs = 3 do_ema = False dev_prob_threshold = 0.1 train_prob_threshold = 0.35 debug_mode = False # experiment_name = f"bert_fever_nli_baseline_on_fulldata" # experiment_name = f"bert_fever_nli_baseline_on_fulldata_aug_the_same_gt_mrate({some_params})" # experiment_name = f"bert_fever_nli_baseline_on_10p_aug_ratio({some_params})" experiment_name = f"bert_fever_nli_baseline_on_fulldata_aug_ratio({some_params})" # experiment_name = f"bert_fever_nli_baseline_pure_aug" data_aug = True # data_aug_file = config.FEVER_DATA_ROOT / "qa_aug/squad_train_turker_groundtruth.json" # data_aug_size = int(21_015 * some_params) # 10p # data_aug_size = int(208_346 * some_params) # training_file = config.FEVER_DATA_ROOT / "fever_1.0/train_10.jsonl" training_file = config.FEVER_DATA_ROOT / "fever_1.0/train.jsonl" train_sample_top_k = 8 # est_datasize = 208_346 # full # est_datasize = 14_544 # est_datasize = 21_015 + data_aug_size # 10p aug_size = int(208_346 * some_params) est_datasize = 208_346 + aug_size # est_datasize = 208_346 + data_aug_size num_class = 3 # num_train_optimization_steps torch.manual_seed(seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") n_gpu = torch.cuda.device_count() unk_token_num = {'tokens': 1} # work around for initiating vocabulary. vocab = ExVocabulary(unk_token_num=unk_token_num) vocab.add_token_to_namespace('SUPPORTS', namespace='labels') vocab.add_token_to_namespace('REFUTES', namespace='labels') vocab.add_token_to_namespace('NOT ENOUGH INFO', namespace='labels') vocab.add_token_to_namespace("hidden", namespace="labels") vocab.change_token_with_index_to_namespace("hidden", -2, namespace='labels') # Finished build vocabulary. # Load standardized sentence file dev_upstream_sent_list = common.load_jsonl(config.FEVER_DATA_ROOT / "upstream_sentence_selection_Feb16/dev_sent_pred_scores.jsonl") dev_sent_after_threshold_filter = fever_ss_sampler.threshold_sampler_insure_unique( config.FEVER_DATA_ROOT / "fever_1.0/shared_task_dev.jsonl", dev_upstream_sent_list, prob_threshold=dev_prob_threshold, top_n=5) dev_data_list = fever_nli_sampler.select_sent_with_prob_for_eval( config.FEVER_DATA_ROOT / "fever_1.0/shared_task_dev.jsonl", dev_sent_after_threshold_filter, None, tokenized=True) # print(dev_data_list[0]) # exit(0) train_upstream_sent_list = common.load_jsonl(config.FEVER_DATA_ROOT / "upstream_sentence_selection_Feb16/train_sent_scores.jsonl") # Finished loading standardized sentence file. bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name, do_lower_case=True) bert_fever_reader = BertReaderFeverNLI(bert_tokenizer, lazy=lazy) dev_instances = bert_fever_reader.read(dev_data_list) biterator = BasicIterator(batch_size=forward_size) biterator.index_with(vocab) # print(list(mnli_dev_instances)) # Load training model # Load training model model_clf = BertForSequenceClassification.from_pretrained(bert_model_name, num_labels=num_class) ema_tracker = None ema_model_copy = None if do_ema and ema_tracker is None: ema_tracker = EMA(model_clf.named_parameters(), on_cpu=True) ema_model_copy = copy.deepcopy(model_clf) model_clf.to(device) param_optimizer = list(model_clf.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] num_train_optimization_steps = int(est_datasize / forward_size / gradient_accumulate_step) * \ num_train_epochs print(num_train_optimization_steps) optimizer = BertAdam(optimizer_grouped_parameters, lr=learning_rate, warmup=warmup_proportion, t_total=num_train_optimization_steps) # optimizer = optim.Adam(optimizer_grouped_parameters, lr=learning_rate) # # # Create Log File file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}") # Save the source code. script_name = os.path.basename(__file__) with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it: out_f.write(it.read()) out_f.flush() # # # Log File end model_clf.train() if n_gpu > 1: model_clf = nn.DataParallel(model_clf) forbackward_step = 0 update_step = 0 eval_iter_num = 2_000 # Change this to real evaluation. best_fever_score = -1 for n_epoch in range(num_train_epochs): print("Resampling...") train_sent_after_threshold_filter = \ fever_ss_sampler.threshold_sampler_insure_unique(training_file, train_upstream_sent_list, train_prob_threshold, top_n=train_sample_top_k) # train_data_list = fever_nli_sampler.adv_simi_sample_with_prob_v1_1( training_file, train_sent_after_threshold_filter, None, tokenized=True) aug_d_list = [] if data_aug: aug_d_list = get_sample_data(-1) random.shuffle(aug_d_list) aug_d_list = aug_d_list[:aug_size] train_data_list = train_data_list + aug_d_list random.shuffle(train_data_list) # train_data_list = get_sample_data(-1) print("Sample data length:", len(train_data_list)) sampled_train_instances = bert_fever_reader.read(train_data_list) # train_iter = biterator(sampled_train_instances, shuffle=True, num_epochs=1) for i, batch in enumerate(tqdm(train_iter)): paired_sequence = batch['paired_sequence'] paired_segments_ids = batch['paired_segments_ids'] labels_ids = batch['label'] att_mask, _ = torch_util.get_length_and_mask(paired_sequence) paired_sequence = paired_sequence.to(device) paired_segments_ids = paired_segments_ids.to(device) labels_ids = labels_ids.to(device) att_mask = att_mask.to(device) loss = model_clf(paired_sequence, token_type_ids=paired_segments_ids, attention_mask=att_mask, labels=labels_ids) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if gradient_accumulate_step > 1: loss = loss / gradient_accumulate_step loss.backward() forbackward_step += 1 if forbackward_step % gradient_accumulate_step == 0: optimizer.step() optimizer.zero_grad() update_step += 1 if do_ema and ema_tracker is not None: # if model_clf is DataParallel, then we use model_clf.module model_to_track = model_clf.module if hasattr(model_clf, 'module') else model_clf ema_tracker(model_to_track.named_parameters()) # Whenever we do update, the do ema update if update_step % eval_iter_num == 0: print("Update steps:", update_step) dev_iter = biterator(dev_instances, num_epochs=1, shuffle=False) if do_ema and ema_model_copy is not None and ema_tracker is not None: print("EMA evaluation.") EMA.load_ema_to_model(ema_model_copy, ema_tracker) ema_model_copy.to(device) if n_gpu > 1: ema_model_copy = nn.DataParallel(ema_model_copy) dev_data_list = hidden_eval(ema_model_copy, dev_iter, dev_data_list, device) else: dev_data_list = hidden_eval(model_clf, dev_iter, dev_data_list, device) eval_mode = {'check_sent_id_correct': True, 'standard': True} fever_score, label_score, pr, rec, f1 = fever_scorer.fever_score(dev_data_list, common.load_jsonl(config.FEVER_DATA_ROOT / "fever_1.0/shared_task_dev.jsonl"), mode=eval_mode, verbose=False) print("Fever Score(FScore/LScore:/Precision/Recall/F1):", fever_score, label_score, pr, rec, f1) print(f"Dev:{fever_score}/{label_score}") if best_fever_score < fever_score: print("New Best FScore") best_fever_score = fever_score save_path = os.path.join( file_path_prefix, f'i({update_step})_epoch({n_epoch})_dev({fever_score})_lacc({label_score})_seed({seed})' ) model_to_save = model_clf.module if hasattr(model_clf, 'module') else model_clf output_model_file = os.path.join(file_path_prefix, save_path) torch.save(model_to_save.state_dict(), output_model_file) print("Update steps:", update_step) dev_iter = biterator(dev_instances, num_epochs=1, shuffle=False) if do_ema and ema_model_copy is not None and ema_tracker is not None: print("EMA evaluation.") EMA.load_ema_to_model(ema_model_copy, ema_tracker) ema_model_copy.to(device) if n_gpu > 1: ema_model_copy = nn.DataParallel(ema_model_copy) dev_data_list = hidden_eval(ema_model_copy, dev_iter, dev_data_list, device) else: dev_data_list = hidden_eval(model_clf, dev_iter, dev_data_list, device) eval_mode = {'check_sent_id_correct': True, 'standard': True} fever_score, label_score, pr, rec, f1 = fever_scorer.fever_score(dev_data_list, common.load_jsonl(config.FEVER_DATA_ROOT / "fever_1.0/shared_task_dev.jsonl"), mode=eval_mode, verbose=False) print("Fever Score(FScore/LScore:/Precision/Recall/F1):", fever_score, label_score, pr, rec, f1) print(f"Dev:{fever_score}/{label_score}") if best_fever_score < fever_score: print("New Best FScore") best_fever_score = fever_score save_path = os.path.join( file_path_prefix, f'i({update_step})_epoch({n_epoch})_dev({fever_score})_lacc({label_score})_seed({seed})' ) model_to_save = model_clf.module if hasattr(model_clf, 'module') else model_clf output_model_file = os.path.join(file_path_prefix, save_path) torch.save(model_to_save.state_dict(), output_model_file)
def model_go_with_old_data(): seed = 12 torch.manual_seed(seed) # bert_model_name = 'bert-large-uncased' bert_model_name = 'bert-base-uncased' experiment_name = 'fever_v1_nli' lazy = False # lazy = True forward_size = 16 # batch_size = 64 # batch_size = 192 batch_size = 32 gradient_accumulate_step = int(batch_size / forward_size) warmup_proportion = 0.1 learning_rate = 5e-5 num_train_epochs = 3 eval_frequency = 2000 do_lower_case = True pair_order = 'cq' # debug_mode = True debug_mode = False # est_datasize = 900_000 num_class = 3 # num_train_optimization_steps train_sent_filtering_prob = 0.35 dev_sent_filtering_prob = 0.1 # dev_sent_results_file = config.RESULT_PATH / "doc_retri_results/fever_results/sent_results/4-14-sent_results_v0/i(5000)|e(0)|s01(0.9170917091709171)|s05(0.8842384238423843)|seed(12)_dev_sent_results.json" # train_sent_results_file = config.RESULT_PATH / "doc_retri_results/fever_results/sent_results/4-14-sent_results_v0/train_sent_results.jsonl" from utest.utest_format_converter_for_old_sent.tool import format_convert dev_sent_results_file = format_convert( config.PRO_ROOT / "results/doc_retri_results/fever_results/sent_results/old_sent_data_by_NSMN/4-15-dev_sent_pred_scores_old_format.jsonl" ) train_sent_results_file = format_convert( config.PRO_ROOT / "results/doc_retri_results/fever_results/sent_results/old_sent_data_by_NSMN/train_sent_scores_old_format.jsonl" ) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") device_num = 0 if torch.cuda.is_available() else -1 n_gpu = torch.cuda.device_count() unk_token_num = {'tokens': 1} # work around for initiating vocabulary. vocab = ExVocabulary(unk_token_num=unk_token_num) vocab.add_token_to_namespace('SUPPORTS', namespace='labels') vocab.add_token_to_namespace('REFUTES', namespace='labels') vocab.add_token_to_namespace('NOT ENOUGH INFO', namespace='labels') vocab.add_token_to_namespace("hidden", namespace="labels") vocab.change_token_with_index_to_namespace("hidden", -2, namespace='labels') # Load Dataset # train_fitems_list = get_inference_pair('train', True, train_sent_results_file, debug_mode, train_sent_filtering_prob) dev_debug_num = 2481 if debug_mode else None dev_fitems_list, dev_list = get_inference_pair('dev', False, dev_sent_results_file, dev_debug_num, dev_sent_filtering_prob) # = common.load_jsonl(config.FEVER_DEV) if debug_mode: dev_list = dev_list[:50] eval_frequency = 1 # print(dev_list[-1]['_id']) # exit(0) # sampled_train_list = down_sample_neg(train_fitems_list, ratio=pos_ratio) train_debug_num = 2971 if debug_mode else None train_fitems_list, _ = get_inference_pair('train', True, train_sent_results_file, train_debug_num, train_sent_filtering_prob) est_datasize = len(train_fitems_list) # dev_o_dict = list_dict_data_tool.list_to_dict(dev_list, 'id') # print(dev_o_dict) bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name, do_lower_case=do_lower_case) bert_cs_reader = BertFeverNLIReader(bert_tokenizer, lazy, is_paired=True, query_l=64, example_filter=None, max_l=364, pair_order=pair_order) bert_encoder = BertModel.from_pretrained(bert_model_name) model = BertMultiLayerSeqClassification(bert_encoder, num_labels=num_class, num_of_pooling_layer=1, act_type='tanh', use_pretrained_pooler=True, use_sigmoid=False) # param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_optimization_steps = int(est_datasize / forward_size / gradient_accumulate_step) * \ num_train_epochs if debug_mode: num_train_optimization_steps = 100 print("Estimated training size", est_datasize) print("Number of optimization steps:", num_train_optimization_steps) optimizer = BertAdam(optimizer_grouped_parameters, lr=learning_rate, warmup=warmup_proportion, t_total=num_train_optimization_steps) dev_instances = bert_cs_reader.read(dev_fitems_list) biterator = BasicIterator(batch_size=forward_size) biterator.index_with(vocab) model.to(device) if n_gpu > 1: model = torch.nn.DataParallel(model) forbackward_step = 0 update_step = 0 logging_agent = save_tool.ScoreLogger({}) file_path_prefix = '.' if not debug_mode: file_path_prefix, date = save_tool.gen_file_prefix( f"{experiment_name}") # # # Create Log File # Save the source code. script_name = os.path.basename(__file__) with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it: out_f.write(it.read()) out_f.flush() # # # Log File end for epoch_i in range(num_train_epochs): print("Epoch:", epoch_i) train_fitems_list, _ = get_inference_pair('train', True, train_sent_results_file, train_debug_num, train_sent_filtering_prob) random.shuffle(train_fitems_list) train_instance = bert_cs_reader.read(train_fitems_list) train_iter = biterator(train_instance, num_epochs=1, shuffle=True) for batch in tqdm(train_iter): model.train() batch = move_to_device(batch, device_num) paired_sequence = batch['paired_sequence'] paired_segments_ids = batch['paired_segments_ids'] labels_ids = batch['label'] att_mask, _ = torch_util.get_length_and_mask(paired_sequence) s1_span = batch['bert_s1_span'] s2_span = batch['bert_s2_span'] loss = model( paired_sequence, token_type_ids=paired_segments_ids, attention_mask=att_mask, mode=BertMultiLayerSeqClassification.ForwardMode.TRAIN, labels=labels_ids) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. if gradient_accumulate_step > 1: loss = loss / gradient_accumulate_step loss.backward() forbackward_step += 1 if forbackward_step % gradient_accumulate_step == 0: optimizer.step() optimizer.zero_grad() update_step += 1 if update_step % eval_frequency == 0: print("Update steps:", update_step) dev_iter = biterator(dev_instances, num_epochs=1, shuffle=False) cur_eval_results_list = eval_model(model, dev_iter, device_num, with_probs=True, make_int=True) results_dict = list_dict_data_tool.list_to_dict( cur_eval_results_list, 'oid') copied_dev_list = copy.deepcopy(dev_list) list_dict_data_tool.append_item_from_dict_to_list( copied_dev_list, results_dict, 'id', 'predicted_label') mode = {'standard': True} strict_score, acc_score, pr, rec, f1 = fever_scorer.fever_score( copied_dev_list, dev_fitems_list, mode=mode, max_evidence=5) logging_item = { 'ss': strict_score, 'ac': acc_score, 'pr': pr, 'rec': rec, 'f1': f1, } save_file_name = f'i({update_step})|e({epoch_i})' \ f'|ss({strict_score})|ac({acc_score})|pr({pr})|rec({rec})|f1({f1})' \ f'|seed({seed})' common.save_jsonl( copied_dev_list, Path(file_path_prefix) / f"{save_file_name}_dev_nli_results.json") # print(save_file_name) logging_agent.incorporate_results({}, save_file_name, logging_item) logging_agent.logging_to_file( Path(file_path_prefix) / "log.json") model_to_save = model.module if hasattr( model, 'module') else model output_model_file = Path(file_path_prefix) / save_file_name torch.save(model_to_save.state_dict(), str(output_model_file))
def main(serialization_directory: int, device: int, data: str, prefix: str, domain: str = None): """ serialization_directory : str, required. The directory containing the serialized weights. device: int, default = -1 The device to run the evaluation on. data: str, default = None The data to evaluate on. By default, we use the validation data from the original experiment. prefix: str, default="" The prefix to prepend to the generated gold and prediction files, to distinguish different models/data. domain: str, optional (default = None) If passed, filters the ontonotes evaluation/test dataset to only contain the specified domain. This overwrites the domain in the config file from the model, to allow evaluation on domains other than the one the model was trained on. """ config = Params.from_file(os.path.join(serialization_directory, "config.json")) if domain is not None: # Hack to allow evaluation on different domains than the # model was trained on. config["dataset_reader"]["domain_identifier"] = domain prefix = f"{domain}_{prefix}" else: config["dataset_reader"].pop("domain_identifier", None) dataset_reader = DatasetReader.from_params(config['dataset_reader']) evaluation_data_path = data if data else config['validation_data_path'] archive = load_archive(os.path.join(serialization_directory, "model.tar.gz"), cuda_device=device) model = archive.model model.eval() prediction_file_path = os.path.join(serialization_directory, prefix + "_predictions.txt") gold_file_path = os.path.join(serialization_directory, prefix + "_gold.txt") prediction_file = open(prediction_file_path, "w+") gold_file = open(gold_file_path, "w+") # Load the evaluation data and index it. print("reading evaluation data from {}".format(evaluation_data_path)) instances = dataset_reader.read(evaluation_data_path) with torch.autograd.no_grad(): iterator = BasicIterator(batch_size=32) iterator.index_with(model.vocab) model_predictions = [] batches = iterator(instances, num_epochs=1, shuffle=False, cuda_device=device) for batch in Tqdm.tqdm(batches): result = model(**batch) predictions = model.decode(result) model_predictions.extend(predictions["tags"]) for instance, prediction in zip(instances, model_predictions): fields = instance.fields try: # Most sentences have a verbal predicate, but not all. verb_index = fields["verb_indicator"].labels.index(1) except ValueError: verb_index = None gold_tags = fields["tags"].labels sentence = [x.text for x in fields["tokens"].tokens] write_to_conll_eval_file(prediction_file, gold_file, verb_index, sentence, prediction, gold_tags) prediction_file.close() gold_file.close()
# train_dataset = reader.read(TRAIN_PATH) train_dataset = merge_reader.read(COMBINED_TRAIN_PATH) validation_dataset = reader.read(VALIDATION_PATH) test_dataset = reader.read(TEST_PATH) # %% vocab = Vocabulary() vocab._token_to_index['labels'] = {'0': 0, '1': 1} # %% """Prepare iterator""" from allennlp.data.iterators import BasicIterator iterator = BasicIterator(batch_size=8) iterator.index_with(vocab) # %% # Loss function def multiple_target_CrossEntropyLoss(logits, labels): loss = 0 for i in range(logits.shape[0]): loss = loss + nn.CrossEntropyLoss( weight=torch.tensor([1.0, 1.0]).cuda())(logits[i, :, :], labels[i, :]) return loss / logits.shape[0] # %% """Prepare the model"""
def train(): reader = PWKPReader() train_dataset = reader.read(train_path) valid_dataset = reader.read(dev_path) if os.path.exists(vocab_dir): vocab = Vocabulary.from_files(vocab_dir) else: vocab = Vocabulary.from_instances(instances=train_dataset, max_vocab_size=opt.vocab_size) vocab.save_to_files(vocab_dir) iterator = BucketIterator(batch_size=opt.batch_size, sorting_keys=[("src", "num_tokens"), ("tgt", "num_tokens")]) iterator.index_with(vocab) model = Seq2Seq(emb_size=opt.emb_size, hidden_size=opt.hidden_size, enc_layers=opt.enc_layers, dec_layers=opt.dec_layers, dropout=opt.dropout, bidirectional=opt.bidirectional, beam_size=opt.beam_size, label_smoothing=opt.label_smoothing, vocab=vocab) optimizer = optim.Adam(model.parameters(), lr=opt.lr) #learning_rate_scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=1, gamma=opt.lr_decay) val_iterator = BasicIterator(batch_size=opt.batch_size) val_iterator.index_with(vocab) predictor = Predictor(iterator=val_iterator, max_decoding_step=opt.max_step, vocab=vocab, reader=reader, data_path=test_path, log_dir=save_dir, map_path=ner_path, cuda_device=opt.gpu) trainer = Trainer( model=model, optimizer=optimizer, #learning_rate_scheduler=learning_rate_scheduler, learning_rate_decay=opt.lr_decay, ema_decay=opt.ema_decay, predictor=predictor, iterator=iterator, train_dataset=train_dataset, validation_dataset=valid_dataset, validation_metric='+bleu', cuda_device=opt.gpu, num_epochs=opt.epoch, serialization_dir=save_dir, num_serialized_models_to_keep=5, #model_save_interval=60, #summary_interval=500, should_log_parameter_statistics=False, grad_norm=10) trainer.train()
word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedder}) lstm = PytorchSeq2VecWrapper( nn.LSTM(word_embeddings.get_output_dim(), config.hidden_sz, bidirectional=True, batch_first=True)) save_file = "model_v12.th" ## models saved by lstm training model2 = LSTM_Model(word_embeddings, lstm, 2) with open(save_file, 'rb') as f: model2.load_state_dict(torch.load(f)) # iterate over the dataset without changing its order seq_iterator = BasicIterator(config.batch_size) seq_iterator.index_with(vocab) predictor = Predictor(model2, seq_iterator) prob, labels = predictor.predict(test_dataset) test_preds = 1 * (prob > .525) #optimal threshold #Evaluation accuracy_test = accuracy_score(test_preds, labels) f1_test = f1_score(test_preds, labels) precision_test = precision_score(test_preds, labels) recall_test = recall_score(test_preds, labels) matrix = confusion_matrix(labels, test_preds) print(matrix) print( "Accuracy score: {:.4f}, F1 Score: {:.4f}, Precision: {:.4f}, Recall: {:.4f} " .format(accuracy_test, f1_test, precision_test, recall_test)) fpr, tpr, _ = roc_curve(labels, prob)
class TestTrainer(AllenNlpTestCase): def setUp(self): super(TestTrainer, self).setUp() self.instances = SequenceTaggingDatasetReader().read('tests/fixtures/data/sequence_tagging.tsv') vocab = Vocabulary.from_instances(self.instances) self.vocab = vocab self.model_params = Params({ "text_field_embedder": { "tokens": { "type": "embedding", "embedding_dim": 5 } }, "encoder": { "type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2 } }) self.model = SimpleTagger.from_params(self.vocab, self.model_params) self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01) self.iterator = BasicIterator(batch_size=2) self.iterator.index_with(vocab) def test_trainer_can_run(self): trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, train_dataset=self.instances, validation_dataset=self.instances, num_epochs=2) metrics = trainer.train() assert 'best_validation_loss' in metrics assert isinstance(metrics['best_validation_loss'], float) assert 'best_epoch' in metrics assert isinstance(metrics['best_epoch'], int) # Making sure that both increasing and decreasing validation metrics work. trainer = Trainer(model=self.model, optimizer=self.optimizer, iterator=self.iterator, train_dataset=self.instances, validation_dataset=self.instances, validation_metric='+loss', num_epochs=2) metrics = trainer.train() assert 'best_validation_loss' in metrics assert isinstance(metrics['best_validation_loss'], float) assert 'best_epoch' in metrics assert isinstance(metrics['best_epoch'], int) @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device registered.") def test_trainer_can_run_cuda(self): trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, num_epochs=2, cuda_device=0) trainer.train() @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Need multiple GPUs.") def test_trainer_can_run_multiple_gpu(self): multigpu_iterator = BasicIterator(batch_size=4) multigpu_iterator.index_with(self.vocab) trainer = Trainer(self.model, self.optimizer, multigpu_iterator, self.instances, num_epochs=2, cuda_device=[0, 1]) trainer.train() def test_trainer_can_resume_training(self): trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=1, serialization_dir=self.TEST_DIR) trainer.train() new_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=3, serialization_dir=self.TEST_DIR) epoch, val_metrics_per_epoch = new_trainer._restore_checkpoint() # pylint: disable=protected-access assert epoch == 1 assert len(val_metrics_per_epoch) == 1 assert isinstance(val_metrics_per_epoch[0], float) assert val_metrics_per_epoch[0] != 0. new_trainer.train() def test_should_stop_early_with_increasing_metric(self): new_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, validation_metric="+test") assert new_trainer._should_stop_early([.5, .3, .2, .1, .4, .4]) # pylint: disable=protected-access assert not new_trainer._should_stop_early([.3, .3, .3, .2, .5, .1]) # pylint: disable=protected-access def test_should_stop_early_with_decreasing_metric(self): new_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, validation_dataset=self.instances, num_epochs=3, serialization_dir=self.TEST_DIR, patience=5, validation_metric="-test") assert new_trainer._should_stop_early([.02, .3, .2, .1, .4, .4]) # pylint: disable=protected-access assert not new_trainer._should_stop_early([.3, .3, .2, .1, .4, .5]) # pylint: disable=protected-access def test_train_driver_raises_on_model_with_no_loss_key(self): class FakeModel(torch.nn.Module): def forward(self, **kwargs): # pylint: disable=arguments-differ,unused-argument return {} with pytest.raises(ConfigurationError): trainer = Trainer(FakeModel(), self.optimizer, self.iterator, self.instances, num_epochs=2, serialization_dir=self.TEST_DIR) trainer.train() def test_trainer_can_log_histograms(self): # enable activation logging for module in self.model.modules(): module.should_log_activations = True trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, num_epochs=3, serialization_dir=self.TEST_DIR, histogram_interval=2) trainer.train() def test_trainer_respects_num_serialized_models_to_keep(self): trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, num_epochs=5, serialization_dir=self.TEST_DIR, num_serialized_models_to_keep=3) trainer.train() # Now check the serialized files for prefix in ['model_state_epoch_*', 'training_state_epoch_*']: file_names = glob.glob(os.path.join(self.TEST_DIR, prefix)) epochs = [int(re.search(r"_([0-9])\.th", fname).group(1)) for fname in file_names] assert sorted(epochs) == [2, 3, 4] def test_trainer_respects_keep_serialized_model_every_num_seconds(self): # To test: # Create an iterator that sleeps for 0.5 second per epoch, so the total training # time for one epoch is slightly greater then 0.5 seconds. # Run for 6 epochs, keeping the last 2 models, models also kept every 1 second. # Check the resulting checkpoints. Should then have models at epochs # 2, 4, plus the last two at 5 and 6. class WaitingIterator(BasicIterator): # pylint: disable=arguments-differ def _create_batches(self, *args, **kwargs): time.sleep(0.5) return super(WaitingIterator, self)._create_batches(*args, **kwargs) iterator = WaitingIterator(batch_size=2) iterator.index_with(self.vocab) trainer = Trainer(self.model, self.optimizer, iterator, self.instances, num_epochs=6, serialization_dir=self.TEST_DIR, num_serialized_models_to_keep=2, keep_serialized_model_every_num_seconds=1) trainer.train() # Now check the serialized files for prefix in ['model_state_epoch_*', 'training_state_epoch_*']: file_names = glob.glob(os.path.join(self.TEST_DIR, prefix)) epochs = [int(re.search(r"_([0-9])\.th", fname).group(1)) for fname in file_names] # epoch N has N-1 in file name assert sorted(epochs) == [1, 3, 4, 5] def test_trainer_saves_models_at_specified_interval(self): iterator = BasicIterator(batch_size=4) iterator.index_with(self.vocab) trainer = Trainer(self.model, self.optimizer, iterator, self.instances, num_epochs=2, serialization_dir=self.TEST_DIR, model_save_interval=0.0001) trainer.train() # Now check the serialized files for models saved during the epoch. prefix = 'model_state_epoch_*' file_names = sorted(glob.glob(os.path.join(self.TEST_DIR, prefix))) epochs = [re.search(r"_([0-9\.\-]+)\.th", fname).group(1) for fname in file_names] # We should have checkpoints at the end of each epoch and during each, e.g. # [0.timestamp, 0, 1.timestamp, 1] assert len(epochs) == 4 assert epochs[3] == '1' assert '.' in epochs[0] # Now make certain we can restore from timestamped checkpoint. # To do so, remove the checkpoint from the end of epoch 1&2, so # that we are forced to restore from the timestamped checkpoints. for k in range(2): os.remove(os.path.join(self.TEST_DIR, 'model_state_epoch_{}.th'.format(k))) os.remove(os.path.join(self.TEST_DIR, 'training_state_epoch_{}.th'.format(k))) os.remove(os.path.join(self.TEST_DIR, 'best.th')) restore_trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances, num_epochs=2, serialization_dir=self.TEST_DIR, model_save_interval=0.0001) epoch, _ = restore_trainer._restore_checkpoint() # pylint: disable=protected-access assert epoch == 2 # One batch per epoch. assert restore_trainer._batch_num_total == 2 # pylint: disable=protected-access