예제 #1
0
 def test_trainer_can_run_multiple_gpu(self):
     multigpu_iterator = BasicIterator(batch_size=4)
     multigpu_iterator.index_with(self.vocab)
     trainer = Trainer(self.model, self.optimizer,
                       multigpu_iterator, self.instances, num_epochs=2,
                       cuda_device=[0, 1])
     trainer.train()
예제 #2
0
    def test_trainer_can_run_multiple_gpu(self):

        class MetaDataCheckWrapper(Model):
            """
            Checks that the metadata field has been correctly split across the batch dimension
            when running on multiple gpus.
            """
            def __init__(self, model):
                super().__init__(model.vocab)
                self.model = model

            def forward(self, **kwargs) -> Dict[str, torch.Tensor]:  # type: ignore # pylint: disable=arguments-differ
                assert 'metadata' in kwargs and 'tags' in kwargs, \
                    f'tokens and metadata must be provided. Got {kwargs.keys()} instead.'
                batch_size = kwargs['tokens']['tokens'].size()[0]
                assert len(kwargs['metadata']) == batch_size, \
                    f'metadata must be split appropriately. Expected {batch_size} elements, ' \
                    f"got {len(kwargs['metadata'])} elements."
                return self.model.forward(**kwargs)

        multigpu_iterator = BasicIterator(batch_size=4)
        multigpu_iterator.index_with(self.vocab)
        trainer = Trainer(MetaDataCheckWrapper(self.model), self.optimizer,
                          multigpu_iterator, self.instances, num_epochs=2,
                          cuda_device=[0, 1])
        trainer.train()
예제 #3
0
    def test_multiple_cursors(self):
        # pylint: disable=protected-access
        lazy_instances1 = _LazyInstances(lambda: (i for i in self.instances))
        lazy_instances2 = _LazyInstances(lambda: (i for i in self.instances))

        eager_instances1 = self.instances[:]
        eager_instances2 = self.instances[:]

        for instances1, instances2 in [(eager_instances1, eager_instances2),
                                       (lazy_instances1, lazy_instances2)]:
            iterator = BasicIterator(batch_size=1, instances_per_epoch=2)
            iterator.index_with(self.vocab)

            # First epoch through dataset1
            batches = list(iterator._create_batches(instances1, shuffle=False))
            grouped_instances = [batch.instances for batch in batches]
            assert grouped_instances == [[self.instances[0]], [self.instances[1]]]

            # First epoch through dataset2
            batches = list(iterator._create_batches(instances2, shuffle=False))
            grouped_instances = [batch.instances for batch in batches]
            assert grouped_instances == [[self.instances[0]], [self.instances[1]]]

            # Second epoch through dataset1
            batches = list(iterator._create_batches(instances1, shuffle=False))
            grouped_instances = [batch.instances for batch in batches]
            assert grouped_instances == [[self.instances[2]], [self.instances[3]]]

            # Second epoch through dataset2
            batches = list(iterator._create_batches(instances2, shuffle=False))
            grouped_instances = [batch.instances for batch in batches]
            assert grouped_instances == [[self.instances[2]], [self.instances[3]]]
예제 #4
0
 def test_can_optimise_model_with_dense_and_sparse_params(self):
     optimizer_params = Params({
             "type": "dense_sparse_adam"
     })
     parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
     optimizer = Optimizer.from_params(parameters, optimizer_params)
     iterator = BasicIterator(2)
     iterator.index_with(self.vocab)
     Trainer(self.model, optimizer, iterator, self.instances).train()
예제 #5
0
    def test_epoch_tracking_multiple_epochs(self):
        iterator = BasicIterator(batch_size=2, track_epoch=True)
        iterator.index_with(self.vocab)

        all_batches = list(iterator(self.instances, num_epochs=10))
        assert len(all_batches) == 10 * 3
        for i, batch in enumerate(all_batches):
            # Should have 3 batches per epoch
            epoch = i // 3
            assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])
예제 #6
0
    def test_trainer_can_log_learning_rates_tensorboard(self):
        iterator = BasicIterator(batch_size=4)
        iterator.index_with(self.vocab)

        trainer = Trainer(self.model, self.optimizer,
                          iterator, self.instances, num_epochs=2,
                          serialization_dir=self.TEST_DIR,
                          should_log_learning_rate=True,
                          summary_interval=2)

        trainer.train()
예제 #7
0
 def test_yield_one_epoch_iterates_over_the_data_once(self):
     for test_instances in (self.instances, self.lazy_instances):
         iterator = BasicIterator(batch_size=2)
         iterator.index_with(self.vocab)
         batches = list(iterator(test_instances, num_epochs=1))
         # We just want to get the single-token array for the text field in the instance.
         instances = [tuple(instance.detach().cpu().numpy())
                      for batch in batches
                      for instance in batch['text']["tokens"]]
         assert len(instances) == 5
         self.assert_instances_are_correct(instances)
예제 #8
0
 def test_call_iterates_over_data_forever(self):
     for test_instances in (self.instances, self.lazy_instances):
         iterator = BasicIterator(batch_size=2)
         iterator.index_with(self.vocab)
         generator = iterator(test_instances)
         batches = [next(generator) for _ in range(18)]  # going over the data 6 times
         # We just want to get the single-token array for the text field in the instance.
         instances = [tuple(instance.detach().cpu().numpy())
                      for batch in batches
                      for instance in batch['text']["tokens"]]
         assert len(instances) == 5 * 6
         self.assert_instances_are_correct(instances)
    def test_with_iterator(self):
        reader = MultiprocessDatasetReader(base_reader=self.base_reader, num_workers=2)
        instances = reader.read(self.glob)

        iterator = BasicIterator(batch_size=32)
        iterator.index_with(self.vocab)

        batches = [batch for batch in iterator(instances, num_epochs=1)]

        # 400 instances / batch_size 32 = 12 full batches + 1 batch of 16
        sizes = sorted([len(batch['tags']) for batch in batches])
        assert sizes == [16] + 12 * [32]
예제 #10
0
    def test_epoch_tracking_forever(self):
        iterator = BasicIterator(batch_size=2, track_epoch=True)
        iterator.index_with(self.vocab)

        it = iterator(self.instances, num_epochs=None)

        all_batches = [next(it) for _ in range(30)]

        assert len(all_batches) == 30
        for i, batch in enumerate(all_batches):
            # Should have 3 batches per epoch
            epoch = i // 3
            assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])
예제 #11
0
    def test_elmo_bilm(self):
        # get the raw data
        sentences, expected_lm_embeddings = self._load_sentences_embeddings()

        # load the test model
        elmo_bilm = _ElmoBiLm(self.options_file, self.weight_file)

        # Deal with the data.
        indexer = ELMoTokenCharactersIndexer()

        # For each sentence, first create a TextField, then create an instance
        instances = []
        for batch in zip(*sentences):
            for sentence in batch:
                tokens = [Token(token) for token in sentence.split()]
                field = TextField(tokens, {'character_ids': indexer})
                instance = Instance({"elmo": field})
                instances.append(instance)

        vocab = Vocabulary()

        # Now finally we can iterate through batches.
        iterator = BasicIterator(3)
        iterator.index_with(vocab)
        for i, batch in enumerate(iterator(instances, num_epochs=1, shuffle=False)):
            lm_embeddings = elmo_bilm(batch['elmo']['character_ids'])
            top_layer_embeddings, mask = remove_sentence_boundaries(
                    lm_embeddings['activations'][2],
                    lm_embeddings['mask']
            )

            # check the mask lengths
            lengths = mask.data.numpy().sum(axis=1)
            batch_sentences = [sentences[k][i] for k in range(3)]
            expected_lengths = [
                    len(sentence.split()) for sentence in batch_sentences
            ]
            self.assertEqual(lengths.tolist(), expected_lengths)

            # get the expected embeddings and compare!
            expected_top_layer = [expected_lm_embeddings[k][i] for k in range(3)]
            for k in range(3):
                self.assertTrue(
                        numpy.allclose(
                                top_layer_embeddings[k, :lengths[k], :].data.numpy(),
                                expected_top_layer[k],
                                atol=1.0e-6
                        )
                )
def main(serialization_directory, device):
    """
    serialization_directory : str, required.
        The directory containing the serialized weights.
    device: int, default = -1
        The device to run the evaluation on.
    """

    config = Params.from_file(os.path.join(serialization_directory, "config.json"))
    dataset_reader = DatasetReader.from_params(config['dataset_reader'])
    evaluation_data_path = config['validation_data_path']

    model = Model.load(config, serialization_dir=serialization_directory, cuda_device=device)

    prediction_file_path = os.path.join(serialization_directory, "predictions.txt")
    gold_file_path = os.path.join(serialization_directory, "gold.txt")
    prediction_file = open(prediction_file_path, "w+")
    gold_file = open(gold_file_path, "w+")

    # Load the evaluation data and index it.
    print("Reading evaluation data from {}".format(evaluation_data_path))
    instances = dataset_reader.read(evaluation_data_path)
    iterator = BasicIterator(batch_size=32)
    iterator.index_with(model.vocab)

    model_predictions = []
    batches = iterator(instances, num_epochs=1, shuffle=False, cuda_device=device, for_training=False)
    for batch in Tqdm.tqdm(batches):
        result = model(**batch)
        predictions = model.decode(result)
        model_predictions.extend(predictions["tags"])

    for instance, prediction in zip(instances, model_predictions):
        fields = instance.fields
        try:
            # Most sentences have a verbal predicate, but not all.
            verb_index = fields["verb_indicator"].labels.index(1)
        except ValueError:
            verb_index = None

        gold_tags = fields["tags"].labels
        sentence = fields["tokens"].tokens

        write_to_conll_eval_file(prediction_file, gold_file,
                                 verb_index, sentence, prediction, gold_tags)
    prediction_file.close()
    gold_file.close()
예제 #13
0
    def test_maximum_samples_per_batch(self):
        for test_instances in (self.instances, self.lazy_instances):
            # pylint: disable=protected-access
            iterator = BasicIterator(
                    batch_size=3, maximum_samples_per_batch=['num_tokens', 9]
            )
            iterator.index_with(self.vocab)
            batches = list(iterator._create_batches(test_instances, shuffle=False))
            stats = self.get_batches_stats(batches)

            # ensure all instances are in a batch
            assert stats['total_instances'] == len(self.instances)

            # ensure correct batch sizes
            assert stats['batch_lengths'] == [2, 1, 1, 1]

            # ensure correct sample sizes (<= 9)
            assert stats['sample_sizes'] == [8, 3, 9, 1]
예제 #14
0
    def test_maximum_samples_per_batch_packs_tightly(self):
        # pylint: disable=protected-access
        token_counts = [10, 4, 3]
        test_instances = self.create_instances_from_token_counts(token_counts)

        iterator = BasicIterator(
                batch_size=3, maximum_samples_per_batch=['num_tokens', 11]
        )
        iterator.index_with(self.vocab)
        batches = list(iterator._create_batches(test_instances, shuffle=False))
        stats = self.get_batches_stats(batches)

        # ensure all instances are in a batch
        assert stats['total_instances'] == len(token_counts)

        # ensure correct batch sizes
        assert stats['batch_lengths'] == [1, 2]

        # ensure correct sample sizes (<= 11)
        assert stats['sample_sizes'] == [10, 8]
예제 #15
0
    def test_maximum_samples_per_batch(self):
        for test_instances in (self.instances, self.lazy_instances):
            # pylint: disable=protected-access
            iterator = BasicIterator(
                    batch_size=3, maximum_samples_per_batch=['num_tokens', 9]
            )
            iterator.index_with(self.vocab)
            batches = list(iterator._create_batches(test_instances, shuffle=False))

            # ensure all instances are in a batch
            grouped_instances = [batch.instances for batch in batches]
            num_instances = sum(len(group) for group in grouped_instances)
            assert num_instances == len(self.instances)

            # ensure all batches are sufficiently small
            for batch in batches:
                batch_sequence_length = max(
                        [instance.get_padding_lengths()['text']['num_tokens']
                         for instance in batch.instances]
                )
                assert batch_sequence_length * len(batch.instances) <= 9
예제 #16
0
    def test_trainer_saves_models_at_specified_interval(self):
        iterator = BasicIterator(batch_size=4)
        iterator.index_with(self.vocab)

        trainer = Trainer(self.model, self.optimizer,
                          iterator, self.instances, num_epochs=2,
                          serialization_dir=self.TEST_DIR,
                          model_save_interval=0.0001)

        trainer.train()

        # Now check the serialized files for models saved during the epoch.
        prefix = 'model_state_epoch_*'
        file_names = sorted(glob.glob(os.path.join(self.TEST_DIR, prefix)))
        epochs = [re.search(r"_([0-9\.\-]+)\.th", fname).group(1)
                  for fname in file_names]
        # We should have checkpoints at the end of each epoch and during each, e.g.
        # [0.timestamp, 0, 1.timestamp, 1]
        assert len(epochs) == 4
        assert epochs[3] == '1'
        assert '.' in epochs[0]

        # Now make certain we can restore from timestamped checkpoint.
        # To do so, remove the checkpoint from the end of epoch 1&2, so
        # that we are forced to restore from the timestamped checkpoints.
        for k in range(2):
            os.remove(os.path.join(self.TEST_DIR, 'model_state_epoch_{}.th'.format(k)))
            os.remove(os.path.join(self.TEST_DIR, 'training_state_epoch_{}.th'.format(k)))
        os.remove(os.path.join(self.TEST_DIR, 'best.th'))

        restore_trainer = Trainer(self.model, self.optimizer,
                                  self.iterator, self.instances, num_epochs=2,
                                  serialization_dir=self.TEST_DIR,
                                  model_save_interval=0.0001)
        epoch, _ = restore_trainer._restore_checkpoint()  # pylint: disable=protected-access
        assert epoch == 2
        # One batch per epoch.
        assert restore_trainer._batch_num_total == 2  # pylint: disable=protected-access
예제 #17
0
    def test_multiple_cursors(self):
        # pylint: disable=protected-access
        lazy_instances1 = _LazyInstances(lambda: (i for i in self.instances))
        lazy_instances2 = _LazyInstances(lambda: (i for i in self.instances))

        eager_instances1 = self.instances[:]
        eager_instances2 = self.instances[:]

        for instances1, instances2 in [(eager_instances1, eager_instances2),
                                       (lazy_instances1, lazy_instances2)]:
            iterator = BasicIterator(batch_size=1, instances_per_epoch=2)
            iterator.index_with(self.vocab)

            # First epoch through dataset1
            batches = list(iterator._create_batches(instances1, shuffle=False))
            grouped_instances = [batch.instances for batch in batches]
            assert grouped_instances == [[self.instances[0]],
                                         [self.instances[1]]]

            # First epoch through dataset2
            batches = list(iterator._create_batches(instances2, shuffle=False))
            grouped_instances = [batch.instances for batch in batches]
            assert grouped_instances == [[self.instances[0]],
                                         [self.instances[1]]]

            # Second epoch through dataset1
            batches = list(iterator._create_batches(instances1, shuffle=False))
            grouped_instances = [batch.instances for batch in batches]
            assert grouped_instances == [[self.instances[2]],
                                         [self.instances[3]]]

            # Second epoch through dataset2
            batches = list(iterator._create_batches(instances2, shuffle=False))
            grouped_instances = [batch.instances for batch in batches]
            assert grouped_instances == [[self.instances[2]],
                                         [self.instances[3]]]
예제 #18
0
def test_iterator():
    indexer = StaticFasttextTokenIndexer(
        model_path="./data/fasttext_embedding.model",
        model_params_path="./data/fasttext_embedding.model.params")

    loader = MenionsLoader(
        category_mapping_file='./data/test_category_mapping.json',
        token_indexers={"tokens": indexer},
        tokenizer=WordTokenizer(word_splitter=FastSplitter()))

    vocab = Vocabulary.from_params(Params({"directory_path":
                                           "./data/vocab2/"}))

    iterator = BasicIterator(batch_size=32)

    iterator.index_with(vocab)

    limit = 50
    for _ in tqdm.tqdm(iterator(loader.read('./data/train_data_aa.tsv'),
                                num_epochs=1),
                       mininterval=2):
        limit -= 1
        if limit <= 0:
            break
예제 #19
0
def main():
    all_chars = {END_SYMBOL, START_SYMBOL}
    all_chars.update("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .,!?'-")
    token_counts = {char: 1 for char in all_chars}
    vocab = Vocabulary({'tokens': token_counts})

    token_indexers = {'tokens': SingleIdTokenIndexer()}

    train_set = read_dataset(all_chars)
    instances = [tokens_to_lm_instance(tokens, token_indexers)
                 for tokens in train_set]

    token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'),
                                embedding_dim=EMBEDDING_SIZE)
    embedder = BasicTextFieldEmbedder({"tokens": token_embedding})

    model = RNNLanguageModel(embedder=embedder,
                             hidden_size=HIDDEN_SIZE,
                             max_len=80,
                             vocab=vocab)

    iterator = BasicIterator(batch_size=BATCH_SIZE)
    iterator.index_with(vocab)

    optimizer = optim.Adam(model.parameters(), lr=5.e-3)

    trainer = Trainer(model=model,
                      optimizer=optimizer,
                      iterator=iterator,
                      train_dataset=instances,
                      num_epochs=10)
    trainer.train()

    for _ in range(50):
        tokens, _ = model.generate()
        print(''.join(token.text for token in tokens))
예제 #20
0
    def test_trainer_respects_keep_serialized_model_every_num_seconds(self):
        # To test:
        #   Create an iterator that sleeps for 2.5 second per epoch, so the total training
        #       time for one epoch is slightly greater then 2.5 seconds.
        #   Run for 6 epochs, keeping the last 2 models, models also kept every 5 seconds.
        #   Check the resulting checkpoints.  Should then have models at epochs
        #       2, 4, plus the last two at 5 and 6.
        class WaitingIterator(BasicIterator):
            # pylint: disable=arguments-differ
            def _create_batches(self, *args, **kwargs):
                time.sleep(2.5)
                return super(WaitingIterator, self)._create_batches(*args, **kwargs)

        iterator = WaitingIterator(batch_size=2)
        iterator.index_with(self.vocab)

        # Don't want validation iterator to wait.
        viterator = BasicIterator(batch_size=2)
        viterator.index_with(self.vocab)

        trainer = CallbackTrainer(self.model, self.optimizer,
                                  num_epochs=6,
                                  serialization_dir=self.TEST_DIR,
                                  callbacks=self.default_callbacks(max_checkpoints=2,
                                                                   checkpoint_every=5,
                                                                   iterator=iterator,
                                                                   validation_iterator=viterator))
        trainer.train()

        # Now check the serialized files
        for prefix in ['model_state_epoch_*', 'training_state_epoch_*']:
            file_names = glob.glob(os.path.join(self.TEST_DIR, prefix))
            epochs = [int(re.search(r"_([0-9])\.th", fname).group(1))
                      for fname in file_names]
            # epoch N has N-1 in file name
            assert sorted(epochs) == [1, 3, 4, 5]
예제 #21
0
    def test_production_rule_field_with_multiple_gpus(self):
        wikitables_dir = 'allennlp/tests/fixtures/data/wikitables/'
        wikitables_reader = WikiTablesDatasetReader(
            tables_directory=wikitables_dir,
            dpd_output_directory=wikitables_dir + 'dpd_output/')
        instances = wikitables_reader.read(wikitables_dir +
                                           'sample_data.examples')
        archive_path = self.FIXTURES_ROOT / 'semantic_parsing' / 'wikitables' / 'serialization' / 'model.tar.gz'
        model = load_archive(archive_path).model
        model.cuda()

        multigpu_iterator = BasicIterator(batch_size=4)
        multigpu_iterator.index_with(model.vocab)

        trainer = CallbackTrainer(model,
                                  self.optimizer,
                                  num_epochs=2,
                                  cuda_device=[0, 1],
                                  callbacks=[
                                      GenerateTrainingBatches(
                                          instances, multigpu_iterator),
                                      TrainSupervised()
                                  ])
        trainer.train()
예제 #22
0
def _filter_data(data, vocab):
    def _is_correct_instance(batch):
        assert len(batch['words']['ru_bert']['offsets']) == 1

        if batch['words']['ru_bert']['token_ids'].shape[1] > 256:
            return False

        return all(
            begin <= end < batch['words']['ru_bert']['token_ids'].shape[1]
            for begin, end in batch['words']['ru_bert']['offsets'][0])

    iterator = BasicIterator(batch_size=1)
    iterator.index_with(vocab)

    result_data = []
    for instance in tqdm(data):
        batch = next(iterator([instance]))
        if _is_correct_instance(batch):
            result_data.append(instance)
        else:
            logger.info('Filtering out %s', batch['metadata'][0]['words'])

    logger.info('Removed %s samples', len(data) - len(result_data))
    return result_data
예제 #23
0
                    for fname in [config.train_file, config.val_file])

# In[ ]:

vars(train_ds[0].fields["input"])

# In[ ]:

# ### Data Iterator

# In[ ]:

from allennlp.data.iterators import BasicIterator

iterator = BasicIterator(batch_size=config.batch_size, )
iterator.index_with(global_vocab)

# Sanity check

# In[ ]:

batch = next(iter(iterator(train_ds)))

# In[ ]:

batch

# In[ ]:

# # Model and Loss
예제 #24
0
def eval_model_for_downstream(model_saved_path):
    seed = 12
    torch.manual_seed(seed)
    bert_model_name = 'bert-base-uncased'
    # lazy = False
    lazy = True
    forward_size = 32
    # batch_size = 64
    batch_size = 128
    do_lower_case = True

    debug_mode = False
    # est_datasize = 900_000

    num_class = 1
    # num_train_optimization_steps

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device_num = 0 if torch.cuda.is_available() else -1

    n_gpu = torch.cuda.device_count()

    unk_token_num = {'tokens': 1}  # work around for initiating vocabulary.
    vocab = ExVocabulary(unk_token_num=unk_token_num)
    vocab.add_token_to_namespace("false", namespace="labels")  # 0
    vocab.add_token_to_namespace("true", namespace="labels")  # 1
    vocab.add_token_to_namespace("hidden", namespace="labels")
    vocab.change_token_with_index_to_namespace("hidden", -2, namespace='labels')

    # Load Dataset
    train_list = common.load_json(config.TRAIN_FILE)
    dev_list = common.load_json(config.DEV_FULLWIKI_FILE)

    dev_fitems_list = common.load_jsonl(
        config.PDATA_ROOT / "content_selection_forward" / "hotpot_dev_p_level_unlabeled.jsonl")
    train_fitems_list = common.load_jsonl(
        config.PDATA_ROOT / "content_selection_forward" / "hotpot_train_p_level_labeled.jsonl")
    test_fitems_list = common.load_jsonl(
        config.PDATA_ROOT / "content_selection_forward" / "hotpot_test_p_level_unlabeled.jsonl")

    if debug_mode:
        dev_list = dev_list[:10]
        dev_fitems_list = dev_fitems_list[:296]
        train_fitems_list = train_fitems_list[:300]
        eval_frequency = 2
        # print(dev_list[-1]['_id'])
        # exit(0)

    dev_o_dict = list_dict_data_tool.list_to_dict(dev_list, '_id')
    train_o_dict = list_dict_data_tool.list_to_dict(train_list, '_id')

    bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name, do_lower_case=do_lower_case)
    bert_cs_reader = BertContentSelectionReader(bert_tokenizer, lazy, is_paired=True,
                                                example_filter=lambda x: len(x['context']) == 0, max_l=286)

    bert_encoder = BertModel.from_pretrained(bert_model_name)
    model = BertMultiLayerSeqClassification(bert_encoder, num_labels=num_class, num_of_pooling_layer=1,
                                            act_type='tanh', use_pretrained_pooler=True, use_sigmoid=True)

    model.load_state_dict(torch.load(model_saved_path))

    model.to(device)
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)
    #
    dev_instances = bert_cs_reader.read(dev_fitems_list)
    train_instance = bert_cs_reader.read(train_fitems_list)
    test_instances = bert_cs_reader.read(test_fitems_list)

    biterator = BasicIterator(batch_size=forward_size)
    biterator.index_with(vocab)

    # train_iter = biterator(train_instance, num_epochs=1, shuffle=False)
    # dev_iter = biterator(dev_instances, num_epochs=1, shuffle=False)
    test_iter = biterator(test_instances, num_epochs=1, shuffle=False)

    print(len(dev_fitems_list))
    print(len(test_fitems_list))
    print(len(train_fitems_list))

    # cur_dev_eval_results_list = eval_model(model, dev_iter, device_num, with_probs=True, show_progress=True)
    # cur_train_eval_results_list = eval_model(model, train_iter, device_num, with_probs=True, show_progress=True)

    cur_test_eval_results_list = eval_model(model, test_iter, device_num, with_probs=True, show_progress=True)
    common.save_jsonl(cur_test_eval_results_list, "test_p_level_bert_v1_results.jsonl")

    print("Test write finished.")
    exit(0)

    copied_dev_o_dict = copy.deepcopy(dev_o_dict)

    list_dict_data_tool.append_subfield_from_list_to_dict(cur_dev_eval_results_list, copied_dev_o_dict,
                                                          'qid', 'fid', check=True)
    # Top_3
    cur_results_dict_top3 = select_top_k_and_to_results_dict(copied_dev_o_dict, top_k=3)
    upperbound_results_dict_top3 = append_gt_downstream_to_get_upperbound_from_doc_retri(
        cur_results_dict_top3,
        dev_list)

    # Top_5
    cur_results_dict_top5 = select_top_k_and_to_results_dict(copied_dev_o_dict, top_k=5)
    upperbound_results_dict_top5 = append_gt_downstream_to_get_upperbound_from_doc_retri(
        cur_results_dict_top5,
        dev_list)

    cur_results_dict_top10 = select_top_k_and_to_results_dict(copied_dev_o_dict, top_k=10)
    upperbound_results_dict_top10 = append_gt_downstream_to_get_upperbound_from_doc_retri(
        cur_results_dict_top10,
        dev_list)

    _, metrics_top3 = ext_hotpot_eval.eval(cur_results_dict_top3, dev_list, verbose=False)
    _, metrics_top3_UB = ext_hotpot_eval.eval(upperbound_results_dict_top3, dev_list, verbose=False)

    _, metrics_top5 = ext_hotpot_eval.eval(cur_results_dict_top5, dev_list, verbose=False)
    _, metrics_top5_UB = ext_hotpot_eval.eval(upperbound_results_dict_top5, dev_list, verbose=False)

    _, metrics_top10 = ext_hotpot_eval.eval(cur_results_dict_top10, dev_list, verbose=False)
    _, metrics_top10_UB = ext_hotpot_eval.eval(upperbound_results_dict_top10, dev_list, verbose=False)

    logging_item = {
        'top3': metrics_top3,
        'top3_UB': metrics_top3_UB,
        'top5': metrics_top5,
        'top5_UB': metrics_top5_UB,
        'top10': metrics_top10,
        'top10_UB': metrics_top10_UB,
    }

    print(logging_item)

    common.save_jsonl(cur_train_eval_results_list, "train_p_level_bert_v1_results.jsonl")
    common.save_jsonl(cur_dev_eval_results_list, "dev_p_level_bert_v1_results.jsonl")
예제 #25
0
def model_go():
    seed = 12
    torch.manual_seed(seed)
    # bert_model_name = 'bert-large-uncased'
    bert_model_name = 'bert-base-uncased'
    experiment_name = 'hotpot_v0_cs'
    lazy = False
    # lazy = True
    forward_size = 16
    # batch_size = 64
    batch_size = 128
    gradient_accumulate_step = int(batch_size / forward_size)
    warmup_proportion = 0.1
    learning_rate = 5e-5
    num_train_epochs = 5
    eval_frequency = 5000
    pos_ratio = 0.2
    do_lower_case = True

    debug_mode = False
    # est_datasize = 900_000

    num_class = 1
    # num_train_optimization_steps

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device_num = 0 if torch.cuda.is_available() else -1

    n_gpu = torch.cuda.device_count()

    unk_token_num = {'tokens': 1}  # work around for initiating vocabulary.
    vocab = ExVocabulary(unk_token_num=unk_token_num)
    vocab.add_token_to_namespace("false", namespace="labels")  # 0
    vocab.add_token_to_namespace("true", namespace="labels")  # 1
    vocab.add_token_to_namespace("hidden", namespace="labels")
    vocab.change_token_with_index_to_namespace("hidden", -2, namespace='labels')

    # Load Dataset
    train_list = common.load_json(config.TRAIN_FILE)
    dev_list = common.load_json(config.DEV_FULLWIKI_FILE)

    dev_fitems_list = common.load_jsonl(
        config.PDATA_ROOT / "content_selection_forward" / "hotpot_dev_p_level_unlabeled.jsonl")
    train_fitems_list = common.load_jsonl(
        config.PDATA_ROOT / "content_selection_forward" / "hotpot_train_p_level_labeled.jsonl")

    if debug_mode:
        dev_list = dev_list[:10]
        dev_fitems_list = dev_fitems_list[:296]
        train_fitems_list = train_fitems_list[:300]
        eval_frequency = 2
        # print(dev_list[-1]['_id'])
        # exit(0)

    sampled_train_list = down_sample_neg(train_fitems_list, ratio=pos_ratio)
    est_datasize = len(sampled_train_list)

    dev_o_dict = list_dict_data_tool.list_to_dict(dev_list, '_id')
    # print(dev_o_dict)

    bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name, do_lower_case=do_lower_case)
    bert_cs_reader = BertContentSelectionReader(bert_tokenizer, lazy, is_paired=True,
                                                example_filter=lambda x: len(x['context']) == 0, max_l=286)

    bert_encoder = BertModel.from_pretrained(bert_model_name)
    model = BertMultiLayerSeqClassification(bert_encoder, num_labels=num_class, num_of_pooling_layer=1,
                                            act_type='tanh', use_pretrained_pooler=True, use_sigmoid=True)

    model.to(device)
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)
    #
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]

    num_train_optimization_steps = int(est_datasize / forward_size / gradient_accumulate_step) * \
                                   num_train_epochs

    print("Estimated training size", est_datasize)
    print("Number of optimization steps:", num_train_optimization_steps)

    optimizer = BertAdam(optimizer_grouped_parameters,
                         lr=learning_rate,
                         warmup=warmup_proportion,
                         t_total=num_train_optimization_steps)

    dev_instances = bert_cs_reader.read(dev_fitems_list)

    biterator = BasicIterator(batch_size=forward_size)
    biterator.index_with(vocab)

    forbackward_step = 0
    update_step = 0

    logging_agent = save_tool.ScoreLogger({})

    # # # Create Log File
    file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}")
    # Save the source code.
    script_name = os.path.basename(__file__)
    with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it:
        out_f.write(it.read())
        out_f.flush()
    # # # Log File end

    for epoch_i in range(num_train_epochs):
        print("Epoch:", epoch_i)
        sampled_train_list = down_sample_neg(train_fitems_list, ratio=pos_ratio)
        train_instance = bert_cs_reader.read(sampled_train_list)
        train_iter = biterator(train_instance, num_epochs=1, shuffle=True)

        for batch in tqdm(train_iter):
            model.train()
            batch = move_to_device(batch, device_num)

            paired_sequence = batch['paired_sequence']
            paired_segments_ids = batch['paired_segments_ids']
            labels_ids = batch['label']
            att_mask, _ = torch_util.get_length_and_mask(paired_sequence)
            s1_span = batch['bert_s1_span']
            s2_span = batch['bert_s2_span']

            loss = model(paired_sequence, token_type_ids=paired_segments_ids, attention_mask=att_mask,
                         mode=BertMultiLayerSeqClassification.ForwardMode.TRAIN,
                         labels=labels_ids)

            if n_gpu > 1:
                loss = loss.mean()  # mean() to average on multi-gpu.

            if gradient_accumulate_step > 1:
                loss = loss / gradient_accumulate_step

            loss.backward()
            forbackward_step += 1

            if forbackward_step % gradient_accumulate_step == 0:
                optimizer.step()
                optimizer.zero_grad()
                update_step += 1

                if update_step % eval_frequency == 0:
                    print("Update steps:", update_step)
                    dev_iter = biterator(dev_instances, num_epochs=1, shuffle=False)

                    cur_eval_results_list = eval_model(model, dev_iter, device_num, with_probs=True)
                    copied_dev_o_dict = copy.deepcopy(dev_o_dict)
                    list_dict_data_tool.append_subfield_from_list_to_dict(cur_eval_results_list, copied_dev_o_dict,
                                                                          'qid', 'fid', check=True)
                    # Top_5
                    cur_results_dict_top5 = select_top_k_and_to_results_dict(copied_dev_o_dict, top_k=5)
                    upperbound_results_dict_top5 = append_gt_downstream_to_get_upperbound_from_doc_retri(
                        cur_results_dict_top5,
                        dev_list)

                    cur_results_dict_top10 = select_top_k_and_to_results_dict(copied_dev_o_dict, top_k=10)
                    upperbound_results_dict_top10 = append_gt_downstream_to_get_upperbound_from_doc_retri(
                        cur_results_dict_top10,
                        dev_list)

                    _, metrics_top5 = ext_hotpot_eval.eval(cur_results_dict_top5, dev_list, verbose=False)
                    _, metrics_top5_UB = ext_hotpot_eval.eval(upperbound_results_dict_top5, dev_list, verbose=False)

                    _, metrics_top10 = ext_hotpot_eval.eval(cur_results_dict_top10, dev_list, verbose=False)
                    _, metrics_top10_UB = ext_hotpot_eval.eval(upperbound_results_dict_top10, dev_list, verbose=False)

                    # top5_doc_f1, top5_UB_sp_f1, top10_doc_f1, top10_Ub_sp_f1
                    # top5_doc_f1 = metrics_top5['doc_f1']
                    # top5_UB_sp_f1 = metrics_top5_UB['sp_f1']
                    # top10_doc_f1 = metrics_top10['doc_f1']
                    # top10_Ub_sp_f1 = metrics_top10_UB['sp_f1']

                    top5_doc_recall = metrics_top5['doc_recall']
                    top5_UB_sp_recall = metrics_top5_UB['sp_recall']
                    top10_doc_recall = metrics_top10['doc_recall']
                    top10_Ub_sp_recall = metrics_top10_UB['sp_recall']

                    logging_item = {
                        'top5': metrics_top5,
                        'top5_UB': metrics_top5_UB,
                        'top10': metrics_top10,
                        'top10_UB': metrics_top10_UB,
                    }

                    # print(logging_item)
                    save_file_name = f'i({update_step})|e({epoch_i})' \
                        f'|t5_doc_recall({top5_doc_recall})|t5_sp_recall({top5_UB_sp_recall})' \
                        f'|t10_doc_recall({top10_doc_recall})|t5_sp_recall({top10_Ub_sp_recall})|seed({seed})'

                    # print(save_file_name)
                    logging_agent.incorporate_results({}, save_file_name, logging_item)
                    logging_agent.logging_to_file(Path(file_path_prefix) / "log.json")

                    model_to_save = model.module if hasattr(model, 'module') else model
                    output_model_file = Path(file_path_prefix) / save_file_name
                    torch.save(model_to_save.state_dict(), str(output_model_file))
def train_fever_std_ema_v1(resume_model=None, wn_feature=False):
    """
    This method is the new training script for train fever with span and probability score.
    :param resume_model:
    :param wn_feature:
    :return:
    """
    num_epoch = 200
    seed = 12
    batch_size = 32
    lazy = True
    dev_prob_threshold = 0.1
    train_prob_threshold = 0.1
    train_sample_top_k = 8
    experiment_name = f"nsmn_sent_wise_std_ema_lr1|t_prob:{train_prob_threshold}|top_k:{train_sample_top_k}"
    # resume_model = None

    print("Do EMA:")

    print("Dev prob threshold:", dev_prob_threshold)
    print("Train prob threshold:", train_prob_threshold)
    print("Train sample top k:", train_sample_top_k)

    dev_upstream_sent_list = common.load_jsonl(
        config.RESULT_PATH /
        "sent_retri_nn/balanced_sentence_selection_results/dev_sent_pred_scores.jsonl"
    )

    train_upstream_sent_list = common.load_jsonl(
        config.RESULT_PATH /
        "sent_retri_nn/balanced_sentence_selection_results/train_sent_scores.jsonl"
    )

    # Prepare Data
    token_indexers = {
        'tokens':
        SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(
            namespace='elmo_characters')  # This is the elmo_characters
    }

    print("Building Prob Dicts...")
    train_sent_list = common.load_jsonl(
        config.RESULT_PATH /
        "sent_retri_nn/balanced_sentence_selection_results/train_sent_scores.jsonl"
    )

    dev_sent_list = common.load_jsonl(
        config.RESULT_PATH /
        "sent_retri_nn/balanced_sentence_selection_results/dev_sent_pred_scores.jsonl"
    )

    selection_dict = paired_selection_score_dict(train_sent_list)
    selection_dict = paired_selection_score_dict(dev_sent_list, selection_dict)

    upstream_dev_list = threshold_sampler_insure_unique(
        config.T_FEVER_DEV_JSONL,
        dev_upstream_sent_list,
        prob_threshold=dev_prob_threshold,
        top_n=5)

    # Specifiy ablation to remove wordnet and number embeddings.
    dev_fever_data_reader = WNSIMIReader(token_indexers=token_indexers,
                                         lazy=lazy,
                                         wn_p_dict=p_dict,
                                         max_l=320,
                                         ablation=None)
    train_fever_data_reader = WNSIMIReader(token_indexers=token_indexers,
                                           lazy=lazy,
                                           wn_p_dict=p_dict,
                                           max_l=320,
                                           shuffle_sentences=False,
                                           ablation=None)

    complete_upstream_dev_data = select_sent_with_prob_for_eval(
        config.T_FEVER_DEV_JSONL,
        upstream_dev_list,
        selection_dict,
        tokenized=True)

    dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data)

    # Load Vocabulary
    biterator = BasicIterator(batch_size=batch_size)

    vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT /
                                               "vocab_cache" / "nli_basic")
    vocab.change_token_with_index_to_namespace('hidden',
                                               -2,
                                               namespace='labels')

    print(vocab.get_token_to_index_vocabulary('labels'))
    print(vocab.get_vocab_size('tokens'))

    biterator.index_with(vocab)

    # Build Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu",
                          index=0)
    device_num = -1 if device.type == 'cpu' else 0

    model = Model(
        rnn_size_in=(1024 + 300 + dev_fever_data_reader.wn_feature_size,
                     1024 + 450 + dev_fever_data_reader.wn_feature_size),
        rnn_size_out=(450, 450),
        weight=weight_dict['glove.840B.300d'],
        vocab_size=vocab.get_vocab_size('tokens'),
        mlp_d=900,
        embedding_dim=300,
        max_l=300,
        use_extra_lex_feature=False,
        max_span_l=100)

    print("Model Max length:", model.max_l)
    if resume_model is not None:
        model.load_state_dict(torch.load(resume_model))
    model.display()
    model.to(device)

    cloned_empty_model = copy.deepcopy(model)
    ema: EMA = EMA(parameters=model.named_parameters())

    # Create Log File
    file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}")
    # Save the source code.
    script_name = os.path.basename(__file__)
    with open(os.path.join(file_path_prefix, script_name),
              'w') as out_f, open(__file__, 'r') as it:
        out_f.write(it.read())
        out_f.flush()
    # Save source code end.

    best_dev = -1
    iteration = 0

    start_lr = 0.0001
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=start_lr)
    criterion = nn.CrossEntropyLoss()

    for i_epoch in range(num_epoch):
        print("Resampling...")
        # Resampling
        train_data_with_candidate_sample_list = \
            threshold_sampler_insure_unique(config.T_FEVER_TRAIN_JSONL, train_upstream_sent_list,
                                            train_prob_threshold,
                                            top_n=train_sample_top_k)

        complete_upstream_train_data = adv_simi_sample_with_prob_v1_1(
            config.T_FEVER_TRAIN_JSONL,
            train_data_with_candidate_sample_list,
            selection_dict,
            tokenized=True)

        print("Sample data length:", len(complete_upstream_train_data))
        sampled_train_instances = train_fever_data_reader.read(
            complete_upstream_train_data)

        train_iter = biterator(sampled_train_instances,
                               shuffle=True,
                               num_epochs=1,
                               cuda_device=device_num)
        for i, batch in tqdm(enumerate(train_iter)):
            model.train()
            out = model(batch)
            y = batch['label']

            loss = criterion(out, y)

            # No decay
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            iteration += 1

            # EMA update
            ema(model.named_parameters())

            if i_epoch < 15:
                mod = 10000
                # mod = 10
            else:
                mod = 2000

            if iteration % mod == 0:
                # eval_iter = biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num)
                # complete_upstream_dev_data = hidden_eval(model, eval_iter, complete_upstream_dev_data)
                #
                # eval_mode = {'check_sent_id_correct': True, 'standard': True}
                # strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(complete_upstream_dev_data,
                #                                                             common.load_jsonl(config.T_FEVER_DEV_JSONL),
                #                                                             mode=eval_mode,
                #                                                             verbose=False)
                # print("Fever Score(Strict/Acc./Precision/Recall/F1):", strict_score, acc_score, pr, rec, f1)
                #
                # print(f"Dev:{strict_score}/{acc_score}")

                # EMA saving
                eval_iter = biterator(dev_instances,
                                      shuffle=False,
                                      num_epochs=1,
                                      cuda_device=device_num)
                load_ema_to_model(cloned_empty_model, ema)
                complete_upstream_dev_data = hidden_eval(
                    cloned_empty_model, eval_iter, complete_upstream_dev_data)

                eval_mode = {'check_sent_id_correct': True, 'standard': True}
                strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(
                    complete_upstream_dev_data,
                    common.load_jsonl(config.T_FEVER_DEV_JSONL),
                    mode=eval_mode,
                    verbose=False)
                print("Fever Score EMA(Strict/Acc./Precision/Recall/F1):",
                      strict_score, acc_score, pr, rec, f1)

                print(f"Dev EMA:{strict_score}/{acc_score}")

                need_save = False
                if strict_score > best_dev:
                    best_dev = strict_score
                    need_save = True

                if need_save:
                    # save_path = os.path.join(
                    #     file_path_prefix,
                    #     f'i({iteration})_epoch({i_epoch})_dev({strict_score})_lacc({acc_score})_seed({seed})'
                    # )

                    # torch.save(model.state_dict(), save_path)

                    ema_save_path = os.path.join(
                        file_path_prefix,
                        f'ema_i({iteration})_epoch({i_epoch})_dev({strict_score})_lacc({acc_score})_seed({seed})'
                    )

                    save_ema_to_file(ema, ema_save_path)
def hidden_eval_fever():
    batch_size = 64
    lazy = True

    SAVE_PATH = "/home/easonnie/projects/FunEver/saved_models/07-18-21:07:28_m_esim_wn_elmo_sample_fixed/i(57000)_epoch(8)_dev(0.5755075507550755)_loss(1.7175163737963839)_seed(12)"

    dev_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/dev.jsonl"

    # Prepare Data
    token_indexers = {
        'tokens':
        SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(
            namespace='elmo_characters')  # This is the elmo_characters
    }

    p_dict = wn_persistent_api.persistence_load()

    dev_fever_data_reader = WNReader(token_indexers=token_indexers,
                                     lazy=lazy,
                                     wn_p_dict=p_dict,
                                     max_l=360)

    complete_upstream_dev_data = get_actual_data(config.T_FEVER_DEV_JSONL,
                                                 dev_upstream_file)
    dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data)
    # Load Vocabulary
    biterator = BasicIterator(batch_size=batch_size)
    # dev_biterator = BasicIterator(batch_size=batch_size * 2)

    vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT /
                                               "vocab_cache" / "nli_basic")
    vocab.change_token_with_index_to_namespace('hidden',
                                               -2,
                                               namespace='labels')

    print(vocab.get_token_to_index_vocabulary('labels'))
    print(vocab.get_vocab_size('tokens'))

    biterator.index_with(vocab)

    # Build Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu",
                          index=0)
    device_num = -1 if device.type == 'cpu' else 0

    model = Model(
        rnn_size_in=(1024 + 300 + dev_fever_data_reader.wn_feature_size,
                     1024 + 300),
        weight=weight_dict['glove.840B.300d'],
        vocab_size=vocab.get_vocab_size('tokens'),
        embedding_dim=300,
        max_l=300)

    print("Model Max length:", model.max_l)
    model.load_state_dict(torch.load(SAVE_PATH))
    model.display()
    model.to(device)

    eval_iter = biterator(dev_instances,
                          shuffle=False,
                          num_epochs=1,
                          cuda_device=device_num)
    builded_dev_data = hidden_eval(model, eval_iter,
                                   complete_upstream_dev_data)

    eval_mode = {'check_sent_id_correct': True, 'standard': True}

    for item in builded_dev_data:
        del item['label']

    print(
        c_scorer.fever_score(builded_dev_data,
                             common.load_jsonl(config.T_FEVER_DEV_JSONL),
                             mode=eval_mode))
예제 #28
0
class TestTrainer(AllenNlpTestCase):
    def setUp(self):
        super(TestTrainer, self).setUp()
        self.instances = SequenceTaggingDatasetReader().read(self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv')
        vocab = Vocabulary.from_instances(self.instances)
        self.vocab = vocab
        self.model_params = Params({
                "text_field_embedder": {
                        "tokens": {
                                "type": "embedding",
                                "embedding_dim": 5
                                }
                        },
                "encoder": {
                        "type": "lstm",
                        "input_size": 5,
                        "hidden_size": 7,
                        "num_layers": 2
                        }
                })
        self.model = SimpleTagger.from_params(vocab=self.vocab, params=self.model_params)
        self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01)
        self.iterator = BasicIterator(batch_size=2)
        self.iterator.index_with(vocab)

    def test_trainer_can_run(self):
        trainer = Trainer(model=self.model,
                          optimizer=self.optimizer,
                          iterator=self.iterator,
                          train_dataset=self.instances,
                          validation_dataset=self.instances,
                          num_epochs=2)
        metrics = trainer.train()
        assert 'best_validation_loss' in metrics
        assert isinstance(metrics['best_validation_loss'], float)
        assert 'best_validation_accuracy' in metrics
        assert isinstance(metrics['best_validation_accuracy'], float)
        assert 'best_validation_accuracy3' in metrics
        assert isinstance(metrics['best_validation_accuracy3'], float)
        assert 'best_epoch' in metrics
        assert isinstance(metrics['best_epoch'], int)

        # Making sure that both increasing and decreasing validation metrics work.
        trainer = Trainer(model=self.model,
                          optimizer=self.optimizer,
                          iterator=self.iterator,
                          train_dataset=self.instances,
                          validation_dataset=self.instances,
                          validation_metric='+loss',
                          num_epochs=2)
        metrics = trainer.train()
        assert 'best_validation_loss' in metrics
        assert isinstance(metrics['best_validation_loss'], float)
        assert 'best_validation_accuracy' in metrics
        assert isinstance(metrics['best_validation_accuracy'], float)
        assert 'best_validation_accuracy3' in metrics
        assert isinstance(metrics['best_validation_accuracy3'], float)
        assert 'best_epoch' in metrics
        assert isinstance(metrics['best_epoch'], int)

    @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device registered.")
    def test_trainer_can_run_cuda(self):
        trainer = Trainer(self.model, self.optimizer,
                          self.iterator, self.instances, num_epochs=2,
                          cuda_device=0)
        trainer.train()

    @pytest.mark.skipif(torch.cuda.device_count() < 2,
                        reason="Need multiple GPUs.")
    def test_trainer_can_run_multiple_gpu(self):

        class MetaDataCheckWrapper(Model):
            """
            Checks that the metadata field has been correctly split across the batch dimension
            when running on multiple gpus.
            """
            def __init__(self, model):
                super().__init__(model.vocab)
                self.model = model

            def forward(self, **kwargs) -> Dict[str, torch.Tensor]:  # type: ignore # pylint: disable=arguments-differ
                assert 'metadata' in kwargs and 'tags' in kwargs, \
                    f'tokens and metadata must be provided. Got {kwargs.keys()} instead.'
                batch_size = kwargs['tokens']['tokens'].size()[0]
                assert len(kwargs['metadata']) == batch_size, \
                    f'metadata must be split appropriately. Expected {batch_size} elements, ' \
                    f"got {len(kwargs['metadata'])} elements."
                return self.model.forward(**kwargs)

        multigpu_iterator = BasicIterator(batch_size=4)
        multigpu_iterator.index_with(self.vocab)
        trainer = Trainer(MetaDataCheckWrapper(self.model), self.optimizer,
                          multigpu_iterator, self.instances, num_epochs=2,
                          cuda_device=[0, 1])
        trainer.train()

    def test_trainer_can_resume_training(self):
        trainer = Trainer(self.model, self.optimizer,
                          self.iterator, self.instances,
                          validation_dataset=self.instances,
                          num_epochs=1, serialization_dir=self.TEST_DIR)
        trainer.train()
        new_trainer = Trainer(self.model, self.optimizer,
                              self.iterator, self.instances,
                              validation_dataset=self.instances,
                              num_epochs=3, serialization_dir=self.TEST_DIR)

        epoch, val_metrics_per_epoch = new_trainer._restore_checkpoint()  # pylint: disable=protected-access
        assert epoch == 1
        assert len(val_metrics_per_epoch) == 1
        assert isinstance(val_metrics_per_epoch[0], float)
        assert val_metrics_per_epoch[0] != 0.
        new_trainer.train()

    def test_metric_only_considered_best_so_far_when_strictly_better_than_those_before_it_increasing_metric(
            self):
        new_trainer = Trainer(self.model, self.optimizer,
                              self.iterator, self.instances,
                              validation_dataset=self.instances,
                              num_epochs=3, serialization_dir=self.TEST_DIR,
                              patience=5, validation_metric="+test")
        # when it is the only metric it should be considered the best
        assert new_trainer._is_best_so_far(1, [])  # pylint: disable=protected-access
        # when it is the same as one before it it is not considered the best
        assert not new_trainer._is_best_so_far(.3, [.3, .3, .3, .2, .5, .1])  # pylint: disable=protected-access
        # when it is the best it is considered the best
        assert new_trainer._is_best_so_far(13.00, [.3, .3, .3, .2, .5, .1])  # pylint: disable=protected-access
        # when it is not the the best it is not considered the best
        assert not new_trainer._is_best_so_far(.0013, [.3, .3, .3, .2, .5, .1])  # pylint: disable=protected-access

    def test_metric_only_considered_best_so_far_when_strictly_better_than_those_before_it_decreasing_metric(self):
        new_trainer = Trainer(self.model, self.optimizer,
                              self.iterator, self.instances,
                              validation_dataset=self.instances,
                              num_epochs=3, serialization_dir=self.TEST_DIR,
                              patience=5, validation_metric="-test")
        # when it is the only metric it should be considered the best
        assert new_trainer._is_best_so_far(1, [])  # pylint: disable=protected-access
        # when it is the same as one before it it is not considered the best
        assert not new_trainer._is_best_so_far(.3, [.3, .3, .3, .2, .5, .1])  # pylint: disable=protected-access
        # when it is the best it is considered the best
        assert new_trainer._is_best_so_far(.013, [.3, .3, .3, .2, .5, .1])  # pylint: disable=protected-access
        # when it is not the the best it is not considered the best
        assert not new_trainer._is_best_so_far(13.00, [.3, .3, .3, .2, .5, .1])  # pylint: disable=protected-access

    def test_should_stop_early_with_increasing_metric(self):
        new_trainer = Trainer(self.model, self.optimizer,
                              self.iterator, self.instances,
                              validation_dataset=self.instances,
                              num_epochs=3, serialization_dir=self.TEST_DIR,
                              patience=5, validation_metric="+test")
        assert new_trainer._should_stop_early([.5, .3, .2, .1, .4, .4])  # pylint: disable=protected-access
        assert not new_trainer._should_stop_early([.3, .3, .3, .2, .5, .1])  # pylint: disable=protected-access

    def test_should_stop_early_with_flat_lining_metric(self):
        flatline = [.2] * 6
        assert Trainer(self.model, self.optimizer,  # pylint: disable=protected-access
                       self.iterator, self.instances,
                       validation_dataset=self.instances,
                       num_epochs=3,
                       serialization_dir=self.TEST_DIR,
                       patience=5,
                       validation_metric="+test")._should_stop_early(flatline)  # pylint: disable=protected-access
        assert Trainer(self.model, self.optimizer,  # pylint: disable=protected-access
                       self.iterator, self.instances,
                       validation_dataset=self.instances,
                       num_epochs=3,
                       serialization_dir=self.TEST_DIR,
                       patience=5,
                       validation_metric="-test")._should_stop_early(flatline)  # pylint: disable=protected-access

    def test_should_stop_early_with_decreasing_metric(self):
        new_trainer = Trainer(self.model, self.optimizer,
                              self.iterator, self.instances,
                              validation_dataset=self.instances,
                              num_epochs=3, serialization_dir=self.TEST_DIR,
                              patience=5, validation_metric="-test")
        assert new_trainer._should_stop_early([.02, .3, .2, .1, .4, .4])  # pylint: disable=protected-access
        assert not new_trainer._should_stop_early([.3, .3, .2, .1, .4, .5])  # pylint: disable=protected-access
        assert new_trainer._should_stop_early([.1, .3, .2, .1, .4, .5])  # pylint: disable=protected-access

    def test_should_stop_early_with_early_stopping_disabled(self):
        # Increasing metric
        trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances,
                          validation_dataset=self.instances, num_epochs=100,
                          patience=None, validation_metric="+test")
        decreasing_history = [float(i) for i in reversed(range(20))]
        assert not trainer._should_stop_early(decreasing_history)  # pylint: disable=protected-access

        # Decreasing metric
        trainer = Trainer(self.model, self.optimizer, self.iterator, self.instances,
                          validation_dataset=self.instances, num_epochs=100,
                          patience=None, validation_metric="-test")
        increasing_history = [float(i) for i in range(20)]
        assert not trainer._should_stop_early(increasing_history)  # pylint: disable=protected-access

    def test_should_stop_early_with_invalid_patience(self):
        for patience in [0, -1, -2, 1.5, 'None']:
            with pytest.raises(ConfigurationError,
                               message='No ConfigurationError for patience={}'.format(patience)):
                Trainer(self.model, self.optimizer, self.iterator, self.instances,
                        validation_dataset=self.instances, num_epochs=100,
                        patience=patience, validation_metric="+test")

    def test_trainer_can_run_with_lr_scheduler(self):
        lr_params = Params({"type": "reduce_on_plateau"})
        lr_scheduler = LearningRateScheduler.from_params(self.optimizer, lr_params)
        trainer = Trainer(model=self.model,
                          optimizer=self.optimizer,
                          iterator=self.iterator,
                          learning_rate_scheduler=lr_scheduler,
                          validation_metric="-loss",
                          train_dataset=self.instances,
                          validation_dataset=self.instances,
                          num_epochs=2)
        trainer.train()

    def test_trainer_can_resume_with_lr_scheduler(self):
        # pylint: disable=protected-access
        lr_scheduler = LearningRateScheduler.from_params(
                self.optimizer, Params({"type": "exponential", "gamma": 0.5}))
        trainer = Trainer(model=self.model,
                          optimizer=self.optimizer,
                          iterator=self.iterator,
                          learning_rate_scheduler=lr_scheduler,
                          train_dataset=self.instances,
                          validation_dataset=self.instances,
                          num_epochs=2, serialization_dir=self.TEST_DIR)
        trainer.train()

        new_lr_scheduler = LearningRateScheduler.from_params(
                self.optimizer, Params({"type": "exponential", "gamma": 0.5}))
        new_trainer = Trainer(model=self.model,
                              optimizer=self.optimizer,
                              iterator=self.iterator,
                              learning_rate_scheduler=new_lr_scheduler,
                              train_dataset=self.instances,
                              validation_dataset=self.instances,
                              num_epochs=4, serialization_dir=self.TEST_DIR)
        epoch, _ = new_trainer._restore_checkpoint()
        assert epoch == 2
        assert new_trainer._learning_rate_scheduler.lr_scheduler.last_epoch == 1
        new_trainer.train()

    def test_trainer_raises_on_model_with_no_loss_key(self):
        class FakeModel(torch.nn.Module):
            def forward(self, **kwargs):  # pylint: disable=arguments-differ,unused-argument
                return {}
        with pytest.raises(RuntimeError):
            trainer = Trainer(FakeModel(), self.optimizer,
                              self.iterator, self.instances,
                              num_epochs=2, serialization_dir=self.TEST_DIR)
            trainer.train()

    def test_trainer_can_log_histograms(self):
        # enable activation logging
        for module in self.model.modules():
            module.should_log_activations = True

        trainer = Trainer(self.model, self.optimizer,
                          self.iterator, self.instances, num_epochs=3,
                          serialization_dir=self.TEST_DIR,
                          histogram_interval=2)
        trainer.train()

    def test_trainer_respects_num_serialized_models_to_keep(self):
        trainer = Trainer(self.model, self.optimizer,
                          self.iterator, self.instances, num_epochs=5,
                          serialization_dir=self.TEST_DIR,
                          num_serialized_models_to_keep=3)
        trainer.train()

        # Now check the serialized files
        for prefix in ['model_state_epoch_*', 'training_state_epoch_*']:
            file_names = glob.glob(os.path.join(self.TEST_DIR, prefix))
            epochs = [int(re.search(r"_([0-9])\.th", fname).group(1))
                      for fname in file_names]
            assert sorted(epochs) == [2, 3, 4]

    def test_trainer_saves_metrics_every_epoch(self):
        trainer = Trainer(model=self.model,
                          optimizer=self.optimizer,
                          iterator=self.iterator,
                          train_dataset=self.instances,
                          validation_dataset=self.instances,
                          num_epochs=5,
                          serialization_dir=self.TEST_DIR,
                          num_serialized_models_to_keep=3)
        trainer.train()

        for epoch in range(5):
            epoch_file = self.TEST_DIR / f'metrics_epoch_{epoch}.json'
            assert epoch_file.exists()
            metrics = json.load(open(epoch_file))
            assert "validation_loss" in metrics
            assert "best_validation_loss" in metrics
            assert metrics.get("epoch") == epoch

    def test_trainer_respects_keep_serialized_model_every_num_seconds(self):
        # To test:
        #   Create an iterator that sleeps for 2.5 second per epoch, so the total training
        #       time for one epoch is slightly greater then 2.5 seconds.
        #   Run for 6 epochs, keeping the last 2 models, models also kept every 5 seconds.
        #   Check the resulting checkpoints.  Should then have models at epochs
        #       2, 4, plus the last two at 5 and 6.
        class WaitingIterator(BasicIterator):
            # pylint: disable=arguments-differ
            def _create_batches(self, *args, **kwargs):
                time.sleep(2.5)
                return super(WaitingIterator, self)._create_batches(*args, **kwargs)

        iterator = WaitingIterator(batch_size=2)
        iterator.index_with(self.vocab)

        trainer = Trainer(self.model, self.optimizer,
                          iterator, self.instances, num_epochs=6,
                          serialization_dir=self.TEST_DIR,
                          num_serialized_models_to_keep=2,
                          keep_serialized_model_every_num_seconds=5)
        trainer.train()

        # Now check the serialized files
        for prefix in ['model_state_epoch_*', 'training_state_epoch_*']:
            file_names = glob.glob(os.path.join(self.TEST_DIR, prefix))
            epochs = [int(re.search(r"_([0-9])\.th", fname).group(1))
                      for fname in file_names]
            # epoch N has N-1 in file name
            assert sorted(epochs) == [1, 3, 4, 5]

    def test_trainer_can_log_learning_rates_tensorboard(self):
        iterator = BasicIterator(batch_size=4)
        iterator.index_with(self.vocab)

        trainer = Trainer(self.model, self.optimizer,
                          iterator, self.instances, num_epochs=2,
                          serialization_dir=self.TEST_DIR,
                          should_log_learning_rate=True,
                          summary_interval=2)

        trainer.train()

    def test_trainer_saves_models_at_specified_interval(self):
        iterator = BasicIterator(batch_size=4)
        iterator.index_with(self.vocab)

        trainer = Trainer(self.model, self.optimizer,
                          iterator, self.instances, num_epochs=2,
                          serialization_dir=self.TEST_DIR,
                          model_save_interval=0.0001)

        trainer.train()

        # Now check the serialized files for models saved during the epoch.
        prefix = 'model_state_epoch_*'
        file_names = sorted(glob.glob(os.path.join(self.TEST_DIR, prefix)))
        epochs = [re.search(r"_([0-9\.\-]+)\.th", fname).group(1)
                  for fname in file_names]
        # We should have checkpoints at the end of each epoch and during each, e.g.
        # [0.timestamp, 0, 1.timestamp, 1]
        assert len(epochs) == 4
        assert epochs[3] == '1'
        assert '.' in epochs[0]

        # Now make certain we can restore from timestamped checkpoint.
        # To do so, remove the checkpoint from the end of epoch 1&2, so
        # that we are forced to restore from the timestamped checkpoints.
        for k in range(2):
            os.remove(os.path.join(self.TEST_DIR, 'model_state_epoch_{}.th'.format(k)))
            os.remove(os.path.join(self.TEST_DIR, 'training_state_epoch_{}.th'.format(k)))
        os.remove(os.path.join(self.TEST_DIR, 'best.th'))

        restore_trainer = Trainer(self.model, self.optimizer,
                                  self.iterator, self.instances, num_epochs=2,
                                  serialization_dir=self.TEST_DIR,
                                  model_save_interval=0.0001)
        epoch, _ = restore_trainer._restore_checkpoint()  # pylint: disable=protected-access
        assert epoch == 2
        # One batch per epoch.
        assert restore_trainer._batch_num_total == 2  # pylint: disable=protected-access
예제 #29
0
def train_fever():
    num_epoch = 8
    seed = 12
    batch_size = 128
    experiment_name = "simple_nn"
    lazy = True
    torch.manual_seed(seed)
    keep_neg_sample_prob = 0.5
    sample_prob_decay = 0.1

    dev_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/dev.jsonl"
    train_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/train.jsonl"

    # Prepare Data
    token_indexers = {
        'tokens': SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters')  # This is the elmo_characters
    }

    train_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=lazy)
    # dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=False)
    dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=lazy)

    complete_upstream_dev_data = get_full_list(config.T_FEVER_DEV_JSONL, dev_upstream_file, pred=True)
    print("Dev size:", len(complete_upstream_dev_data))
    dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data)

    # Load Vocabulary
    biterator = BasicIterator(batch_size=batch_size)
    dev_biterator = BasicIterator(batch_size=batch_size)

    vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic")
    # THis is important
    vocab.add_token_to_namespace("true", namespace="selection_labels")
    vocab.add_token_to_namespace("false", namespace="selection_labels")
    vocab.add_token_to_namespace("hidden", namespace="selection_labels")
    vocab.change_token_with_index_to_namespace("hidden", -2, namespace='selection_labels')
    # Label value

    vocab.get_index_to_token_vocabulary('selection_labels')

    print(vocab.get_token_to_index_vocabulary('selection_labels'))
    print(vocab.get_vocab_size('tokens'))

    biterator.index_with(vocab)
    dev_biterator.index_with(vocab)

    # exit(0)
    # Build Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0)
    device_num = -1 if device.type == 'cpu' else 0

    model = Model(weight=weight_dict['glove.840B.300d'],
                  vocab_size=vocab.get_vocab_size('tokens'),
                  embedding_dim=300, max_l=300, num_of_class=2)

    model.display()
    model.to(device)

    # Create Log File
    file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}")
    # Save the source code.
    script_name = os.path.basename(__file__)
    with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it:
        out_f.write(it.read())
        out_f.flush()
    # Save source code end.

    best_dev = -1
    iteration = 0

    start_lr = 0.0002
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=start_lr)
    criterion = nn.CrossEntropyLoss()

    for i_epoch in range(num_epoch):
        print("Resampling...")
        # Resampling
        complete_upstream_train_data = get_full_list(config.T_FEVER_TRAIN_JSONL, train_upstream_file, pred=False)
        filtered_train_data = post_filter(complete_upstream_train_data, keep_prob=keep_neg_sample_prob,
                                          seed=12 + i_epoch)
        # Change the seed to avoid duplicate sample...
        keep_neg_sample_prob -= sample_prob_decay

        print("Sampled_length:", len(filtered_train_data))
        sampled_train_instances = train_fever_data_reader.read(filtered_train_data)

        train_iter = biterator(sampled_train_instances, shuffle=True, num_epochs=1, cuda_device=device_num)
        for i, batch in tqdm(enumerate(train_iter)):
            model.train()
            out = model(batch)
            y = batch['selection_label']

            loss = criterion(out, y)

            # No decay
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            iteration += 1

            if i_epoch <= 4:
                mod = 25000
            else:
                mod = 10000

            if iteration % mod == 0:
                eval_iter = dev_biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num)
                dev_score, dev_loss, complete_upstream_dev_data = full_eval_model(model, eval_iter, criterion,
                                                                                  complete_upstream_dev_data)

                dev_results_list = score_converter_v0(config.T_FEVER_DEV_JSONL, complete_upstream_dev_data)
                eval_mode = {'check_sent_id_correct': True, 'standard': True}
                strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(dev_results_list, config.T_FEVER_DEV_JSONL,
                                                                            mode=eval_mode, verbose=False)
                total = len(dev_results_list)
                hit = eval_mode['check_sent_id_correct_hits']
                tracking_score = hit / total

                print(f"Dev(clf_acc/pr/rec/f1/loss):{dev_score}/{pr}/{rec}/{f1}/{dev_loss}")
                print(f"Tracking score:", f"{tracking_score}")

                need_save = False
                if tracking_score > best_dev:
                    best_dev = tracking_score
                    need_save = True

                if need_save:
                    save_path = os.path.join(
                        file_path_prefix,
                        f'i({iteration})_epoch({i_epoch})_'
                        f'(tra_score:{tracking_score}|clf_acc:{dev_score}|pr:{pr}|rec:{rec}|f1:{f1}|loss:{dev_loss})'
                    )

                    torch.save(model.state_dict(), save_path)
def multitask_learning():
    # load datasetreader 
    # Save logging to a local file
    # Multitasking
    log.getLogger().addHandler(log.FileHandler(directory+"/log.log"))

    lr = 0.00001
    batch_size = 2
    epochs = 10 
    max_seq_len = 512
    max_span_width = 30

    #import pdb
    #pdb.set_trace()    

    #token_indexer = BertIndexer(pretrained_model="bert-base-uncased", max_pieces=max_seq_len, do_lowercase=True,)
    #token_indexer = PretrainedBertIndexer("bert-base-cased", do_lowercase=False)
    from allennlp.data.token_indexers.elmo_indexer import ELMoTokenCharactersIndexer
    # the token indexer is responsible for mapping tokens to integers
    token_indexer = ELMoTokenCharactersIndexer()
    
    def tokenizer(x: str):
        return [w.text for w in SpacyWordSplitter(language='en_core_web_sm', pos_tags=False).split_words(x)[:max_seq_len]]


    #conll_reader = ConllCorefBertReader(max_span_width = max_span_width, token_indexers = {"tokens": token_indexer}) 
    conll_reader = ConllCorefReader(max_span_width = max_span_width, token_indexers = {"tokens": token_indexer})
    swag_reader = SWAGDatasetReader(tokenizer=tokenizer, token_indexers = token_indexer)
    EMBEDDING_DIM = 1024
    HIDDEN_DIM = 200
    conll_datasets, swag_datasets = load_datasets(conll_reader, swag_reader, directory)
    conll_vocab = Vocabulary()
    conll_iterator = BasicIterator(batch_size=batch_size)
    conll_iterator.index_with(conll_vocab)

    swag_vocab = Vocabulary()
    swag_iterator = BasicIterator(batch_size=batch_size)
    swag_iterator.index_with(swag_vocab)

    from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder
    from allennlp.modules.token_embedders import ElmoTokenEmbedder

    #bert_embedder = PretrainedBertEmbedder(pretrained_model="bert-base-cased",top_layer_only=True, requires_grad=True)

    options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json'
    weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'
 
    elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
    word_embedding = BasicTextFieldEmbedder({"tokens": elmo_embedder})#, allow_unmatched_keys=True)

    #word_embedding = BasicTextFieldEmbedder({"tokens": bert_embedder}, allow_unmatched_keys=True)
    #BERT_DIM = word_embedding.get_output_dim()
    ELMO_DIM = word_embedding.get_output_dim()

    seq2seq = PytorchSeq2SeqWrapper(torch.nn.LSTM(ELMO_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True))
    seq2vec = PytorchSeq2VecWrapper(torch.nn.LSTM(ELMO_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True))
    mention_feedforward = FeedForward(input_dim = 2336, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU())
    antecedent_feedforward = FeedForward(input_dim = 7776, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU())
    model1 = CoreferenceResolver(vocab=conll_vocab, text_field_embedder=word_embedding,context_layer= seq2seq, mention_feedforward=mention_feedforward,antecedent_feedforward=antecedent_feedforward , feature_size=768,max_span_width=max_span_width,spans_per_word=0.4,max_antecedents=250,lexical_dropout= 0.2)

    model2 = SWAGExampleModel(vocab=swag_vocab, text_field_embedder=word_embedding, phrase_encoder=seq2vec)
    optimizer1 = optim.Adam(model1.parameters(), lr=lr)
    optimizer2 = optim.Adam(model2.parameters(), lr=lr)

    swag_train_iterator = swag_iterator(swag_datasets[0], num_epochs=1, shuffle=True)
    conll_train_iterator = conll_iterator(conll_datasets[0], num_epochs=1, shuffle=True)
    swag_val_iterator = swag_iterator(swag_datasets[1], num_epochs=1, shuffle=True)
    conll_val_iterator:q = conll_iterator(conll_datasets[1], num_epochs=1, shuffle=True)
    task_infos = {"swag": {"model": model2, "optimizer": optimizer2, "loss": 0.0, "iterator": swag_iterator, "train_data": swag_datasets[0], "val_data": swag_datasets[1], "num_train": len(swag_datasets[0]), "num_val": len(swag_datasets[1]), "lr": lr, "score": {"accuracy":0.0}}, \
                    "conll": {"model": model1, "iterator": conll_iterator, "loss": 0.0, "val_data": conll_datasets[1], "train_data": conll_datasets[0], "optimizer": optimizer1, "num_train": len(conll_datasets[0]), "num_val": len(conll_datasets[1]),"lr": lr, "score": {"coref_prediction": 0.0, "coref_recall": 0.0, "coref_f1": 0.0,"mention_recall": 0.0}}}
    USE_GPU = 1
    trainer = MultiTaskTrainer(
        task_infos=task_infos, 
        num_epochs=epochs,
        serialization_dir=directory + "saved_models/multitask/"
    ) 
    metrics = trainer.train()
class TextInAllAspectSentimentOutTrainTemplate(
        ModelTrainTemplate.ModelTrainTemplate):
    """

    """
    def __init__(self, configuration):
        super().__init__(configuration)
        self.data_reader: DatasetReader = None
        self.train_data = None
        self.dev_data = None
        self.test_data = None
        self.hard_test_data = None
        self.distinct_categories: List[str] = None
        self.distinct_polarities: List[str] = None
        self._load_data()
        self._get_max_sentence_len()
        if self.configuration['debug']:
            self.train_data = self.train_data[:128]
            self.dev_data = self.dev_data[:128]
            self.test_data = self.test_data[:128]

        self.vocab = None
        self._build_vocab()

        self.iterator = None
        self.val_iterator = None
        self._build_iterator()

        self.acd_model_dir = self.model_dir + 'acd/'

    def _get_max_sentence_len(self):
        len_count = collections.defaultdict(int)
        for data in [self.train_data, self.test_data, self.dev_data]:
            if data is None:
                continue
            for sample in data:
                tokens = sample.fields['tokens'].tokens
                # tokens = sample.fields['sample'].metadata[4]
                # if len(tokens) > self.configuration['max_len']:
                #     print(tokens)
                len_count[len(tokens)] += 1
        len_count_list = [[items[0], items[1]] for items in len_count.items()]
        len_count_list.sort(key=lambda x: x[0])
        self.logger.info('len_count_list: %s' % str(len_count_list))

    def _get_data_reader(self):
        token_indexer = SingleIdTokenIndexer(namespace="tokens")
        position_indexer = SingleIdTokenIndexer(namespace='position')
        aspect_indexer = SingleIdTokenIndexer(namespace='aspect')
        reader = acd_and_sc_data_reader.TextInAllAspectSentimentOut(
            self.distinct_categories,
            self.distinct_polarities,
            tokenizer=self._get_word_segmenter(),
            token_indexers={"tokens": token_indexer},
            position_indexers={'position': position_indexer},
            aspect_indexers={'aspect': aspect_indexer},
            configuration=self.configuration)
        return reader

    def _load_data(self):
        data_filepath = self.base_data_dir + 'data'
        if os.path.exists(data_filepath):
            self.train_data, self.dev_data, self.test_data, self.distinct_categories, self.distinct_polarities, \
            self.hard_test_data = super()._load_object(data_filepath)
            reader = self._get_data_reader()
            self.data_reader = reader
        else:
            train_dev_test_data, distinct_categories, distinct_polarities = self.dataset. \
                generate_acd_and_sc_data(dev_size=0.2)

            if self.configuration['hard_test']:
                train_dev_test_data['hard_test'] = []
                for sample in train_dev_test_data['test']:
                    polarities = set([e[1] for e in sample[1]])
                    if len(polarities) >= 2:
                        train_dev_test_data['hard_test'].append(sample)

            distinct_polarities_new = []
            for polarity in distinct_polarities:
                if polarity != 'conflict':
                    distinct_polarities_new.append(polarity)
            self.distinct_categories = distinct_categories
            self.distinct_polarities = distinct_polarities_new

            train_dev_test_data_label_indexed = {}
            for data_type, data in train_dev_test_data.items():
                if data is None:
                    continue
                data_new = []
                for sample in data:
                    sample_new = [sample[0]]
                    labels_new = []
                    for label in sample[1]:
                        aspect = label[0]
                        polarity = label[1]
                        aspect_index = distinct_categories.index(aspect)
                        if polarity == 'conflict':
                            polarity_index = -100
                        else:
                            polarity_index = distinct_polarities_new.index(
                                polarity)
                        labels_new.append((aspect_index, polarity_index))
                    if len(labels_new) != 0:
                        sample_new.append(labels_new)
                        data_new.append(sample_new)
                train_dev_test_data_label_indexed[data_type] = data_new

            reader = self._get_data_reader()
            self.data_reader = reader
            self.train_data = reader.read(
                train_dev_test_data_label_indexed['train'])
            self.dev_data = reader.read(
                train_dev_test_data_label_indexed['dev'])
            self.test_data = reader.read(
                train_dev_test_data_label_indexed['test'])
            if self.configuration['hard_test']:
                self.hard_test_data = reader.read(
                    train_dev_test_data_label_indexed['hard_test'])
            data = [
                self.train_data, self.dev_data, self.test_data,
                self.distinct_categories, self.distinct_polarities,
                self.hard_test_data
            ]
            super()._save_object(data_filepath, data)

    def _build_vocab(self):
        if self.configuration['train']:
            vocab_file_path = self.base_data_dir + 'vocab'
            if os.path.exists(vocab_file_path):
                self.vocab = super()._load_object(vocab_file_path)
            else:
                data = self.train_data + self.dev_data + self.test_data
                self.vocab = Vocabulary.from_instances(
                    data, max_vocab_size=sys.maxsize)
                super()._save_object(vocab_file_path, self.vocab)
            self.model_meta_data['vocab'] = self.vocab
        else:
            self.vocab = self.model_meta_data['vocab']

    def _build_iterator(self):
        self.iterator = BucketIterator(
            batch_size=self.configuration['batch_size'],
            sorting_keys=[("tokens", "num_tokens")],
        )
        self.iterator.index_with(self.vocab)
        self.val_iterator = BasicIterator(
            batch_size=self.configuration['batch_size'])
        self.val_iterator.index_with(self.vocab)

    def _print_args(self, model):
        n_trainable_params, n_nontrainable_params = 0, 0
        for p in model.parameters():
            n_params = torch.prod(torch.tensor(p.shape)).item()
            if p.requires_grad:
                n_trainable_params += n_params
            else:
                n_nontrainable_params += n_params
        self.logger.info(
            'n_trainable_params: {0}, n_nontrainable_params: {1}'.format(
                n_trainable_params, n_nontrainable_params))
        self.logger.info('> training arguments:')
        for arg in self.configuration.keys():
            self.logger.info('>>> {0}: {1}'.format(arg,
                                                   self.configuration[arg]))

    def _find_model_function_pure(self):
        raise NotImplementedError('_find_model_function_pure')

    def _get_aspect_embeddings_dim(self):
        return 300

    def _get_position_embeddings_dim(self):
        return 300

    def _is_train_token_embeddings(self):
        return False

    def _find_model_function(self):
        embedding_dim = self.configuration['embed_size']
        embedding_matrix_filepath = self.base_data_dir + 'embedding_matrix'
        if os.path.exists(embedding_matrix_filepath):
            embedding_matrix = super()._load_object(embedding_matrix_filepath)
        else:
            embedding_filepath = self.configuration['embedding_filepath']
            embedding_matrix = embedding._read_embeddings_from_text_file(
                embedding_filepath,
                embedding_dim,
                self.vocab,
                namespace='tokens')
            super()._save_object(embedding_matrix_filepath, embedding_matrix)
        embedding_matrix = embedding_matrix.to(self.configuration['device'])
        token_embedding = Embedding(
            num_embeddings=self.vocab.get_vocab_size(namespace='tokens'),
            embedding_dim=embedding_dim,
            padding_index=0,
            vocab_namespace='tokens',
            trainable=self._is_train_token_embeddings(),
            weight=embedding_matrix)
        # the embedder maps the input tokens to the appropriate embedding matrix
        word_embedder: TextFieldEmbedder = BasicTextFieldEmbedder(
            {"tokens": token_embedding})

        position_embedding = Embedding(
            num_embeddings=self.vocab.get_vocab_size(namespace='position'),
            embedding_dim=self._get_position_embeddings_dim(),
            padding_index=0)
        position_embedder: TextFieldEmbedder = BasicTextFieldEmbedder(
            {"position": position_embedding},
            # we'll be ignoring masks so we'll need to set this to True
            allow_unmatched_keys=True)

        aspect_embedding = Embedding(
            num_embeddings=self.vocab.get_vocab_size(namespace='aspect'),
            embedding_dim=self._get_aspect_embeddings_dim(),
            padding_index=0)
        aspect_embedder: TextFieldEmbedder = BasicTextFieldEmbedder(
            {"aspect": aspect_embedding},
            # we'll be ignoring masks so we'll need to set this to True
            allow_unmatched_keys=True)
        model_function: pytorch_models.TextInAllAspectSentimentOutModel = self._find_model_function_pure(
        )
        model = model_function(
            word_embedder,
            position_embedder,
            aspect_embedder,
            self.distinct_categories,
            self.distinct_polarities,
            self.vocab,
            self.configuration,
        )
        self._print_args(model)
        model = model.to(self.configuration['device'])
        return model

    def _get_optimizer(self, model):
        _params = filter(lambda p: p.requires_grad, model.parameters())
        return optim.Adam(_params, lr=0.001, weight_decay=0.00001)

    def _get_acd_optimizer(self, model):
        _params = filter(lambda p: p.requires_grad, model.parameters())
        return optim.Adam(_params, lr=0.001, weight_decay=0.00001)

    def _get_acd_warmup_epoch_num(self):
        return 3

    def _get_estimator(self, model):
        USE_GPU = torch.cuda.is_available()
        if USE_GPU:
            gpu_id = self.configuration['gpu_id']
        else:
            gpu_id = -1
        estimator = pytorch_models.TextInAllAspectSentimentOutEstimator(
            model,
            self.val_iterator,
            self.distinct_categories,
            self.distinct_polarities,
            configuration=self.configuration,
            cuda_device=gpu_id)
        return estimator

    def _get_estimate_callback(self, model):
        result = []
        data_type_and_data = {
            'train': self.train_data,
            'dev': self.dev_data,
            'test': self.test_data
        }
        if self.hard_test_data:
            data_type_and_data['hard_test'] = self.hard_test_data
        estimator = self._get_estimator(model)
        estimate_callback = allennlp_callback.EstimateCallback(
            data_type_and_data, estimator, self.logger)
        result.append(estimate_callback)
        return result

    def _get_loss_weight_callback(self):
        result = []
        set_loss_weight_callback = allennlp_callback.SetLossWeightCallback(
            self.model,
            self.logger,
            acd_warmup_epoch_num=self._get_acd_warmup_epoch_num())
        result.append(set_loss_weight_callback)
        return result

    def _get_fixed_loss_weight_callback(self,
                                        model,
                                        category_loss_weight=1,
                                        sentiment_loss_weight=1):
        result = []
        fixed_loss_weight_callback = allennlp_callback.FixedLossWeightCallback(
            model,
            self.logger,
            category_loss_weight=category_loss_weight,
            sentiment_loss_weight=sentiment_loss_weight)
        result.append(fixed_loss_weight_callback)
        return result

    def _get_bert_word_embedder(self):
        return None

    def _inner_train(self):
        USE_GPU = torch.cuda.is_available()
        if USE_GPU:
            gpu_id = self.configuration['gpu_id']
        else:
            gpu_id = -1

        self.model: pytorch_models.TextInAllAspectSentimentOutModel = self._find_model_function(
        )

        estimator = self._get_estimator(self.model)
        if self.configuration['acd_warmup']:
            if self.configuration[
                    'frozen_all_acsc_parameter_while_pretrain_acd']:
                self.model.set_grad_for_acsc_parameter(requires_grad=False)

            optimizer = self._get_acd_optimizer(self.model)
            self.logger.info('acd warmup')
            validation_metric = '+category_f1'
            callbacks = self._get_estimate_callback(self.model)
            callbacks.extend(
                self._get_fixed_loss_weight_callback(self.model,
                                                     category_loss_weight=1,
                                                     sentiment_loss_weight=0))
            self._print_args(self.model)
            trainer = Trainer(
                model=self.model,
                optimizer=optimizer,
                iterator=self.iterator,
                train_dataset=self.train_data,
                validation_dataset=self.dev_data,
                cuda_device=gpu_id,
                num_epochs=self.configuration['acd_warmup_epochs'],
                validation_metric=validation_metric,
                validation_iterator=self.val_iterator,
                serialization_dir=self.acd_model_dir,
                patience=None if self.configuration['acd_warmup_patience']
                == -1 else self.configuration['acd_warmup_patience'],
                callbacks=callbacks,
                num_serialized_models_to_keep=2,
                early_stopping_by_batch=self.
                configuration['early_stopping_by_batch'],
                estimator=estimator,
                grad_clipping=5)
            metrics = trainer.train()
            self.logger.info('acd metrics: %s' % str(metrics))

            if self.configuration[
                    'frozen_all_acsc_parameter_while_pretrain_acd']:
                self.model.set_grad_for_acsc_parameter(requires_grad=True)
            # 恢复bert到初始状态
            if 'bert' in self.configuration and self.configuration['bert']:
                self.model.set_bert_word_embedder()
                bert_word_embedder = self._get_bert_word_embedder()
                self.model.set_bert_word_embedder(bert_word_embedder)

        if self.configuration['only_acd']:
            return None
        validation_metric = '+accuracy'
        if 'early_stopping_metric' in self.configuration:
            validation_metric = '+%s' % self.configuration[
                'early_stopping_metric']
        callbacks = self._get_estimate_callback(self.model)
        if self.configuration['acd_warmup'] and self.configuration['pipeline']:
            callbacks.extend(
                self._get_fixed_loss_weight_callback(self.model,
                                                     category_loss_weight=0,
                                                     sentiment_loss_weight=1))
            # acd 相关的参数不更新
            self.model.no_grad_for_acd_parameter()
        else:
            callbacks.extend(
                self._get_fixed_loss_weight_callback(
                    self.model,
                    category_loss_weight=self.configuration['acd_init_weight'],
                    sentiment_loss_weight=1))
        self.logger.info('validation_metric: %s' % validation_metric)
        optimizer = self._get_optimizer(self.model)
        self._print_args(self.model)
        trainer = Trainer(model=self.model,
                          optimizer=optimizer,
                          iterator=self.iterator,
                          train_dataset=self.train_data,
                          validation_dataset=self.dev_data
                          if self.configuration['early_stopping'] else None,
                          cuda_device=gpu_id,
                          num_epochs=self.configuration['epochs'],
                          validation_metric=validation_metric,
                          validation_iterator=self.val_iterator,
                          serialization_dir=self.model_dir,
                          patience=self.configuration['patience'],
                          callbacks=callbacks,
                          num_serialized_models_to_keep=2,
                          early_stopping_by_batch=self.
                          configuration['early_stopping_by_batch'],
                          estimator=estimator,
                          grad_clipping=5)
        metrics = trainer.train()
        self.logger.info('metrics: %s' % str(metrics))

    def _save_model(self):
        torch.save(self.model, self.best_model_filepath)

    def _load_model(self):
        if torch.cuda.is_available():
            self.model = torch.load(self.best_model_filepath)
        else:
            self.model = torch.load(self.best_model_filepath,
                                    map_location=torch.device('cpu'))
        self.model.configuration = self.configuration

    def evaluate(self):
        USE_GPU = torch.cuda.is_available()
        if USE_GPU:
            gpu_id = self.configuration['gpu_id']
        else:
            gpu_id = -1
        estimator = pytorch_models.TextInAllAspectSentimentOutEstimator(
            self.model,
            self.val_iterator,
            self.distinct_categories,
            self.distinct_polarities,
            configuration=self.configuration,
            cuda_device=gpu_id)

        data_type_and_data = {
            # 'train': self.train_data,
            'dev': self.dev_data,
            'test': self.test_data
        }
        if self.hard_test_data:
            data_type_and_data['hard_test'] = self.hard_test_data
        if 'performance_of_different_lengths' in self.configuration:
            lengths = self.configuration[
                'performance_of_different_lengths'].split(',')
            if len(lengths) > 1:
                data_of_different_lengths = {
                    int(length): []
                    for length in lengths
                }
                for sample in data_type_and_data['test']:
                    tokens = sample.fields['tokens'].tokens
                    for length in data_of_different_lengths:
                        if len(tokens) <= length:
                            data_of_different_lengths[length].append(sample)
                for length, data in data_of_different_lengths.items():
                    if len(data) > 0:
                        data_type_and_data['test_%d' % length] = data
        for data_type, data in data_type_and_data.items():
            result = estimator.estimate(data)
            self.logger.info('data_type: %s result: %s' % (data_type, result))

    def predict(self, texts: List[str] = None):
        """

        :param texts: 如果texts为None,就是用训练时的测试集
        :return:
        """
        USE_GPU = torch.cuda.is_available()
        if USE_GPU:
            gpu_id = self.configuration['gpu_id']
        else:
            gpu_id = -1
        predictor = pytorch_models.TextInAllAspectSentimentOutPredictor(
            self.model,
            self.val_iterator,
            self.distinct_categories,
            self.distinct_polarities,
            configuration=self.configuration,
            cuda_device=gpu_id)

        data = self.data_reader.read(texts)
        result = predictor.predict(data)
        return result

    def error_analysis(self):
        """

        :return:
        """
        USE_GPU = torch.cuda.is_available()
        if USE_GPU:
            gpu_id = self.configuration['gpu_id']
        else:
            gpu_id = -1
        predictor = pytorch_models.TextInAllAspectSentimentOutPredictor(
            self.model,
            self.val_iterator,
            self.distinct_categories,
            self.distinct_polarities,
            configuration=self.configuration,
            cuda_device=gpu_id)

        data = self.test_data
        result = predictor.predict(data)
        result_final = []
        for i in range(len(data)):
            instance: Instance = data[i]
            metadata = instance.fields['sample'].metadata
            sentence = metadata[0]
            labels_true = {
                self.distinct_categories[e[0]]: self.distinct_polarities[e[1]]
                for e in metadata[1]
            }
            labels_pred = result[i]
            for label_pred in labels_pred:
                label_true = labels_true[label_pred[0]]
                if label_true == label_pred[1]:
                    continue
                result_final.append(
                    (sentence, label_pred[0], label_pred[1], label_true))
        result_str = ['\t'.join(e) for e in result_final]
        output_filepath = os.path.join(self.model_dir, 'error_analysis.csv')
        file_utils.write_lines(result_str, output_filepath)
        return result_final
예제 #32
0
class TestTrainer(AllenNlpTestCase):
    def setUp(self):
        super(TestTrainer, self).setUp()
        self.instances = SequenceTaggingDatasetReader().read(
            'tests/fixtures/data/sequence_tagging.tsv')
        vocab = Vocabulary.from_instances(self.instances)
        self.vocab = vocab
        self.model_params = Params({
            "text_field_embedder": {
                "tokens": {
                    "type": "embedding",
                    "embedding_dim": 5
                }
            },
            "encoder": {
                "type": "lstm",
                "input_size": 5,
                "hidden_size": 7,
                "num_layers": 2
            }
        })
        self.model = SimpleTagger.from_params(self.vocab, self.model_params)
        self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01)
        self.iterator = BasicIterator(batch_size=2)
        self.iterator.index_with(vocab)

    def test_trainer_can_run(self):
        trainer = Trainer(model=self.model,
                          optimizer=self.optimizer,
                          iterator=self.iterator,
                          train_dataset=self.instances,
                          validation_dataset=self.instances,
                          num_epochs=2)
        metrics = trainer.train()
        assert 'best_validation_loss' in metrics
        assert isinstance(metrics['best_validation_loss'], float)
        assert 'best_epoch' in metrics
        assert isinstance(metrics['best_epoch'], int)

        # Making sure that both increasing and decreasing validation metrics work.
        trainer = Trainer(model=self.model,
                          optimizer=self.optimizer,
                          iterator=self.iterator,
                          train_dataset=self.instances,
                          validation_dataset=self.instances,
                          validation_metric='+loss',
                          num_epochs=2)
        metrics = trainer.train()
        assert 'best_validation_loss' in metrics
        assert isinstance(metrics['best_validation_loss'], float)
        assert 'best_epoch' in metrics
        assert isinstance(metrics['best_epoch'], int)

    @pytest.mark.skipif(not torch.cuda.is_available(),
                        reason="No CUDA device registered.")
    def test_trainer_can_run_cuda(self):
        trainer = Trainer(self.model,
                          self.optimizer,
                          self.iterator,
                          self.instances,
                          num_epochs=2,
                          cuda_device=0)
        trainer.train()

    @pytest.mark.skipif(torch.cuda.device_count() < 2,
                        reason="Need multiple GPUs.")
    def test_trainer_can_run_multiple_gpu(self):
        multigpu_iterator = BasicIterator(batch_size=4)
        multigpu_iterator.index_with(self.vocab)
        trainer = Trainer(self.model,
                          self.optimizer,
                          multigpu_iterator,
                          self.instances,
                          num_epochs=2,
                          cuda_device=[0, 1])
        trainer.train()

    def test_trainer_can_resume_training(self):
        trainer = Trainer(self.model,
                          self.optimizer,
                          self.iterator,
                          self.instances,
                          validation_dataset=self.instances,
                          num_epochs=1,
                          serialization_dir=self.TEST_DIR)
        trainer.train()
        new_trainer = Trainer(self.model,
                              self.optimizer,
                              self.iterator,
                              self.instances,
                              validation_dataset=self.instances,
                              num_epochs=3,
                              serialization_dir=self.TEST_DIR)

        epoch, val_metrics_per_epoch = new_trainer._restore_checkpoint()  # pylint: disable=protected-access
        assert epoch == 1
        assert len(val_metrics_per_epoch) == 1
        assert isinstance(val_metrics_per_epoch[0], float)
        assert val_metrics_per_epoch[0] != 0.
        new_trainer.train()

    def test_should_stop_early_with_increasing_metric(self):
        new_trainer = Trainer(self.model,
                              self.optimizer,
                              self.iterator,
                              self.instances,
                              validation_dataset=self.instances,
                              num_epochs=3,
                              serialization_dir=self.TEST_DIR,
                              patience=5,
                              validation_metric="+test")
        assert new_trainer._should_stop_early([.5, .3, .2, .1, .4, .4])  # pylint: disable=protected-access
        assert not new_trainer._should_stop_early([.3, .3, .3, .2, .5, .1])  # pylint: disable=protected-access

    def test_should_stop_early_with_decreasing_metric(self):
        new_trainer = Trainer(self.model,
                              self.optimizer,
                              self.iterator,
                              self.instances,
                              validation_dataset=self.instances,
                              num_epochs=3,
                              serialization_dir=self.TEST_DIR,
                              patience=5,
                              validation_metric="-test")
        assert new_trainer._should_stop_early([.02, .3, .2, .1, .4, .4])  # pylint: disable=protected-access
        assert not new_trainer._should_stop_early([.3, .3, .2, .1, .4, .5])  # pylint: disable=protected-access

    def test_trainer_can_run_with_lr_scheduler(self):

        lr_params = Params({"type": "reduce_on_plateau"})
        lr_scheduler = LearningRateScheduler.from_params(
            self.optimizer, lr_params)
        trainer = Trainer(model=self.model,
                          optimizer=self.optimizer,
                          iterator=self.iterator,
                          learning_rate_scheduler=lr_scheduler,
                          validation_metric="-loss",
                          train_dataset=self.instances,
                          validation_dataset=self.instances,
                          num_epochs=2)
        trainer.train()

    def test_trainer_raises_on_model_with_no_loss_key(self):
        class FakeModel(torch.nn.Module):
            def forward(self, **kwargs):  # pylint: disable=arguments-differ,unused-argument
                return {}

        with pytest.raises(RuntimeError):
            trainer = Trainer(FakeModel(),
                              self.optimizer,
                              self.iterator,
                              self.instances,
                              num_epochs=2,
                              serialization_dir=self.TEST_DIR)
            trainer.train()

    def test_trainer_can_log_histograms(self):
        # enable activation logging
        for module in self.model.modules():
            module.should_log_activations = True

        trainer = Trainer(self.model,
                          self.optimizer,
                          self.iterator,
                          self.instances,
                          num_epochs=3,
                          serialization_dir=self.TEST_DIR,
                          histogram_interval=2)
        trainer.train()

    def test_trainer_respects_num_serialized_models_to_keep(self):
        trainer = Trainer(self.model,
                          self.optimizer,
                          self.iterator,
                          self.instances,
                          num_epochs=5,
                          serialization_dir=self.TEST_DIR,
                          num_serialized_models_to_keep=3)
        trainer.train()

        # Now check the serialized files
        for prefix in ['model_state_epoch_*', 'training_state_epoch_*']:
            file_names = glob.glob(os.path.join(self.TEST_DIR, prefix))
            epochs = [
                int(re.search(r"_([0-9])\.th", fname).group(1))
                for fname in file_names
            ]
            assert sorted(epochs) == [2, 3, 4]

    def test_trainer_respects_keep_serialized_model_every_num_seconds(self):
        # To test:
        #   Create an iterator that sleeps for 0.5 second per epoch, so the total training
        #       time for one epoch is slightly greater then 0.5 seconds.
        #   Run for 6 epochs, keeping the last 2 models, models also kept every 1 second.
        #   Check the resulting checkpoints.  Should then have models at epochs
        #       2, 4, plus the last two at 5 and 6.
        class WaitingIterator(BasicIterator):
            # pylint: disable=arguments-differ
            def _create_batches(self, *args, **kwargs):
                time.sleep(0.5)
                return super(WaitingIterator,
                             self)._create_batches(*args, **kwargs)

        iterator = WaitingIterator(batch_size=2)
        iterator.index_with(self.vocab)

        trainer = Trainer(self.model,
                          self.optimizer,
                          iterator,
                          self.instances,
                          num_epochs=6,
                          serialization_dir=self.TEST_DIR,
                          num_serialized_models_to_keep=2,
                          keep_serialized_model_every_num_seconds=1)
        trainer.train()

        # Now check the serialized files
        for prefix in ['model_state_epoch_*', 'training_state_epoch_*']:
            file_names = glob.glob(os.path.join(self.TEST_DIR, prefix))
            epochs = [
                int(re.search(r"_([0-9])\.th", fname).group(1))
                for fname in file_names
            ]
            # epoch N has N-1 in file name
            assert sorted(epochs) == [1, 3, 4, 5]

    def test_trainer_saves_models_at_specified_interval(self):
        iterator = BasicIterator(batch_size=4)
        iterator.index_with(self.vocab)

        trainer = Trainer(self.model,
                          self.optimizer,
                          iterator,
                          self.instances,
                          num_epochs=2,
                          serialization_dir=self.TEST_DIR,
                          model_save_interval=0.0001)

        trainer.train()

        # Now check the serialized files for models saved during the epoch.
        prefix = 'model_state_epoch_*'
        file_names = sorted(glob.glob(os.path.join(self.TEST_DIR, prefix)))
        epochs = [
            re.search(r"_([0-9\.\-]+)\.th", fname).group(1)
            for fname in file_names
        ]
        # We should have checkpoints at the end of each epoch and during each, e.g.
        # [0.timestamp, 0, 1.timestamp, 1]
        assert len(epochs) == 4
        assert epochs[3] == '1'
        assert '.' in epochs[0]

        # Now make certain we can restore from timestamped checkpoint.
        # To do so, remove the checkpoint from the end of epoch 1&2, so
        # that we are forced to restore from the timestamped checkpoints.
        for k in range(2):
            os.remove(
                os.path.join(self.TEST_DIR,
                             'model_state_epoch_{}.th'.format(k)))
            os.remove(
                os.path.join(self.TEST_DIR,
                             'training_state_epoch_{}.th'.format(k)))
        os.remove(os.path.join(self.TEST_DIR, 'best.th'))

        restore_trainer = Trainer(self.model,
                                  self.optimizer,
                                  self.iterator,
                                  self.instances,
                                  num_epochs=2,
                                  serialization_dir=self.TEST_DIR,
                                  model_save_interval=0.0001)
        epoch, _ = restore_trainer._restore_checkpoint()  # pylint: disable=protected-access
        assert epoch == 2
        # One batch per epoch.
        assert restore_trainer._batch_num_total == 2  # pylint: disable=protected-access
예제 #33
0
def train_fever():
    num_epoch = 8
    seed = 12
    batch_size = 32
    experiment_name = "mesim_elmo"
    lazy = True

    dev_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/dev.jsonl"
    train_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/train.jsonl"

    # Prepare Data
    token_indexers = {
        'tokens': SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters')  # This is the elmo_characters
    }

    train_fever_data_reader = BasicReader(token_indexers=token_indexers, lazy=lazy, max_l=360)
    dev_fever_data_reader = BasicReader(token_indexers=token_indexers, lazy=lazy, max_l=360)

    complete_upstream_dev_data = get_actual_data(config.T_FEVER_DEV_JSONL, dev_upstream_file)
    dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data)
    # Load Vocabulary
    biterator = BasicIterator(batch_size=batch_size)

    vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic")
    vocab.change_token_with_index_to_namespace('hidden', -2, namespace='labels')

    print(vocab.get_token_to_index_vocabulary('labels'))
    print(vocab.get_vocab_size('tokens'))

    biterator.index_with(vocab)

    # Build Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0)
    device_num = -1 if device.type == 'cpu' else 0

    model = Model(weight=weight_dict['glove.840B.300d'],
                  vocab_size=vocab.get_vocab_size('tokens'),
                  embedding_dim=300, max_l=300)

    model.display()
    model.to(device)

    # Create Log File
    file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}")

    best_dev = -1
    iteration = 0

    start_lr = 0.0002
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=start_lr)
    criterion = nn.CrossEntropyLoss()

    for i_epoch in range(num_epoch):
        print("Resampling...")
        # Resampling
        complete_upstream_train_data = get_sampled_data(config.T_FEVER_TRAIN_JSONL, train_upstream_file)

        sampled_train_instances = train_fever_data_reader.read(complete_upstream_train_data)

        train_iter = biterator(sampled_train_instances, shuffle=True, num_epochs=1, cuda_device=device_num)
        for i, batch in tqdm(enumerate(train_iter)):
            model.train()
            out = model(batch)
            y = batch['label']

            loss = criterion(out, y)

            # No decay
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            iteration += 1

            if i_epoch <= 4:
                mod = 5000
            else:
                mod = 200

            if iteration % mod == 0:
                eval_iter = biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num)
                dev_score, dev_loss = full_eval_model(model, eval_iter, criterion, complete_upstream_dev_data)

                print(f"Dev:{dev_score}/{dev_loss}")

                need_save = False
                if dev_score > best_dev:
                    best_dev = dev_score
                    need_save = True

                if need_save:
                    save_path = os.path.join(
                        file_path_prefix,
                        f'i({iteration})_epoch({i_epoch})_dev({dev_score})_loss({dev_loss})_seed({seed})'
                    )

                    torch.save(model.state_dict(), save_path)
예제 #34
0
def debug_fever():
    num_epoch = 8
    seed = 12
    batch_size = 128
    experiment_name = "simple_nn"
    lazy = True
    torch.manual_seed(seed)
    keep_neg_sample_prob = 0.6
    sample_prob_decay = 0.1

    dev_upstream_file = config.RESULT_PATH / "doc_retri/cn_util_Jul17_docretri.singularize/dev.jsonl"
    train_upstream_file = config.RESULT_PATH / "doc_retri/cn_util_Jul17_docretri.singularize/train.jsonl"

    # Prepare Data
    token_indexers = {
        'tokens': SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters')  # This is the elmo_characters
    }

    train_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=lazy, max_l=300)
    # dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=False)
    dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=lazy, max_l=300)

    complete_upstream_dev_data = get_full_list(config.T_FEVER_DEV_JSONL, dev_upstream_file, pred=True)
    print("Dev size:", len(complete_upstream_dev_data))
    dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data)

    # Load Vocabulary
    biterator = BasicIterator(batch_size=batch_size)
    dev_biterator = BasicIterator(batch_size=batch_size)

    vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic")
    # THis is important
    vocab.add_token_to_namespace("true", namespace="selection_labels")
    vocab.add_token_to_namespace("false", namespace="selection_labels")
    vocab.add_token_to_namespace("hidden", namespace="selection_labels")
    vocab.change_token_with_index_to_namespace("hidden", -2, namespace='selection_labels')
    # Label value

    vocab.get_index_to_token_vocabulary('selection_labels')

    print(vocab.get_token_to_index_vocabulary('selection_labels'))
    print(vocab.get_vocab_size('tokens'))

    biterator.index_with(vocab)
    dev_biterator.index_with(vocab)

    # exit(0)
    # Build Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0)
    device_num = -1 if device.type == 'cpu' else 0

    model = Model(weight=weight_dict['glove.840B.300d'],
                  vocab_size=vocab.get_vocab_size('tokens'),
                  embedding_dim=300, max_l=280, num_of_class=2)

    model.display()
    model.to(device)

    # Create Log File
    file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}")
    # Save the source code.
    script_name = os.path.basename(__file__)
    with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it:
        out_f.write(it.read())
        out_f.flush()
    # Save source code end.

    best_dev = -1
    iteration = 0
    i_epoch = 0

    start_lr = 0.0002
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=start_lr)
    criterion = nn.CrossEntropyLoss()

    eval_iter = dev_biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num)
    complete_upstream_dev_data = hidden_eval(model, eval_iter, complete_upstream_dev_data)

    dev_results_list = score_converter_v0(config.T_FEVER_DEV_JSONL, complete_upstream_dev_data)
    eval_mode = {'check_sent_id_correct': True, 'standard': True}
    strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(dev_results_list, config.T_FEVER_DEV_JSONL,
                                                                mode=eval_mode, verbose=False)
    total = len(dev_results_list)
    hit = eval_mode['check_sent_id_correct_hits']
    tracking_score = hit / total

    print(f"Dev(raw_acc/pr/rec/f1):{acc_score}/{pr}/{rec}/{f1}/")
    print("Strict score:", strict_score)
    print(f"Eval Tracking score:", f"{tracking_score}")

    need_save = False
    if tracking_score > best_dev:
        best_dev = tracking_score
        need_save = True

    if need_save:
        save_path = os.path.join(
            file_path_prefix,
            f'i({iteration})_epoch({i_epoch})_'
            f'(tra_score:{tracking_score}|raw_acc:{acc_score}|pr:{pr}|rec:{rec}|f1:{f1})'
        )

        torch.save(model.state_dict(), save_path)

    print("Epoch Evaluation...")
    eval_iter = dev_biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num)
    complete_upstream_dev_data = hidden_eval(model, eval_iter, complete_upstream_dev_data)

    dev_results_list = score_converter_v0(config.T_FEVER_DEV_JSONL, complete_upstream_dev_data)
    eval_mode = {'check_sent_id_correct': True, 'standard': True}
    strict_score, acc_score, pr, rec, f1 = c_scorer.fever_score(dev_results_list, config.T_FEVER_DEV_JSONL,
                                                                mode=eval_mode, verbose=False)
    total = len(dev_results_list)
    hit = eval_mode['check_sent_id_correct_hits']
    tracking_score = hit / total

    print(f"Dev(raw_acc/pr/rec/f1):{acc_score}/{pr}/{rec}/{f1}/")
    print("Strict score:", strict_score)
    print(f"Eval Tracking score:", f"{tracking_score}")

    if tracking_score > best_dev:
        best_dev = tracking_score

    save_path = os.path.join(
        file_path_prefix,
        f'i({iteration})_epoch({i_epoch})_'
        f'(tra_score:{tracking_score}|raw_acc:{acc_score}|pr:{pr}|rec:{rec}|f1:{f1})_epoch'
    )

    torch.save(model.state_dict(), save_path)
예제 #35
0
def main():
    # Load SNLI dataset
    single_id_indexer = SingleIdTokenIndexer(lowercase_tokens=True)  # word tokenizer
    tokenizer = WordTokenizer(
        end_tokens=["@@NULL@@"]
    )  # add @@NULL@@ to the end of sentences
    reader = SnliReader(
        token_indexers={"tokens": single_id_indexer}, tokenizer=tokenizer
    )
    dev_dataset = reader.read(
        "https://s3-us-west-2.amazonaws.com/allennlp/datasets/snli/snli_1.0_dev.jsonl"
    )
    # Load model and vocab
    model = load_archive(
        "https://allennlp.s3-us-west-2.amazonaws.com/models/esim-glove-snli-2019.04.23.tar.gz"
    ).model
    model.eval().cuda()
    vocab = model.vocab

    # add hooks for embeddings so we can compute gradients w.r.t. to the input tokens
    utils.add_hooks(model)
    embedding_weight = utils.get_embedding_weight(
        model
    )  # save the word embedding matrix

    # Batches of examples to construct triggers
    universal_perturb_batch_size = 32
    iterator = BasicIterator(batch_size=universal_perturb_batch_size)
    iterator.index_with(vocab)

    # Subsample the dataset to one class to do a universal attack on that class
    dataset_label_filter = "entailment"  # only entailment examples
    # dataset_label_filter = 'contradiction' # only contradiction examples
    # dataset_label_filter = 'neutral' # only neutral examples
    subset_dev_dataset = []
    for instance in dev_dataset:
        if instance["label"].label == dataset_label_filter:
            subset_dev_dataset.append(instance)
    # the attack is targeted towards a specific class
    # target_label = "0" # flip to entailment
    target_label = "1"  # flip to contradiction
    # target_label = "2" # flip to neutral

    # A k-d tree if you want to do gradient + nearest neighbors
    # tree = KDTree(embedding_weight.numpy())

    # Get original accuracy before adding universal triggers
    utils.get_accuracy(
        model, subset_dev_dataset, vocab, trigger_token_ids=None, snli=True
    )
    model.train()  # rnn cannot do backwards in train mode

    # Initialize triggers
    num_trigger_tokens = 1  # one token prepended
    trigger_token_ids = [vocab.get_token_index("a")] * num_trigger_tokens
    # sample batches, update the triggers, and repeat
    for batch in lazy_groups_of(
        iterator(subset_dev_dataset, num_epochs=10, shuffle=True), group_size=1
    ):
        # get model accuracy with current triggers
        utils.get_accuracy(
            model, subset_dev_dataset, vocab, trigger_token_ids, snli=True
        )
        model.train()  # rnn cannot do backwards in train mode

        # get grad of triggers
        averaged_grad = utils.get_average_grad(
            model, batch, trigger_token_ids, target_label, snli=True
        )

        # find attack candidates using an attack method
        cand_trigger_token_ids = attacks.hotflip_attack(
            averaged_grad, embedding_weight, num_candidates=40
        )
        # cand_trigger_token_ids = attacks.random_attack(embedding_weight,
        #                                                trigger_token_ids,
        #                                                num_candidates=40)
        # cand_trigger_token_ids = attacks.nearest_neighbor_grad(averaged_grad,
        #                                                        embedding_weight,
        #                                                        trigger_token_ids,
        #                                                        tree,
        #                                                        100,
        #                                                        decrease_prob=True)

        # query the model to get the best candidates
        trigger_token_ids = utils.get_best_candidates(
            model, batch, trigger_token_ids, cand_trigger_token_ids, snli=True
        )
예제 #36
0
 def test_epoch_tracking_when_one_epoch_at_a_time(self):
     iterator = BasicIterator(batch_size=2, track_epoch=True)
     iterator.index_with(self.vocab)
     for epoch in range(10):
         for batch in iterator(self.instances, num_epochs=1):
             assert all(epoch_num == epoch for epoch_num in batch['epoch_num'])
def train_only_lee():
    # This is WORKING! 
	# load datasetreader 
    # Save logging to a local file
    # Multitasking
    log.getLogger().addHandler(log.FileHandler(directory+"/log.log"))

    lr = 0.00001
    batch_size = 2
    epochs = 100
    max_seq_len = 512
    max_span_width = 30
    #token_indexer = BertIndexer(pretrained_model="bert-base-uncased", max_pieces=max_seq_len, do_lowercase=True,)
    token_indexer = PretrainedBertIndexer("bert-base-cased", do_lowercase=False)
    reader = ConllCorefBertReader(max_span_width = max_span_width, token_indexers = {"tokens": token_indexer})

    EMBEDDING_DIM = 1024
    HIDDEN_DIM = 200
    processed_reader_dir = Path(directory+"processed/")
    
    train_ds, val_ds, test_ds = load_lee(reader, directory)
    # restore checkpoint here
    from allennlp.modules.token_embedders import ElmoTokenEmbedder
    #vocab = Vocabulary.from_instances(train_ds + val_ds)
    vocab = Vocabulary()
    iterator = BasicIterator(batch_size=batch_size)
    iterator.index_with(vocab)

    val_iterator = BasicIterator(batch_size=batch_size)
    val_iterator.index_with(vocab)
    from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder
    # here, allow_unmatched_key = True since we dont pass in offsets since 
    #we allow for word embedings of the bert-tokenized, wnot necessiarly the 
    # original tokens
    # see the documetnation for offsets here for more info:
    # https://github.com/allenai/allennlp/blob/master/allennlp/modules/token_embedders/bert_token_embedder.py
    options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json'
    weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'
 
    elmo_embedder = ElmoTokenEmbedder(options_file, weight_file)
    word_embedding = BasicTextFieldEmbedder({"tokens": elmo_embedder})#, allow_unmatched_keys=True)

    #word_embedding = BasicTextFieldEmbedder({"tokens": bert_embedder}, allow_unmatched_keys=True)
    #BERT_DIM = word_embedding.get_output_dim()
    ELMO_DIM = word_embedding.get_output_dim()
    # at each batch, sample from the two, and load th eLSTM
    shared_layer = torch.nn.LSTM(ELMO_DIM, HIDDEN_DIM, batch_first=True, bidirectional=True)
    seq2seq = PytorchSeq2SeqWrapper(shared_layer)
    mention_feedforward = FeedForward(input_dim =512, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU())
    antecedent_feedforward = FeedForward(input_dim =2304, num_layers = 2, hidden_dims = 150, activations = torch.nn.ReLU())

    model = CoreferenceResolver(vocab=vocab, text_field_embedder=word_embedding,context_layer= seq2seq, mention_feedforward=mention_feedforward,antecedent_feedforward=antecedent_feedforward , feature_size=768,max_span_width=max_span_width,spans_per_word=0.4,max_antecedents=250,lexical_dropout= 0.2)
    print(model)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # and then we can do the shared loss
    # 
    # Get 
    USE_GPU = 1
    trainer = Trainer(
        model=model.cuda(),
        optimizer=optimizer,
        iterator=iterator,
        validation_iterator = val_iterator, 
        train_dataset=train_ds,
        validation_dataset = val_ds, 
        validation_metric = "+coref_f1",
        cuda_device=0 if USE_GPU else -1,
        serialization_dir= directory + "saved_models/only_lee",
        num_epochs=epochs,
    )    

    metrics = trainer.train()
    # save the model
    with open(directory + "saved_models/current_run_model_state", 'wb') as f:
        torch.save(model.state_dict(), f)
예제 #38
0
class BiEncoderTopXRetriever:
    def __init__(self, args, vocab, biencoder_onlyfor_encodingmentions,
                 faiss_stored_kb, reader_for_mentions, duidx2encoded_emb):
        self.args = args
        self.mention_encoder = biencoder_onlyfor_encodingmentions
        self.mention_encoder.eval()
        self.faiss_searcher = faiss_stored_kb
        self.reader_for_mentions = reader_for_mentions
        self.sequence_iterator = BasicIterator(
            batch_size=self.args.batch_size_for_eval)
        self.sequence_iterator.index_with(vocab)
        self.cuda_device = 0
        self.duidx2encoded_emb = duidx2encoded_emb

    def biencoder_tophits_retrievaler(self,
                                      train_or_dev_or_test_flag,
                                      how_many_top_hits_preserved=500):
        ds = self.reader_for_mentions.read(train_or_dev_or_test_flag)
        generator_for_biencoder = self.sequence_iterator(ds,
                                                         num_epochs=1,
                                                         shuffle=False)
        generator_for_biencoder_tqdm = tqdm(
            generator_for_biencoder,
            total=self.sequence_iterator.get_num_batches(ds))

        with torch.no_grad():
            for batch in generator_for_biencoder_tqdm:
                batch = nn_util.move_to_device(batch, self.cuda_device)
                mention_uniq_ids, encoded_mentions, gold_duidxs = self._extract_mention_idx_encoded_emb_and_its_gold_cuidx(
                    batch=batch)
                faiss_search_candidate_result_cuidxs = self.faiss_topx_retriever(
                    encoded_mentions=encoded_mentions,
                    how_many_top_hits_preserved=how_many_top_hits_preserved)
                yield faiss_search_candidate_result_cuidxs, mention_uniq_ids, gold_duidxs

    def faiss_topx_retriever(self, encoded_mentions,
                             how_many_top_hits_preserved):
        '''
        if cossimsearch -> re-sort with L2, we have to use self.args.cand_num_before_sort_candidates_forBLINKbiencoder
        Args:
            encoded_mentions:
            how_many_top_hits_preserved:
        Returns:
        '''

        if self.args.search_method == 'cossim':
            encoded_mentions = normalize(torch.from_numpy(encoded_mentions),
                                         dim=1).cpu().detach().numpy()
            _, faiss_search_candidate_result_cuidxs = self.faiss_searcher.search(
                encoded_mentions, how_many_top_hits_preserved)

        else:
            # assert self.args.search_method == 'indexflatl2'
            _, faiss_search_candidate_result_cuidxs = self.faiss_searcher.search(
                encoded_mentions, how_many_top_hits_preserved)

        return faiss_search_candidate_result_cuidxs

    def calc_L2distance(self, h, t):
        diff = h - t
        return torch.norm(diff, dim=2)

    def tonp(self, tsr):
        return tsr.detach().cpu().numpy()

    def _extract_mention_idx_encoded_emb_and_its_gold_cuidx(self, batch):
        out_dict = self.mention_encoder(**batch)
        return self.tonp(out_dict['mention_uniq_id']), self.tonp(
            out_dict['contextualized_mention']), self.tonp(
                out_dict['gold_duidx'])
예제 #39
0
def train_fever_v1():
    num_epoch = 10
    seed = 12
    batch_size = 128
    dev_batch_size = 128
    # experiment_name = "simple_nn_doc_first_sent"
    experiment_name = "simple_nn_doc"
    lazy = True
    torch.manual_seed(seed)
    contain_first_sentence = False
    pn_ratio = 1.0
    # keep_neg_sample_prob = 0.4
    # sample_prob_decay = 0.05

    dev_upstream_file = config.RESULT_PATH / "doc_retri_bls/docretri.basic.nopageview/dev.jsonl"
    train_upstream_file = config.RESULT_PATH / "doc_retri_bls/docretri.basic.nopageview/train.jsonl"
    dev_data_list = common.load_jsonl(dev_upstream_file)

    # Prepare Data
    token_indexers = {
        'tokens': SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters')  # This is the elmo_characters
    }

    train_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=lazy, max_l=180)
    # dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=False)
    dev_fever_data_reader = SSelectorReader(token_indexers=token_indexers, lazy=lazy, max_l=180)

    cursor = fever_db.get_cursor()
    complete_upstream_dev_data = disamb.sample_disamb_inference(common.load_jsonl(dev_upstream_file), cursor,
                                                                contain_first_sentence=contain_first_sentence)
    print("Dev size:", len(complete_upstream_dev_data))
    dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data)

    # Load Vocabulary
    biterator = BasicIterator(batch_size=batch_size)
    dev_biterator = BasicIterator(batch_size=dev_batch_size)

    vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic")
    # THis is important
    vocab.add_token_to_namespace("true", namespace="selection_labels")
    vocab.add_token_to_namespace("false", namespace="selection_labels")
    vocab.add_token_to_namespace("hidden", namespace="selection_labels")
    vocab.change_token_with_index_to_namespace("hidden", -2, namespace='selection_labels')
    # Label value

    vocab.get_index_to_token_vocabulary('selection_labels')

    print(vocab.get_token_to_index_vocabulary('selection_labels'))
    print(vocab.get_vocab_size('tokens'))

    biterator.index_with(vocab)
    dev_biterator.index_with(vocab)

    # exit(0)
    # Build Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu", index=0)
    device_num = -1 if device.type == 'cpu' else 0

    model = Model(weight=weight_dict['glove.840B.300d'],
                  vocab_size=vocab.get_vocab_size('tokens'),
                  embedding_dim=300, max_l=160, num_of_class=2)

    model.display()
    model.to(device)

    # Create Log File
    file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}")
    # Save the source code.
    script_name = os.path.basename(__file__)
    with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it:
        out_f.write(it.read())
        out_f.flush()
    # Save source code end.

    best_dev = -1
    iteration = 0

    start_lr = 0.0002
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=start_lr)
    criterion = nn.CrossEntropyLoss()

    for i_epoch in range(num_epoch):
        print("Resampling...")
        # Resampling
        complete_upstream_train_data = disamb.sample_disamb_training_v0(common.load_jsonl(train_upstream_file),
                                                                        cursor, pn_ratio, contain_first_sentence)
        print("Sample Prob.:", pn_ratio)

        print("Sampled_length:", len(complete_upstream_train_data))
        sampled_train_instances = train_fever_data_reader.read(complete_upstream_train_data)

        train_iter = biterator(sampled_train_instances, shuffle=True, num_epochs=1, cuda_device=device_num)
        for i, batch in tqdm(enumerate(train_iter)):
            model.train()
            out = model(batch)
            y = batch['selection_label']

            loss = criterion(out, y)

            # No decay
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            iteration += 1

            if i_epoch <= 5:
                mod = 1000
            else:
                mod = 500

            if iteration % mod == 0:
                eval_iter = dev_biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num)
                complete_upstream_dev_data = hidden_eval(model, eval_iter, complete_upstream_dev_data)

                disamb.enforce_disabuigation_into_retrieval_result_v0(complete_upstream_dev_data,
                                                                      dev_data_list)
                oracle_score, pr, rec, f1 = c_scorer.fever_doc_only(dev_data_list, dev_data_list, max_evidence=5)

                print(f"Dev(raw_acc/pr/rec/f1):{oracle_score}/{pr}/{rec}/{f1}")
                print("Strict score:", oracle_score)
                print(f"Eval Tracking score:", f"{oracle_score}")

                need_save = False
                if oracle_score > best_dev:
                    best_dev = oracle_score
                    need_save = True

                if need_save:
                    save_path = os.path.join(
                        file_path_prefix,
                        f'i({iteration})_epoch({i_epoch})_'
                        f'(tra_score:{oracle_score}|pr:{pr}|rec:{rec}|f1:{f1})'
                    )

                    torch.save(model.state_dict(), save_path)
        # 
        print("Epoch Evaluation...")
        eval_iter = dev_biterator(dev_instances, shuffle=False, num_epochs=1, cuda_device=device_num)
        complete_upstream_dev_data = hidden_eval(model, eval_iter, complete_upstream_dev_data)

        disamb.enforce_disabuigation_into_retrieval_result_v0(complete_upstream_dev_data,
                                                              dev_data_list)
        oracle_score, pr, rec, f1 = c_scorer.fever_doc_only(dev_data_list, dev_data_list, max_evidence=5)

        print(f"Dev(raw_acc/pr/rec/f1):{oracle_score}/{pr}/{rec}/{f1}")
        print("Strict score:", oracle_score)
        print(f"Eval Tracking score:", f"{oracle_score}")

        need_save = False
        if oracle_score > best_dev:
            best_dev = oracle_score
            need_save = True

        if need_save:
            save_path = os.path.join(
                file_path_prefix,
                f'i({iteration})_epoch({i_epoch})_e'
                f'(tra_score:{oracle_score}|pr:{pr}|rec:{rec}|f1:{f1})'
            )

            torch.save(model.state_dict(), save_path)
예제 #40
0
def attack_unitrigger(args,
                      model,
                      vocab,
                      target_label,
                      trigger_data,
                      init_trigger='the',
                      previous_inits=[],
                      previous_triggers=[],
                      exempt_triggers=[],
                      tree=None,
                      surrogate=None):
    # Register a gradient hook on the embeddings. This saves the gradient w.r.t. the word embeddings.
    # We use the gradient later in the attack.
    unitrigger_utils.add_hooks(model)
    embedding_weight = unitrigger_utils.get_embedding_weight(
        model)  # also save the word embedding matrix

    if init_trigger == "":
        # randomly choose a starting point
        total_vocab = vocab.get_vocab_size()
        choices = np.array(list(range(total_vocab)))
        # previous_list = previous_inits + previous_triggers
        previous_list = previous_inits
        # print(previous_list)
        if not len(previous_list) or args.trigger_neighbor < 1:
            idx = np.random.choice(choices)
        else:
            mask = np.array([True] * total_vocab)
            for word_idx in previous_list:
                word_embed = torch.nn.functional.embedding(
                    torch.LongTensor([word_idx]),
                    embedding_weight).detach().cpu().numpy()[0]
                neighbors = tree.query([word_embed],
                                       k=args.trigger_neighbor,
                                       return_distance=False)
                mask[neighbors] = False
            idx = np.random.choice(choices[mask])

        init_trigger = vocab.get_token_from_index(idx)
        previous_inits.append(idx)

    iterator = BasicIterator(batch_size=args.universal_batch_size)
    iterator.index_with(vocab)

    model.train()  # rnn cannot do backwards in train mode
    # initialize triggers which are concatenated to the input
    trigger_token_ids = [vocab.get_token_index(init_trigger)
                         ] * args.trigger_length

    for batch in lazy_groups_of(iterator(trigger_data,
                                         num_epochs=args.trigger_epochs,
                                         shuffle=True),
                                group_size=1):
        averaged_grad = unitrigger_utils.get_average_grad(
            model, batch, trigger_token_ids)
        cand_trigger_token_ids = hotflip_attack(
            averaged_grad,
            embedding_weight,
            trigger_token_ids,
            num_candidates=args.num_candidates,
            exempt_candidates=exempt_triggers,
            increase_loss=True)

        cand_trigger_token_ids = [
            a[args.trigger_ignore:] for a in cand_trigger_token_ids
        ]
        # Tries all of the candidates and returns the trigger sequence with highest loss.
        trigger_token_ids = unitrigger_utils.get_best_candidates(
            model,
            batch,
            trigger_token_ids,
            cand_trigger_token_ids,
            surrogate=surrogate)

    for token_id in trigger_token_ids:
        if token_id not in previous_triggers:
            previous_triggers.append(token_id)
    return trigger_token_ids, init_trigger
def eval_fever():
    # save_path = "/home/easonnie/projects/MiscEnc/saved_models/06-07-21:58:06_esim_elmo/i(60900)_epoch(4)_um_dev(80.03458096013019)_m_dev(79.174732552216)_seed(12)"
    save_path = "/home/easonnie/projects/MiscEnc/saved_models/07-02-14:40:01_esim_elmo_linear_amr_cs_score_filtering_0.5/i(5900)_epoch(3)_um_dev(39.73759153783564)_m_dev(40.18339276617422)_seed(12)"
    # save_path = "/home/easonnie/projects/MiscEnc/saved_models/07-02-14:42:34_esim_elmo_cs_score_filtering_0.7/i(1300)_epoch(4)_um_dev(32.55695687550855)_m_dev(32.42995415180846)_seed(12)"
    batch_size = 32

    # Prepare Data
    token_indexers = {
        'tokens':
        SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(
            namespace='elmo_characters')  # This is the elmo_characters
    }

    csnli_dataset_reader = CNLIReader(
        token_indexers=token_indexers,
        example_filter=lambda x: float(x['cs_score']) >= 0.7)

    # mnli_train_data_path = config.DATA_ROOT / "mnli/multinli_1.0_train.jsonl"
    mnli_m_dev_data_path = config.DATA_ROOT / "amrs/mnli_amr_ln/mnli_mdev.jsonl.cs"
    mnli_um_dev_data_path = config.DATA_ROOT / "amrs/mnli_amr_ln/mnli_umdev.jsonl.cs"

    # mnli_train_instances = csnli_dataset_reader.read(mnli_train_data_path)
    mnli_m_dev_instances = csnli_dataset_reader.read(mnli_m_dev_data_path)
    mnli_um_dev_instances = csnli_dataset_reader.read(mnli_um_dev_data_path)

    # Load Vocabulary
    biterator = BasicIterator(batch_size=batch_size)

    vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT /
                                               "vocab_cache" / "nli")
    vocab.change_token_with_index_to_namespace('hidden',
                                               -2,
                                               namespace='labels')

    print(vocab.get_token_to_index_vocabulary('labels'))
    print(vocab.get_vocab_size('tokens'))

    biterator.index_with(vocab)

    # Build Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu",
                          index=0)
    device_num = -1 if device.type == 'cpu' else 0

    model = Model(weight=weight_dict['glove.840B.300d'],
                  vocab_size=vocab.get_vocab_size('tokens'),
                  embedding_dim=300)

    model.load_state_dict(torch.load(save_path))

    model.display()
    model.to(device)

    # Create Log File

    criterion = nn.CrossEntropyLoss()

    eval_iter = biterator(mnli_m_dev_instances,
                          shuffle=False,
                          num_epochs=1,
                          cuda_device=device_num)
    m_dev_score, m_dev_loss = eval_model(model, eval_iter, criterion)

    eval_iter = biterator(mnli_um_dev_instances,
                          shuffle=False,
                          num_epochs=1,
                          cuda_device=device_num)
    um_dev_score, um_dev_loss = eval_model(model, eval_iter, criterion)

    print(f"Dev(M):{m_dev_score}/{m_dev_loss}")
    print(f"Dev(UM):{um_dev_score}/{um_dev_loss}")
class TestCallbackTrainer(ModelTestCase):
    def setUp(self):
        super().setUp()

        # A lot of the tests want access to the metric tracker
        # so we add a property that gets it by grabbing it from
        # the relevant callback.
        def metric_tracker(self: CallbackTrainer):
            for callback in self.handler.callbacks():
                if isinstance(callback, TrackMetrics):
                    return callback.metric_tracker
            return None

        setattr(CallbackTrainer, "metric_tracker", property(metric_tracker))

        self.instances = SequenceTaggingDatasetReader().read(
            self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv")
        vocab = Vocabulary.from_instances(self.instances)
        self.vocab = vocab
        self.model_params = Params({
            "text_field_embedder": {
                "token_embedders": {
                    "tokens": {
                        "type": "embedding",
                        "embedding_dim": 5
                    }
                }
            },
            "encoder": {
                "type": "lstm",
                "input_size": 5,
                "hidden_size": 7,
                "num_layers": 2
            },
        })
        self.model = SimpleTagger.from_params(vocab=self.vocab,
                                              params=self.model_params)
        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         0.01,
                                         momentum=0.9)
        self.iterator = BasicIterator(batch_size=2)
        self.iterator.index_with(vocab)

    def tearDown(self):
        super().tearDown()
        delattr(CallbackTrainer, "metric_tracker")

    def default_callbacks(
        self,
        validation_metric: str = "-loss",
        patience: int = None,
        max_checkpoints: int = 20,
        checkpoint_every: int = None,
        model_save_interval: float = None,
        serialization_dir: str = "__DEFAULT__",
        validation_data: Iterable[Instance] = None,
        validation_iterator: DataIterator = None,
        batch_size: int = 2,
    ):
        if serialization_dir == "__DEFAULT__":
            serialization_dir = self.TEST_DIR
        checkpointer = Checkpointer(serialization_dir, checkpoint_every,
                                    max_checkpoints)
        tensorboard = TensorboardWriter(get_batch_num_total=lambda: None)

        if validation_iterator is None:
            validation_iterator = BasicIterator(batch_size=batch_size)
            validation_iterator.index_with(self.vocab)

        return [
            LogToTensorboard(log_batch_size_period=10,
                             tensorboard=tensorboard),
            Checkpoint(checkpointer, model_save_interval),
            Validate(
                validation_data=self.instances
                if validation_data is None else validation_data,
                validation_iterator=validation_iterator,
            ),
            TrackMetrics(patience, validation_metric),
            GradientNormAndClip(),
        ]

    def test_end_to_end(self):
        self.ensure_model_can_train_save_and_load(
            self.FIXTURES_ROOT / "simple_tagger" /
            "experiment_callback_trainer.json")

    def test_trainer_can_run_from_params(self):

        from allennlp.commands.train import train_model

        params = Params({
            "trainer": {
                "type":
                "callback",
                "optimizer": {
                    "type": "sgd",
                    "lr": 0.01,
                    "momentum": 0.9
                },
                "num_epochs":
                2,
                "callbacks": [
                    "checkpoint",
                    "track_metrics",
                    "validate",
                    {
                        "type": "log_to_tensorboard",
                        "log_batch_size_period": 10
                    },
                ],
            },
            "dataset_reader": {
                "type": "sequence_tagging"
            },
            "train_data_path":
            str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"),
            "validation_data_path":
            str(self.FIXTURES_ROOT / "data" / "sequence_tagging.tsv"),
            "model": {
                "type": "simple_tagger",
                "text_field_embedder": {
                    "token_embedders": {
                        "tokens": {
                            "type": "embedding",
                            "embedding_dim": 5
                        }
                    }
                },
                "encoder": {
                    "type": "lstm",
                    "input_size": 5,
                    "hidden_size": 7,
                    "num_layers": 2
                },
            },
            "iterator": {
                "type": "basic",
                "batch_size": 2
            },
        })

        train_model(params, self.TEST_DIR)
        with open(self.TEST_DIR / "metrics.json") as f:
            metrics = json.load(f)
        assert "best_validation_loss" in metrics
        assert isinstance(metrics["best_validation_loss"], float)
        assert "best_validation_accuracy" in metrics
        assert isinstance(metrics["best_validation_accuracy"], float)
        assert "best_validation_accuracy3" in metrics
        assert isinstance(metrics["best_validation_accuracy3"], float)
        assert "best_epoch" in metrics
        assert isinstance(metrics["best_epoch"], int)

    def test_trainer_can_run(self):
        trainer = CallbackTrainer(
            model=self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            callbacks=self.default_callbacks(serialization_dir=None),
            num_epochs=2,
        )
        metrics = trainer.train()
        assert "best_validation_loss" in metrics
        assert isinstance(metrics["best_validation_loss"], float)
        assert "best_validation_accuracy" in metrics
        assert isinstance(metrics["best_validation_accuracy"], float)
        assert "best_validation_accuracy3" in metrics
        assert isinstance(metrics["best_validation_accuracy3"], float)
        assert "best_epoch" in metrics
        assert isinstance(metrics["best_epoch"], int)
        assert "peak_cpu_memory_MB" in metrics

        # Making sure that both increasing and decreasing validation metrics work.
        trainer = CallbackTrainer(
            model=self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            callbacks=self.default_callbacks(validation_metric="+loss",
                                             serialization_dir=None),
            num_epochs=2,
        )
        metrics = trainer.train()
        assert "best_validation_loss" in metrics
        assert isinstance(metrics["best_validation_loss"], float)
        assert "best_validation_accuracy" in metrics
        assert isinstance(metrics["best_validation_accuracy"], float)
        assert "best_validation_accuracy3" in metrics
        assert isinstance(metrics["best_validation_accuracy3"], float)
        assert "best_epoch" in metrics
        assert isinstance(metrics["best_epoch"], int)
        assert "peak_cpu_memory_MB" in metrics
        assert isinstance(metrics["peak_cpu_memory_MB"], float)
        assert metrics["peak_cpu_memory_MB"] > 0

    @responses.activate
    def test_trainer_posts_to_url(self):
        url = "https://slack.com?webhook=ewifjweoiwjef"
        responses.add(responses.POST, url)
        post_to_url = PostToUrl(url, message="only a test")
        callbacks = self.default_callbacks() + [post_to_url]
        trainer = CallbackTrainer(
            model=self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=2,
            callbacks=callbacks,
        )
        trainer.train()

        assert len(responses.calls) == 1
        assert responses.calls[
            0].response.request.body == b'{"text": "only a test"}'

    def test_trainer_can_run_exponential_moving_average(self):
        moving_average = ExponentialMovingAverage(
            self.model.named_parameters(), decay=0.9999)
        callbacks = self.default_callbacks() + [
            UpdateMovingAverage(moving_average)
        ]
        trainer = CallbackTrainer(
            model=self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=2,
            callbacks=callbacks,
        )
        trainer.train()

    def test_trainer_can_run_ema_from_params(self):
        uma_params = Params({"moving_average": {"decay": 0.9999}})
        callbacks = self.default_callbacks() + [
            UpdateMovingAverage.from_params(uma_params, self.model)
        ]
        trainer = CallbackTrainer(
            model=self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=2,
            callbacks=callbacks,
        )
        trainer.train()

    @pytest.mark.skipif(not torch.cuda.is_available(),
                        reason="No CUDA device registered.")
    def test_trainer_can_run_cuda(self):
        self.model.cuda()
        trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=2,
            callbacks=self.default_callbacks(),
            cuda_device=0,
        )
        trainer.train()

    @pytest.mark.skipif(torch.cuda.device_count() < 2,
                        reason="Need multiple GPUs.")
    def test_trainer_can_run_multiple_gpu(self):
        self.model.cuda()

        class MetaDataCheckWrapper(Model):
            """
            Checks that the metadata field has been correctly split across the batch dimension
            when running on multiple gpus.
            """
            def __init__(self, model):
                super().__init__(model.vocab)
                self.model = model

            def forward(self,
                        **kwargs) -> Dict[str, torch.Tensor]:  # type: ignore
                assert (
                    "metadata" in kwargs and "tags" in kwargs
                ), f"tokens and metadata must be provided. Got {kwargs.keys()} instead."
                batch_size = kwargs["tokens"]["tokens"].size()[0]
                assert len(kwargs["metadata"]) == batch_size, (
                    f"metadata must be split appropriately. Expected {batch_size} elements, "
                    f"got {len(kwargs['metadata'])} elements.")
                return self.model.forward(**kwargs)

        multigpu_iterator = BasicIterator(batch_size=4)
        multigpu_iterator.index_with(self.vocab)
        trainer = CallbackTrainer(
            MetaDataCheckWrapper(self.model),
            training_data=self.instances,
            iterator=multigpu_iterator,
            optimizer=self.optimizer,
            num_epochs=2,
            callbacks=self.default_callbacks(),
            cuda_device=[0, 1],
        )
        metrics = trainer.train()
        assert "peak_cpu_memory_MB" in metrics
        assert isinstance(metrics["peak_cpu_memory_MB"], float)
        assert metrics["peak_cpu_memory_MB"] > 0
        assert "peak_gpu_0_memory_MB" in metrics
        assert isinstance(metrics["peak_gpu_0_memory_MB"], int)
        assert "peak_gpu_1_memory_MB" in metrics
        assert isinstance(metrics["peak_gpu_1_memory_MB"], int)

    @pytest.mark.skipif(torch.cuda.device_count() < 2,
                        reason="Need multiple GPUs.")
    def test_production_rule_field_with_multiple_gpus(self):
        wikitables_dir = "allennlp/tests/fixtures/data/wikitables/"
        offline_lf_directory = wikitables_dir + "action_space_walker_output/"
        wikitables_reader = WikiTablesDatasetReader(
            tables_directory=wikitables_dir,
            offline_logical_forms_directory=offline_lf_directory)
        instances = wikitables_reader.read(wikitables_dir +
                                           "sample_data.examples")
        archive_path = (self.FIXTURES_ROOT / "semantic_parsing" /
                        "wikitables" / "serialization" / "model.tar.gz")
        model = load_archive(archive_path).model
        model.cuda()

        multigpu_iterator = BasicIterator(batch_size=4)
        multigpu_iterator.index_with(model.vocab)

        trainer = CallbackTrainer(
            model,
            instances,
            multigpu_iterator,
            self.optimizer,
            num_epochs=2,
            cuda_device=[0, 1],
            callbacks=[GradientNormAndClip()],
        )
        trainer.train()

    def test_trainer_can_resume_training(self):
        trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            callbacks=self.default_callbacks(),
            num_epochs=1,
            serialization_dir=self.TEST_DIR,
        )
        trainer.train()

        new_trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            callbacks=self.default_callbacks(),
            num_epochs=3,
            serialization_dir=self.TEST_DIR,
        )

        new_trainer.handler.fire_event(Events.TRAINING_START)

        assert new_trainer.epoch_number == 1

        tracker = new_trainer.metric_tracker

        assert tracker is not None
        assert tracker.is_best_so_far()
        assert tracker._best_so_far is not None

        new_trainer.train()

    def test_trainer_can_resume_training_for_exponential_moving_average(self):
        moving_average = ExponentialMovingAverage(
            self.model.named_parameters())
        callbacks = self.default_callbacks() + [
            UpdateMovingAverage(moving_average)
        ]

        trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=1,
            serialization_dir=self.TEST_DIR,
            callbacks=callbacks,
        )
        trainer.train()

        new_moving_average = ExponentialMovingAverage(
            self.model.named_parameters())
        new_callbacks = self.default_callbacks() + [
            UpdateMovingAverage(new_moving_average)
        ]

        new_trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=3,
            serialization_dir=self.TEST_DIR,
            callbacks=new_callbacks,
        )

        new_trainer.handler.fire_event(Events.TRAINING_START)
        assert new_trainer.epoch_number == 1

        tracker = trainer.metric_tracker
        assert tracker.is_best_so_far()
        assert tracker._best_so_far is not None

        new_trainer.train()

    def test_training_metrics_consistent_with_and_without_validation(self):
        default_callbacks = self.default_callbacks(serialization_dir=None)
        default_callbacks_without_validation = [
            callback for callback in default_callbacks
            if not isinstance(callback, Validate)
        ]
        trainer1 = CallbackTrainer(
            copy.deepcopy(self.model),
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=copy.deepcopy(self.optimizer),
            callbacks=default_callbacks_without_validation,
            num_epochs=1,
            serialization_dir=None,
        )

        trainer1.train()

        trainer2 = CallbackTrainer(
            copy.deepcopy(self.model),
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=copy.deepcopy(self.optimizer),
            callbacks=default_callbacks,
            num_epochs=1,
            serialization_dir=None,
        )

        trainer2.train()
        metrics1 = trainer1.train_metrics
        metrics2 = trainer2.train_metrics
        assert metrics1.keys() == metrics2.keys()
        for key in ["accuracy", "accuracy3", "loss"]:
            np.testing.assert_almost_equal(metrics1[key], metrics2[key])

    def test_validation_metrics_consistent_with_and_without_tracking(self):
        default_callbacks = self.default_callbacks(serialization_dir=None)
        default_callbacks_without_tracking = [
            callback for callback in default_callbacks
            if not isinstance(callback, TrackMetrics)
        ]
        trainer1 = CallbackTrainer(
            copy.deepcopy(self.model),
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=copy.deepcopy(self.optimizer),
            callbacks=default_callbacks_without_tracking,
            num_epochs=1,
            serialization_dir=None,
        )

        trainer1.train()

        trainer2 = CallbackTrainer(
            copy.deepcopy(self.model),
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=copy.deepcopy(self.optimizer),
            callbacks=default_callbacks,
            num_epochs=1,
            serialization_dir=None,
        )

        trainer2.train()
        metrics1 = trainer1.val_metrics
        metrics2 = trainer2.val_metrics
        assert metrics1.keys() == metrics2.keys()
        for key in ["accuracy", "accuracy3", "loss"]:
            np.testing.assert_almost_equal(metrics1[key], metrics2[key])

    def test_metric_only_considered_best_so_far_when_strictly_better_than_those_before_it_increasing_metric(
            self):
        new_trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=3,
            serialization_dir=self.TEST_DIR,
            callbacks=self.default_callbacks("+test", patience=5),
        )
        tracker = new_trainer.metric_tracker

        # when it is the only metric it should be considered the best
        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metric(1)
        assert new_tracker.is_best_so_far()

        # when it is the same as one before it it is not considered the best
        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 0.3])
        assert not new_tracker.is_best_so_far()

        # when it is the best it is considered the best
        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 13])
        assert new_tracker.is_best_so_far()

        # when it is not the the best it is not considered the best
        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 0.0013])
        assert not new_tracker.is_best_so_far()

    def test_metric_only_considered_best_so_far_when_strictly_better_than_those_before_it_decreasing_metric(
            self):
        new_trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=3,
            serialization_dir=self.TEST_DIR,
            callbacks=self.default_callbacks(patience=5),
        )
        tracker = new_trainer.metric_tracker

        # when it is the only metric it should be considered the best
        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metric(1)
        assert new_tracker.is_best_so_far()

        # when it is the same as one before it it is not considered the best
        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 0.3])
        assert not new_tracker.is_best_so_far()

        # when it is the best it is considered the best
        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 0.0013])
        assert new_tracker.is_best_so_far()

        # when it is not the the best it is not considered the best
        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1, 13])

    def test_should_stop_early_with_increasing_metric(self):
        new_trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=3,
            serialization_dir=self.TEST_DIR,
            callbacks=self.default_callbacks(patience=5,
                                             validation_metric="+test"),
        )

        tracker = new_trainer.metric_tracker

        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metrics([0.5, 0.3, 0.2, 0.1, 0.4, 0.4])
        assert new_tracker.should_stop_early()

        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metrics([0.3, 0.3, 0.3, 0.2, 0.5, 0.1])
        assert not new_tracker.should_stop_early()

    def test_should_stop_early_with_decreasing_metric(self):
        new_trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=3,
            serialization_dir=self.TEST_DIR,
            callbacks=self.default_callbacks(patience=5),
        )
        tracker = new_trainer.metric_tracker

        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metrics([0.02, 0.3, 0.2, 0.1, 0.4, 0.4])
        assert new_tracker.should_stop_early()

        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metrics([0.3, 0.3, 0.2, 0.1, 0.4, 0.5])
        assert not new_tracker.should_stop_early()

        new_tracker = copy.deepcopy(tracker)
        new_tracker.add_metrics([0.1, 0.3, 0.2, 0.1, 0.4, 0.5])
        assert new_tracker.should_stop_early()

    def test_should_stop_early_with_early_stopping_disabled(self):
        # Increasing metric
        trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=100,
            callbacks=self.default_callbacks(validation_metric="+test"),
        )
        tracker = trainer.metric_tracker
        tracker.add_metrics([float(i) for i in reversed(range(20))])
        assert not tracker.should_stop_early()

        # Decreasing metric
        trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=100,
            callbacks=self.default_callbacks(validation_metric="-test"),
        )
        tracker = trainer.metric_tracker
        tracker.add_metrics([float(i) for i in range(20)])
        assert not tracker.should_stop_early()

    def test_should_stop_early_with_invalid_patience(self):
        for patience in [0, -1, -2, 1.5, "None"]:
            with pytest.raises(ConfigurationError):
                CallbackTrainer(
                    self.model,
                    training_data=self.instances,
                    iterator=self.iterator,
                    optimizer=self.optimizer,
                    num_epochs=100,
                    callbacks=self.default_callbacks(
                        patience=patience, validation_metric="+test"),
                )

    def test_trainer_can_run_and_resume_with_momentum_scheduler(self):
        scheduler = MomentumScheduler.from_params(
            self.optimizer,
            Params({
                "type": "inverted_triangular",
                "cool_down": 2,
                "warm_up": 2
            }))
        callbacks = self.default_callbacks() + [UpdateMomentum(scheduler)]
        trainer = CallbackTrainer(
            model=self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=4,
            callbacks=callbacks,
            serialization_dir=self.TEST_DIR,
        )
        trainer.train()

        new_scheduler = MomentumScheduler.from_params(
            self.optimizer,
            Params({
                "type": "inverted_triangular",
                "cool_down": 2,
                "warm_up": 2
            }))
        new_callbacks = self.default_callbacks() + [
            UpdateMomentum(new_scheduler)
        ]
        new_trainer = CallbackTrainer(
            model=self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=6,
            callbacks=new_callbacks,
            serialization_dir=self.TEST_DIR,
        )
        new_trainer.handler.fire_event(Events.TRAINING_START)
        assert new_trainer.epoch_number == 4
        assert new_scheduler.last_epoch == 3
        new_trainer.train()

    def test_trainer_can_run_with_lr_scheduler(self):
        lr_params = Params({"type": "reduce_on_plateau"})
        lr_scheduler = LearningRateScheduler.from_params(
            self.optimizer, lr_params)
        callbacks = self.default_callbacks() + [
            UpdateLearningRate(lr_scheduler)
        ]

        trainer = CallbackTrainer(
            model=self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            callbacks=callbacks,
            num_epochs=2,
        )
        trainer.train()

    def test_trainer_can_resume_with_lr_scheduler(self):
        lr_scheduler = LearningRateScheduler.from_params(
            self.optimizer, Params({
                "type": "exponential",
                "gamma": 0.5
            }))
        callbacks = self.default_callbacks() + [
            UpdateLearningRate(lr_scheduler)
        ]

        trainer = CallbackTrainer(
            model=self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            callbacks=callbacks,
            num_epochs=2,
            serialization_dir=self.TEST_DIR,
        )
        trainer.train()

        new_lr_scheduler = LearningRateScheduler.from_params(
            self.optimizer, Params({
                "type": "exponential",
                "gamma": 0.5
            }))
        callbacks = self.default_callbacks() + [
            UpdateLearningRate(new_lr_scheduler)
        ]

        new_trainer = CallbackTrainer(
            model=self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            callbacks=callbacks,
            num_epochs=4,
            serialization_dir=self.TEST_DIR,
        )
        new_trainer.handler.fire_event(Events.TRAINING_START)
        assert new_trainer.epoch_number == 2
        assert new_lr_scheduler.lr_scheduler.last_epoch == 1
        new_trainer.train()

    def test_trainer_raises_on_model_with_no_loss_key(self):
        class FakeModel(Model):
            def forward(self, **kwargs):
                return {}

        with pytest.raises(RuntimeError):
            trainer = CallbackTrainer(
                FakeModel(None),
                training_data=self.instances,
                iterator=self.iterator,
                optimizer=self.optimizer,
                callbacks=self.default_callbacks(),
                num_epochs=2,
                serialization_dir=self.TEST_DIR,
            )
            trainer.train()

    def test_trainer_can_log_histograms(self):
        # enable activation logging
        for module in self.model.modules():
            module.should_log_activations = True

        callbacks = [
            cb for cb in self.default_callbacks()
            if not isinstance(cb, LogToTensorboard)
        ]
        # The lambda: None is unfortunate, but it will get replaced by the callback.
        tensorboard = TensorboardWriter(lambda: None, histogram_interval=2)
        callbacks.append(LogToTensorboard(tensorboard))

        trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=3,
            serialization_dir=self.TEST_DIR,
            callbacks=callbacks,
        )
        trainer.train()

    def test_trainer_respects_num_serialized_models_to_keep(self):
        trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=5,
            serialization_dir=self.TEST_DIR,
            callbacks=self.default_callbacks(max_checkpoints=3),
        )
        trainer.train()

        # Now check the serialized files
        for prefix in ["model_state_epoch_*", "training_state_epoch_*"]:
            file_names = glob.glob(os.path.join(self.TEST_DIR, prefix))
            epochs = [
                int(re.search(r"_([0-9])\.th", fname).group(1))
                for fname in file_names
            ]
            assert sorted(epochs) == [2, 3, 4]

    def test_trainer_saves_metrics_every_epoch(self):
        trainer = CallbackTrainer(
            model=self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=5,
            serialization_dir=self.TEST_DIR,
            callbacks=self.default_callbacks(max_checkpoints=3),
        )
        trainer.train()

        for epoch in range(5):
            epoch_file = self.TEST_DIR / f"metrics_epoch_{epoch}.json"
            assert epoch_file.exists()
            metrics = json.load(open(epoch_file))
            assert "validation_loss" in metrics
            assert "best_validation_loss" in metrics
            assert metrics.get("epoch") == epoch

    def test_trainer_respects_keep_serialized_model_every_num_seconds(self):
        # To test:
        #   Create an iterator that sleeps for 2.5 second per epoch, so the total training
        #       time for one epoch is slightly greater then 2.5 seconds.
        #   Run for 6 epochs, keeping the last 2 models, models also kept every 5 seconds.
        #   Check the resulting checkpoints.  Should then have models at epochs
        #       2, 4, plus the last two at 5 and 6.
        class WaitingIterator(BasicIterator):
            def _create_batches(self, *args, **kwargs):
                time.sleep(2.5)
                return super()._create_batches(*args, **kwargs)

        waiting_iterator = WaitingIterator(batch_size=2)
        waiting_iterator.index_with(self.vocab)

        # Don't want validation iterator to wait.
        viterator = BasicIterator(batch_size=2)
        viterator.index_with(self.vocab)

        trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=waiting_iterator,
            optimizer=self.optimizer,
            num_epochs=6,
            serialization_dir=self.TEST_DIR,
            callbacks=self.default_callbacks(max_checkpoints=2,
                                             checkpoint_every=5,
                                             validation_iterator=viterator),
        )
        trainer.train()

        # Now check the serialized files
        for prefix in ["model_state_epoch_*", "training_state_epoch_*"]:
            file_names = glob.glob(os.path.join(self.TEST_DIR, prefix))
            epochs = [
                int(re.search(r"_([0-9])\.th", fname).group(1))
                for fname in file_names
            ]
            # epoch N has N-1 in file name
            assert sorted(epochs) == [1, 3, 4, 5]

    def test_trainer_can_log_learning_rates_tensorboard(self):
        callbacks = [
            cb for cb in self.default_callbacks()
            if not isinstance(cb, LogToTensorboard)
        ]
        # The lambda: None is unfortunate, but it will get replaced by the callback.
        tensorboard = TensorboardWriter(lambda: None,
                                        should_log_learning_rate=True,
                                        summary_interval=2)
        callbacks.append(LogToTensorboard(tensorboard))

        trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            num_epochs=2,
            serialization_dir=self.TEST_DIR,
            callbacks=callbacks,
        )

        trainer.train()

    def test_trainer_saves_models_at_specified_interval(self):
        iterator = BasicIterator(batch_size=4)
        iterator.index_with(self.vocab)

        trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=iterator,
            optimizer=self.optimizer,
            num_epochs=2,
            serialization_dir=self.TEST_DIR,
            callbacks=self.default_callbacks(model_save_interval=0.0001),
        )

        trainer.train()

        # Now check the serialized files for models saved during the epoch.
        prefix = "model_state_epoch_*"
        file_names = sorted(glob.glob(os.path.join(self.TEST_DIR, prefix)))
        epochs = [
            re.search(r"_([0-9\.\-]+)\.th", fname).group(1)
            for fname in file_names
        ]
        # We should have checkpoints at the end of each epoch and during each, e.g.
        # [0.timestamp, 0, 1.timestamp, 1]
        assert len(epochs) == 4
        assert epochs[3] == "1"
        assert "." in epochs[0]

        # Now make certain we can restore from timestamped checkpoint.
        # To do so, remove the checkpoint from the end of epoch 1&2, so
        # that we are forced to restore from the timestamped checkpoints.
        for k in range(2):
            os.remove(
                os.path.join(self.TEST_DIR,
                             "model_state_epoch_{}.th".format(k)))
            os.remove(
                os.path.join(self.TEST_DIR,
                             "training_state_epoch_{}.th".format(k)))
        os.remove(os.path.join(self.TEST_DIR, "best.th"))

        restore_trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=iterator,
            optimizer=self.optimizer,
            num_epochs=2,
            serialization_dir=self.TEST_DIR,
            callbacks=self.default_callbacks(model_save_interval=0.0001),
        )
        restore_trainer.handler.fire_event(Events.TRAINING_START)
        assert restore_trainer.epoch_number == 2
        # One batch per epoch.
        assert restore_trainer.batch_num_total == 2

    def test_trainer_saves_and_loads_best_validation_metrics_correctly_1(self):
        # Use -loss and run 1 epoch of original-training, and one of restored-training
        # Run 1 epoch of original training.
        trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            callbacks=self.default_callbacks(),
            num_epochs=1,
            serialization_dir=self.TEST_DIR,
        )
        trainer.train()
        trainer.handler.fire_event(Events.TRAINING_START)
        best_epoch_1 = trainer.metric_tracker.best_epoch
        best_validation_metrics_epoch_1 = trainer.metric_tracker.best_epoch_metrics
        # best_validation_metrics_epoch_1: {'accuracy': 0.75, 'accuracy3': 1.0, 'loss': 0.6243013441562653}
        assert isinstance(best_validation_metrics_epoch_1, dict)
        assert "loss" in best_validation_metrics_epoch_1

        # Run 1 epoch of restored training.
        restore_trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            callbacks=self.default_callbacks(),
            num_epochs=2,
            serialization_dir=self.TEST_DIR,
        )
        restore_trainer.train()
        restore_trainer.handler.fire_event(Events.TRAINING_START)
        best_epoch_2 = restore_trainer.metric_tracker.best_epoch
        best_validation_metrics_epoch_2 = restore_trainer.metric_tracker.best_epoch_metrics

        # Because of using -loss, 2nd epoch would be better than 1st. So best val metrics should not be same.
        assert best_epoch_1 == 0 and best_epoch_2 == 1
        assert best_validation_metrics_epoch_2 != best_validation_metrics_epoch_1

    def test_trainer_saves_and_loads_best_validation_metrics_correctly_2(self):
        # Use -loss and run 1 epoch of original-training, and one of restored-training
        # Run 1 epoch of original training.
        trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            callbacks=self.default_callbacks(validation_metric="+loss"),
            num_epochs=1,
            serialization_dir=self.TEST_DIR,
        )
        trainer.handler.verbose = True
        trainer.train()

        trainer.handler.fire_event(Events.TRAINING_START)
        best_epoch_1 = trainer.metric_tracker.best_epoch
        best_validation_metrics_epoch_1 = trainer.metric_tracker.best_epoch_metrics
        # best_validation_metrics_epoch_1: {'accuracy': 0.75, 'accuracy3': 1.0, 'loss': 0.6243013441562653}
        assert isinstance(best_validation_metrics_epoch_1, dict)
        assert "loss" in best_validation_metrics_epoch_1

        # Run 1 more epoch of restored training.
        restore_trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            callbacks=self.default_callbacks(validation_metric="+loss"),
            num_epochs=2,
            serialization_dir=self.TEST_DIR,
        )
        print("restore trainer")
        restore_trainer.handler.verbose = True
        restore_trainer.train()
        restore_trainer.handler.fire_event(Events.TRAINING_START)
        best_epoch_2 = restore_trainer.metric_tracker.best_epoch
        best_validation_metrics_epoch_2 = restore_trainer.metric_tracker.best_epoch_metrics

        # Because of using +loss, 2nd epoch won't be better than 1st. So best val metrics should be same.
        assert best_epoch_1 == best_epoch_2 == 0
        assert best_validation_metrics_epoch_2 == best_validation_metrics_epoch_1

    def test_restored_training_returns_best_epoch_metrics_even_if_no_better_epoch_is_found_after_restoring(
            self):
        # Instead of -loss, use +loss to assure 2nd epoch is considered worse.
        # Run 1 epoch of original training.
        original_trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            callbacks=self.default_callbacks(validation_metric="+loss"),
            num_epochs=1,
            serialization_dir=self.TEST_DIR,
        )
        training_metrics = original_trainer.train()

        # Run 1 epoch of restored training.
        restored_trainer = CallbackTrainer(
            self.model,
            training_data=self.instances,
            iterator=self.iterator,
            optimizer=self.optimizer,
            callbacks=self.default_callbacks(validation_metric="+loss"),
            num_epochs=2,
            serialization_dir=self.TEST_DIR,
        )
        restored_metrics = restored_trainer.train()

        assert "best_validation_loss" in restored_metrics
        assert "best_validation_accuracy" in restored_metrics
        assert "best_validation_accuracy3" in restored_metrics
        assert "best_epoch" in restored_metrics

        # Epoch 2 validation loss should be lesser than that of Epoch 1
        assert training_metrics["best_validation_loss"] == restored_metrics[
            "best_validation_loss"]
        assert training_metrics["best_epoch"] == 0
        assert training_metrics["validation_loss"] > restored_metrics[
            "validation_loss"]

    def test_handle_errors(self):
        class ErrorTest(Callback):
            """
            A callback with three triggers
            * at BATCH_START, it raises a RuntimeError
            * at TRAINING_END, it sets a finished flag to True
            * at ERROR, it captures `trainer.exception`
            """
            def __init__(self) -> None:
                self.exc: Optional[Exception] = None
                self.finished_training = None

            @handle_event(Events.BATCH_START)
            def raise_exception(self, trainer):
                raise RuntimeError("problem starting batch")

            @handle_event(Events.TRAINING_END)
            def finish_training(self, trainer):
                self.finished_training = True

            @handle_event(Events.ERROR)
            def capture_error(self, trainer):
                self.exc = trainer.exception

        error_test = ErrorTest()
        callbacks = self.default_callbacks() + [error_test]

        original_trainer = CallbackTrainer(
            self.model,
            self.instances,
            self.iterator,
            self.optimizer,
            callbacks=callbacks,
            num_epochs=1,
            serialization_dir=self.TEST_DIR,
        )

        with pytest.raises(RuntimeError):

            original_trainer.train()

        # The callback should have captured the exception.
        assert error_test.exc is not None
        assert error_test.exc.args == ("problem starting batch", )

        # The "finished" flag should never have been set to True.
        assert not error_test.finished_training
def hidden_eval_fever_adv_v1():
    batch_size = 64
    lazy = True
    dev_prob_threshold = 0.5

    SAVE_PATH = "/home/easonnie/projects/FunEver/saved_models/07-20-22:28:24_mesim_wn_450_adv_sample_v1_|t_prob:0.35|top_k:8/i(46000)_epoch(7)_dev(0.6405140514051405)_loss(1.0761665150348825)_seed(12)"

    dev_upstream_sent_list = common.load_jsonl(
        config.RESULT_PATH /
        "sent_retri_nn/2018_07_20_15:17:59_r/dev_sent.jsonl")

    # Prepare Data
    token_indexers = {
        'tokens':
        SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(
            namespace='elmo_characters')  # This is the elmo_characters
    }

    p_dict = wn_persistent_api.persistence_load()

    upstream_dev_list = score_converter_scaled(config.T_FEVER_DEV_JSONL,
                                               dev_upstream_sent_list,
                                               scale_prob=dev_prob_threshold,
                                               delete_prob=False)

    dev_fever_data_reader = WNReader(token_indexers=token_indexers,
                                     lazy=lazy,
                                     wn_p_dict=p_dict,
                                     max_l=360)

    complete_upstream_dev_data = get_actual_data(config.T_FEVER_DEV_JSONL,
                                                 upstream_dev_list)
    dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data)

    # Load Vocabulary
    biterator = BasicIterator(batch_size=batch_size)

    vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT /
                                               "vocab_cache" / "nli_basic")
    vocab.change_token_with_index_to_namespace('hidden',
                                               -2,
                                               namespace='labels')

    print(vocab.get_token_to_index_vocabulary('labels'))
    print(vocab.get_vocab_size('tokens'))

    biterator.index_with(vocab)

    # Build Model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu",
                          index=0)
    device_num = -1 if device.type == 'cpu' else 0

    model = Model(
        rnn_size_in=(1024 + 300 + dev_fever_data_reader.wn_feature_size,
                     1024 + 450),
        rnn_size_out=(450, 450),
        weight=weight_dict['glove.840B.300d'],
        vocab_size=vocab.get_vocab_size('tokens'),
        mlp_d=900,
        embedding_dim=300,
        max_l=300)

    print("Model Max length:", model.max_l)
    model.load_state_dict(torch.load(SAVE_PATH))
    model.display()
    model.to(device)

    eval_iter = biterator(dev_instances,
                          shuffle=False,
                          num_epochs=1,
                          cuda_device=device_num)
    builded_dev_data = hidden_eval(model, eval_iter,
                                   complete_upstream_dev_data)

    eval_mode = {'check_sent_id_correct': True, 'standard': True}

    common.save_jsonl(
        builded_dev_data,
        config.RESULT_PATH / "nli_results" / "pipeline_results_1.jsonl")
    c_scorer.delete_label(builded_dev_data)
    print(
        c_scorer.fever_score(builded_dev_data,
                             common.load_jsonl(config.FEVER_DEV_JSONL),
                             mode=eval_mode))
예제 #44
0
def model_go_pure_aug():
    # for some_params in [0.25, 0.25, 0.25]:
    for some_params in [0.25, 0.25, 0.25]:
        # bert_model_name = 'bert-large-uncased'
        seed = 6
        bert_model_name = 'bert-base-uncased'
        lazy = False
        forward_size = 16
        batch_size = 32
        gradient_accumulate_step = int(batch_size / forward_size)
        warmup_proportion = 0.1
        learning_rate = 5e-5
        num_train_epochs = 3
        do_ema = False
        dev_prob_threshold = 0.1
        train_prob_threshold = 0.35
        debug_mode = False
        # experiment_name = f"bert_fever_nli_baseline_on_fulldata"
        # experiment_name = f"bert_fever_nli_baseline_on_fulldata_aug_the_same_gt_mrate({some_params})"
        # experiment_name = f"bert_fever_nli_baseline_on_10p_aug_ratio({some_params})"
        experiment_name = f"bert_fever_nli_baseline_on_fulldata_aug_ratio({some_params})"
        # experiment_name = f"bert_fever_nli_baseline_pure_aug"

        data_aug = True
        # data_aug_file = config.FEVER_DATA_ROOT / "qa_aug/squad_train_turker_groundtruth.json"
        # data_aug_size = int(21_015 * some_params)   # 10p
        # data_aug_size = int(208_346 * some_params)

        # training_file = config.FEVER_DATA_ROOT / "fever_1.0/train_10.jsonl"
        training_file = config.FEVER_DATA_ROOT / "fever_1.0/train.jsonl"

        train_sample_top_k = 8

        # est_datasize = 208_346    # full
        # est_datasize = 14_544
        # est_datasize = 21_015 + data_aug_size   # 10p
        aug_size = int(208_346 * some_params)
        est_datasize = 208_346 + aug_size
        # est_datasize = 208_346 + data_aug_size

        num_class = 3

        # num_train_optimization_steps
        torch.manual_seed(seed)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        n_gpu = torch.cuda.device_count()

        unk_token_num = {'tokens': 1}  # work around for initiating vocabulary.
        vocab = ExVocabulary(unk_token_num=unk_token_num)
        vocab.add_token_to_namespace('SUPPORTS', namespace='labels')
        vocab.add_token_to_namespace('REFUTES', namespace='labels')
        vocab.add_token_to_namespace('NOT ENOUGH INFO', namespace='labels')
        vocab.add_token_to_namespace("hidden", namespace="labels")
        vocab.change_token_with_index_to_namespace("hidden", -2, namespace='labels')
        # Finished build vocabulary.

        # Load standardized sentence file
        dev_upstream_sent_list = common.load_jsonl(config.FEVER_DATA_ROOT /
                                                   "upstream_sentence_selection_Feb16/dev_sent_pred_scores.jsonl")
        dev_sent_after_threshold_filter = fever_ss_sampler.threshold_sampler_insure_unique(
            config.FEVER_DATA_ROOT / "fever_1.0/shared_task_dev.jsonl",
            dev_upstream_sent_list,
            prob_threshold=dev_prob_threshold, top_n=5)

        dev_data_list = fever_nli_sampler.select_sent_with_prob_for_eval(
            config.FEVER_DATA_ROOT / "fever_1.0/shared_task_dev.jsonl", dev_sent_after_threshold_filter,
            None, tokenized=True)

        # print(dev_data_list[0])
        # exit(0)

        train_upstream_sent_list = common.load_jsonl(config.FEVER_DATA_ROOT /
                                                     "upstream_sentence_selection_Feb16/train_sent_scores.jsonl")
        # Finished loading standardized sentence file.

        bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name, do_lower_case=True)

        bert_fever_reader = BertReaderFeverNLI(bert_tokenizer, lazy=lazy)

        dev_instances = bert_fever_reader.read(dev_data_list)

        biterator = BasicIterator(batch_size=forward_size)
        biterator.index_with(vocab)

        # print(list(mnli_dev_instances))

        # Load training model
        # Load training model
        model_clf = BertForSequenceClassification.from_pretrained(bert_model_name, num_labels=num_class)

        ema_tracker = None
        ema_model_copy = None
        if do_ema and ema_tracker is None:
            ema_tracker = EMA(model_clf.named_parameters(), on_cpu=True)
            ema_model_copy = copy.deepcopy(model_clf)

        model_clf.to(device)

        param_optimizer = list(model_clf.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]

        num_train_optimization_steps = int(est_datasize / forward_size / gradient_accumulate_step) * \
                                       num_train_epochs

        print(num_train_optimization_steps)

        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=learning_rate,
                             warmup=warmup_proportion,
                             t_total=num_train_optimization_steps)

        # optimizer = optim.Adam(optimizer_grouped_parameters, lr=learning_rate)

        # # # Create Log File
        file_path_prefix, date = save_tool.gen_file_prefix(f"{experiment_name}")
        # Save the source code.
        script_name = os.path.basename(__file__)
        with open(os.path.join(file_path_prefix, script_name), 'w') as out_f, open(__file__, 'r') as it:
            out_f.write(it.read())
            out_f.flush()
        # # # Log File end

        model_clf.train()

        if n_gpu > 1:
            model_clf = nn.DataParallel(model_clf)

        forbackward_step = 0
        update_step = 0
        eval_iter_num = 2_000  # Change this to real evaluation.
        best_fever_score = -1

        for n_epoch in range(num_train_epochs):
            print("Resampling...")
            train_sent_after_threshold_filter = \
                fever_ss_sampler.threshold_sampler_insure_unique(training_file,
                                                                 train_upstream_sent_list,
                                                                 train_prob_threshold,
                                                                 top_n=train_sample_top_k)
            #
            train_data_list = fever_nli_sampler.adv_simi_sample_with_prob_v1_1(
                training_file,
                train_sent_after_threshold_filter,
                None,
                tokenized=True)

            aug_d_list = []
            if data_aug:
                aug_d_list = get_sample_data(-1)
                random.shuffle(aug_d_list)
                aug_d_list = aug_d_list[:aug_size]

            train_data_list = train_data_list + aug_d_list

            random.shuffle(train_data_list)
            # train_data_list = get_sample_data(-1)
            print("Sample data length:", len(train_data_list))
            sampled_train_instances = bert_fever_reader.read(train_data_list)
            #
            train_iter = biterator(sampled_train_instances, shuffle=True, num_epochs=1)

            for i, batch in enumerate(tqdm(train_iter)):
                paired_sequence = batch['paired_sequence']
                paired_segments_ids = batch['paired_segments_ids']
                labels_ids = batch['label']
                att_mask, _ = torch_util.get_length_and_mask(paired_sequence)

                paired_sequence = paired_sequence.to(device)
                paired_segments_ids = paired_segments_ids.to(device)
                labels_ids = labels_ids.to(device)
                att_mask = att_mask.to(device)

                loss = model_clf(paired_sequence, token_type_ids=paired_segments_ids, attention_mask=att_mask,
                                 labels=labels_ids)

                if n_gpu > 1:
                    loss = loss.mean()  # mean() to average on multi-gpu.

                if gradient_accumulate_step > 1:
                    loss = loss / gradient_accumulate_step

                loss.backward()
                forbackward_step += 1

                if forbackward_step % gradient_accumulate_step == 0:
                    optimizer.step()
                    optimizer.zero_grad()
                    update_step += 1
                    if do_ema and ema_tracker is not None:
                        # if model_clf is DataParallel, then we use model_clf.module
                        model_to_track = model_clf.module if hasattr(model_clf,
                                                                     'module') else model_clf
                        ema_tracker(model_to_track.named_parameters())  # Whenever we do update, the do ema update

                    if update_step % eval_iter_num == 0:
                        print("Update steps:", update_step)
                        dev_iter = biterator(dev_instances, num_epochs=1, shuffle=False)

                        if do_ema and ema_model_copy is not None and ema_tracker is not None:
                            print("EMA evaluation.")
                            EMA.load_ema_to_model(ema_model_copy, ema_tracker)
                            ema_model_copy.to(device)
                            if n_gpu > 1:
                                ema_model_copy = nn.DataParallel(ema_model_copy)
                            dev_data_list = hidden_eval(ema_model_copy, dev_iter, dev_data_list, device)
                        else:
                            dev_data_list = hidden_eval(model_clf, dev_iter, dev_data_list, device)

                        eval_mode = {'check_sent_id_correct': True, 'standard': True}
                        fever_score, label_score, pr, rec, f1 = fever_scorer.fever_score(dev_data_list,
                                                                                         common.load_jsonl(config.FEVER_DATA_ROOT / "fever_1.0/shared_task_dev.jsonl"),
                                                                                         mode=eval_mode,
                                                                                         verbose=False)
                        print("Fever Score(FScore/LScore:/Precision/Recall/F1):", fever_score, label_score, pr, rec, f1)

                        print(f"Dev:{fever_score}/{label_score}")

                        if best_fever_score < fever_score:
                            print("New Best FScore")
                            best_fever_score = fever_score

                            save_path = os.path.join(
                                file_path_prefix,
                                f'i({update_step})_epoch({n_epoch})_dev({fever_score})_lacc({label_score})_seed({seed})'
                            )
                            model_to_save = model_clf.module if hasattr(model_clf,
                                                                        'module') else model_clf
                            output_model_file = os.path.join(file_path_prefix, save_path)
                            torch.save(model_to_save.state_dict(), output_model_file)

            print("Update steps:", update_step)
            dev_iter = biterator(dev_instances, num_epochs=1, shuffle=False)

            if do_ema and ema_model_copy is not None and ema_tracker is not None:
                print("EMA evaluation.")
                EMA.load_ema_to_model(ema_model_copy, ema_tracker)
                ema_model_copy.to(device)
                if n_gpu > 1:
                    ema_model_copy = nn.DataParallel(ema_model_copy)
                dev_data_list = hidden_eval(ema_model_copy, dev_iter, dev_data_list, device)
            else:
                dev_data_list = hidden_eval(model_clf, dev_iter, dev_data_list, device)

            eval_mode = {'check_sent_id_correct': True, 'standard': True}
            fever_score, label_score, pr, rec, f1 = fever_scorer.fever_score(dev_data_list,
                                                                             common.load_jsonl(config.FEVER_DATA_ROOT / "fever_1.0/shared_task_dev.jsonl"),
                                                                             mode=eval_mode,
                                                                             verbose=False)
            print("Fever Score(FScore/LScore:/Precision/Recall/F1):", fever_score, label_score, pr, rec, f1)

            print(f"Dev:{fever_score}/{label_score}")

            if best_fever_score < fever_score:
                print("New Best FScore")
                best_fever_score = fever_score

                save_path = os.path.join(
                    file_path_prefix,
                    f'i({update_step})_epoch({n_epoch})_dev({fever_score})_lacc({label_score})_seed({seed})'
                )
                model_to_save = model_clf.module if hasattr(model_clf,
                                                            'module') else model_clf
                output_model_file = os.path.join(file_path_prefix, save_path)
                torch.save(model_to_save.state_dict(), output_model_file)
예제 #45
0
def model_go_with_old_data():
    seed = 12
    torch.manual_seed(seed)
    # bert_model_name = 'bert-large-uncased'
    bert_model_name = 'bert-base-uncased'
    experiment_name = 'fever_v1_nli'
    lazy = False
    # lazy = True
    forward_size = 16
    # batch_size = 64
    # batch_size = 192
    batch_size = 32
    gradient_accumulate_step = int(batch_size / forward_size)
    warmup_proportion = 0.1
    learning_rate = 5e-5
    num_train_epochs = 3
    eval_frequency = 2000
    do_lower_case = True
    pair_order = 'cq'
    # debug_mode = True
    debug_mode = False
    # est_datasize = 900_000

    num_class = 3
    # num_train_optimization_steps

    train_sent_filtering_prob = 0.35
    dev_sent_filtering_prob = 0.1

    # dev_sent_results_file = config.RESULT_PATH / "doc_retri_results/fever_results/sent_results/4-14-sent_results_v0/i(5000)|e(0)|s01(0.9170917091709171)|s05(0.8842384238423843)|seed(12)_dev_sent_results.json"
    # train_sent_results_file = config.RESULT_PATH / "doc_retri_results/fever_results/sent_results/4-14-sent_results_v0/train_sent_results.jsonl"
    from utest.utest_format_converter_for_old_sent.tool import format_convert
    dev_sent_results_file = format_convert(
        config.PRO_ROOT /
        "results/doc_retri_results/fever_results/sent_results/old_sent_data_by_NSMN/4-15-dev_sent_pred_scores_old_format.jsonl"
    )
    train_sent_results_file = format_convert(
        config.PRO_ROOT /
        "results/doc_retri_results/fever_results/sent_results/old_sent_data_by_NSMN/train_sent_scores_old_format.jsonl"
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device_num = 0 if torch.cuda.is_available() else -1

    n_gpu = torch.cuda.device_count()

    unk_token_num = {'tokens': 1}  # work around for initiating vocabulary.
    vocab = ExVocabulary(unk_token_num=unk_token_num)
    vocab.add_token_to_namespace('SUPPORTS', namespace='labels')
    vocab.add_token_to_namespace('REFUTES', namespace='labels')
    vocab.add_token_to_namespace('NOT ENOUGH INFO', namespace='labels')
    vocab.add_token_to_namespace("hidden", namespace="labels")
    vocab.change_token_with_index_to_namespace("hidden",
                                               -2,
                                               namespace='labels')

    # Load Dataset
    # train_fitems_list = get_inference_pair('train', True, train_sent_results_file, debug_mode, train_sent_filtering_prob)
    dev_debug_num = 2481 if debug_mode else None
    dev_fitems_list, dev_list = get_inference_pair('dev', False,
                                                   dev_sent_results_file,
                                                   dev_debug_num,
                                                   dev_sent_filtering_prob)
    # = common.load_jsonl(config.FEVER_DEV)

    if debug_mode:
        dev_list = dev_list[:50]
        eval_frequency = 1
        # print(dev_list[-1]['_id'])
        # exit(0)

    # sampled_train_list = down_sample_neg(train_fitems_list, ratio=pos_ratio)
    train_debug_num = 2971 if debug_mode else None
    train_fitems_list, _ = get_inference_pair('train', True,
                                              train_sent_results_file,
                                              train_debug_num,
                                              train_sent_filtering_prob)
    est_datasize = len(train_fitems_list)

    # dev_o_dict = list_dict_data_tool.list_to_dict(dev_list, 'id')
    # print(dev_o_dict)

    bert_tokenizer = BertTokenizer.from_pretrained(bert_model_name,
                                                   do_lower_case=do_lower_case)
    bert_cs_reader = BertFeverNLIReader(bert_tokenizer,
                                        lazy,
                                        is_paired=True,
                                        query_l=64,
                                        example_filter=None,
                                        max_l=364,
                                        pair_order=pair_order)

    bert_encoder = BertModel.from_pretrained(bert_model_name)
    model = BertMultiLayerSeqClassification(bert_encoder,
                                            num_labels=num_class,
                                            num_of_pooling_layer=1,
                                            act_type='tanh',
                                            use_pretrained_pooler=True,
                                            use_sigmoid=False)
    #
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    num_train_optimization_steps = int(est_datasize / forward_size / gradient_accumulate_step) * \
                                   num_train_epochs

    if debug_mode:
        num_train_optimization_steps = 100

    print("Estimated training size", est_datasize)
    print("Number of optimization steps:", num_train_optimization_steps)

    optimizer = BertAdam(optimizer_grouped_parameters,
                         lr=learning_rate,
                         warmup=warmup_proportion,
                         t_total=num_train_optimization_steps)

    dev_instances = bert_cs_reader.read(dev_fitems_list)

    biterator = BasicIterator(batch_size=forward_size)
    biterator.index_with(vocab)

    model.to(device)
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)

    forbackward_step = 0
    update_step = 0

    logging_agent = save_tool.ScoreLogger({})

    file_path_prefix = '.'
    if not debug_mode:
        file_path_prefix, date = save_tool.gen_file_prefix(
            f"{experiment_name}")
        # # # Create Log File
        # Save the source code.
        script_name = os.path.basename(__file__)
        with open(os.path.join(file_path_prefix, script_name),
                  'w') as out_f, open(__file__, 'r') as it:
            out_f.write(it.read())
            out_f.flush()
        # # # Log File end

    for epoch_i in range(num_train_epochs):
        print("Epoch:", epoch_i)

        train_fitems_list, _ = get_inference_pair('train', True,
                                                  train_sent_results_file,
                                                  train_debug_num,
                                                  train_sent_filtering_prob)
        random.shuffle(train_fitems_list)
        train_instance = bert_cs_reader.read(train_fitems_list)
        train_iter = biterator(train_instance, num_epochs=1, shuffle=True)

        for batch in tqdm(train_iter):
            model.train()
            batch = move_to_device(batch, device_num)

            paired_sequence = batch['paired_sequence']
            paired_segments_ids = batch['paired_segments_ids']
            labels_ids = batch['label']
            att_mask, _ = torch_util.get_length_and_mask(paired_sequence)
            s1_span = batch['bert_s1_span']
            s2_span = batch['bert_s2_span']

            loss = model(
                paired_sequence,
                token_type_ids=paired_segments_ids,
                attention_mask=att_mask,
                mode=BertMultiLayerSeqClassification.ForwardMode.TRAIN,
                labels=labels_ids)

            if n_gpu > 1:
                loss = loss.mean()  # mean() to average on multi-gpu.

            if gradient_accumulate_step > 1:
                loss = loss / gradient_accumulate_step

            loss.backward()
            forbackward_step += 1

            if forbackward_step % gradient_accumulate_step == 0:
                optimizer.step()
                optimizer.zero_grad()
                update_step += 1

                if update_step % eval_frequency == 0:
                    print("Update steps:", update_step)
                    dev_iter = biterator(dev_instances,
                                         num_epochs=1,
                                         shuffle=False)

                    cur_eval_results_list = eval_model(model,
                                                       dev_iter,
                                                       device_num,
                                                       with_probs=True,
                                                       make_int=True)

                    results_dict = list_dict_data_tool.list_to_dict(
                        cur_eval_results_list, 'oid')
                    copied_dev_list = copy.deepcopy(dev_list)
                    list_dict_data_tool.append_item_from_dict_to_list(
                        copied_dev_list, results_dict, 'id', 'predicted_label')

                    mode = {'standard': True}
                    strict_score, acc_score, pr, rec, f1 = fever_scorer.fever_score(
                        copied_dev_list,
                        dev_fitems_list,
                        mode=mode,
                        max_evidence=5)
                    logging_item = {
                        'ss': strict_score,
                        'ac': acc_score,
                        'pr': pr,
                        'rec': rec,
                        'f1': f1,
                    }

                    save_file_name = f'i({update_step})|e({epoch_i})' \
                        f'|ss({strict_score})|ac({acc_score})|pr({pr})|rec({rec})|f1({f1})' \
                        f'|seed({seed})'

                    common.save_jsonl(
                        copied_dev_list,
                        Path(file_path_prefix) /
                        f"{save_file_name}_dev_nli_results.json")

                    # print(save_file_name)
                    logging_agent.incorporate_results({}, save_file_name,
                                                      logging_item)
                    logging_agent.logging_to_file(
                        Path(file_path_prefix) / "log.json")

                    model_to_save = model.module if hasattr(
                        model, 'module') else model
                    output_model_file = Path(file_path_prefix) / save_file_name
                    torch.save(model_to_save.state_dict(),
                               str(output_model_file))
def main(serialization_directory: int,
         device: int,
         data: str,
         prefix: str,
         domain: str = None):
    """
    serialization_directory : str, required.
        The directory containing the serialized weights.
    device: int, default = -1
        The device to run the evaluation on.
    data: str, default = None
        The data to evaluate on. By default, we use the validation data from
        the original experiment.
    prefix: str, default=""
        The prefix to prepend to the generated gold and prediction files, to distinguish
        different models/data.
    domain: str, optional (default = None)
        If passed, filters the ontonotes evaluation/test dataset to only contain the
        specified domain. This overwrites the domain in the config file from the model,
        to allow evaluation on domains other than the one the model was trained on.
    """
    config = Params.from_file(os.path.join(serialization_directory, "config.json"))

    if domain is not None:
        # Hack to allow evaluation on different domains than the
        # model was trained on.
        config["dataset_reader"]["domain_identifier"] = domain
        prefix = f"{domain}_{prefix}"
    else:
        config["dataset_reader"].pop("domain_identifier", None)

    dataset_reader = DatasetReader.from_params(config['dataset_reader'])
    evaluation_data_path = data if data else config['validation_data_path']

    archive = load_archive(os.path.join(serialization_directory, "model.tar.gz"), cuda_device=device)
    model = archive.model
    model.eval()

    prediction_file_path = os.path.join(serialization_directory, prefix + "_predictions.txt")
    gold_file_path = os.path.join(serialization_directory, prefix + "_gold.txt")
    prediction_file = open(prediction_file_path, "w+")
    gold_file = open(gold_file_path, "w+")

    # Load the evaluation data and index it.
    print("reading evaluation data from {}".format(evaluation_data_path))
    instances = dataset_reader.read(evaluation_data_path)

    with torch.autograd.no_grad():
        iterator = BasicIterator(batch_size=32)
        iterator.index_with(model.vocab)

        model_predictions = []
        batches = iterator(instances, num_epochs=1, shuffle=False, cuda_device=device)
        for batch in Tqdm.tqdm(batches):
            result = model(**batch)
            predictions = model.decode(result)
            model_predictions.extend(predictions["tags"])

        for instance, prediction in zip(instances, model_predictions):
            fields = instance.fields
            try:
                # Most sentences have a verbal predicate, but not all.
                verb_index = fields["verb_indicator"].labels.index(1)
            except ValueError:
                verb_index = None

            gold_tags = fields["tags"].labels
            sentence = [x.text for x in fields["tokens"].tokens]

            write_to_conll_eval_file(prediction_file, gold_file,
                                     verb_index, sentence, prediction, gold_tags)
        prediction_file.close()
        gold_file.close()
예제 #47
0
# train_dataset = reader.read(TRAIN_PATH)
train_dataset = merge_reader.read(COMBINED_TRAIN_PATH)
validation_dataset = reader.read(VALIDATION_PATH)
test_dataset = reader.read(TEST_PATH)
# %%
vocab = Vocabulary()

vocab._token_to_index['labels'] = {'0': 0, '1': 1}

# %%
"""Prepare iterator"""
from allennlp.data.iterators import BasicIterator

iterator = BasicIterator(batch_size=8)

iterator.index_with(vocab)


# %%
# Loss function
def multiple_target_CrossEntropyLoss(logits, labels):
    loss = 0
    for i in range(logits.shape[0]):
        loss = loss + nn.CrossEntropyLoss(
            weight=torch.tensor([1.0, 1.0]).cuda())(logits[i, :, :],
                                                    labels[i, :])
    return loss / logits.shape[0]


# %%
"""Prepare the model"""
예제 #48
0
def train():
    reader = PWKPReader()
    train_dataset = reader.read(train_path)
    valid_dataset = reader.read(dev_path)
    if os.path.exists(vocab_dir):
        vocab = Vocabulary.from_files(vocab_dir)
    else:
        vocab = Vocabulary.from_instances(instances=train_dataset,
                                          max_vocab_size=opt.vocab_size)
        vocab.save_to_files(vocab_dir)
    iterator = BucketIterator(batch_size=opt.batch_size,
                              sorting_keys=[("src", "num_tokens"),
                                            ("tgt", "num_tokens")])
    iterator.index_with(vocab)

    model = Seq2Seq(emb_size=opt.emb_size,
                    hidden_size=opt.hidden_size,
                    enc_layers=opt.enc_layers,
                    dec_layers=opt.dec_layers,
                    dropout=opt.dropout,
                    bidirectional=opt.bidirectional,
                    beam_size=opt.beam_size,
                    label_smoothing=opt.label_smoothing,
                    vocab=vocab)

    optimizer = optim.Adam(model.parameters(), lr=opt.lr)
    #learning_rate_scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=1, gamma=opt.lr_decay)

    val_iterator = BasicIterator(batch_size=opt.batch_size)
    val_iterator.index_with(vocab)

    predictor = Predictor(iterator=val_iterator,
                          max_decoding_step=opt.max_step,
                          vocab=vocab,
                          reader=reader,
                          data_path=test_path,
                          log_dir=save_dir,
                          map_path=ner_path,
                          cuda_device=opt.gpu)

    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        #learning_rate_scheduler=learning_rate_scheduler,
        learning_rate_decay=opt.lr_decay,
        ema_decay=opt.ema_decay,
        predictor=predictor,
        iterator=iterator,
        train_dataset=train_dataset,
        validation_dataset=valid_dataset,
        validation_metric='+bleu',
        cuda_device=opt.gpu,
        num_epochs=opt.epoch,
        serialization_dir=save_dir,
        num_serialized_models_to_keep=5,
        #model_save_interval=60,
        #summary_interval=500,
        should_log_parameter_statistics=False,
        grad_norm=10)

    trainer.train()
예제 #49
0
word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedder})

lstm = PytorchSeq2VecWrapper(
    nn.LSTM(word_embeddings.get_output_dim(),
            config.hidden_sz,
            bidirectional=True,
            batch_first=True))

save_file = "model_v12.th"  ## models saved by lstm training
model2 = LSTM_Model(word_embeddings, lstm, 2)
with open(save_file, 'rb') as f:
    model2.load_state_dict(torch.load(f))

# iterate over the dataset without changing its order
seq_iterator = BasicIterator(config.batch_size)
seq_iterator.index_with(vocab)
predictor = Predictor(model2, seq_iterator)
prob, labels = predictor.predict(test_dataset)
test_preds = 1 * (prob > .525)  #optimal threshold

#Evaluation
accuracy_test = accuracy_score(test_preds, labels)
f1_test = f1_score(test_preds, labels)
precision_test = precision_score(test_preds, labels)
recall_test = recall_score(test_preds, labels)
matrix = confusion_matrix(labels, test_preds)
print(matrix)
print(
    "Accuracy score: {:.4f}, F1 Score: {:.4f}, Precision: {:.4f}, Recall: {:.4f} "
    .format(accuracy_test, f1_test, precision_test, recall_test))
fpr, tpr, _ = roc_curve(labels, prob)
예제 #50
0
class TestTrainer(AllenNlpTestCase):
    def setUp(self):
        super(TestTrainer, self).setUp()
        self.instances = SequenceTaggingDatasetReader().read('tests/fixtures/data/sequence_tagging.tsv')
        vocab = Vocabulary.from_instances(self.instances)
        self.vocab = vocab
        self.model_params = Params({
                "text_field_embedder": {
                        "tokens": {
                                "type": "embedding",
                                "embedding_dim": 5
                                }
                        },
                "encoder": {
                        "type": "lstm",
                        "input_size": 5,
                        "hidden_size": 7,
                        "num_layers": 2
                        }
                })
        self.model = SimpleTagger.from_params(self.vocab, self.model_params)
        self.optimizer = torch.optim.SGD(self.model.parameters(), 0.01)
        self.iterator = BasicIterator(batch_size=2)
        self.iterator.index_with(vocab)

    def test_trainer_can_run(self):
        trainer = Trainer(model=self.model,
                          optimizer=self.optimizer,
                          iterator=self.iterator,
                          train_dataset=self.instances,
                          validation_dataset=self.instances,
                          num_epochs=2)
        metrics = trainer.train()
        assert 'best_validation_loss' in metrics
        assert isinstance(metrics['best_validation_loss'], float)
        assert 'best_epoch' in metrics
        assert isinstance(metrics['best_epoch'], int)

        # Making sure that both increasing and decreasing validation metrics work.
        trainer = Trainer(model=self.model,
                          optimizer=self.optimizer,
                          iterator=self.iterator,
                          train_dataset=self.instances,
                          validation_dataset=self.instances,
                          validation_metric='+loss',
                          num_epochs=2)
        metrics = trainer.train()
        assert 'best_validation_loss' in metrics
        assert isinstance(metrics['best_validation_loss'], float)
        assert 'best_epoch' in metrics
        assert isinstance(metrics['best_epoch'], int)

    @pytest.mark.skipif(not torch.cuda.is_available(), reason="No CUDA device registered.")
    def test_trainer_can_run_cuda(self):
        trainer = Trainer(self.model, self.optimizer,
                          self.iterator, self.instances, num_epochs=2,
                          cuda_device=0)
        trainer.train()

    @pytest.mark.skipif(torch.cuda.device_count() < 2,
                        reason="Need multiple GPUs.")
    def test_trainer_can_run_multiple_gpu(self):
        multigpu_iterator = BasicIterator(batch_size=4)
        multigpu_iterator.index_with(self.vocab)
        trainer = Trainer(self.model, self.optimizer,
                          multigpu_iterator, self.instances, num_epochs=2,
                          cuda_device=[0, 1])
        trainer.train()

    def test_trainer_can_resume_training(self):
        trainer = Trainer(self.model, self.optimizer,
                          self.iterator, self.instances,
                          validation_dataset=self.instances,
                          num_epochs=1, serialization_dir=self.TEST_DIR)
        trainer.train()
        new_trainer = Trainer(self.model, self.optimizer,
                              self.iterator, self.instances,
                              validation_dataset=self.instances,
                              num_epochs=3, serialization_dir=self.TEST_DIR)

        epoch, val_metrics_per_epoch = new_trainer._restore_checkpoint()  # pylint: disable=protected-access
        assert epoch == 1
        assert len(val_metrics_per_epoch) == 1
        assert isinstance(val_metrics_per_epoch[0], float)
        assert val_metrics_per_epoch[0] != 0.
        new_trainer.train()

    def test_should_stop_early_with_increasing_metric(self):
        new_trainer = Trainer(self.model, self.optimizer,
                              self.iterator, self.instances,
                              validation_dataset=self.instances,
                              num_epochs=3, serialization_dir=self.TEST_DIR,
                              patience=5, validation_metric="+test")
        assert new_trainer._should_stop_early([.5, .3, .2, .1, .4, .4])  # pylint: disable=protected-access
        assert not new_trainer._should_stop_early([.3, .3, .3, .2, .5, .1])  # pylint: disable=protected-access

    def test_should_stop_early_with_decreasing_metric(self):
        new_trainer = Trainer(self.model, self.optimizer,
                              self.iterator, self.instances,
                              validation_dataset=self.instances,
                              num_epochs=3, serialization_dir=self.TEST_DIR,
                              patience=5, validation_metric="-test")
        assert new_trainer._should_stop_early([.02, .3, .2, .1, .4, .4])  # pylint: disable=protected-access
        assert not new_trainer._should_stop_early([.3, .3, .2, .1, .4, .5])  # pylint: disable=protected-access

    def test_train_driver_raises_on_model_with_no_loss_key(self):

        class FakeModel(torch.nn.Module):
            def forward(self, **kwargs):  # pylint: disable=arguments-differ,unused-argument
                return {}
        with pytest.raises(ConfigurationError):
            trainer = Trainer(FakeModel(), self.optimizer,
                              self.iterator, self.instances,
                              num_epochs=2, serialization_dir=self.TEST_DIR)
            trainer.train()

    def test_trainer_can_log_histograms(self):
        # enable activation logging
        for module in self.model.modules():
            module.should_log_activations = True

        trainer = Trainer(self.model, self.optimizer,
                          self.iterator, self.instances, num_epochs=3,
                          serialization_dir=self.TEST_DIR,
                          histogram_interval=2)
        trainer.train()

    def test_trainer_respects_num_serialized_models_to_keep(self):
        trainer = Trainer(self.model, self.optimizer,
                          self.iterator, self.instances, num_epochs=5,
                          serialization_dir=self.TEST_DIR,
                          num_serialized_models_to_keep=3)
        trainer.train()

        # Now check the serialized files
        for prefix in ['model_state_epoch_*', 'training_state_epoch_*']:
            file_names = glob.glob(os.path.join(self.TEST_DIR, prefix))
            epochs = [int(re.search(r"_([0-9])\.th", fname).group(1))
                      for fname in file_names]
            assert sorted(epochs) == [2, 3, 4]

    def test_trainer_respects_keep_serialized_model_every_num_seconds(self):
        # To test:
        #   Create an iterator that sleeps for 0.5 second per epoch, so the total training
        #       time for one epoch is slightly greater then 0.5 seconds.
        #   Run for 6 epochs, keeping the last 2 models, models also kept every 1 second.
        #   Check the resulting checkpoints.  Should then have models at epochs
        #       2, 4, plus the last two at 5 and 6.
        class WaitingIterator(BasicIterator):
            # pylint: disable=arguments-differ
            def _create_batches(self, *args, **kwargs):
                time.sleep(0.5)
                return super(WaitingIterator, self)._create_batches(*args, **kwargs)

        iterator = WaitingIterator(batch_size=2)
        iterator.index_with(self.vocab)

        trainer = Trainer(self.model, self.optimizer,
                          iterator, self.instances, num_epochs=6,
                          serialization_dir=self.TEST_DIR,
                          num_serialized_models_to_keep=2,
                          keep_serialized_model_every_num_seconds=1)
        trainer.train()

        # Now check the serialized files
        for prefix in ['model_state_epoch_*', 'training_state_epoch_*']:
            file_names = glob.glob(os.path.join(self.TEST_DIR, prefix))
            epochs = [int(re.search(r"_([0-9])\.th", fname).group(1))
                      for fname in file_names]
            # epoch N has N-1 in file name
            assert sorted(epochs) == [1, 3, 4, 5]

    def test_trainer_saves_models_at_specified_interval(self):
        iterator = BasicIterator(batch_size=4)
        iterator.index_with(self.vocab)

        trainer = Trainer(self.model, self.optimizer,
                          iterator, self.instances, num_epochs=2,
                          serialization_dir=self.TEST_DIR,
                          model_save_interval=0.0001)

        trainer.train()

        # Now check the serialized files for models saved during the epoch.
        prefix = 'model_state_epoch_*'
        file_names = sorted(glob.glob(os.path.join(self.TEST_DIR, prefix)))
        epochs = [re.search(r"_([0-9\.\-]+)\.th", fname).group(1)
                  for fname in file_names]
        # We should have checkpoints at the end of each epoch and during each, e.g.
        # [0.timestamp, 0, 1.timestamp, 1]
        assert len(epochs) == 4
        assert epochs[3] == '1'
        assert '.' in epochs[0]

        # Now make certain we can restore from timestamped checkpoint.
        # To do so, remove the checkpoint from the end of epoch 1&2, so
        # that we are forced to restore from the timestamped checkpoints.
        for k in range(2):
            os.remove(os.path.join(self.TEST_DIR, 'model_state_epoch_{}.th'.format(k)))
            os.remove(os.path.join(self.TEST_DIR, 'training_state_epoch_{}.th'.format(k)))
        os.remove(os.path.join(self.TEST_DIR, 'best.th'))

        restore_trainer = Trainer(self.model, self.optimizer,
                                  self.iterator, self.instances, num_epochs=2,
                                  serialization_dir=self.TEST_DIR,
                                  model_save_interval=0.0001)
        epoch, _ = restore_trainer._restore_checkpoint() # pylint: disable=protected-access
        assert epoch == 2
        # One batch per epoch.
        assert restore_trainer._batch_num_total == 2 # pylint: disable=protected-access