def default_callbacks(self, validation_metric: str = "-loss", patience: int = None, max_checkpoints: int = 20, checkpoint_every: int = None, serialization_dir: str = "__DEFAULT__", iterator: DataIterator = None, validation_data: Iterable[Instance] = None, validation_iterator: DataIterator = None, batch_size: int = 2): if serialization_dir == "__DEFAULT__": serialization_dir = self.TEST_DIR checkpointer = Checkpointer(serialization_dir, checkpoint_every, max_checkpoints) tensorboard = TensorboardWriter(get_batch_num_total=lambda: None) if iterator is None: iterator = BasicIterator(batch_size=batch_size) iterator.index_with(self.vocab) return [ LogToTensorboard(log_batch_size_period=10, tensorboard=tensorboard), Checkpoint(checkpointer), Validate(validation_data=self.instances if validation_data is None else validation_data, validation_iterator=iterator if validation_iterator is None else validation_iterator), TrackMetrics(patience, validation_metric), TrainSupervised(), GenerateTrainingBatches(self.instances, iterator, True) ]
def default_callbacks( self, validation_metric: str = "-loss", patience: int = None, max_checkpoints: int = 20, checkpoint_every: int = None, model_save_interval: float = None, serialization_dir: str = "__DEFAULT__", validation_data: Iterable[Instance] = None, validation_iterator: DataIterator = None, batch_size: int = 2, ): if serialization_dir == "__DEFAULT__": serialization_dir = self.TEST_DIR checkpointer = Checkpointer(serialization_dir, checkpoint_every, max_checkpoints) tensorboard = TensorboardWriter(get_batch_num_total=lambda: None) if validation_iterator is None: validation_iterator = BasicIterator(batch_size=batch_size) validation_iterator.index_with(self.vocab) return [ LogToTensorboard(log_batch_size_period=10, tensorboard=tensorboard), Checkpoint(checkpointer, model_save_interval), Validate( validation_data=self.instances if validation_data is None else validation_data, validation_iterator=validation_iterator, ), TrackMetrics(patience, validation_metric), GradientNormAndClip(), ]
def test_trainer_can_log_learning_rates_tensorboard(self): callbacks = [cb for cb in self.default_callbacks() if not isinstance(cb, LogToTensorboard)] # The lambda: None is unfortunate, but it will get replaced by the callback. tensorboard = TensorboardWriter(lambda: None, should_log_learning_rate=True, summary_interval=2) callbacks.append(LogToTensorboard(tensorboard)) trainer = CallbackTrainer(self.model, self.optimizer, num_epochs=2, serialization_dir=self.TEST_DIR, callbacks=callbacks) trainer.train()
def test_trainer_can_log_histograms(self): # enable activation logging for module in self.model.modules(): module.should_log_activations = True callbacks = [cb for cb in self.default_callbacks() if not isinstance(cb, LogToTensorboard)] # The lambda: None is unfortunate, but it will get replaced by the callback. tensorboard = TensorboardWriter(lambda: None, histogram_interval=2) callbacks.append(LogToTensorboard(tensorboard)) trainer = CallbackTrainer(self.model, self.optimizer, num_epochs=3, serialization_dir=self.TEST_DIR, callbacks=callbacks) trainer.train()
def test_model_training(self): training_dataset = self.sample_instances if self.sample_only else self.train_instances #training_dataset = training_dataset[:500] validation_dataset = self.sample_instances if self.sample_only else self.test_instances serialization_dir = self.TEST_DATA_ROOT / "serialized_sample" if self.sample_only else "serialized" tensorboard_dir = self.TEST_DATA_ROOT / "tensorboard" batch_size = 64 train_iterator = BucketIterator(sorting_keys=[("question", "num_tokens")], padding_noise=0.0, batch_size=batch_size) val_iterator = BucketIterator(sorting_keys=[("question", "num_tokens") ], padding_noise=0.0, batch_size=batch_size) train_iterator.index_with(vocab=self.vocab) val_iterator.index_with(vocab=self.vocab) tensorboard = TensorboardWriter(get_batch_num_total=lambda: np.ceil( len(training_dataset) / batch_size), serialization_dir=tensorboard_dir, summary_interval=5, histogram_interval=5, should_log_parameter_statistics=True) trainer = CallbackTrainer( model=self.model, serialization_dir=serialization_dir, iterator=train_iterator, training_data=training_dataset, num_epochs=20, cuda_device=0, optimizer=torch.optim.Adagrad(self.model.parameters()), callbacks=[ LogToTensorboard(tensorboard), Validate(validation_data=validation_dataset, validation_iterator=val_iterator), TrackMetrics(), ResetMetricsCallback() ]) trainer.train() self.val_outputs_fp.close()
def test_model_training(self): serialization_dir = self.TEST_DATA_ROOT / "serialized_sample" tensorboard_dir = self.TEST_DATA_ROOT / "tensorboard.seq2seq" batch_size = 64 train_iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens")], padding_noise=0.0, batch_size=batch_size) train_iterator.index_with(vocab=self.vocab) tensorboard = TensorboardWriter( get_batch_num_total=lambda: np.ceil(len(self.train_instances) / batch_size), serialization_dir=tensorboard_dir, summary_interval=5, histogram_interval=5, should_log_parameter_statistics=True) trainer = CallbackTrainer(model=self.model, serialization_dir=serialization_dir, iterator=train_iterator, training_data=self.train_instances, num_epochs=1, cuda_device=0, optimizer=torch.optim.Adam(self.model.parameters(), lr=1e-3), callbacks=[LogToTensorboard(tensorboard), Validate(validation_data=self.dev_instances, validation_iterator=train_iterator), TrackMetrics(), ResetMetricsCallback()] ) for i in range(50): print('Epoch: {}'.format(i)) trainer.train() import itertools predictor = Seq2SeqPredictor(self.model, self.reader) for instance in itertools.islice(self.dev_instances, 10): print('SOURCE:', instance.fields['source_tokens'].tokens) print('GOLD:', instance.fields['target_tokens'].tokens) print('PRED:', predictor.predict_instance(instance)['predicted_tokens'])
def test_model_training(self): training_dataset = self.sample_instances if self.sample_only else self.train_instances #training_dataset = training_dataset[:500] validation_dataset = self.sample_instances if self.sample_only else self.test_instances serialization_dir = self.TEST_DATA_ROOT / "serialized_sample" if self.sample_only else "serialized" tensorboard_dir = self.TEST_DATA_ROOT / "tensorboard.seq2seq" batch_size = 64 train_iterator = BucketIterator(sorting_keys=[("source_tokens", "num_tokens")], padding_noise=0.1, batch_size=batch_size) train_iterator.index_with(vocab=self.vocab) multiproc_iterator = MultiprocessIterator(train_iterator, num_workers=4, output_queue_size=6000) tensorboard = TensorboardWriter(get_batch_num_total=lambda: np.ceil( len(training_dataset) / batch_size), serialization_dir=tensorboard_dir, summary_interval=5, histogram_interval=5, should_log_parameter_statistics=True, should_log_learning_rate=True) optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3) scheduler = CosineWithRestarts(optimizer=optimizer, t_initial=5) trainer = CallbackTrainer( model=self.model, serialization_dir=serialization_dir, iterator=multiproc_iterator, training_data=self.train_instances, num_epochs=100, cuda_device=0, optimizer=optimizer, callbacks=[ LogToTensorboard(tensorboard), Validate(validation_data=self.test_instances, validation_iterator=multiproc_iterator), TrackMetrics(), ResetMetricsCallback(), UpdateLearningRate(scheduler), ValidationLogCallback(self.train_reader, self.test_instances) ]) # trainer = Trainer(model=self.model, # serialization_dir=serialization_dir, # iterator=train_iterator, # train_dataset=training_dataset, # num_epochs=1, # cuda_device=0, # optimizer=torch.optim.Adam(self.model.parameters(), lr=1e-3), # validation_dataset=training_dataset, # validation_iterator=train_iterator, # should_log_learning_rate=True, # learning_rate_scheduler=scheduler # ) # for i in range(50): # print('Epoch: {}'.format(i)) # trainer.train() # # import itertools # # predictor = Seq2SeqPredictor(self.model, self.train_reader) # # for instance in itertools.islice(training_dataset, 10): # print('SOURCE:', instance.fields['source_tokens'].tokens) # print('GOLD:', instance.fields['target_tokens'].tokens) # print('PRED:', predictor.predict_instance(instance)['predicted_tokens']) # # self.val_outputs_fp.close() trainer.train()