class ExperimentTest(TestCase): NUM_EPOCHS = 5 def setUp(self): self.temp_dir_obj = TemporaryDirectory() self.test_checkpoints_path = os.path.join(self.temp_dir_obj.name, 'expt') self.test_experiment = Experiment( self.test_checkpoints_path, nn.Linear(1, 1), optimizer='sgd', loss_function='mse', monitor_metric="loss", monitor_mode="min", ) self.ckpt_1_path = os.path.join(self.test_checkpoints_path, "checkpoint_epoch_1.ckpt") self.ckpt_last_path = os.path.join(self.test_checkpoints_path, "checkpoint.ckpt") self.optim_ckpt_path = os.path.join(self.test_checkpoints_path, "checkpoint.optim") self.tsv_log_path = os.path.join(self.test_checkpoints_path, "log.tsv") self.tsv_test_log_path = os.path.join(self.test_checkpoints_path, "test_log.tsv") self.epoch_file_path = os.path.join(self.test_checkpoints_path, "last.epoch") self.time_metric_plot_png_file_path = os.path.join( self.test_checkpoints_path, "plots", 'time.png') self.time_metric_plot_pdf_file_path = os.path.join( self.test_checkpoints_path, "plots", 'time.pdf') self.loss_metric_plot_png_file_path = os.path.join( self.test_checkpoints_path, "plots", 'loss.png') self.loss_metric_plot_pdf_file_path = os.path.join( self.test_checkpoints_path, "plots", 'loss.pdf') def tearDown(self): self.temp_dir_obj.cleanup() def test_integration_train(self): train_generator = SomeDataGeneratorWithLen(32, 10, 0) valid_generator = SomeDataGeneratorWithLen(32, 10, 0) logs = self.test_experiment.train(train_generator, valid_generator, epochs=ExperimentTest.NUM_EPOCHS) self._test_train_integration(logs) def test_integration_train_dataset(self): train_dataset = TensorDataset(torch.rand(32 * 10, 1), torch.rand(32 * 10, 1)) valid_dataset = TensorDataset(torch.rand(32 * 10, 1), torch.rand(32 * 10, 1)) logs = self.test_experiment.train_dataset( train_dataset, valid_dataset, epochs=ExperimentTest.NUM_EPOCHS) self._test_train_integration(logs) def test_integration_train_data(self): train_x, train_y = torch.rand(32 * 10, 1), torch.rand(32 * 10, 1) validation_data = torch.rand(32 * 10, 1), torch.rand(32 * 10, 1) logs = self.test_experiment.train_data( train_x, train_y, validation_data, epochs=ExperimentTest.NUM_EPOCHS) self._test_train_integration(logs) def test_train_resume(self): train_generator = SomeDataGeneratorWithLen(32, 10, 0) valid_generator = SomeDataGeneratorWithLen(32, 10, 0) logs = self.test_experiment.train(train_generator, valid_generator, epochs=ExperimentTest.NUM_EPOCHS) self._test_train_integration(logs) epochs = ExperimentTest.NUM_EPOCHS + 10 logs = self.test_experiment.train(train_generator, valid_generator, epochs=epochs) self._test_train_integration(logs, epochs=epochs, initial_epoch=ExperimentTest.NUM_EPOCHS + 1) def test_train_no_log(self): test_experiment = Experiment( self.test_checkpoints_path, nn.Linear(1, 1), optimizer='sgd', loss_function='mse', monitor_metric="loss", monitor_mode="min", logging=False, ) train_generator = SomeDataGeneratorWithLen(32, 10, 0) valid_generator = SomeDataGeneratorWithLen(32, 10, 0) logs = test_experiment.train(train_generator, valid_generator, epochs=ExperimentTest.NUM_EPOCHS) self.assertFalse(os.path.isdir(self.test_checkpoints_path)) self.assertFalse(os.path.isfile(self.ckpt_1_path)) self.assertFalse(os.path.isfile(self.ckpt_last_path)) self.assertFalse(os.path.isfile(self.optim_ckpt_path)) self.assertFalse(os.path.isfile(self.tsv_log_path)) self.assertFalse(os.path.isfile(self.epoch_file_path)) self.assertFalse(os.path.isfile(self.tsv_test_log_path)) self.assertFalse(os.path.isfile(self.time_metric_plot_png_file_path)) self.assertFalse(os.path.isfile(self.time_metric_plot_pdf_file_path)) self.assertFalse(os.path.isfile(self.loss_metric_plot_png_file_path)) self.assertFalse(os.path.isfile(self.loss_metric_plot_pdf_file_path)) self.assertEqual(len(logs), ExperimentTest.NUM_EPOCHS) for i, log in enumerate(logs, 1): self.assertIn('epoch', log) self.assertEqual(log['epoch'], i) self.assertIn('loss', log) self.assertIn('val_loss', log) self.assertIn('time', log) def _test_train_integration(self, logs, epochs=NUM_EPOCHS, initial_epoch=1): self.assertTrue(os.path.isdir(self.test_checkpoints_path)) self.assertTrue(os.path.isfile(self.ckpt_1_path)) self.assertTrue(os.path.isfile(self.ckpt_last_path)) self.assertTrue(os.path.isfile(self.optim_ckpt_path)) self.assertTrue(os.path.isfile(self.tsv_log_path)) self.assertTrue(os.path.isfile(self.epoch_file_path)) self.assertTrue(os.path.isfile(self.time_metric_plot_png_file_path)) self.assertTrue(os.path.isfile(self.time_metric_plot_pdf_file_path)) self.assertTrue(os.path.isfile(self.loss_metric_plot_png_file_path)) self.assertTrue(os.path.isfile(self.loss_metric_plot_pdf_file_path)) self.assertFalse(os.path.isfile(self.tsv_test_log_path)) self.assertEqual(len(logs), epochs - initial_epoch + 1) for i, log in enumerate(logs, initial_epoch): self.assertIn('epoch', log) self.assertEqual(log['epoch'], i) self.assertIn('loss', log) self.assertIn('val_loss', log) self.assertIn('time', log) with open(self.epoch_file_path, 'r', encoding='utf-8') as fd: epoch = int(fd.read()) self.assertEqual(epoch, epochs) def test_integration_test(self): self._train_expt() generator = SomeDataGeneratorWithLen(32, 10, 0) logs = self.test_experiment.test(generator) self._test_test_integration(logs) def test_integration_test_dataset(self): self._train_expt() dataset = TensorDataset(torch.rand(32 * 10, 1), torch.rand(32 * 10, 1)) logs = self.test_experiment.test_dataset(dataset) self._test_test_integration(logs) def test_integration_test_data(self): self._train_expt() x, y = torch.rand(32 * 10, 1), torch.rand(32 * 10, 1) log = self.test_experiment.test_data(x, y) self._test_test_integration(log) def _train_expt(self): train_generator = SomeDataGeneratorWithLen(32, 10, 0) valid_generator = SomeDataGeneratorWithLen(32, 10, 0) self.test_experiment.train(train_generator, valid_generator, epochs=ExperimentTest.NUM_EPOCHS) def _test_test_integration(self, log): self.assertTrue(os.path.isfile(self.tsv_test_log_path)) self.assertIn('test_loss', log) self.assertIn('time', log) def setUpTwoExperiment(self, a_params=None, b_params=None): self.test_checkpoints_path_b = os.path.join(self.temp_dir_obj.name, 'expt_b') self.test_checkpoints_path_a = os.path.join(self.temp_dir_obj.name, 'expt_a') if a_params is None: a_params = {} self.test_experiment_a = Experiment(self.test_checkpoints_path_a, nn.Linear(1, 1), **a_params) if b_params is None: b_params = {} self.test_experiment_b = Experiment(self.test_checkpoints_path_b, nn.Linear(1, 1), **b_params) def test_givenAIsSmallerThanBMinMonitoring_thenReturnTrue(self): self.setUpTwoExperiment() with patch.object(Experiment, "load_checkpoint") as load_checkpoint_mock: mocked_stats = MagicMock(spec=DataFrame) series_mock = MagicMock(spec=Series) series_mock.item.side_effect = [[1], [2]] # The monitored metric values mocked_stats.__getitem__.return_value = series_mock load_checkpoint_mock.return_value = mocked_stats self.assertTrue( self.test_experiment_a.is_better_than(self.test_experiment_b)) def test_givenAIsGreaterThanBMinMonitoring_thenReturnFalse(self): self.setUpTwoExperiment() with patch.object(Experiment, "load_checkpoint") as load_checkpoint_mock: mocked_stats = MagicMock(spec=DataFrame) series_mock = MagicMock(spec=Series) series_mock.item.side_effect = [[2], [1]] # The monitored metric values mocked_stats.__getitem__.return_value = series_mock load_checkpoint_mock.return_value = mocked_stats self.assertFalse( self.test_experiment_a.is_better_than(self.test_experiment_b)) def test_givenAIsSmallerThanBMaxMonitoring_thenReturnFalse(self): # We need to specify the metric for the mode to be properly set to "max" params = {"monitor_mode": "max", "monitor_metric": "loss"} self.setUpTwoExperiment(a_params=params, b_params=params) with patch.object(Experiment, "load_checkpoint") as load_checkpoint_mock: mocked_stats = MagicMock(spec=DataFrame) series_mock = MagicMock(spec=Series) series_mock.item.side_effect = [[1], [2]] # The monitored metric values mocked_stats.__getitem__.return_value = series_mock load_checkpoint_mock.return_value = mocked_stats self.assertFalse( self.test_experiment_a.is_better_than(self.test_experiment_b)) def test_givenAIsGreaterThanBMaxMonitoring_thenReturnTrue(self): # We need to specify the metric for the mode to be properly set to "max" params = {"monitor_mode": "max", "monitor_metric": "loss"} self.setUpTwoExperiment(a_params=params, b_params=params) with patch.object(Experiment, "load_checkpoint") as load_checkpoint_mock: mocked_stats = MagicMock(spec=DataFrame) series_mock = MagicMock(spec=Series) series_mock.item.side_effect = [[2], [1]] # The monitored metric values mocked_stats.__getitem__.return_value = series_mock load_checkpoint_mock.return_value = mocked_stats self.assertTrue( self.test_experiment_a.is_better_than(self.test_experiment_b)) def test_givenSomeExperimentNotLogging_thenRaiseValueError(self): params_a = {"logging": False} params_b = {"logging": True} self.setUpTwoExperiment(a_params=params_a, b_params=params_b) with self.assertRaises(ValueError): self.test_experiment_a.is_better_than(self.test_experiment_b) params_a = {"logging": True} params_b = {"logging": False} self.setUpTwoExperiment(a_params=params_a, b_params=params_b) with self.assertRaises(ValueError): self.test_experiment_a.is_better_than(self.test_experiment_b) params = {"logging": False} self.setUpTwoExperiment(a_params=params, b_params=params) with self.assertRaises(ValueError): self.test_experiment_a.is_better_than(self.test_experiment_b) def test_givenDifferentMonitorMetric_thenRaiseValueError(self): params_a = {"monitor_metric": "loss"} params_b = {"monitor_metric": "acc"} self.setUpTwoExperiment(a_params=params_a, b_params=params_b) with self.assertRaises(ValueError): self.test_experiment_a.is_better_than(self.test_experiment_b) def test_givenDifferentMonitorMode_thenRaiseValueError(self): # We need to specify the metric for the mode to be properly set to "max" params_a = {"monitor_mode": "max", "monitor_metric": "loss"} params_b = {"monitor_mode": "min"} self.setUpTwoExperiment(a_params=params_a, b_params=params_b) with self.assertRaises(ValueError): self.test_experiment_a.is_better_than(self.test_experiment_b)
import torch import torch.nn as nn from torch.utils.data import random_split from torchvision.datasets import MNIST from torchvision.transforms import ToTensor from poutyne import Experiment # Instanciate the MNIST dataset train_valid_dataset = MNIST('./datasets', train=True, download=True, transform=ToTensor()) test_dataset = MNIST('./datasets', train=False, download=True, transform=ToTensor()) train_dataset, valid_dataset = random_split(train_valid_dataset, [50_000, 10_000], generator=torch.Generator().manual_seed(42)) # Select CUDA device if available cuda_device = 0 device = torch.device('cuda:%d' % cuda_device if torch.cuda.is_available() else 'cpu') # Define the network network = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 100), nn.ReLU(), nn.Linear(100, 10)) epochs = 5 # Define the Experiment and train experiment = Experiment( './simple_model', # Where to log network, optimizer='sgd', loss_function='cross_entropy', device=device ) experiment.train_dataset(train_dataset, valid_dataset, epochs=epochs)
class ExperimentTest(TestCase): NUM_EPOCHS = 5 def setUp(self): self.temp_dir_obj = TemporaryDirectory() self.test_checkpoints_path = os.path.join(self.temp_dir_obj.name, 'expt') self.test_experiment = Experiment(self.test_checkpoints_path, nn.Linear(1, 1), optimizer='sgd', loss_function='mse', monitor_metric="loss", monitor_mode="min") self.ckpt_1_path = os.path.join(self.test_checkpoints_path, "checkpoint_epoch_1.ckpt") self.ckpt_last_path = os.path.join(self.test_checkpoints_path, "checkpoint.ckpt") self.optim_ckpt_path = os.path.join(self.test_checkpoints_path, "checkpoint.optim") self.tsv_log_path = os.path.join(self.test_checkpoints_path, "log.tsv") self.tsv_test_log_path = os.path.join(self.test_checkpoints_path, "test_log.tsv") self.epoch_file_path = os.path.join(self.test_checkpoints_path, "last.epoch") def tearDown(self): self.temp_dir_obj.cleanup() def test_integration_train(self): train_generator = SomeDataGeneratorWithLen(32, 10, 0) valid_generator = SomeDataGeneratorWithLen(32, 10, 0) logs = self.test_experiment.train(train_generator, valid_generator, epochs=ExperimentTest.NUM_EPOCHS) self._test_train_integration(logs) def test_integration_train_dataset(self): train_dataset = TensorDataset(torch.rand(32 * 10, 1), torch.rand(32 * 10, 1)) valid_dataset = TensorDataset(torch.rand(32 * 10, 1), torch.rand(32 * 10, 1)) logs = self.test_experiment.train_dataset( train_dataset, valid_dataset, epochs=ExperimentTest.NUM_EPOCHS) self._test_train_integration(logs) def test_integration_train_data(self): train_x, train_y = torch.rand(32 * 10, 1), torch.rand(32 * 10, 1) validation_data = torch.rand(32 * 10, 1), torch.rand(32 * 10, 1) logs = self.test_experiment.train_data( train_x, train_y, validation_data, epochs=ExperimentTest.NUM_EPOCHS) self._test_train_integration(logs) def test_train_resume(self): train_generator = SomeDataGeneratorWithLen(32, 10, 0) valid_generator = SomeDataGeneratorWithLen(32, 10, 0) logs = self.test_experiment.train(train_generator, valid_generator, epochs=ExperimentTest.NUM_EPOCHS) self._test_train_integration(logs) epochs = ExperimentTest.NUM_EPOCHS + 10 logs = self.test_experiment.train(train_generator, valid_generator, epochs=epochs) self._test_train_integration(logs, epochs=epochs, initial_epoch=ExperimentTest.NUM_EPOCHS + 1) def test_train_no_log(self): test_experiment = Experiment(self.test_checkpoints_path, nn.Linear(1, 1), optimizer='sgd', loss_function='mse', monitor_metric="loss", monitor_mode="min", logging=False) train_generator = SomeDataGeneratorWithLen(32, 10, 0) valid_generator = SomeDataGeneratorWithLen(32, 10, 0) logs = test_experiment.train(train_generator, valid_generator, epochs=ExperimentTest.NUM_EPOCHS) self.assertFalse(os.path.isdir(self.test_checkpoints_path)) self.assertFalse(os.path.isfile(self.ckpt_1_path)) self.assertFalse(os.path.isfile(self.ckpt_last_path)) self.assertFalse(os.path.isfile(self.optim_ckpt_path)) self.assertFalse(os.path.isfile(self.tsv_log_path)) self.assertFalse(os.path.isfile(self.epoch_file_path)) self.assertFalse(os.path.isfile(self.tsv_test_log_path)) self.assertEqual(len(logs), ExperimentTest.NUM_EPOCHS) for i, log in enumerate(logs, 1): self.assertIn('epoch', log) self.assertEqual(log['epoch'], i) self.assertIn('loss', log) self.assertIn('val_loss', log) self.assertIn('time', log) def _test_train_integration(self, logs, epochs=NUM_EPOCHS, initial_epoch=1): self.assertTrue(os.path.isdir(self.test_checkpoints_path)) self.assertTrue(os.path.isfile(self.ckpt_1_path)) self.assertTrue(os.path.isfile(self.ckpt_last_path)) self.assertTrue(os.path.isfile(self.optim_ckpt_path)) self.assertTrue(os.path.isfile(self.tsv_log_path)) self.assertTrue(os.path.isfile(self.epoch_file_path)) self.assertFalse(os.path.isfile(self.tsv_test_log_path)) self.assertEqual(len(logs), epochs - initial_epoch + 1) for i, log in enumerate(logs, initial_epoch): self.assertIn('epoch', log) self.assertEqual(log['epoch'], i) self.assertIn('loss', log) self.assertIn('val_loss', log) self.assertIn('time', log) with open(self.epoch_file_path, 'r') as fd: epoch = int(fd.read()) self.assertEqual(epoch, epochs) def test_integration_test(self): self._train_expt() generator = SomeDataGeneratorWithLen(32, 10, 0) logs = self.test_experiment.test(generator) self._test_test_integration(logs) def test_integration_test_dataset(self): self._train_expt() dataset = TensorDataset(torch.rand(32 * 10, 1), torch.rand(32 * 10, 1)) logs = self.test_experiment.test_dataset(dataset) self._test_test_integration(logs) def test_integration_test_data(self): self._train_expt() x, y = torch.rand(32 * 10, 1), torch.rand(32 * 10, 1) log = self.test_experiment.test_data(x, y) self._test_test_integration(log) def _train_expt(self): train_generator = SomeDataGeneratorWithLen(32, 10, 0) valid_generator = SomeDataGeneratorWithLen(32, 10, 0) self.test_experiment.train(train_generator, valid_generator, epochs=ExperimentTest.NUM_EPOCHS) def _test_test_integration(self, log): self.assertTrue(os.path.isfile(self.tsv_test_log_path)) self.assertIn('test_loss', log) self.assertIn('time', log)
if epoch_number % self.alert_frequency == 0: self.notif.send_notification(f"Epoch {epoch_number} is done.") def on_train_end(self, logs: Dict): self.notif.send_notification("End of the training.") your_email = "your_email" your_email_login_credential = "your_password" email_provider = smtplib.SMTP( 'smtp.email_provider.com', 587) # email_provider such as gmail or live # 587 for TLS port # Alert at start/end of the training and at every epoch. training_alert_callback = TrainingAlertCallback(your_email, your_email_login_credential, email_provider, alert_frequency=1) # Define the Experiment and train experiment = Experiment( './simple_model', # Where to log network, optimizer='sgd', loss_function='cross_entropy', device=device) experiment.train_dataset(train_dataset, valid_dataset, epochs=epochs, callbacks=[training_alert_callback])