class DelayCallbackTest(TestCase): epochs = 10 steps_per_epoch = 5 batch_size = 20 def setUp(self): torch.manual_seed(42) self.pytorch_module = nn.Linear(1, 1) self.loss_function = nn.MSELoss() self.optimizer = torch.optim.SGD(self.pytorch_module.parameters(), lr=1e-3) self.model = Model(self.pytorch_module, self.optimizer, self.loss_function) self.mock_callback = MagicMock() self.delay_callback = DelayCallback(self.mock_callback) self.train_dict = {'loss': ANY} self.log_dict = {'loss': ANY, 'val_loss': ANY} def test_epoch_delay(self): epoch_delay = 4 delay_callback = DelayCallback(self.mock_callback, epoch_delay=epoch_delay) train_generator = some_data_generator(DelayCallbackTest.batch_size) valid_generator = some_data_generator(DelayCallbackTest.batch_size) logs = self.model.fit_generator( train_generator, valid_generator, epochs=DelayCallbackTest.epochs, steps_per_epoch=DelayCallbackTest.steps_per_epoch, validation_steps=DelayCallbackTest.steps_per_epoch, callbacks=[delay_callback]) params = { 'epochs': DelayCallbackTest.epochs, 'steps': DelayCallbackTest.steps_per_epoch } call_list = [] call_list.append(call.on_train_begin({})) for epoch in range(epoch_delay + 1, DelayCallbackTest.epochs + 1): call_list.append(call.on_epoch_begin(epoch, {})) for step in range(1, params['steps'] + 1): call_list.append(call.on_batch_begin(step, {})) call_list.append( call.on_batch_end( step, { 'batch': step, 'size': DelayCallbackTest.batch_size, **self.train_dict })) call_list.append( call.on_epoch_end(epoch, { 'epoch': epoch, **self.log_dict })) call_list.append(call.on_train_end({})) method_calls = self.mock_callback.method_calls self.assertIn(call.set_model(self.model), method_calls[:2]) self.assertIn(call.set_params(params), method_calls[:2]) self.assertEqual(len(method_calls), len(call_list) + 2) self.assertEqual(method_calls[2:], call_list) def test_batch_delay_in_middle_of_epoch(self): self._test_batch_delay(epoch_delay=5, batch_in_epoch_delay=3) def test_batch_delay_at_begin_of_epoch(self): self._test_batch_delay(epoch_delay=5, batch_in_epoch_delay=0) def test_batch_delay_when_no_delay(self): self._test_batch_delay(epoch_delay=0, batch_in_epoch_delay=0) def _test_batch_delay(self, epoch_delay, batch_in_epoch_delay): batch_delay = epoch_delay * DelayCallbackTest.steps_per_epoch + batch_in_epoch_delay delay_callback = DelayCallback(self.mock_callback, batch_delay=batch_delay) train_generator = some_data_generator(DelayCallbackTest.batch_size) valid_generator = some_data_generator(DelayCallbackTest.batch_size) logs = self.model.fit_generator( train_generator, valid_generator, epochs=DelayCallbackTest.epochs, steps_per_epoch=DelayCallbackTest.steps_per_epoch, validation_steps=DelayCallbackTest.steps_per_epoch, callbacks=[delay_callback]) params = { 'epochs': DelayCallbackTest.epochs, 'steps': DelayCallbackTest.steps_per_epoch } call_list = [] call_list.append(call.on_train_begin({})) for epoch in range(epoch_delay + 1, DelayCallbackTest.epochs + 1): call_list.append(call.on_epoch_begin(epoch, {})) start_step = batch_in_epoch_delay + 1 if epoch == epoch_delay + 1 else 1 for step in range(start_step, params['steps'] + 1): call_list.append(call.on_batch_begin(step, {})) call_list.append( call.on_batch_end( step, { 'batch': step, 'size': DelayCallbackTest.batch_size, **self.train_dict })) call_list.append( call.on_epoch_end(epoch, { 'epoch': epoch, **self.log_dict })) call_list.append(call.on_train_end({})) method_calls = self.mock_callback.method_calls self.assertIn(call.set_model(self.model), method_calls[:2]) self.assertIn(call.set_params(params), method_calls[:2]) self.assertEqual(len(method_calls), len(call_list) + 2) self.assertEqual(method_calls[2:], call_list)
class GroupSparseNN: def __init__(self, input_dim, output_dim, loss_function=None, activation_function='ReLu', layers_sizes=[], lr=0.001): super(GroupSparseNN, self).__init__() self.lr = lr self.fc_network = FullyConnectedNN(input_dim, output_dim, activation_function, layers_sizes) if torch.cuda.is_available(): self.fc_network.cuda() if loss_function is None: self.loss = nn.MultiLabelSoftMarginLoss() else: self.loss = loss_function optimizer = optim.Adam(self.fc_network.parameters(), lr=self.lr) self.model = Model(self.fc_network, optimizer, self.loss) # Pytoune Encapsulation def fit(self, x_train, y_train, x_valid, y_valid, n_epochs=100, batch_size=32, log_filename=None, checkpoint_filename=None, with_early_stopping=True): """ :param x_train: training set examples :param y_train: training set labels :param x_valid: testing set examples :param y_valid: testing set labels :param n_epochs: int, number of epoch default value 100 :param batch_size: int, size of the batch default value 32, must be multiple of 2 :param log_filename: optional, to output the training informations :param checkpoint_filename: optional, to save the model :param with_early_stopping: to activate the early stopping or not :return: self, the model """ callbacks = [] if with_early_stopping: early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0) callbacks += [early_stopping] reduce_lr = ReduceLROnPlateau(monitor='loss', patience=2, factor=1 / 10, min_lr=1e-6) best_model_restore = BestModelRestore() callbacks += [reduce_lr, best_model_restore] if log_filename: logger = CSVLogger(log_filename, batch_granularity=False, separator='\t') callbacks += [logger] if checkpoint_filename: checkpointer = ModelCheckpoint(checkpoint_filename, monitor='val_loss', save_best_only=True) callbacks += [checkpointer] # self.model.fit(x_train, y_train, x_valid, y_valid, # batch_size=batch_size, epochs=n_epochs, # callbacks=callbacks) nb_steps_train, nb_step_valid = int( len(x_train) / batch_size), int(len(x_valid) / batch_size) self.model.fit_generator( generator(x_train, y_train, batch_size), steps_per_epoch=nb_steps_train, valid_generator=generator(x_valid, y_valid, batch_size), validation_steps=nb_step_valid, epochs=n_epochs, callbacks=callbacks, ) return self def evaluate(self, x, y, batch_size=16, metrics=None): if metrics is None: metrics = [hamming_loss] valid_gen = generator(x, y, batch=batch_size) nsteps = ceil(len(x) / (batch_size * 1.0)) _, y_pred = self.model.evaluate_generator(valid_gen, steps=nsteps, return_pred=True) y_pred = np.concatenate(y_pred, axis=0) if torch.cuda.is_available(): y_true = y.cpu().numpy() else: y_true = y.numpy() res = { metric.__name__: metric(y_true, y_pred.round()) for metric in metrics } print('The metrics of the model is: {}'.format(res)) return res def load(self, checkpoint_filename): self.model.load_weights(checkpoint_filename)
class EarlyStoppingTest(TestCase): batch_size = 20 def setUp(self): torch.manual_seed(42) self.pytorch_module = nn.Linear(1, 1) self.loss_function = nn.MSELoss() self.optimizer = torch.optim.SGD(self.pytorch_module.parameters(), lr=1e-3) self.model = Model(self.pytorch_module, self.optimizer, self.loss_function) def test_integration(self): train_gen = some_data_generator(20) valid_gen = some_data_generator(20) earlystopper = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=False) self.model.fit_generator(train_gen, valid_gen, epochs=10, steps_per_epoch=5, callbacks=[earlystopper]) def test_early_stopping_patience_of_1(self): earlystopper = EarlyStopping(monitor='val_loss', min_delta=0, patience=1, verbose=False) val_losses = [8, 4, 5, 2] early_stop_epoch = 3 self._test_early_stopping(earlystopper, val_losses, early_stop_epoch) def test_early_stopping_with_delta(self): earlystopper = EarlyStopping(monitor='val_loss', min_delta=3, patience=2, verbose=False) val_losses = [8, 4, 5, 2, 2] early_stop_epoch = 4 self._test_early_stopping(earlystopper, val_losses, early_stop_epoch) def test_early_stopping_with_max(self): earlystopper = EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=False, mode='max') val_losses = [2, 8, 4, 5, 2] early_stop_epoch = 4 self._test_early_stopping(earlystopper, val_losses, early_stop_epoch) def _test_early_stopping(self, earlystopper, val_losses, early_stop_epoch): generator = some_data_generator(EarlyStoppingTest.batch_size) self.model.stop_training = False earlystopper.set_params({'epochs': len(val_losses), 'steps': 1}) earlystopper.set_model(self.model) earlystopper.on_train_begin({}) for epoch, val_loss in enumerate(val_losses, 1): earlystopper.on_epoch_begin(epoch, {}) earlystopper.on_batch_begin(1, {}) loss = self._update_model(generator) earlystopper.on_batch_end(1, { 'batch': 1, 'size': EarlyStoppingTest.batch_size, 'loss': loss }) earlystopper.on_epoch_end(epoch, { 'epoch': epoch, 'loss': loss, 'val_loss': val_loss }) self.assertEqual(self.model.stop_training, epoch == early_stop_epoch) if epoch == early_stop_epoch: break earlystopper.on_train_end({}) def _update_model(self, generator): self.pytorch_module.zero_grad() x, y = next(generator) pred_y = self.pytorch_module(x) loss = self.loss_function(pred_y, y) loss.backward() self.optimizer.step() return float(loss)
def main(model_name, device=0, d=100, epochs=100, char_embedding_dimension=16, debug_mode=True): # Global parameters debug_mode = debug_mode verbose = True save = True seed = 42 torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) logging.info("Debug mode: {}".format(debug_mode)) logging.info("Verbose: {}".format(verbose)) use_gpu = torch.cuda.is_available() use_gpu = False if use_gpu: cuda_device = device torch.cuda.set_device(cuda_device) logging.info('Using GPU') # Prepare examples train_loader, valid_loader, test_loader, char_to_idx = prepare_data( d=d, use_gpu=use_gpu, batch_size=64, debug_mode=debug_mode, verbose=verbose, ) logging.info('Size of alphabet: ' + str(len(char_to_idx))) # Initialize training parameters lr = 0.001 if debug_mode: model_name = 'testing_' + model_name save = False epochs = 3 # Create the model net = Mimick( characters_vocabulary=char_to_idx, characters_embedding_dimension=char_embedding_dimension, word_embeddings_dimension=d, fc_dropout_p=0.5, comick_compatibility=False ) model = Model( model=net, optimizer=Adam(net.parameters(), lr=lr), loss_function=square_distance, metrics=[cosine_sim], ) if use_gpu: model.cuda() # Set up the callbacks and train train( model, model_name, train_loader=train_loader, valid_loader=valid_loader, epochs=epochs, ) evaluate(model, test_loader) save_char_embeddings(model, char_to_idx, 'char_'+model_name)
class CSVLoggerTest(TestCase): batch_size = 20 lr = 1e-3 def setUp(self): torch.manual_seed(42) self.pytorch_module = nn.Linear(1, 1) self.loss_function = nn.MSELoss() self.optimizer = torch.optim.SGD(self.pytorch_module.parameters(), lr=CSVLoggerTest.lr) self.model = Model(self.pytorch_module, self.optimizer, self.loss_function) self.temp_dir_obj = TemporaryDirectory() self.csv_filename = os.path.join(self.temp_dir_obj.name, 'my_log.csv') def tearDown(self): self.temp_dir_obj.cleanup() def test_logging(self): train_gen = some_data_generator(20) valid_gen = some_data_generator(20) logger = CSVLogger(self.csv_filename) history = self.model.fit_generator(train_gen, valid_gen, epochs=10, steps_per_epoch=5, callbacks=[logger]) self._test_logging(history) def test_logging_with_batch_granularity(self): train_gen = some_data_generator(20) valid_gen = some_data_generator(20) logger = CSVLogger(self.csv_filename, batch_granularity=True) history = History() self.model.fit_generator(train_gen, valid_gen, epochs=10, steps_per_epoch=5, callbacks=[logger, history]) self._test_logging(history.history) def test_logging_append(self): train_gen = some_data_generator(20) valid_gen = some_data_generator(20) logger = CSVLogger(self.csv_filename) history = self.model.fit_generator(train_gen, valid_gen, epochs=10, steps_per_epoch=5, callbacks=[logger]) logger = CSVLogger(self.csv_filename, append=True) history2 = self.model.fit_generator(train_gen, valid_gen, epochs=20, steps_per_epoch=5, initial_epoch=10, callbacks=[logger]) self._test_logging(history + history2) def _test_logging(self, history): with open(self.csv_filename) as csvfile: reader = csv.DictReader(csvfile) rows = [] for row in reader: if row['epoch'] != '': self.assertAlmostEqual(float(row['lr']), CSVLoggerTest.lr) del row['lr'] rows.append(row) self.assertEqual(len(rows), len(history)) for row, hist_entry in zip(rows, history): row = {k: v for k, v in row.items() if v != ''} self.assertEqual(row.keys(), hist_entry.keys()) for k in row.keys(): if isinstance(hist_entry[k], float): self.assertAlmostEqual(float(row[k]), hist_entry[k]) else: self.assertEqual(str(row[k]), str(hist_entry[k]))
class ModelTest(TestCase): epochs = 10 steps_per_epoch = 5 batch_size = 20 evaluate_dataset_len = 107 def setUp(self): torch.manual_seed(42) self.pytorch_module = nn.Linear(1, 1) self.loss_function = nn.MSELoss() self.optimizer = torch.optim.SGD(self.pytorch_module.parameters(), lr=1e-3) self.metrics = [some_metric_1, some_metric_2] self.metrics_names = ['some_metric_1', 'some_metric_2'] self.metrics_values = [some_metric_1_value, some_metric_2_value] self.model = Model(self.pytorch_module, self.optimizer, self.loss_function, metrics=self.metrics) self.mock_callback = MagicMock() def test_fitting_tensor_generator(self): train_generator = some_data_tensor_generator(ModelTest.batch_size) valid_generator = some_data_tensor_generator(ModelTest.batch_size) logs = self.model.fit_generator( train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch, callbacks=[self.mock_callback]) params = { 'epochs': ModelTest.epochs, 'steps': ModelTest.steps_per_epoch } self._test_fitting(params, logs) def test_fitting_without_valid_generator(self): train_generator = some_data_tensor_generator(ModelTest.batch_size) logs = self.model.fit_generator( train_generator, None, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch, callbacks=[self.mock_callback]) params = { 'epochs': ModelTest.epochs, 'steps': ModelTest.steps_per_epoch } self._test_fitting(params, logs, has_valid=False) def test_fitting_variable_generator(self): train_generator = some_data_variable_generator(ModelTest.batch_size) valid_generator = some_data_variable_generator(ModelTest.batch_size) logs = self.model.fit_generator( train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch, callbacks=[self.mock_callback]) params = { 'epochs': ModelTest.epochs, 'steps': ModelTest.steps_per_epoch } self._test_fitting(params, logs) def test_fitting_with_data_loader(self): train_real_steps_per_epoch = 30 train_batch_size = ModelTest.batch_size train_final_batch_missing_samples = 7 train_x = torch.rand( train_real_steps_per_epoch * train_batch_size - train_final_batch_missing_samples, 1) train_y = torch.rand( train_real_steps_per_epoch * train_batch_size - train_final_batch_missing_samples, 1) train_dataset = TensorDataset(train_x, train_y) train_generator = DataLoader(train_dataset, train_batch_size) valid_real_steps_per_epoch = 10 valid_batch_size = 15 valid_final_batch_missing_samples = 3 valid_x = torch.rand( valid_real_steps_per_epoch * valid_batch_size - valid_final_batch_missing_samples, 1) valid_y = torch.rand( valid_real_steps_per_epoch * valid_batch_size - valid_final_batch_missing_samples, 1) valid_dataset = TensorDataset(valid_x, valid_y) valid_generator = DataLoader(valid_dataset, valid_batch_size) logs = self.model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=None, validation_steps=None, callbacks=[self.mock_callback]) params = { 'epochs': ModelTest.epochs, 'steps': train_real_steps_per_epoch } self._test_fitting(params, logs) def test_fitting_with_tensor(self): train_real_steps_per_epoch = 30 train_batch_size = ModelTest.batch_size train_final_batch_missing_samples = 7 train_x = torch.rand( train_real_steps_per_epoch * train_batch_size - train_final_batch_missing_samples, 1) train_y = torch.rand( train_real_steps_per_epoch * train_batch_size - train_final_batch_missing_samples, 1) valid_real_steps_per_epoch = 10 valid_batch_size = train_batch_size # valid_batch_size will be the same as train_batch_size in the fit method. valid_final_batch_missing_samples = 3 valid_x = torch.rand( valid_real_steps_per_epoch * valid_batch_size - valid_final_batch_missing_samples, 1) valid_y = torch.rand( valid_real_steps_per_epoch * valid_batch_size - valid_final_batch_missing_samples, 1) logs = self.model.fit(train_x, train_y, validation_x=valid_x, validation_y=valid_y, epochs=ModelTest.epochs, batch_size=train_batch_size, steps_per_epoch=None, validation_steps=None, callbacks=[self.mock_callback]) params = { 'epochs': ModelTest.epochs, 'steps': train_real_steps_per_epoch } self._test_fitting(params, logs) def test_fitting_with_generator_with_len(self): train_real_steps_per_epoch = 30 train_generator = SomeDataGeneratorWithLen( batch_size=ModelTest.batch_size, length=train_real_steps_per_epoch, num_missing_samples=7) valid_generator = SomeDataGeneratorWithLen(batch_size=15, length=10, num_missing_samples=3) logs = self.model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=None, validation_steps=None, callbacks=[self.mock_callback]) params = { 'epochs': ModelTest.epochs, 'steps': train_real_steps_per_epoch } self._test_fitting(params, logs) def _test_fitting(self, params, logs, has_valid=True): self.assertEqual(len(logs), params['epochs']) train_dict = dict(zip(self.metrics_names, self.metrics_values), loss=ANY) if has_valid: val_metrics_names = [ 'val_' + metric_name for metric_name in self.metrics_names ] val_dict = dict(zip(val_metrics_names, self.metrics_values), val_loss=ANY) log_dict = {**train_dict, **val_dict} else: log_dict = train_dict for epoch, log in enumerate(logs, 1): self.assertEqual(log, dict(log_dict, epoch=epoch)) call_list = [] call_list.append(call.on_train_begin({})) for epoch in range(1, params['epochs'] + 1): call_list.append(call.on_epoch_begin(epoch, {})) for step in range(1, params['steps'] + 1): call_list.append(call.on_batch_begin(step, {})) call_list.append( call.on_batch_end(step, { 'batch': step, 'size': ANY, **train_dict })) call_list.append( call.on_epoch_end(epoch, { 'epoch': epoch, **log_dict })) call_list.append(call.on_train_end({})) method_calls = self.mock_callback.method_calls self.assertIn(call.set_model(self.model), method_calls[:2]) self.assertIn(call.set_params(params), method_calls[:2]) self.assertEqual(len(method_calls), len(call_list) + 2) self.assertEqual(method_calls[2:], call_list) def test_evaluate(self): x = torch.rand(ModelTest.evaluate_dataset_len, 1) y = torch.rand(ModelTest.evaluate_dataset_len, 1) loss, metrics = self.model.evaluate(x, y, batch_size=ModelTest.batch_size) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), [some_metric_1_value, some_metric_2_value]) def test_evaluate_with_pred(self): x = torch.rand(ModelTest.evaluate_dataset_len, 1) y = torch.rand(ModelTest.evaluate_dataset_len, 1) loss, metrics, pred_y = self.model.evaluate( x, y, batch_size=ModelTest.batch_size, return_pred=True) self.assertEqual(pred_y.shape, (ModelTest.evaluate_dataset_len, 1)) def test_evaluate_with_np_array(self): x = np.random.rand(ModelTest.evaluate_dataset_len, 1).astype(np.float32) y = np.random.rand(ModelTest.evaluate_dataset_len, 1).astype(np.float32) loss, metrics, pred_y = self.model.evaluate( x, y, batch_size=ModelTest.batch_size, return_pred=True) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), [some_metric_1_value, some_metric_2_value]) self.assertEqual(pred_y.shape, (ModelTest.evaluate_dataset_len, 1)) def test_evaluate_data_loader(self): x = torch.rand(ModelTest.evaluate_dataset_len, 1) y = torch.rand(ModelTest.evaluate_dataset_len, 1) dataset = TensorDataset(x, y) generator = DataLoader(dataset, ModelTest.batch_size) loss, metrics, pred_y = self.model.evaluate_generator(generator, return_pred=True) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), [some_metric_1_value, some_metric_2_value]) self._test_predictions_for_evaluate_and_predict_generator(pred_y) def test_evaluate_generator(self): num_steps = 10 generator = some_data_tensor_generator(ModelTest.batch_size) loss, metrics, pred_y = self.model.evaluate_generator(generator, steps=num_steps, return_pred=True) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), [some_metric_1_value, some_metric_2_value]) for pred in pred_y: self.assertEqual(type(pred), np.ndarray) self.assertEqual(pred.shape, (ModelTest.batch_size, 1)) self.assertEqual( np.concatenate(pred_y).shape, (num_steps * ModelTest.batch_size, 1)) def test_predict(self): x = torch.rand(ModelTest.evaluate_dataset_len, 1) pred_y = self.model.predict(x, batch_size=ModelTest.batch_size) self.assertEqual(pred_y.shape, (ModelTest.evaluate_dataset_len, 1)) def test_predict_with_np_array(self): x = np.random.rand(ModelTest.evaluate_dataset_len, 1).astype(np.float32) pred_y = self.model.predict(x, batch_size=ModelTest.batch_size) self.assertEqual(pred_y.shape, (ModelTest.evaluate_dataset_len, 1)) def test_predict_data_loader(self): x = torch.rand(ModelTest.evaluate_dataset_len, 1) generator = DataLoader(x, ModelTest.batch_size) pred_y = self.model.predict_generator(generator) self._test_predictions_for_evaluate_and_predict_generator(pred_y) def test_predict_generator(self): num_steps = 10 generator = some_data_tensor_generator(ModelTest.batch_size) generator = (x for x, _ in generator) pred_y = self.model.predict_generator(generator, steps=num_steps) for pred in pred_y: self.assertEqual(type(pred), np.ndarray) self.assertEqual(pred.shape, (ModelTest.batch_size, 1)) self.assertEqual( np.concatenate(pred_y).shape, (num_steps * ModelTest.batch_size, 1)) def _test_predictions_for_evaluate_and_predict_generator(self, pred_y): self.assertEqual(type(pred_y), list) remaning_example = ModelTest.evaluate_dataset_len cur_batch_size = ModelTest.batch_size for pred in pred_y: self.assertEqual(type(pred), np.ndarray) if remaning_example < ModelTest.batch_size: cur_batch_size = remaning_example remaning_example = 0 else: remaning_example -= ModelTest.batch_size self.assertEqual(pred.shape, (cur_batch_size, 1)) self.assertEqual( np.concatenate(pred_y).shape, (ModelTest.evaluate_dataset_len, 1))
class BestModelRestoreTest(TestCase): batch_size = 20 def setUp(self): torch.manual_seed(42) self.pytorch_module = nn.Linear(1, 1) self.loss_function = nn.MSELoss() self.optimizer = torch.optim.SGD(self.pytorch_module.parameters(), lr=1e-3) self.model = Model(self.pytorch_module, self.optimizer, self.loss_function) def test_integration(self): train_gen = some_data_generator(20) valid_gen = some_data_generator(20) model_restore = BestModelRestore(monitor='val_loss', verbose=True) self.model.fit_generator(train_gen, valid_gen, epochs=10, steps_per_epoch=5, callbacks=[model_restore]) def test_basic_restore(self): model_restore = BestModelRestore(monitor='val_loss') val_losses = [3, 2, 8, 5, 4] best_epoch = 2 self._test_restore_with_val_losses(model_restore, val_losses, best_epoch) def test_save_best_only_with_max(self): model_restore = BestModelRestore(monitor='val_loss', mode='max') val_losses = [3, 2, 8, 5, 4] best_epoch = 3 self._test_restore_with_val_losses(model_restore, val_losses, best_epoch) def _test_restore_with_val_losses(self, checkpointer, val_losses, best_epoch): generator = some_data_generator(BestModelRestoreTest.batch_size) best_epoch_weights = None checkpointer.set_params({'epochs': len(val_losses), 'steps': 1}) checkpointer.set_model(self.model) checkpointer.on_train_begin({}) for epoch, val_loss in enumerate(val_losses, 1): checkpointer.on_epoch_begin(epoch, {}) checkpointer.on_batch_begin(1, {}) loss = self._update_model(generator) checkpointer.on_batch_end(1, { 'batch': 1, 'size': BestModelRestoreTest.batch_size, 'loss': loss }) checkpointer.on_epoch_end(epoch, { 'epoch': epoch, 'loss': loss, 'val_loss': val_loss }) if epoch == best_epoch: best_epoch_weights = torch_to_numpy( self.model.get_weight_copies()) checkpointer.on_train_end({}) final_weights = torch_to_numpy(self.model.get_weight_copies()) self.assertEqual(best_epoch_weights, final_weights) def _update_model(self, generator): self.pytorch_module.zero_grad() x, y = next(generator) pred_y = self.pytorch_module(x) loss = self.loss_function(pred_y, y) loss.backward() self.optimizer.step() return float(loss)
class ModelCheckpointTest(TestCase): batch_size = 20 def setUp(self): torch.manual_seed(42) self.pytorch_module = nn.Linear(1, 1) self.loss_function = nn.MSELoss() self.optimizer = torch.optim.SGD(self.pytorch_module.parameters(), lr=1e-3) self.model = Model(self.pytorch_module, self.optimizer, self.loss_function) self.temp_dir_obj = TemporaryDirectory() self.checkpoint_filename = os.path.join(self.temp_dir_obj.name, 'my_checkpoint_{epoch}.ckpt') def tearDown(self): self.temp_dir_obj.cleanup() def test_integration(self): train_gen = some_data_generator(ModelCheckpointTest.batch_size) valid_gen = some_data_generator(ModelCheckpointTest.batch_size) checkpointer = ModelCheckpoint(self.checkpoint_filename, monitor='val_loss', verbose=True, save_best_only=True) self.model.fit_generator(train_gen, valid_gen, epochs=10, steps_per_epoch=5, callbacks=[checkpointer]) def test_temporary_filename_arg(self): tmp_filename = os.path.join(self.temp_dir_obj.name, 'my_checkpoint.tmp.ckpt') checkpoint_filename = os.path.join(self.temp_dir_obj.name, 'my_checkpoint.ckpt') train_gen = some_data_generator(ModelCheckpointTest.batch_size) valid_gen = some_data_generator(ModelCheckpointTest.batch_size) checkpointer = ModelCheckpoint(checkpoint_filename, monitor='val_loss', verbose=True, period=1, temporary_filename=tmp_filename) self.model.fit_generator(train_gen, valid_gen, epochs=10, steps_per_epoch=5, callbacks=[checkpointer]) self.assertFalse(os.path.isfile(tmp_filename)) self.assertTrue(os.path.isfile(checkpoint_filename)) def test_temporary_filename_arg_with_differing_checkpoint_filename(self): epochs = 10 tmp_filename = os.path.join(self.temp_dir_obj.name, 'my_checkpoint.tmp.ckpt') checkpoint_filename = os.path.join(self.temp_dir_obj.name, 'my_checkpoint_{epoch}.ckpt') train_gen = some_data_generator(ModelCheckpointTest.batch_size) valid_gen = some_data_generator(ModelCheckpointTest.batch_size) checkpointer = ModelCheckpoint(checkpoint_filename, monitor='val_loss', verbose=True, period=1, temporary_filename=tmp_filename) self.model.fit_generator(train_gen, valid_gen, epochs=epochs, steps_per_epoch=5, callbacks=[checkpointer]) self.assertFalse(os.path.isfile(tmp_filename)) for i in range(1, epochs+1): self.assertTrue(os.path.isfile(checkpoint_filename.format(epoch=i))) def test_non_atomic_write(self): checkpoint_filename = os.path.join(self.temp_dir_obj.name, 'my_checkpoint.ckpt') train_gen = some_data_generator(ModelCheckpointTest.batch_size) valid_gen = some_data_generator(ModelCheckpointTest.batch_size) checkpointer = ModelCheckpoint(checkpoint_filename, monitor='val_loss', verbose=True, period=1, atomic_write=False) self.model.fit_generator(train_gen, valid_gen, epochs=10, steps_per_epoch=5, callbacks=[checkpointer]) self.assertTrue(os.path.isfile(checkpoint_filename)) def test_save_best_only(self): checkpointer = ModelCheckpoint(self.checkpoint_filename, monitor='val_loss', verbose=True, save_best_only=True) val_losses = [10, 3, 8, 5, 2] has_checkpoints = [True, True, False, False, True] self._test_checkpointer_with_val_losses(checkpointer, val_losses, has_checkpoints) def test_save_best_only_with_restore_best(self): checkpointer = ModelCheckpoint(self.checkpoint_filename, monitor='val_loss', verbose=True, save_best_only=True, restore_best=True) val_losses = [10, 3, 8, 5, 2] has_checkpoints = [True, True, False, False, True] self._test_checkpointer_with_val_losses(checkpointer, val_losses, has_checkpoints) self._test_restore_best(val_losses) def test_restore_best_without_save_best_only(self): with self.assertRaises(ValueError): checkpointer = ModelCheckpoint(self.checkpoint_filename, monitor='val_loss', verbose=True, save_best_only=False, restore_best=True) with self.assertRaises(ValueError): checkpointer = ModelCheckpoint(self.checkpoint_filename, monitor='val_loss', verbose=True, restore_best=True) def test_save_best_only_with_max(self): checkpointer = ModelCheckpoint(self.checkpoint_filename, monitor='val_loss', mode='max', verbose=True, save_best_only=True) val_losses = [2, 3, 8, 5, 2] has_checkpoints = [True, True, True, False, False] self._test_checkpointer_with_val_losses(checkpointer, val_losses, has_checkpoints) def test_periodic_with_period_of_1(self): checkpointer = ModelCheckpoint(self.checkpoint_filename, monitor='val_loss', verbose=True, period=1, save_best_only=False) val_losses = [1] * 10 has_checkpoints = [True] * 10 self._test_checkpointer_with_val_losses(checkpointer, val_losses, has_checkpoints) def test_periodic_with_period_of_2(self): checkpointer = ModelCheckpoint(self.checkpoint_filename, monitor='val_loss', verbose=True, period=2, save_best_only=False) val_losses = [1] * 10 has_checkpoints = [False, True] * 5 self._test_checkpointer_with_val_losses(checkpointer, val_losses, has_checkpoints) def _test_checkpointer_with_val_losses(self, checkpointer, val_losses, has_checkpoints): generator = some_data_generator(ModelCheckpointTest.batch_size) checkpointer.set_params({'epochs': len(val_losses), 'steps': 1}) checkpointer.set_model(self.model) checkpointer.on_train_begin({}) for epoch, (val_loss, has_checkpoint) in enumerate(zip(val_losses, has_checkpoints), 1): checkpointer.on_epoch_begin(epoch, {}) checkpointer.on_batch_begin(1, {}) loss = self._update_model(generator) checkpointer.on_batch_end(1, {'batch': 1, 'size': ModelCheckpointTest.batch_size, 'loss': loss}) checkpointer.on_epoch_end(epoch, {'epoch': epoch, 'loss': loss, 'val_loss': val_loss}) filename = self.checkpoint_filename.format(epoch=epoch) self.assertEqual(has_checkpoint, os.path.isfile(filename)) checkpointer.on_train_end({}) def _update_model(self, generator): self.pytorch_module.zero_grad() x, y = next(generator) pred_y = self.pytorch_module(x) loss = self.loss_function(pred_y, y) loss.backward() self.optimizer.step() return float(loss) def _test_restore_best(self, val_losses): final_weights = torch_to_numpy(self.model.get_weight_copies()) epoch = val_losses.index(min(val_losses)) + 1 best_epoch_filename = self.checkpoint_filename.format(epoch=epoch) self.model.load_weights(best_epoch_filename) best_weights = torch_to_numpy(self.model.get_weight_copies()) self.assertEqual(best_weights, final_weights)
def test_evaluate_with_no_metric(self): self.model = Model(self.pytorch_module, self.optimizer, self.loss_function) x = torch.rand(ModelTest.evaluate_dataset_len, 1) y = torch.rand(ModelTest.evaluate_dataset_len, 1) loss = self.model.evaluate(x, y, batch_size=ModelTest.batch_size) self.assertEqual(type(loss), float)
if epoch % self.every == 0: self.net.train(False) print('\n'.join( [self.net.generate(self.device) for i in range(self.n)])) self.net.train(True) if __name__ == '__main__': device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') dataset = NameDataset('./data/data.csv') loader = DataLoader( dataset, batch_size=32, shuffle=True, num_workers=4, collate_fn=dataset.sort_by_length_flatten_on_timestamp_collate) net = NameGenerator(len(char_to_idx)) optimizer = Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=1e-3) criterion = CrossEntropyLoss(ignore_index=PAD_IDX) model = Model(net, optimizer, criterion, metrics=['accuracy']).to(device) history = model.fit_generator( loader, epochs=300, validation_steps=0, callbacks=[ClipGradient(net, 2), GenerateCallback(net, device)]) df = pd.DataFrame(history).set_index('epoch') df.plot(subplots=True) plt.show()
optimizer = torch.optim.Adam(pytorch_module.parameters(), lr=1e-3) Model = Net() # track model overfitting earlyStopping = pytoune.framework.EarlyStopping(monitor = 'val_loss', patience = 1, verbose = 0, mode = 'min') # fit the model # note that I'm not passing the data to this function, I've just included it here (i.e. I've # included X and Y) Model.train(X[train_inds], #Y[train_inds], #callbacks = [earlyStopping], #verbose = 0, #nb_epoch = 100, ) #print ('MSE:',earlyStopping.best) return {'loss': earlyStopping.best, 'status': STATUS_OK} # ## Plot predictions vs data # In[ ]: Y_pred = model.predict(X, verbose=1)
def main(task_config, n=21, k=2, device=0, d=100, epochs=100): # Global parameters debug_mode = True verbose = True save = True freeze_word_embeddings = True over_population_threshold = 100 relative_over_population = True data_augmentation = True if debug_mode: data_augmentation = False over_population_threshold = None logging.info("Task name: {}".format(task_config['name'])) logging.info("Debug mode: {}".format(debug_mode)) logging.info("Verbose: {}".format(verbose)) logging.info("Freeze word embeddings: {}".format(freeze_word_embeddings)) logging.info( "Over population threshold: {}".format(over_population_threshold)) logging.info( "Relative over population: {}".format(relative_over_population)) logging.info("Data augmentation: {}".format(data_augmentation)) use_gpu = torch.cuda.is_available() # use_gpu = False if use_gpu: cuda_device = device torch.cuda.set_device(cuda_device) logging.info('Using GPU') # Load dataset dataset = task_config['dataset'](debug_mode, relative_path='./data/') all_sentences = dataset.get_train_sentences + dataset.get_valid_sentences + dataset.get_test_sentences word_embeddings = load_embeddings( './data/glove_embeddings/glove.6B.{}d.txt'.format(d)) chars_embeddings = load_embeddings( './predicted_char_embeddings/char_mimick_glove_d100_c20') # Prepare vectorizer word_to_idx, char_to_idx = make_vocab(all_sentences) vectorizer = WordsInContextVectorizer(word_to_idx, char_to_idx) vectorizer = vectorizer # Initialize training parameters model_name = '{}_n{}_k{}_d{}_e{}'.format(task_config['name'], n, k, d, epochs) lr = 0.001 if debug_mode: model_name = 'testing_' + model_name save = False epochs = 3 # Create the model net = LRComick( characters_vocabulary=char_to_idx, words_vocabulary=word_to_idx, characters_embedding_dimension=20, # characters_embeddings=chars_embeddings, word_embeddings_dimension=d, words_embeddings=word_embeddings, # context_dropout_p=0.5, # fc_dropout_p=0.5, freeze_word_embeddings=freeze_word_embeddings) model_name = "{}_{}_v{}".format(model_name, net.__class__.__name__.lower(), net.version) handler = logging.FileHandler('{}.log'.format(model_name)) logger.addHandler(handler) model = Model( model=net, optimizer=Adam(net.parameters(), lr=lr), loss_function=square_distance, metrics=[cosine_sim], ) if use_gpu: model.cuda() # Prepare examples train_loader, valid_loader, test_loader, oov_loader = prepare_data( dataset=dataset, embeddings=word_embeddings, vectorizer=vectorizer, n=n, use_gpu=use_gpu, k=k, over_population_threshold=over_population_threshold, relative_over_population=relative_over_population, data_augmentation=data_augmentation, debug_mode=debug_mode, verbose=verbose, ) # Set up the callbacks and train train( model, model_name, train_loader=train_loader, valid_loader=valid_loader, epochs=epochs, ) test_embeddings = evaluate(model, test_loader=test_loader, test_embeddings=word_embeddings, save=save, model_name=model_name + '.txt') predicted_oov_embeddings = predict_mean_embeddings(model, oov_loader) # Override embeddings with the training ones # Make sure we only have embeddings from the corpus data logging.info("Evaluating embeddings...") predicted_oov_embeddings.update(word_embeddings) for task in task_config['tasks']: logging.info("Using predicted embeddings on {} task...".format( task['name'])) task['script'](predicted_oov_embeddings, task['name'] + "_" + model_name, device, debug_mode) logger.removeHandler(handler)
else: map_location = lambda storage, loc: storage model_name = 'mimick_glove_d{0}_c{1}'.format(d, c) model_path = './models/best_' + model_name + '.torch' print("Loading model from: " + model_path) net = Mimick( characters_vocabulary=char_to_idx, characters_embedding_dimension=c, word_embeddings_dimension=d, fc_dropout_p=0.5, comick_compatibility=False, ) net.load_state_dict(torch.load(model_path, map_location)) model = Model( model=net, optimizer=Adam(net.parameters(), lr=0.001), loss_function=square_distance, metrics=[cosine_sim], ) print('Done.') # Evaluation evaluate(model, test_loader) # save_char_embeddings(model, char_to_idx, 'char_'+model_name) # for dataset in ['conll', 'semeval', 'sentiment']: # path = './data/'+dataset+'_embeddings_settings/setting1/glove/oov.txt' # predict_OOV(model, char_to_idx, path, dataset+'_OOV_embeddings_'+model_name+'.txt')
train = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0, collate_fn=train_dataset.collate_fn) test_dataset = EmojifyDataset('data/test_emoji.csv', glove) test = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=0, collate_fn=test_dataset.collate_fn) module = Emojify(glove) module.summary() optimizer = Adam(filter(lambda p: p.requires_grad, module.parameters())) criterion = CrossEntropyLoss(ignore_index=PAD) model = Model(module, optimizer, criterion, metrics=['accuracy']).to(device) history = model.fit_generator(train, test, epochs=50) module.eval() # setting model to eval mode disables drop out wrong_x = [] wrong_y = [] real_y = [] for x, y in test: x, y = x.to(device), y.to(device) y_hat = module(x) y_hat_labels = y_hat.argmax(dim=-1) wrong_x.append(x[y_hat_labels != y]) wrong_y.append(y_hat_labels[y_hat_labels != y]) real_y.append(y[y_hat_labels != y]) wrong_x = torch.cat(wrong_x, dim=0) wrong_y = torch.cat(wrong_y, dim=0)
def train(embeddings, model_name='vanilla'): train_sentences, train_tags = parse_20newsgroup_file('./data/20newsgroup/train.pickle') valid_sentences, valid_tags = parse_20newsgroup_file('./data/20newsgroup/dev.pickle') test_sentences, test_tags = parse_20newsgroup_file('./data/20newsgroup/test.pickle') labels = set(train_tags + valid_tags + test_tags) words_vocab, words_to_idx = make_vocab_and_idx(train_sentences + valid_sentences + test_sentences) tags_to_idx = {v: v for v in labels} train_sentences = [[words_to_idx[word] for word in sentence] for sentence in train_sentences] valid_sentences = [[words_to_idx[word] for word in sentence] for sentence in valid_sentences] test_sentences = [[words_to_idx[word] for word in sentence] for sentence in test_sentences] train_dataset = list(zip(train_sentences, train_tags)) valid_dataset = list(zip(valid_sentences, valid_tags)) test_dataset = list(zip(test_sentences, test_tags)) train_loader = DataLoader( train_dataset, batch_size=32, shuffle=True, collate_fn=collate_examples ) valid_loader = DataLoader( valid_dataset, batch_size=32, shuffle=True, collate_fn=collate_examples ) test_loader = DataLoader( test_dataset, batch_size=32, shuffle=True, collate_fn=collate_examples ) net = LSTMClassifier( 100, 50, words_to_idx, len(tags_to_idx) ) net.load_words_embeddings(embeddings) lrscheduler = ReduceLROnPlateau(patience=5) early_stopping = EarlyStopping(patience=10) model_path = './models/' checkpoint = ModelCheckpoint(model_path+'newsclassif_'+model_name+'.torch', save_best_only=True, restore_best=True, temporary_filename=model_path+'tmp_newsclassif_'+model_name+'.torch') csv_logger = CSVLogger('./train_logs/newsclassif_{}.csv'.format(model_name)) loss = CrossEntropyLoss() model = Model(net, Adam(net.parameters(), lr=0.001), loss, metrics=[acc]) model.fit_generator(train_loader, valid_loader, epochs=40, callbacks=[lrscheduler, checkpoint, early_stopping, csv_logger]) loss, metric = model.evaluate_generator(test_loader) logging.info("Test loss: {}".format(loss)) logging.info("Test metric: {}".format(metric))
class ModelTest(TestCase): # pylint: disable=too-many-public-methods epochs = 10 steps_per_epoch = 5 batch_size = 20 evaluate_dataset_len = 107 cuda_device = int(os.environ.get('CUDA_DEVICE', 0)) def setUp(self): torch.manual_seed(42) self.pytorch_module = nn.Linear(1, 1) self.loss_function = nn.MSELoss() self.optimizer = torch.optim.SGD(self.pytorch_module.parameters(), lr=1e-3) self.metrics = [some_metric_1, some_metric_2] self.metrics_names = ['some_metric_1', 'some_metric_2'] self.metrics_values = [some_metric_1_value, some_metric_2_value] self.model = Model(self.pytorch_module, self.optimizer, self.loss_function, metrics=self.metrics) self.mock_callback = MagicMock() def test_fitting_tensor_generator(self): train_generator = some_data_tensor_generator(ModelTest.batch_size) valid_generator = some_data_tensor_generator(ModelTest.batch_size) logs = self.model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch, callbacks=[self.mock_callback]) params = {'epochs': ModelTest.epochs, 'steps': ModelTest.steps_per_epoch} self._test_fitting(params, logs) def test_fitting_without_valid_generator(self): train_generator = some_data_tensor_generator(ModelTest.batch_size) logs = self.model.fit_generator(train_generator, None, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, callbacks=[self.mock_callback]) params = {'epochs': ModelTest.epochs, 'steps': ModelTest.steps_per_epoch} self._test_fitting(params, logs, has_valid=False) def test_fitting_ndarray_generator(self): train_generator = some_ndarray_generator(ModelTest.batch_size) valid_generator = some_ndarray_generator(ModelTest.batch_size) logs = self.model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch, callbacks=[self.mock_callback]) params = {'epochs': ModelTest.epochs, 'steps': ModelTest.steps_per_epoch} self._test_fitting(params, logs) def test_fitting_with_data_loader(self): # pylint: disable=too-many-locals train_real_steps_per_epoch = 30 train_batch_size = ModelTest.batch_size train_final_batch_missing_samples = 7 train_size = train_real_steps_per_epoch * train_batch_size - \ train_final_batch_missing_samples train_x = torch.rand(train_size, 1) train_y = torch.rand(train_size, 1) train_dataset = TensorDataset(train_x, train_y) train_generator = DataLoader(train_dataset, train_batch_size) valid_real_steps_per_epoch = 10 valid_batch_size = 15 valid_final_batch_missing_samples = 3 valid_size = valid_real_steps_per_epoch * valid_batch_size - \ valid_final_batch_missing_samples valid_x = torch.rand(valid_size, 1) valid_y = torch.rand(valid_size, 1) valid_dataset = TensorDataset(valid_x, valid_y) valid_generator = DataLoader(valid_dataset, valid_batch_size) logs = self.model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=None, validation_steps=None, callbacks=[self.mock_callback]) params = {'epochs': ModelTest.epochs, 'steps': train_real_steps_per_epoch} self._test_fitting(params, logs) def test_fitting_with_tensor(self): # pylint: disable=too-many-locals train_real_steps_per_epoch = 30 train_batch_size = ModelTest.batch_size train_final_batch_missing_samples = 7 train_size = train_real_steps_per_epoch * train_batch_size - \ train_final_batch_missing_samples train_x = torch.rand(train_size, 1) train_y = torch.rand(train_size, 1) valid_real_steps_per_epoch = 10 # valid_batch_size will be the same as train_batch_size in the fit method. valid_batch_size = train_batch_size valid_final_batch_missing_samples = 3 valid_size = valid_real_steps_per_epoch * valid_batch_size - \ valid_final_batch_missing_samples valid_x = torch.rand(valid_size, 1) valid_y = torch.rand(valid_size, 1) logs = self.model.fit(train_x, train_y, validation_x=valid_x, validation_y=valid_y, epochs=ModelTest.epochs, batch_size=train_batch_size, steps_per_epoch=None, validation_steps=None, callbacks=[self.mock_callback]) params = {'epochs': ModelTest.epochs, 'steps': train_real_steps_per_epoch} self._test_fitting(params, logs) def test_fitting_with_np_array(self): # pylint: disable=too-many-locals train_real_steps_per_epoch = 30 train_batch_size = ModelTest.batch_size train_final_batch_missing_samples = 7 train_size = train_real_steps_per_epoch * train_batch_size - \ train_final_batch_missing_samples train_x = np.random.rand(train_size, 1).astype(np.float32) train_y = np.random.rand(train_size, 1).astype(np.float32) valid_real_steps_per_epoch = 10 # valid_batch_size will be the same as train_batch_size in the fit method. valid_batch_size = train_batch_size valid_final_batch_missing_samples = 3 valid_size = valid_real_steps_per_epoch * valid_batch_size - \ valid_final_batch_missing_samples valid_x = np.random.rand(valid_size, 1).astype(np.float32) valid_y = np.random.rand(valid_size, 1).astype(np.float32) logs = self.model.fit(train_x, train_y, validation_x=valid_x, validation_y=valid_y, epochs=ModelTest.epochs, batch_size=train_batch_size, steps_per_epoch=None, validation_steps=None, callbacks=[self.mock_callback]) params = {'epochs': ModelTest.epochs, 'steps': train_real_steps_per_epoch} self._test_fitting(params, logs) def test_fitting_with_generator_with_len(self): train_real_steps_per_epoch = 30 train_generator = SomeDataGeneratorWithLen(batch_size=ModelTest.batch_size, length=train_real_steps_per_epoch, num_missing_samples=7) valid_generator = SomeDataGeneratorWithLen(batch_size=15, length=10, num_missing_samples=3) logs = self.model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=None, validation_steps=None, callbacks=[self.mock_callback]) params = {'epochs': ModelTest.epochs, 'steps': train_real_steps_per_epoch} self._test_fitting(params, logs) def test_fitting_with_generator_with_stop_iteration(self): train_real_steps_per_epoch = 30 train_generator = SomeDataGeneratorUsingStopIteration(batch_size=ModelTest.batch_size, length=train_real_steps_per_epoch) valid_generator = SomeDataGeneratorUsingStopIteration(batch_size=15, length=10) logs = self.model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=None, validation_steps=None, callbacks=[self.mock_callback]) params = {'epochs': ModelTest.epochs, 'steps': None} self._test_fitting(params, logs, steps=train_real_steps_per_epoch) def _test_fitting(self, params, logs, has_valid=True, steps=None): if steps is None: steps = params['steps'] self.assertEqual(len(logs), params['epochs']) train_dict = dict(zip(self.metrics_names, self.metrics_values), loss=ANY, time=ANY) if has_valid: val_metrics_names = ['val_' + metric_name for metric_name in self.metrics_names] val_dict = dict(zip(val_metrics_names, self.metrics_values), val_loss=ANY) log_dict = {**train_dict, **val_dict} else: log_dict = train_dict for epoch, log in enumerate(logs, 1): self.assertEqual(log, dict(log_dict, epoch=epoch)) call_list = [] call_list.append(call.on_train_begin({})) for epoch in range(1, params['epochs']+1): call_list.append(call.on_epoch_begin(epoch, {})) for step in range(1, steps+1): call_list.append(call.on_batch_begin(step, {})) call_list.append(call.on_backward_end(step)) call_list.append(call.on_batch_end( step, {'batch': step, 'size': ANY, **train_dict} )) call_list.append(call.on_epoch_end(epoch, {'epoch': epoch, **log_dict})) call_list.append(call.on_train_end({})) method_calls = self.mock_callback.method_calls self.assertIn(call.set_model(self.model), method_calls[:2]) self.assertIn(call.set_params(params), method_calls[:2]) self.assertEqual(len(method_calls), len(call_list) + 2) self.assertEqual(method_calls[2:], call_list) def test_tensor_train_on_batch(self): x = torch.rand(ModelTest.batch_size, 1) y = torch.rand(ModelTest.batch_size, 1) loss, metrics = self.model.train_on_batch(x, y) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), [some_metric_1_value, some_metric_2_value]) def test_train_on_batch_with_pred(self): x = torch.rand(ModelTest.batch_size, 1) y = torch.rand(ModelTest.batch_size, 1) loss, metrics, pred_y = self.model.train_on_batch(x, y, return_pred=True) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), [some_metric_1_value, some_metric_2_value]) self.assertEqual(pred_y.shape, (ModelTest.batch_size, 1)) def test_ndarray_train_on_batch(self): x = np.random.rand(ModelTest.batch_size, 1).astype(np.float32) y = np.random.rand(ModelTest.batch_size, 1).astype(np.float32) loss, metrics = self.model.train_on_batch(x, y) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), [some_metric_1_value, some_metric_2_value]) def test_evaluate(self): x = torch.rand(ModelTest.evaluate_dataset_len, 1) y = torch.rand(ModelTest.evaluate_dataset_len, 1) loss, metrics = self.model.evaluate(x, y, batch_size=ModelTest.batch_size) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), [some_metric_1_value, some_metric_2_value]) def test_evaluate_with_pred(self): x = torch.rand(ModelTest.evaluate_dataset_len, 1) y = torch.rand(ModelTest.evaluate_dataset_len, 1) # We also test the unpacking. # pylint: disable=unused-variable loss, metrics, pred_y = self.model.evaluate(x, y, batch_size=ModelTest.batch_size, return_pred=True) self.assertEqual(pred_y.shape, (ModelTest.evaluate_dataset_len, 1)) def test_evaluate_with_np_array(self): x = np.random.rand(ModelTest.evaluate_dataset_len, 1).astype(np.float32) y = np.random.rand(ModelTest.evaluate_dataset_len, 1).astype(np.float32) loss, metrics, pred_y = self.model.evaluate(x, y, batch_size=ModelTest.batch_size, return_pred=True) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), [some_metric_1_value, some_metric_2_value]) self.assertEqual(pred_y.shape, (ModelTest.evaluate_dataset_len, 1)) def test_evaluate_data_loader(self): x = torch.rand(ModelTest.evaluate_dataset_len, 1) y = torch.rand(ModelTest.evaluate_dataset_len, 1) dataset = TensorDataset(x, y) generator = DataLoader(dataset, ModelTest.batch_size) loss, metrics, pred_y = self.model.evaluate_generator(generator, return_pred=True) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), [some_metric_1_value, some_metric_2_value]) self._test_predictions_for_evaluate_and_predict_generator(pred_y) def test_evaluate_generator(self): num_steps = 10 generator = some_data_tensor_generator(ModelTest.batch_size) loss, metrics, pred_y = self.model.evaluate_generator(generator, steps=num_steps, return_pred=True) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), [some_metric_1_value, some_metric_2_value]) for pred in pred_y: self.assertEqual(type(pred), np.ndarray) self.assertEqual(pred.shape, (ModelTest.batch_size, 1)) self.assertEqual(np.concatenate(pred_y).shape, (num_steps * ModelTest.batch_size, 1)) def test_evaluate_with_only_one_metric(self): self.model = Model(self.pytorch_module, self.optimizer, self.loss_function, metrics=self.metrics[:1]) x = torch.rand(ModelTest.evaluate_dataset_len, 1) y = torch.rand(ModelTest.evaluate_dataset_len, 1) loss, first_metric = self.model.evaluate(x, y, batch_size=ModelTest.batch_size) self.assertEqual(type(loss), float) self.assertEqual(type(first_metric), float) self.assertEqual(first_metric, some_metric_1_value) def test_metrics_integration(self): num_steps = 10 self.model = Model(self.pytorch_module, self.optimizer, self.loss_function, metrics=[F.mse_loss]) train_generator = some_data_tensor_generator(ModelTest.batch_size) valid_generator = some_data_tensor_generator(ModelTest.batch_size) self.model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch, callbacks=[self.mock_callback]) generator = some_data_tensor_generator(ModelTest.batch_size) loss, mse = self.model.evaluate_generator(generator, steps=num_steps) self.assertEqual(type(loss), float) self.assertEqual(type(mse), float) def test_evaluate_with_no_metric(self): self.model = Model(self.pytorch_module, self.optimizer, self.loss_function) x = torch.rand(ModelTest.evaluate_dataset_len, 1) y = torch.rand(ModelTest.evaluate_dataset_len, 1) loss = self.model.evaluate(x, y, batch_size=ModelTest.batch_size) self.assertEqual(type(loss), float) def test_tensor_evaluate_on_batch(self): x = torch.rand(ModelTest.batch_size, 1) y = torch.rand(ModelTest.batch_size, 1) loss, metrics = self.model.evaluate_on_batch(x, y) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), [some_metric_1_value, some_metric_2_value]) def test_evaluate_on_batch_with_pred(self): x = torch.rand(ModelTest.batch_size, 1) y = torch.rand(ModelTest.batch_size, 1) loss, metrics, pred_y = self.model.evaluate_on_batch(x, y, return_pred=True) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), [some_metric_1_value, some_metric_2_value]) self.assertEqual(pred_y.shape, (ModelTest.batch_size, 1)) def test_ndarray_evaluate_on_batch(self): x = np.random.rand(ModelTest.batch_size, 1).astype(np.float32) y = np.random.rand(ModelTest.batch_size, 1).astype(np.float32) loss, metrics = self.model.evaluate_on_batch(x, y) self.assertEqual(type(loss), float) self.assertEqual(type(metrics), np.ndarray) self.assertEqual(metrics.tolist(), [some_metric_1_value, some_metric_2_value]) def test_predict(self): x = torch.rand(ModelTest.evaluate_dataset_len, 1) pred_y = self.model.predict(x, batch_size=ModelTest.batch_size) self.assertEqual(pred_y.shape, (ModelTest.evaluate_dataset_len, 1)) def test_predict_with_np_array(self): x = np.random.rand(ModelTest.evaluate_dataset_len, 1).astype(np.float32) pred_y = self.model.predict(x, batch_size=ModelTest.batch_size) self.assertEqual(pred_y.shape, (ModelTest.evaluate_dataset_len, 1)) def test_predict_data_loader(self): x = torch.rand(ModelTest.evaluate_dataset_len, 1) generator = DataLoader(x, ModelTest.batch_size) pred_y = self.model.predict_generator(generator) self._test_predictions_for_evaluate_and_predict_generator(pred_y) def test_predict_generator(self): num_steps = 10 generator = some_data_tensor_generator(ModelTest.batch_size) generator = (x for x, _ in generator) pred_y = self.model.predict_generator(generator, steps=num_steps) for pred in pred_y: self.assertEqual(type(pred), np.ndarray) self.assertEqual(pred.shape, (ModelTest.batch_size, 1)) self.assertEqual(np.concatenate(pred_y).shape, (num_steps * ModelTest.batch_size, 1)) def _test_predictions_for_evaluate_and_predict_generator(self, pred_y): self.assertEqual(type(pred_y), list) remaning_example = ModelTest.evaluate_dataset_len cur_batch_size = ModelTest.batch_size for pred in pred_y: self.assertEqual(type(pred), np.ndarray) if remaning_example < ModelTest.batch_size: cur_batch_size = remaning_example remaning_example = 0 else: remaning_example -= ModelTest.batch_size self.assertEqual(pred.shape, (cur_batch_size, 1)) self.assertEqual(np.concatenate(pred_y).shape, (ModelTest.evaluate_dataset_len, 1)) def test_tensor_predict_on_batch(self): x = torch.rand(ModelTest.batch_size, 1) pred_y = self.model.predict_on_batch(x) self.assertEqual(pred_y.shape, (ModelTest.batch_size, 1)) def test_ndarray_predict_on_batch(self): x = np.random.rand(ModelTest.batch_size, 1).astype(np.float32) pred_y = self.model.predict_on_batch(x) self.assertEqual(pred_y.shape, (ModelTest.batch_size, 1)) @skipIf(not torch.cuda.is_available(), "no gpu available") def test_cpu_cuda(self): train_generator = some_data_tensor_generator(ModelTest.batch_size) valid_generator = some_data_tensor_generator(ModelTest.batch_size) with torch.cuda.device(ModelTest.cuda_device): self.model.cuda() self.model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch, callbacks=[self.mock_callback]) # The context manager is also used here because of this bug: # https://github.com/pytorch/pytorch/issues/7320 with torch.cuda.device(ModelTest.cuda_device): self.model.cuda(ModelTest.cuda_device) self._test_device(torch.device('cuda:' + str(ModelTest.cuda_device))) self.model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch, callbacks=[self.mock_callback]) self.model.cpu() self._test_device(torch.device('cpu')) self.model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch, callbacks=[self.mock_callback]) self.model.to(torch.device('cuda:' + str(ModelTest.cuda_device))) self._test_device(torch.device('cuda:' + str(ModelTest.cuda_device))) self.model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch, callbacks=[self.mock_callback]) self.model.to(torch.device('cpu')) self._test_device(torch.device('cpu')) self.model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch, callbacks=[self.mock_callback]) def _test_device(self, device): for p in self.pytorch_module.parameters(): self.assertEqual(p.device, device) def test_disable_batch_size_warning(self): import warnings def tuple_generator(batch_size): while True: x1 = torch.rand(batch_size, 1) x2 = torch.rand(batch_size, 1) y1 = torch.rand(batch_size, 1) y2 = torch.rand(batch_size, 1) yield (x1, x2), (y1, y2) class TupleModule(nn.Module): def __init__(self): super().__init__() self.l1 = nn.Linear(1, 1) self.l2 = nn.Linear(1, 1) def forward(self, x): # pylint: disable=arguments-differ x1, x2 = x return self.l1(x1), self.l2(x2) def loss_function(y_pred, y_true): return F.mse_loss(y_pred[0], y_true[0]) + F.mse_loss(y_pred[1], y_true[1]) pytorch_module = TupleModule() optimizer = torch.optim.SGD(pytorch_module.parameters(), lr=1e-3) model = Model(pytorch_module, optimizer, loss_function) train_generator = tuple_generator(ModelTest.batch_size) valid_generator = tuple_generator(ModelTest.batch_size) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch) num_warnings = ModelTest.steps_per_epoch * 2 * ModelTest.epochs self.assertEqual(len(w), num_warnings) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") warning_settings['batch_size'] = 'ignore' model.fit_generator(train_generator, valid_generator, epochs=ModelTest.epochs, steps_per_epoch=ModelTest.steps_per_epoch, validation_steps=ModelTest.steps_per_epoch) self.assertEqual(len(w), 0)
class Experiment: BEST_CHECKPOINT_FILENAME = 'checkpoint_epoch_{epoch}.ckpt' BEST_CHECKPOINT_TMP_FILENAME = 'checkpoint_epoch.tmp.ckpt' MODEL_CHECKPOINT_FILENAME = 'checkpoint.ckpt' MODEL_CHECKPOINT_TMP_FILENAME = 'checkpoint.tmp.ckpt' OPTIMIZER_CHECKPOINT_FILENAME = 'checkpoint.optim' OPTIMIZER_CHECKPOINT_TMP_FILENAME = 'checkpoint.tmp.optim' LOG_FILENAME = 'log.tsv' TENSORBOARD_DIRECTORY = 'tensorboard' EPOCH_FILENAME = 'last.epoch' EPOCH_TMP_FILENAME = 'last.tmp.epoch' LR_SCHEDULER_FILENAME = 'lr_sched_%d.lrsched' LR_SCHEDULER_TMP_FILENAME = 'lr_sched_%d.tmp.lrsched' TEST_LOG_FILENAME = 'test_log.tsv' def __init__(self, directory, module, *, device=None, logging=True, optimizer='sgd', loss_function=None, metrics=[], monitor_metric=None, monitor_mode=None, type=None): self.directory = directory self.logging = logging if type is not None and not type.startswith( 'classif') and not type.startswith('reg'): raise ValueError("Invalid type '%s'" % type) loss_function = self._get_loss_function(loss_function, module, type) metrics = self._get_metrics(metrics, module, type) self._set_monitor(monitor_metric, monitor_mode, type) self.model = Model(module, optimizer, loss_function, metrics=metrics) if device is not None: self.model.to(device) join_dir = lambda x: os.path.join(directory, x) self.best_checkpoint_filename = join_dir( Experiment.BEST_CHECKPOINT_FILENAME) self.best_checkpoint_tmp_filename = join_dir( Experiment.BEST_CHECKPOINT_TMP_FILENAME) self.model_checkpoint_filename = join_dir( Experiment.MODEL_CHECKPOINT_FILENAME) self.model_checkpoint_tmp_filename = join_dir( Experiment.MODEL_CHECKPOINT_TMP_FILENAME) self.optimizer_checkpoint_filename = join_dir( Experiment.OPTIMIZER_CHECKPOINT_FILENAME) self.optimizer_checkpoint_tmp_filename = join_dir( Experiment.OPTIMIZER_CHECKPOINT_TMP_FILENAME) self.log_filename = join_dir(Experiment.LOG_FILENAME) self.tensorboard_directory = join_dir(Experiment.TENSORBOARD_DIRECTORY) self.epoch_filename = join_dir(Experiment.EPOCH_FILENAME) self.epoch_tmp_filename = join_dir(Experiment.EPOCH_TMP_FILENAME) self.lr_scheduler_filename = join_dir(Experiment.LR_SCHEDULER_FILENAME) self.lr_scheduler_tmp_filename = join_dir( Experiment.LR_SCHEDULER_TMP_FILENAME) self.test_log_filename = join_dir(Experiment.TEST_LOG_FILENAME) def _get_loss_function(self, loss_function, module, type): if loss_function is None: if hasattr(module, 'loss_function'): return module.loss_function elif type is not None: if type.startswith('classif'): return 'cross_entropy' elif type.startswith('reg'): return 'mse' return loss_function def _get_metrics(self, metrics, module, type): if metrics is None or len(metrics) == 0: if hasattr(module, 'metrics'): return module.metrics elif type is not None and type.startswith('classif'): return ['accuracy'] return metrics def _set_monitor(self, monitor_metric, monitor_mode, type): if monitor_mode is not None and monitor_mode not in ['min', 'max']: raise ValueError("Invalid mode '%s'" % monitor_mode) self.monitor_metric = 'val_loss' self.monitor_mode = 'min' if monitor_metric is not None: self.monitor_metric = monitor_metric if monitor_mode is not None: self.monitor_mode = monitor_mode elif type is not None and type.startswith('classif'): self.monitor_metric = 'val_acc' self.monitor_mode = 'max' def get_best_epoch_stats(self): if pd is None: warnings.warn("pandas needs to be installed to use this function.") history = pd.read_csv(self.log_filename, sep='\t') if self.monitor_mode == 'min': best_epoch_index = history[self.monitor_metric].idxmin() else: best_epoch_index = history[self.monitor_metric].idxmax() return history.iloc[best_epoch_index:best_epoch_index + 1] def _warn_missing_file(self, filename): warnings.warn("Missing checkpoint: %s." % filename) def _load_epoch_state(self, lr_schedulers): # pylint: disable=broad-except initial_epoch = 1 if os.path.isfile(self.epoch_filename): try: with open(self.epoch_filename, 'r') as f: initial_epoch = int(f.read()) + 1 except Exception as e: print(e) if os.path.isfile(self.model_checkpoint_filename): try: print("Loading weights from %s and starting at epoch %d." % (self.model_checkpoint_filename, initial_epoch)) self.model.load_weights(self.model_checkpoint_filename) except Exception as e: print(e) else: self._warn_missing_file(self.model_checkpoint_filename) if os.path.isfile(self.optimizer_checkpoint_filename): try: print( "Loading optimizer state from %s and starting at epoch %d." % (self.optimizer_checkpoint_filename, initial_epoch)) self.model.load_optimizer_state( self.optimizer_checkpoint_filename) except Exception as e: print(e) else: self._warn_missing_file(self.optimizer_checkpoint_filename) for i, lr_scheduler in enumerate(lr_schedulers): filename = self.lr_scheduler_filename % i if os.path.isfile(filename): try: print( "Loading LR scheduler state from %s and starting at epoch %d." % (filename, initial_epoch)) lr_scheduler.load_state(filename) except Exception as e: print(e) else: self._warn_missing_file(filename) return initial_epoch def _init_model_restoring_callbacks(self, initial_epoch, save_every_epoch): callbacks = [] best_checkpoint = ModelCheckpoint( self.best_checkpoint_filename, monitor=self.monitor_metric, mode=self.monitor_mode, save_best_only=not save_every_epoch, restore_best=not save_every_epoch, verbose=not save_every_epoch, temporary_filename=self.best_checkpoint_tmp_filename) callbacks.append(best_checkpoint) if save_every_epoch: best_restore = BestModelRestore(monitor=self.monitor_metric, mode=self.monitor_mode, verbose=True) callbacks.append(best_restore) if initial_epoch > 1: # We set the current best metric score in the ModelCheckpoint so that # it does not save checkpoint it would not have saved if the # optimization was not stopped. best_epoch_stats = self.get_best_epoch_stats() best_epoch = best_epoch_stats['epoch'].item() best_filename = self.best_checkpoint_filename.format( epoch=best_epoch) if not save_every_epoch: best_checkpoint.best_filename = best_filename best_checkpoint.current_best = best_epoch_stats[ self.monitor_metric].item() else: best_restore.best_weights = torch.load(best_filename, map_location='cpu') best_restore.current_best = best_epoch_stats[ self.monitor_metric].item() return callbacks def _init_tensorboard_callbacks(self, disable_tensorboard): tensorboard_writer = None callbacks = [] if not disable_tensorboard: if SummaryWriter is None: warnings.warn( "tensorboardX does not seem to be installed. " "To remove this warning, set the 'disable_tensorboard' " "flag to True.") else: tensorboard_writer = SummaryWriter(self.tensorboard_directory) callbacks += [TensorBoardLogger(tensorboard_writer)] return tensorboard_writer, callbacks def _init_lr_scheduler_callbacks(self, lr_schedulers): callbacks = [] if self.logging: for i, lr_scheduler in enumerate(lr_schedulers): filename = self.lr_scheduler_filename % i tmp_filename = self.lr_scheduler_tmp_filename % i callbacks += [ LRSchedulerCheckpoint(lr_scheduler, filename, verbose=False, temporary_filename=tmp_filename) ] else: callbacks += lr_schedulers callbacks += [ BestModelRestore(monitor=self.monitor_metric, mode=self.monitor_mode, verbose=True) ] return callbacks def train(self, train_loader, valid_loader=None, *, callbacks=[], lr_schedulers=[], save_every_epoch=False, disable_tensorboard=False, epochs=1000, steps_per_epoch=None, validation_steps=None, seed=42): if seed is not None: # Make training deterministic. random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) # Copy callback list. callbacks = list(callbacks) tensorboard_writer = None initial_epoch = 1 if self.logging: if not os.path.exists(self.directory): os.makedirs(self.directory) # Restarting optimization if needed. initial_epoch = self._load_epoch_state(lr_schedulers) callbacks += [ CSVLogger(self.log_filename, separator='\t', append=initial_epoch != 1) ] callbacks += self._init_model_restoring_callbacks( initial_epoch, save_every_epoch) callbacks += [ ModelCheckpoint( self.model_checkpoint_filename, verbose=False, temporary_filename=self.model_checkpoint_tmp_filename) ] callbacks += [ OptimizerCheckpoint( self.optimizer_checkpoint_filename, verbose=False, temporary_filename=self.optimizer_checkpoint_tmp_filename) ] # We save the last epoch number after the end of the epoch so that the # _load_epoch_state() knows which epoch to restart the optimization. callbacks += [ PeriodicSaveLambda( lambda fd, epoch, logs: print(epoch, file=fd), self.epoch_filename, temporary_filename=self.epoch_tmp_filename, open_mode='w') ] tensorboard_writer, cb_list = self._init_tensorboard_callbacks( disable_tensorboard) callbacks += cb_list # This method returns callbacks that checkpoints the LR scheduler if logging is enabled. # Otherwise, it just returns the list of LR schedulers with a BestModelRestore callback. callbacks += self._init_lr_scheduler_callbacks(lr_schedulers) try: self.model.fit_generator(train_loader, valid_loader, epochs=epochs, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, initial_epoch=initial_epoch, callbacks=callbacks) finally: if tensorboard_writer is not None: tensorboard_writer.close() def load_best_checkpoint(self, *, verbose=False): best_epoch_stats = self.get_best_epoch_stats() best_epoch = best_epoch_stats['epoch'].item() if verbose: metrics_str = ', '.join( '%s: %g' % (metric_name, best_epoch_stats[metric_name].item()) for metric_name in best_epoch_stats.columns[2:]) print("Found best checkpoint at epoch: {}".format(best_epoch)) print(metrics_str) self.load_checkpoint(best_epoch) return best_epoch_stats def load_checkpoint(self, epoch): ckpt_filename = self.best_checkpoint_filename.format(epoch=epoch) self.model.load_weights(ckpt_filename) def load_last_checkpoint(self): self.model.load_weights(self.model_checkpoint_filename) def test(self, test_loader, *, steps=None, load_best_checkpoint=True, load_last_checkpoint=False, seed=42): if seed is not None: # Make training deterministic. random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) best_epoch_stats = None if load_best_checkpoint: best_epoch_stats = self.load_best_checkpoint(verbose=True) elif load_last_checkpoint: best_epoch_stats = self.load_last_checkpoint() test_loss, test_metrics = self.model.evaluate_generator(test_loader, steps=steps) if not isinstance(test_metrics, np.ndarray): test_metrics = np.array([test_metrics]) test_metrics_names = ['test_loss'] + \ ['test_' + metric_name for metric_name in self.model.metrics_names] test_metrics_values = np.concatenate(([test_loss], test_metrics)) test_metrics_str = ', '.join( '%s: %g' % (col, val) for col, val in zip(test_metrics_names, test_metrics_values)) print("On best model: %s" % test_metrics_str) if self.logging: test_stats = pd.DataFrame([test_metrics_values], columns=test_metrics_names) if best_epoch_stats is not None: best_epoch_stats = best_epoch_stats.reset_index(drop=True) test_stats = best_epoch_stats.join(test_stats) test_stats.to_csv(self.test_log_filename, sep='\t', index=False)
class OptimizerCheckpointTest(TestCase): batch_size = 20 epochs = 10 def setUp(self): torch.manual_seed(42) self.pytorch_module = nn.Linear(1, 1) self.loss_function = nn.MSELoss() self.optimizer = torch.optim.Adam(self.pytorch_module.parameters(), lr=1e-3) self.model = Model(self.pytorch_module, self.optimizer, self.loss_function) self.temp_dir_obj = TemporaryDirectory() self.checkpoint_filename = os.path.join(self.temp_dir_obj.name, 'my_checkpoint_{epoch}.optim') def tearDown(self): self.temp_dir_obj.cleanup() def test_any_scheduler_integration(self): train_gen = some_data_generator(OptimizerCheckpointTest.batch_size) valid_gen = some_data_generator(OptimizerCheckpointTest.batch_size) lr_scheduler = ExponentialLR(gamma=0.01) checkpointer = LRSchedulerCheckpoint(lr_scheduler, self.checkpoint_filename, period=1) self.model.fit_generator(train_gen, valid_gen, epochs=OptimizerCheckpointTest.epochs, steps_per_epoch=5, callbacks=[checkpointer]) def test_reduce_lr_on_plateau_integration(self): train_gen = some_data_generator(OptimizerCheckpointTest.batch_size) valid_gen = some_data_generator(OptimizerCheckpointTest.batch_size) reduce_lr = ReduceLROnPlateau(monitor='loss', patience=3) checkpointer = LRSchedulerCheckpoint(reduce_lr, self.checkpoint_filename, period=1) self.model.fit_generator(train_gen, valid_gen, epochs=OptimizerCheckpointTest.epochs, steps_per_epoch=5, callbacks=[checkpointer]) def test_any_scheduler_checkpoints(self): lr_scheduler = ExponentialLR(gamma=0.01) checkpointer = LRSchedulerCheckpoint(lr_scheduler, self.checkpoint_filename, period=1) self._test_checkpointer(checkpointer, lr_scheduler) def test_reduce_lr_checkpoints(self): reduce_lr = ReduceLROnPlateau(monitor='loss', patience=3) checkpointer = LRSchedulerCheckpoint(reduce_lr, self.checkpoint_filename, period=1) self._test_checkpointer(checkpointer, reduce_lr) def _test_checkpointer(self, checkpointer, lr_scheduler): scheduler_states = {} generator = some_data_generator(OptimizerCheckpointTest.batch_size) checkpointer.set_params({ 'epochs': OptimizerCheckpointTest.epochs, 'steps': 1 }) checkpointer.set_model(self.model) checkpointer.on_train_begin({}) for epoch in range(1, OptimizerCheckpointTest.epochs + 1): checkpointer.on_epoch_begin(epoch, {}) checkpointer.on_batch_begin(1, {}) loss = self._update_model(generator) checkpointer.on_batch_end( 1, { 'batch': 1, 'size': OptimizerCheckpointTest.batch_size, 'loss': loss }) checkpointer.on_epoch_end(epoch, { 'epoch': epoch, 'loss': loss, 'val_loss': 1 }) filename = self.checkpoint_filename.format(epoch=epoch) self.assertTrue(os.path.isfile(filename)) scheduler_states[epoch] = torch_to_numpy( lr_scheduler.scheduler.state_dict(), copy=True) checkpointer.on_train_end({}) self._test_checkpoint(scheduler_states, lr_scheduler) def _update_model(self, generator): self.pytorch_module.zero_grad() x, y = next(generator) pred_y = self.pytorch_module(x) loss = self.loss_function(pred_y, y) loss.backward() self.optimizer.step() return float(loss) def _test_checkpoint(self, scheduler_states, lr_scheduler): for epoch, epoch_scheduler_state in scheduler_states.items(): filename = self.checkpoint_filename.format(epoch=epoch) lr_scheduler.load_state(filename) saved_scheduler_state = torch_to_numpy( lr_scheduler.scheduler.state_dict()) self.assertEqual(epoch_scheduler_state, saved_scheduler_state)
def launch_train(embeddings, model_name, device, debug): if debug: epochs = 1 else: epochs = 40 train_sentences, train_tags = parse_conll_file('./data/conll/train.txt') valid_sentences, valid_tags = parse_conll_file('./data/conll/valid.txt') test_sentences, test_tags = parse_conll_file('./data/conll/test.txt') words_vocab, words_to_idx = make_vocab_and_idx(train_sentences + valid_sentences + test_sentences) tags_vocab, tags_to_idx = make_vocab_and_idx(train_tags + valid_tags + test_tags) train_sentences = [[words_to_idx[word] for word in sentence] for sentence in train_sentences] train_tags = [[tags_to_idx[word] for word in sentence] for sentence in train_tags] valid_sentences = [[words_to_idx[word] for word in sentence] for sentence in valid_sentences] valid_tags = [[tags_to_idx[word] for word in sentence] for sentence in valid_tags] test_sentences = [[words_to_idx[word] for word in sentence] for sentence in test_sentences] test_tags = [[tags_to_idx[word] for word in sentence] for sentence in test_tags] train_dataset = list(zip(train_sentences, train_tags)) valid_dataset = list(zip(valid_sentences, valid_tags)) test_dataset = list(zip(test_sentences, test_tags)) def cuda_collate(samples): words_tensor, labels_tensor = collate_examples(samples) return words_tensor.cuda(), labels_tensor.cuda() use_gpu = torch.cuda.is_available() if use_gpu: cuda_device = device torch.cuda.set_device(cuda_device) logging.info('Using GPU') if use_gpu: collate_fn = cuda_collate else: collate_fn = collate_examples train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn) valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn) test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn) net = LSTMTagger(100, 50, words_to_idx, len(tags_to_idx), use_gpu) net.load_words_embeddings(embeddings) if use_gpu: net.cuda() lrscheduler = ReduceLROnPlateau(patience=2) early_stopping = EarlyStopping(patience=5) model_path = './models/' checkpoint = ModelCheckpoint(model_path + 'ner_' + model_name + '.torch', save_best_only=True, restore_best=True, temporary_filename=model_path + 'tmp_ner_' + model_name + '.torch', verbose=True) csv_logger = CSVLogger('./train_logs/ner_{}.csv'.format(model_name)) model = Model(net, Adam(net.parameters(), lr=0.001), sequence_cross_entropy, metrics=[f1]) model.fit_generator( train_loader, valid_loader, epochs=epochs, callbacks=[lrscheduler, checkpoint, early_stopping, csv_logger]) loss, metric = model.evaluate_generator(test_loader) logging.info("Test loss: {}".format(loss)) logging.info("Test metric: {}".format(metric))
def setUp(self): torch.manual_seed(42) self.pytorch_module = nn.Linear(1, 1) self.loss_function = nn.MSELoss() self.optimizer = torch.optim.SGD(self.pytorch_module.parameters(), lr=1e-3) self.model = Model(self.pytorch_module, self.optimizer, self.loss_function)