def test_not_fitting_scaler_but_normalising_data(self): n_trials = 10 n_training_cols = 6 n_class_cols = 3 training_cols = ["col_" + str(i) for i in range(n_training_cols)] class_cols = ["class_" + str(i) for i in range(n_class_cols)] test_dataset = self.create_dataset(n_trials, training_cols, class_cols, seed=0) second_dataset = self.create_dataset(n_trials, training_cols, class_cols, seed=71) _, scaler = dataset.prepare_dataset([test_dataset], class_cols, training_columns=training_cols, normalise_data=True, scaler=None) self.assertTrue(scaler is not None) loaders, scaler = dataset.prepare_dataset( [second_dataset, test_dataset], class_cols, training_columns=training_cols, normalise_data=True, scaler=scaler) self.assertTrue(scaler is not None) numpy_dataset = loaders[1].dataset.tensors[0].numpy() self.assertTrue( TestNormalise.are_mean_and_variance_correct(numpy_dataset, atol=1e-7))
def test_transforms(self): def add_one(dataframe): with warnings.catch_warnings(): warnings.filterwarnings("ignore") dataframe.loc[:, dataframe. columns] = dataframe[dataframe.columns] + 1 n_trials = 10 n_training_cols = 6 n_class_cols = 3 training_cols = ["col_" + str(i) for i in range(n_training_cols)] class_cols = ["class_" + str(i) for i in range(n_class_cols)] test_dataset = self.create_dataset(n_trials, training_cols, class_cols) loader, _ = dataset.prepare_dataset([test_dataset], class_cols, training_columns=training_cols, normalise_data=False, transforms=[add_one]) numpy_dataset = loader.dataset.tensors[0].numpy() datasets_training_cols = np.array( [np.array(trial[training_cols]) for trial in test_dataset]) self.assertTrue( np.isclose(numpy_dataset, datasets_training_cols + 1).all())
def test_multiclass(self): n_trials = 10 n_training_cols = 8 n_class_cols = 6 training_cols = ["col_" + str(i) for i in range(n_training_cols)] class_cols = ["class_" + str(i) for i in range(n_class_cols)] multiclass_list = [class_cols[:3], class_cols[3:]] test_dataset = self.create_dataset(n_trials, training_cols, multiclass_list, multiclass=True) dataset_loader, scaler = dataset.prepare_dataset( [test_dataset], multiclass_list, multiclass=True, training_columns=training_cols) self.assertEqual(scaler, None) self.assertEqual(len(dataset_loader.dataset), n_trials) # Check only if training cols are correctly set by passing None self.assertTrue( self.are_trials_correctly_set(dataset_loader, test_dataset, training_cols, None)) self.assertTrue( self.is_multiclass_correctly_set(dataset_loader, test_dataset, multiclass_list))
def test_class_columnns_not_in_every_dataset(self): n_trials = 10 n_training_cols = 6 n_class_cols = 5 training_cols = ["col_" + str(i) for i in range(n_training_cols)] class_cols = ["class_" + str(i) for i in range(n_class_cols)] train_dataset = self.create_dataset(2 * n_trials, training_cols, class_cols) test_dataset = self.create_dataset(n_trials, training_cols, class_cols[:3]) with self.assertRaises(KeyError): dataset.prepare_dataset([train_dataset, test_dataset], class_cols, training_columns=training_cols)
def test_normalisation_cols(self): n_trials = 10 n_training_cols = 6 n_class_cols = 3 training_cols = ["col_" + str(i) for i in range(n_training_cols)] categorical_columns = training_cols[:2] non_categorical_columns = training_cols[2:] categorical_cols_bool_index = np.array( [col in categorical_columns for col in training_cols]) class_cols = ["class_" + str(i) for i in range(n_class_cols)] test_dataset = self.create_dataset(n_trials, training_cols, class_cols) loader, _ = dataset.prepare_dataset( [test_dataset], class_cols, training_columns=training_cols, normalise_data=True, normalisation_cols=non_categorical_columns) numpy_dataset = loader.dataset.tensors[0].numpy() datasets_training_cols = np.array( [np.array(trial[training_cols]) for trial in test_dataset]) # Check categorical columns haven't changed self.assertTrue( np.isclose( numpy_dataset[:, :, categorical_cols_bool_index], datasets_training_cols[:, :, categorical_cols_bool_index]).all()) # Check non-categorical columns have been normalised correctly self.assertTrue( TestNormalise.are_mean_and_variance_correct( numpy_dataset[:, :, ~categorical_cols_bool_index], atol=1e-7))
def test_non_categorical_class_columns(self): n_trials = 10 n_training_cols = 6 n_class_cols = 3 training_cols = ["col_" + str(i) for i in range(n_training_cols)] class_cols = ["class_" + str(i) for i in range(n_class_cols)] test_dataset = self.create_dataset(n_trials, training_cols, class_cols) for trial in test_dataset: trial[class_cols] = np.random.rand(*trial[class_cols].shape) with self.assertRaisesRegex(ValueError, "Classes are not one-hot encoded"): dataset.prepare_dataset([test_dataset], class_cols, training_columns=training_cols)
def test_all_training_columns_are_categorical(self): n_trials = 10 n_training_cols = 6 n_class_cols = 3 training_cols = ["col_" + str(i) for i in range(n_training_cols)] categorical_columns = training_cols class_cols = ["class_" + str(i) for i in range(n_class_cols)] test_dataset = self.create_dataset(n_trials, training_cols, class_cols) with self.assertRaises(ValueError): dataset.prepare_dataset([test_dataset], class_cols, training_columns=training_cols, normalise_data=True, categorical_columns=categorical_columns)
def test_columns_in_both_categorical_and_normalisation_cols(self): n_trials = 10 n_training_cols = 6 n_class_cols = 3 training_cols = ["col_" + str(i) for i in range(n_training_cols)] categorical_columns = training_cols[:4] non_categorical_columns = training_cols[2:] class_cols = ["class_" + str(i) for i in range(n_class_cols)] test_dataset = self.create_dataset(n_trials, training_cols, class_cols) with self.assertRaises(ValueError): dataset.prepare_dataset([test_dataset], class_cols, training_columns=training_cols, normalise_data=True, normalisation_cols=non_categorical_columns, categorical_columns=categorical_columns)
def test_non_categorical_class_columnns_in_multiclass_setting(self): n_trials = 10 n_training_cols = 6 n_class_cols = 6 training_cols = ["col_" + str(i) for i in range(n_training_cols)] class_cols = ["class_" + str(i) for i in range(n_class_cols)] multiclass_list = [class_cols[:3], class_cols[3:]] test_dataset = self.create_dataset(n_trials, training_cols, multiclass_list, multiclass=True) for trial in test_dataset: trial[class_cols] = np.random.rand(*trial[class_cols].shape) with self.assertRaises(ValueError): dataset.prepare_dataset([test_dataset], multiclass_list, multiclass=True, training_columns=training_cols)
def test_passing_scaler_but_not_normalising_data(self): n_trials = 10 n_training_cols = 6 n_class_cols = 3 training_cols = ["col_" + str(i) for i in range(n_training_cols)] class_cols = ["class_" + str(i) for i in range(n_class_cols)] test_dataset = self.create_dataset(n_trials, training_cols, class_cols, seed=0) second_dataset = self.create_dataset(n_trials, training_cols, class_cols, seed=71) _, scaler = dataset.prepare_dataset([test_dataset], class_cols, training_columns=training_cols, normalise_data=True, scaler=None) self.assertTrue(scaler is not None) loader, scaler = dataset.prepare_dataset( [second_dataset], class_cols, training_columns=training_cols, normalise_data=False, scaler=scaler) self.assertTrue(scaler is not None) numpy_dataset = loader.dataset.tensors[0].numpy() datasets_training_cols = np.array( [np.array(trial[training_cols]) for trial in second_dataset]) self.assertTrue( np.isclose(numpy_dataset, datasets_training_cols).all())
def test_one_dataset(self): n_trials = 10 n_training_cols = 6 n_class_cols = 3 training_cols = ["col_" + str(i) for i in range(n_training_cols)] class_cols = ["class_" + str(i) for i in range(n_class_cols)] test_dataset = self.create_dataset(n_trials, training_cols, class_cols) dataset_loader, scaler = dataset.prepare_dataset( [test_dataset], class_cols, training_columns=training_cols) self.assertEqual(scaler, None) self.assertEqual(len(dataset_loader.dataset), n_trials) self.assertTrue( self.are_trials_correctly_set(dataset_loader, test_dataset, training_cols, class_cols))
def test_different_batch_sizes(self): n_trials = 10 n_training_cols = 6 n_class_cols = 3 training_cols = ["col_" + str(i) for i in range(n_training_cols)] class_cols = ["class_" + str(i) for i in range(n_class_cols)] test_dataset = self.create_dataset(n_trials, training_cols, class_cols) for batch_size in [1, 4, 16]: dataset_loader, scaler = dataset.prepare_dataset( [test_dataset], class_cols, training_columns=training_cols, batch_size=batch_size) self.assertEqual(dataset_loader.batch_size, batch_size)
def test_cpu_device(self): n_trials = 10 n_training_cols = 6 n_class_cols = 3 training_cols = ["col_" + str(i) for i in range(n_training_cols)] class_cols = ["class_" + str(i) for i in range(n_class_cols)] cpu_device = torch.device("cpu") test_dataset = self.create_dataset(n_trials, training_cols, class_cols) dataset_loader, _ = dataset.prepare_dataset( [test_dataset], class_cols, training_columns=training_cols, ) self.assertFalse(dataset_loader.dataset.tensors[0].is_cuda) self.assertFalse(dataset_loader.dataset.tensors[1].is_cuda)
def test_class_columnns_integer(self): n_trials = 10 n_training_cols = 6 n_class_cols = 3 training_cols = ["col_" + str(i) for i in range(n_training_cols)] class_cols = ["class_" + str(i) for i in range(n_class_cols)] test_dataset = self.create_dataset(n_trials, training_cols, class_cols) for trial in test_dataset: trial[class_cols] = trial[class_cols] dataset_loader, _ = dataset.prepare_dataset( [test_dataset], class_cols, training_columns=training_cols) self.assertTrue( self.are_trials_correctly_set(dataset_loader, test_dataset, training_cols, class_cols))
def test_save_plot_is_not_none(self): self.create_and_save_model(self.network_params, self.model_path) dataset = self.create_dataset(self.n_trials, self.training_columns, self.class_columns) test_loader, _ = prepare_dataset( [dataset], class_columns=self.class_columns, training_columns=self.training_columns) self.write_dataframes_to_file(dataset, self.dataset_path) _, predicted = evaluation.evaluate_saved_model(self.model_path, self.network_params, self.dataset_path, self.training_columns, self.class_columns, trials=None) ax = visualization.plot_confusion_matrix_given_predicted_and_test_loader( predicted, test_loader, self.class_columns, self.plot_path) self.assertTrue(isinstance(ax, Axes)) self.assertTrue(os.path.exists(self.plot_path))
def test_gpu_device(self): if not torch.cuda.is_available(): warnings.warn("No Cuda device available to run this test.") return n_trials = 10 n_training_cols = 6 n_class_cols = 3 training_cols = ["col_" + str(i) for i in range(n_training_cols)] class_cols = ["class_" + str(i) for i in range(n_class_cols)] gpu_device = torch.device("cuda:0") test_dataset = self.create_dataset(n_trials, training_cols, class_cols) dataset_loader, _ = dataset.prepare_dataset( [test_dataset], class_cols, training_columns=training_cols, device=gpu_device) self.assertTrue(dataset_loader.dataset.tensors[0].is_cuda) self.assertTrue(dataset_loader.dataset.tensors[1].is_cuda)
def test_more_than_one_dataset(self): n_trials = 10 n_training_cols = 6 n_class_cols = 3 training_cols = ["col_" + str(i) for i in range(n_training_cols)] class_cols = ["class_" + str(i) for i in range(n_class_cols)] train_dataset = self.create_dataset(2 * n_trials, training_cols, class_cols) test_dataset = self.create_dataset(n_trials, training_cols, class_cols) loaders, scaler = dataset.prepare_dataset( [train_dataset, test_dataset], class_cols, training_columns=training_cols) self.assertEqual(scaler, None) self.assertEqual(len(loaders[0].dataset), 2 * n_trials) self.assertEqual(len(loaders[1].dataset), n_trials) self.assertTrue( self.are_trials_correctly_set(loaders[0], train_dataset, training_cols, class_cols)) self.assertTrue( self.are_trials_correctly_set(loaders[1], test_dataset, training_cols, class_cols))