def test_performance(self): """Test slicing performance with 2 corresponding slice tasks that represent roughly <10% of the data.""" dataloaders = [] for df, split in [(self.df_train, "train"), (self.df_valid, "valid")]: dataloader = create_dataloader(df, split) dataloaders.append(dataloader) base_task = create_task("task", module_suffixes=["A", "B"]) # Apply SFs slicing_functions = [f, g] # low-coverage slices slice_names = [sf.name for sf in slicing_functions] applier = PandasSFApplier(slicing_functions) S_train = applier.apply(self.df_train, progress_bar=False) S_valid = applier.apply(self.df_valid, progress_bar=False) # Add slice labels add_slice_labels(dataloaders[0], base_task, S_train) add_slice_labels(dataloaders[1], base_task, S_valid) # Convert to slice tasks tasks = convert_to_slice_tasks(base_task, slice_names) model = MultitaskClassifier(tasks=tasks) # Train # NOTE: Needs more epochs to convergence with more heads trainer = Trainer(lr=0.001, n_epochs=65, progress_bar=False) trainer.fit(model, dataloaders) scores = model.score(dataloaders) # Confirm reasonably high slice scores # Check train scores self.assertGreater(scores["task/TestData/train/f1"], 0.9) self.assertGreater(scores["task_slice:f_pred/TestData/train/f1"], 0.9) self.assertGreater(scores["task_slice:f_ind/TestData/train/f1"], 0.9) self.assertGreater(scores["task_slice:g_pred/TestData/train/f1"], 0.9) self.assertGreater(scores["task_slice:g_ind/TestData/train/f1"], 0.9) self.assertGreater(scores["task_slice:base_pred/TestData/train/f1"], 0.9) self.assertEqual(scores["task_slice:base_ind/TestData/train/f1"], 1.0) # Check valid scores self.assertGreater(scores["task/TestData/valid/f1"], 0.9) self.assertGreater(scores["task_slice:f_pred/TestData/valid/f1"], 0.9) self.assertGreater(scores["task_slice:f_ind/TestData/valid/f1"], 0.9) self.assertGreater(scores["task_slice:g_pred/TestData/valid/f1"], 0.9) self.assertGreater(scores["task_slice:g_ind/TestData/valid/f1"], 0.9) self.assertGreater(scores["task_slice:base_pred/TestData/valid/f1"], 0.9) # base_ind is trivial: all labels are positive self.assertEqual(scores["task_slice:base_ind/TestData/valid/f1"], 1.0)
def test_convergence(self): """Test slicing convergence with 1 slice task that represents ~25% of the data.""" dataloaders = [] for df, split in [(self.df_train, "train"), (self.df_valid, "valid")]: dataloader = create_dataloader(df, split) dataloaders.append(dataloader) base_task = create_task("task", module_suffixes=["A", "B"]) # Apply SFs slicing_functions = [h] # high coverage slice slice_names = [sf.name for sf in slicing_functions] applier = PandasSFApplier(slicing_functions) S_train = applier.apply(self.df_train, progress_bar=False) S_valid = applier.apply(self.df_valid, progress_bar=False) self.assertEqual(S_train.shape, (self.N_TRAIN, )) self.assertEqual(S_valid.shape, (self.N_VALID, )) self.assertIn("h", S_train.dtype.names) # Add slice labels add_slice_labels(dataloaders[0], base_task, S_train) add_slice_labels(dataloaders[1], base_task, S_valid) # Convert to slice tasks tasks = convert_to_slice_tasks(base_task, slice_names) model = MultitaskClassifier(tasks=tasks) # Train trainer = Trainer(lr=0.001, n_epochs=50, progress_bar=False) trainer.fit(model, dataloaders) scores = model.score(dataloaders) # Confirm near perfect scores self.assertGreater(scores["task/TestData/valid/accuracy"], 0.94) self.assertGreater(scores["task_slice:h_pred/TestData/valid/accuracy"], 0.94) self.assertGreater(scores["task_slice:h_ind/TestData/valid/f1"], 0.94) # Calculate/check train/val loss train_dataset = dataloaders[0].dataset train_loss_output = model.calculate_loss(train_dataset.X_dict, train_dataset.Y_dict) train_loss = train_loss_output[0]["task"].item() self.assertLess(train_loss, 0.1) val_dataset = dataloaders[1].dataset val_loss_output = model.calculate_loss(val_dataset.X_dict, val_dataset.Y_dict) val_loss = val_loss_output[0]["task"].item() self.assertLess(val_loss, 0.1)
def test_add_slice_labels(self): # Create dummy data # Given slicing function f(), we expect the first two entries to be active x = torch.Tensor([0.1, 0.2, 0.3, 0.4, 0.5]) y = torch.Tensor([0, 1, 1, 0, 1]).long() dataset = DictDataset( name="TestData", split="train", X_dict={"data": x}, Y_dict={"TestTask": y} ) # Ensure that we start with 1 labelset self.assertEqual(len(dataset.Y_dict), 1) # Apply SFs with PandasSFApplier df = pd.DataFrame({"val": x, "y": y}) slicing_functions = [f] applier = PandasSFApplier(slicing_functions) S = applier.apply(df, progress_bar=False) dataloader = DictDataLoader(dataset) dummy_task = create_dummy_task(task_name="TestTask") add_slice_labels(dataloader, dummy_task, S) # Ensure that all the fields are present labelsets = dataloader.dataset.Y_dict self.assertIn("TestTask", labelsets) self.assertIn("TestTask_slice:base_ind", labelsets) self.assertIn("TestTask_slice:base_pred", labelsets) self.assertIn("TestTask_slice:f_ind", labelsets) self.assertIn("TestTask_slice:f_pred", labelsets) self.assertEqual(len(labelsets), 5) # Ensure "ind" contains mask self.assertEqual( labelsets["TestTask_slice:f_ind"].numpy().tolist(), [1, 1, 0, 0, 0] ) self.assertEqual( labelsets["TestTask_slice:base_ind"].numpy().tolist(), [1, 1, 1, 1, 1] ) # Ensure "pred" contains masked elements self.assertEqual( labelsets["TestTask_slice:f_pred"].numpy().tolist(), [0, 1, -1, -1, -1] ) self.assertEqual( labelsets["TestTask_slice:base_pred"].numpy().tolist(), [0, 1, 1, 0, 1] ) self.assertEqual(labelsets["TestTask"].numpy().tolist(), [0, 1, 1, 0, 1])