def test_partially_empty_batch(self): dataset = create_dataloader("task1", shuffle=False).dataset dataset.Y_dict["task1"][0] = -1 model = MultitaskClassifier([self.task1]) loss_dict, count_dict = model.calculate_loss(dataset.X_dict, dataset.Y_dict) self.assertEqual(count_dict["task1"], 9)
def test_empty_batch(self): dataset = create_dataloader("task1", shuffle=False).dataset dataset.Y_dict["task1"] = torch.full_like(dataset.Y_dict["task1"], -1) model = MultitaskClassifier([self.task1]) loss_dict, count_dict = model.calculate_loss(dataset.X_dict, dataset.Y_dict) self.assertFalse(loss_dict) self.assertFalse(count_dict)
def test_score(self): model = MultitaskClassifier([self.task1]) metrics = model.score([self.dataloader]) # deterministic random tie breaking alternates predicted labels self.assertEqual(metrics["task1/dataset/train/accuracy"], 0.4) # test dataframe format metrics_df = model.score([self.dataloader], as_dataframe=True) self.assertTrue(isinstance(metrics_df, pd.DataFrame)) self.assertEqual(metrics_df.at[0, "score"], 0.4)
def test_no_input_spec(self): # Confirm model doesn't break when a module does not specify specific inputs dataset = create_dataloader("task", shuffle=False).dataset task = Task( name="task", module_pool=nn.ModuleDict({"identity": nn.Identity()}), op_sequence=[Operation("identity", [])], ) model = MultitaskClassifier(tasks=[task], dataparallel=False) outputs = model.forward(dataset.X_dict, ["task"]) self.assertIn("_input_", outputs)
def test_performance(self): """Test slicing performance with 2 corresponding slice tasks that represent roughly <10% of the data.""" dataloaders = [] for df, split in [(self.df_train, "train"), (self.df_valid, "valid")]: dataloader = create_dataloader(df, split) dataloaders.append(dataloader) base_task = create_task("task", module_suffixes=["A", "B"]) # Apply SFs slicing_functions = [f, g] # low-coverage slices slice_names = [sf.name for sf in slicing_functions] applier = PandasSFApplier(slicing_functions) S_train = applier.apply(self.df_train, progress_bar=False) S_valid = applier.apply(self.df_valid, progress_bar=False) # Add slice labels add_slice_labels(dataloaders[0], base_task, S_train) add_slice_labels(dataloaders[1], base_task, S_valid) # Convert to slice tasks tasks = convert_to_slice_tasks(base_task, slice_names) model = MultitaskClassifier(tasks=tasks) # Train # NOTE: Needs more epochs to convergence with more heads trainer = Trainer(lr=0.001, n_epochs=65, progress_bar=False) trainer.fit(model, dataloaders) scores = model.score(dataloaders) # Confirm reasonably high slice scores # Check train scores self.assertGreater(scores["task/TestData/train/f1"], 0.9) self.assertGreater(scores["task_slice:f_pred/TestData/train/f1"], 0.9) self.assertGreater(scores["task_slice:f_ind/TestData/train/f1"], 0.9) self.assertGreater(scores["task_slice:g_pred/TestData/train/f1"], 0.9) self.assertGreater(scores["task_slice:g_ind/TestData/train/f1"], 0.9) self.assertGreater(scores["task_slice:base_pred/TestData/train/f1"], 0.9) self.assertEqual(scores["task_slice:base_ind/TestData/train/f1"], 1.0) # Check valid scores self.assertGreater(scores["task/TestData/valid/f1"], 0.9) self.assertGreater(scores["task_slice:f_pred/TestData/valid/f1"], 0.9) self.assertGreater(scores["task_slice:f_ind/TestData/valid/f1"], 0.9) self.assertGreater(scores["task_slice:g_pred/TestData/valid/f1"], 0.9) self.assertGreater(scores["task_slice:g_ind/TestData/valid/f1"], 0.9) self.assertGreater(scores["task_slice:base_pred/TestData/valid/f1"], 0.9) # base_ind is trivial: all labels are positive self.assertEqual(scores["task_slice:base_ind/TestData/valid/f1"], 1.0)
def test_convergence(self): """Test slicing convergence with 1 slice task that represents ~25% of the data.""" dataloaders = [] for df, split in [(self.df_train, "train"), (self.df_valid, "valid")]: dataloader = create_dataloader(df, split) dataloaders.append(dataloader) base_task = create_task("task", module_suffixes=["A", "B"]) # Apply SFs slicing_functions = [h] # high coverage slice slice_names = [sf.name for sf in slicing_functions] applier = PandasSFApplier(slicing_functions) S_train = applier.apply(self.df_train, progress_bar=False) S_valid = applier.apply(self.df_valid, progress_bar=False) self.assertEqual(S_train.shape, (self.N_TRAIN, )) self.assertEqual(S_valid.shape, (self.N_VALID, )) self.assertIn("h", S_train.dtype.names) # Add slice labels add_slice_labels(dataloaders[0], base_task, S_train) add_slice_labels(dataloaders[1], base_task, S_valid) # Convert to slice tasks tasks = convert_to_slice_tasks(base_task, slice_names) model = MultitaskClassifier(tasks=tasks) # Train trainer = Trainer(lr=0.001, n_epochs=50, progress_bar=False) trainer.fit(model, dataloaders) scores = model.score(dataloaders) # Confirm near perfect scores self.assertGreater(scores["task/TestData/valid/accuracy"], 0.94) self.assertGreater(scores["task_slice:h_pred/TestData/valid/accuracy"], 0.94) self.assertGreater(scores["task_slice:h_ind/TestData/valid/f1"], 0.94) # Calculate/check train/val loss train_dataset = dataloaders[0].dataset train_loss_output = model.calculate_loss(train_dataset.X_dict, train_dataset.Y_dict) train_loss = train_loss_output[0]["task"].item() self.assertLess(train_loss, 0.1) val_dataset = dataloaders[1].dataset val_loss_output = model.calculate_loss(val_dataset.X_dict, val_dataset.Y_dict) val_loss = val_loss_output[0]["task"].item() self.assertLess(val_loss, 0.1)
def test_save_load(self): fd, checkpoint_path = tempfile.mkstemp() task1 = create_task("task1") task2 = create_task("task2") # Make task2's second linear layer have different weights task2.module_pool["linear2"] = nn.Linear(2, 2) model = MultitaskClassifier([task1]) self.assertTrue( torch.eq( task1.module_pool["linear2"].weight, model.module_pool["linear2"].module.weight, ).all()) model.save(checkpoint_path) model = MultitaskClassifier([task2]) self.assertFalse( torch.eq( task1.module_pool["linear2"].weight, model.module_pool["linear2"].module.weight, ).all()) model.load(checkpoint_path) self.assertTrue( torch.eq( task1.module_pool["linear2"].weight, model.module_pool["linear2"].module.weight, ).all()) os.close(fd)
def test_score_shuffled(self): # Test scoring with a shuffled dataset set_seed(123) class SimpleVoter(nn.Module): def forward(self, x): """Set class 0 to -1 if x and 1 otherwise""" mask = x % 2 == 0 out = torch.zeros(x.shape[0], 2) out[mask, 0] = 1 # class 0 out[~mask, 1] = 1 # class 1 return out # Create model task_name = "VotingTask" module_name = "simple_voter" module_pool = nn.ModuleDict({module_name: SimpleVoter()}) op0 = Operation(module_name=module_name, inputs=[("_input_", "data")], name="op0") op_sequence = [op0] task = Task(name=task_name, module_pool=module_pool, op_sequence=op_sequence) model = MultitaskClassifier([task]) # Create dataset y_list = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] x_list = [i for i in range(len(y_list))] Y = torch.LongTensor(y_list * 100) X = torch.FloatTensor(x_list * 100) dataset = DictDataset(name="dataset", split="train", X_dict={"data": X}, Y_dict={task_name: Y}) # Create dataloaders dataloader = DictDataLoader(dataset, batch_size=2, shuffle=False) scores = model.score([dataloader]) self.assertEqual(scores["VotingTask/dataset/train/accuracy"], 0.6) dataloader_shuffled = DictDataLoader(dataset, batch_size=2, shuffle=True) scores_shuffled = model.score([dataloader_shuffled]) self.assertEqual(scores_shuffled["VotingTask/dataset/train/accuracy"], 0.6)
def create_model(resnet_cnn): # freeze the resnet weights for param in resnet_cnn.parameters(): param.requires_grad = False # define input features in_features = resnet_cnn.fc.in_features feature_extractor = nn.Sequential(*list(resnet_cnn.children())[:-1]) # initialize FC layer: maps 3 sets of image features to class logits WEMB_SIZE = 100 fc = nn.Linear(in_features * 3 + 2 * WEMB_SIZE, 3) init_fc(fc) # define layers module_pool = nn.ModuleDict( { "feat_extractor": feature_extractor, "prediction_head": fc, "feat_concat": FlatConcat(), "word_emb": WordEmb(), } ) # define task flow through modules op_sequence = get_op_sequence() pred_cls_task = Task( name="visual_relation_task", module_pool=module_pool, op_sequence=op_sequence, scorer=Scorer(metrics=["f1_micro"]), ) return MultitaskClassifier([pred_cls_task])
def test_twotask_partial_overlap_model(self): """Add two tasks with overlapping modules and flows""" task1 = create_task("task1", module_suffixes=["A", "A"]) task2 = create_task("task2", module_suffixes=["A", "B"]) model = MultitaskClassifier(tasks=[task1, task2]) self.assertEqual(len(model.task_names), 2) self.assertEqual(len(model.op_sequences), 2) self.assertEqual(len(model.module_pool), 3)
def test_predict(self): model = MultitaskClassifier([self.task1]) results = model.predict(self.dataloader) self.assertEqual(sorted(list(results.keys())), ["golds", "probs"]) np.testing.assert_array_equal( results["golds"]["task1"], self.dataloader.dataset.Y_dict["task1"].numpy()) np.testing.assert_array_equal(results["probs"]["task1"], np.ones((NUM_EXAMPLES, 2)) * 0.5) results = model.predict(self.dataloader, return_preds=True) self.assertEqual(sorted(list(results.keys())), ["golds", "preds", "probs"]) # deterministic random tie breaking alternates predicted labels np.testing.assert_array_equal( results["preds"]["task1"], np.array([0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0]), )
def test_convergence(self): """Test multitask classifier convergence with two tasks.""" dataloaders = [] for offset, task_name in zip([0.0, 0.25], ["task1", "task2"]): df = create_data(N_TRAIN, offset) dataloader = create_dataloader(df, "train", task_name) dataloaders.append(dataloader) for offset, task_name in zip([0.0, 0.25], ["task1", "task2"]): df = create_data(N_VALID, offset) dataloader = create_dataloader(df, "valid", task_name) dataloaders.append(dataloader) task1 = create_task("task1", module_suffixes=["A", "A"]) task2 = create_task("task2", module_suffixes=["A", "B"]) model = MultitaskClassifier(tasks=[task1, task2]) # Train trainer = Trainer(lr=0.001, n_epochs=10, progress_bar=False) trainer.fit(model, dataloaders) scores = model.score(dataloaders) # Confirm near perfect scores on both tasks for idx, task_name in enumerate(["task1", "task2"]): self.assertGreater(scores[f"{task_name}/TestData/valid/accuracy"], 0.95) # Calculate/check train/val loss train_dataset = dataloaders[idx].dataset train_loss_output = model.calculate_loss( train_dataset.X_dict, train_dataset.Y_dict ) train_loss = train_loss_output[0][task_name].item() self.assertLess(train_loss, 0.05) val_dataset = dataloaders[2 + idx].dataset val_loss_output = model.calculate_loss( val_dataset.X_dict, val_dataset.Y_dict ) val_loss = val_loss_output[0][task_name].item() self.assertLess(val_loss, 0.05)
def test_bad_tasks(self): with self.assertRaisesRegex(ValueError, "Found duplicate task"): MultitaskClassifier(tasks=[self.task1, self.task1]) with self.assertRaisesRegex(ValueError, "Unrecognized task type"): MultitaskClassifier(tasks=[self.task1, {"fake_task": 42}]) with self.assertRaisesRegex(ValueError, "Unsuccessful operation"): task1 = create_task("task1") task1.op_sequence[0].inputs[0] = (0, 0) model = MultitaskClassifier(tasks=[task1]) X_dict = self.dataloader.dataset.X_dict model.forward(X_dict, [task1.name])
def test_remapped_labels(self): # Test additional label keys in the Y_dict # Without remapping, model should ignore them task_name = self.task1.name X = torch.FloatTensor([[i, i] for i in range(NUM_EXAMPLES)]) Y = torch.ones(NUM_EXAMPLES).long() Y_dict = {task_name: Y, "other_task": Y} dataset = DictDataset(name="dataset", split="train", X_dict={"data": X}, Y_dict=Y_dict) dataloader = DictDataLoader(dataset, batch_size=BATCH_SIZE) model = MultitaskClassifier([self.task1]) loss_dict, count_dict = model.calculate_loss(dataset.X_dict, dataset.Y_dict) self.assertIn("task1", loss_dict) # Test setting without remapping results = model.predict(dataloader) self.assertIn("task1", results["golds"]) self.assertNotIn("other_task", results["golds"]) scores = model.score([dataloader]) self.assertIn("task1/dataset/train/accuracy", scores) self.assertNotIn("other_task/dataset/train/accuracy", scores) # Test remapped labelsets results = model.predict(dataloader, remap_labels={"other_task": task_name}) self.assertIn("task1", results["golds"]) self.assertIn("other_task", results["golds"]) results = model.score([dataloader], remap_labels={"other_task": task_name}) self.assertIn("task1/dataset/train/accuracy", results) self.assertIn("other_task/dataset/train/accuracy", results)
# and the sequence in which they are used. # Loss and scoring functions are added based on task type task_object = Task( name=task_formal_name, module_pool=module_pool, op_sequence=op_sequence, loss_func=task_type_function_mapping[task_type]["loss_function"], output_func=partial(F.softmax, dim=1), scorer=task_type_function_mapping[task_type]["scorer"], ) # Add task to list of tasks tasks.append(task_object) # Input list of tasks to MultitaskClassifier object to create model with architecture set for each task model = MultitaskClassifier(tasks) # Set out trainer settings - I.e. how the model will train trainer_config = { "progress_bar": True, "n_epochs": 2, "lr": 0.02, "logging": True, "log_writer": "json", "checkpointing": True, } # Create trainer object using above settings trainer = Trainer(**trainer_config) # Train model using above settings on the datasets linked
def test_trainer_twotask(self): """Train a model with overlapping modules and flows""" multitask_model = MultitaskClassifier(tasks) trainer = Trainer(**base_config) trainer.fit(multitask_model, dataloaders)
], ) # %% [markdown] # ## Model # %% [markdown] # With our tasks defined, constructing a model is simple: we simply pass the list of tasks in and the model constructs itself using information from the task flows. # # Note that the model uses the names of modules (not the modules themselves) to determine whether two modules specified by separate tasks are the same module (and should share weights) or different modules (with separate weights). # So because both the `square_task` and `circle_task` include "base_mlp" in their module pools, this module will be shared between the two tasks. # %% from snorkel.classification import MultitaskClassifier model = MultitaskClassifier([circle_task, square_task]) # %% [markdown] # ### Train Model # %% [markdown] # Once the model is constructed, we can train it as we would a single-task model, using the `fit` method of a `Trainer` object. The `Trainer` supports multiple schedules or patterns for sampling from different dataloaders; the default is to randomly sample from them proportional to their number of batches, such that all data points will be seen exactly once before any are seen twice. # %% from snorkel.classification import Trainer trainer_config = {"progress_bar": False, "n_epochs": 10, "lr": 0.02} trainer = Trainer(**trainer_config) trainer.fit(model, dataloaders)
def test_no_data_parallel(self): model = MultitaskClassifier(tasks=[self.task1, self.task2], dataparallel=False) self.assertEqual(len(model.task_names), 2) self.assertIsInstance(model.module_pool["linear1A"], nn.Module)
def test_twotask_none_overlap_model(self): """Add two tasks with totally separate modules and flows""" model = MultitaskClassifier(tasks=[self.task1, self.task2]) self.assertEqual(len(model.task_names), 2) self.assertEqual(len(model.op_sequences), 2) self.assertEqual(len(model.module_pool), 4)
def test_onetask_model(self): model = MultitaskClassifier(tasks=[self.task1]) self.assertEqual(len(model.task_names), 1) self.assertEqual(len(model.op_sequences), 1) self.assertEqual(len(model.module_pool), 2)
op_sequence = [op1, op2] task = Task(name=task_name, module_pool=module_pool, op_sequence=op_sequence) return task dataloaders = [create_dataloader(task_name) for task_name in TASK_NAMES] tasks = [ create_task(TASK_NAMES[0], module_suffixes=["A", "A"]), create_task(TASK_NAMES[1], module_suffixes=["A", "B"]), ] model = MultitaskClassifier([tasks[0]]) class TrainerTest(unittest.TestCase): def test_trainer_onetask(self): """Train a single-task model""" trainer = Trainer(**base_config) trainer.fit(model, [dataloaders[0]]) def test_trainer_twotask(self): """Train a model with overlapping modules and flows""" multitask_model = MultitaskClassifier(tasks) trainer = Trainer(**base_config) trainer.fit(multitask_model, dataloaders) def test_trainer_errors(self):