def test_score(self):
        model = MultitaskClassifier([self.task1])
        metrics = model.score([self.dataloader])
        # deterministic random tie breaking alternates predicted labels
        self.assertEqual(metrics["task1/dataset/train/accuracy"], 0.4)

        # test dataframe format
        metrics_df = model.score([self.dataloader], as_dataframe=True)
        self.assertTrue(isinstance(metrics_df, pd.DataFrame))
        self.assertEqual(metrics_df.at[0, "score"], 0.4)
    def test_score_shuffled(self):
        # Test scoring with a shuffled dataset

        set_seed(123)

        class SimpleVoter(nn.Module):
            def forward(self, x):
                """Set class 0 to -1 if x and 1 otherwise"""
                mask = x % 2 == 0
                out = torch.zeros(x.shape[0], 2)
                out[mask, 0] = 1  # class 0
                out[~mask, 1] = 1  # class 1
                return out

        # Create model
        task_name = "VotingTask"
        module_name = "simple_voter"
        module_pool = nn.ModuleDict({module_name: SimpleVoter()})
        op0 = Operation(module_name=module_name,
                        inputs=[("_input_", "data")],
                        name="op0")
        op_sequence = [op0]
        task = Task(name=task_name,
                    module_pool=module_pool,
                    op_sequence=op_sequence)
        model = MultitaskClassifier([task])

        # Create dataset
        y_list = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
        x_list = [i for i in range(len(y_list))]
        Y = torch.LongTensor(y_list * 100)
        X = torch.FloatTensor(x_list * 100)
        dataset = DictDataset(name="dataset",
                              split="train",
                              X_dict={"data": X},
                              Y_dict={task_name: Y})

        # Create dataloaders
        dataloader = DictDataLoader(dataset, batch_size=2, shuffle=False)
        scores = model.score([dataloader])

        self.assertEqual(scores["VotingTask/dataset/train/accuracy"], 0.6)

        dataloader_shuffled = DictDataLoader(dataset,
                                             batch_size=2,
                                             shuffle=True)
        scores_shuffled = model.score([dataloader_shuffled])
        self.assertEqual(scores_shuffled["VotingTask/dataset/train/accuracy"],
                         0.6)
Example #3
0
    def test_convergence(self):
        """Test slicing convergence with 1 slice task that represents ~25% of
        the data."""

        dataloaders = []
        for df, split in [(self.df_train, "train"), (self.df_valid, "valid")]:
            dataloader = create_dataloader(df, split)
            dataloaders.append(dataloader)

        base_task = create_task("task", module_suffixes=["A", "B"])

        # Apply SFs
        slicing_functions = [h]  # high coverage slice
        slice_names = [sf.name for sf in slicing_functions]
        applier = PandasSFApplier(slicing_functions)
        S_train = applier.apply(self.df_train, progress_bar=False)
        S_valid = applier.apply(self.df_valid, progress_bar=False)

        self.assertEqual(S_train.shape, (self.N_TRAIN, ))
        self.assertEqual(S_valid.shape, (self.N_VALID, ))
        self.assertIn("h", S_train.dtype.names)

        # Add slice labels
        add_slice_labels(dataloaders[0], base_task, S_train)
        add_slice_labels(dataloaders[1], base_task, S_valid)

        # Convert to slice tasks
        tasks = convert_to_slice_tasks(base_task, slice_names)
        model = MultitaskClassifier(tasks=tasks)

        # Train
        trainer = Trainer(lr=0.001, n_epochs=50, progress_bar=False)
        trainer.fit(model, dataloaders)
        scores = model.score(dataloaders)

        # Confirm near perfect scores
        self.assertGreater(scores["task/TestData/valid/accuracy"], 0.94)
        self.assertGreater(scores["task_slice:h_pred/TestData/valid/accuracy"],
                           0.94)
        self.assertGreater(scores["task_slice:h_ind/TestData/valid/f1"], 0.94)

        # Calculate/check train/val loss
        train_dataset = dataloaders[0].dataset
        train_loss_output = model.calculate_loss(train_dataset.X_dict,
                                                 train_dataset.Y_dict)
        train_loss = train_loss_output[0]["task"].item()
        self.assertLess(train_loss, 0.1)

        val_dataset = dataloaders[1].dataset
        val_loss_output = model.calculate_loss(val_dataset.X_dict,
                                               val_dataset.Y_dict)
        val_loss = val_loss_output[0]["task"].item()
        self.assertLess(val_loss, 0.1)
Example #4
0
    def test_performance(self):
        """Test slicing performance with 2 corresponding slice tasks that
        represent roughly <10% of the data."""

        dataloaders = []
        for df, split in [(self.df_train, "train"), (self.df_valid, "valid")]:
            dataloader = create_dataloader(df, split)
            dataloaders.append(dataloader)

        base_task = create_task("task", module_suffixes=["A", "B"])

        # Apply SFs
        slicing_functions = [f, g]  # low-coverage slices
        slice_names = [sf.name for sf in slicing_functions]
        applier = PandasSFApplier(slicing_functions)
        S_train = applier.apply(self.df_train, progress_bar=False)
        S_valid = applier.apply(self.df_valid, progress_bar=False)

        # Add slice labels
        add_slice_labels(dataloaders[0], base_task, S_train)
        add_slice_labels(dataloaders[1], base_task, S_valid)

        # Convert to slice tasks
        tasks = convert_to_slice_tasks(base_task, slice_names)
        model = MultitaskClassifier(tasks=tasks)

        # Train
        # NOTE: Needs more epochs to convergence with more heads
        trainer = Trainer(lr=0.001, n_epochs=65, progress_bar=False)
        trainer.fit(model, dataloaders)
        scores = model.score(dataloaders)

        # Confirm reasonably high slice scores
        # Check train scores
        self.assertGreater(scores["task/TestData/train/f1"], 0.9)
        self.assertGreater(scores["task_slice:f_pred/TestData/train/f1"], 0.9)
        self.assertGreater(scores["task_slice:f_ind/TestData/train/f1"], 0.9)
        self.assertGreater(scores["task_slice:g_pred/TestData/train/f1"], 0.9)
        self.assertGreater(scores["task_slice:g_ind/TestData/train/f1"], 0.9)
        self.assertGreater(scores["task_slice:base_pred/TestData/train/f1"],
                           0.9)
        self.assertEqual(scores["task_slice:base_ind/TestData/train/f1"], 1.0)

        # Check valid scores
        self.assertGreater(scores["task/TestData/valid/f1"], 0.9)
        self.assertGreater(scores["task_slice:f_pred/TestData/valid/f1"], 0.9)
        self.assertGreater(scores["task_slice:f_ind/TestData/valid/f1"], 0.9)
        self.assertGreater(scores["task_slice:g_pred/TestData/valid/f1"], 0.9)
        self.assertGreater(scores["task_slice:g_ind/TestData/valid/f1"], 0.9)
        self.assertGreater(scores["task_slice:base_pred/TestData/valid/f1"],
                           0.9)
        # base_ind is trivial: all labels are positive
        self.assertEqual(scores["task_slice:base_ind/TestData/valid/f1"], 1.0)
    def test_remapped_labels(self):
        # Test additional label keys in the Y_dict
        # Without remapping, model should ignore them
        task_name = self.task1.name
        X = torch.FloatTensor([[i, i] for i in range(NUM_EXAMPLES)])
        Y = torch.ones(NUM_EXAMPLES).long()

        Y_dict = {task_name: Y, "other_task": Y}
        dataset = DictDataset(name="dataset",
                              split="train",
                              X_dict={"data": X},
                              Y_dict=Y_dict)
        dataloader = DictDataLoader(dataset, batch_size=BATCH_SIZE)

        model = MultitaskClassifier([self.task1])
        loss_dict, count_dict = model.calculate_loss(dataset.X_dict,
                                                     dataset.Y_dict)
        self.assertIn("task1", loss_dict)

        # Test setting without remapping
        results = model.predict(dataloader)
        self.assertIn("task1", results["golds"])
        self.assertNotIn("other_task", results["golds"])
        scores = model.score([dataloader])
        self.assertIn("task1/dataset/train/accuracy", scores)
        self.assertNotIn("other_task/dataset/train/accuracy", scores)

        # Test remapped labelsets
        results = model.predict(dataloader,
                                remap_labels={"other_task": task_name})
        self.assertIn("task1", results["golds"])
        self.assertIn("other_task", results["golds"])
        results = model.score([dataloader],
                              remap_labels={"other_task": task_name})
        self.assertIn("task1/dataset/train/accuracy", results)
        self.assertIn("other_task/dataset/train/accuracy", results)
    def test_convergence(self):
        """Test multitask classifier convergence with two tasks."""

        dataloaders = []

        for offset, task_name in zip([0.0, 0.25], ["task1", "task2"]):
            df = create_data(N_TRAIN, offset)
            dataloader = create_dataloader(df, "train", task_name)
            dataloaders.append(dataloader)

        for offset, task_name in zip([0.0, 0.25], ["task1", "task2"]):
            df = create_data(N_VALID, offset)
            dataloader = create_dataloader(df, "valid", task_name)
            dataloaders.append(dataloader)

        task1 = create_task("task1", module_suffixes=["A", "A"])
        task2 = create_task("task2", module_suffixes=["A", "B"])
        model = MultitaskClassifier(tasks=[task1, task2])

        # Train
        trainer = Trainer(lr=0.001, n_epochs=10, progress_bar=False)
        trainer.fit(model, dataloaders)
        scores = model.score(dataloaders)

        # Confirm near perfect scores on both tasks
        for idx, task_name in enumerate(["task1", "task2"]):
            self.assertGreater(scores[f"{task_name}/TestData/valid/accuracy"], 0.95)

            # Calculate/check train/val loss
            train_dataset = dataloaders[idx].dataset
            train_loss_output = model.calculate_loss(
                train_dataset.X_dict, train_dataset.Y_dict
            )
            train_loss = train_loss_output[0][task_name].item()
            self.assertLess(train_loss, 0.05)

            val_dataset = dataloaders[2 + idx].dataset
            val_loss_output = model.calculate_loss(
                val_dataset.X_dict, val_dataset.Y_dict
            )
            val_loss = val_loss_output[0][task_name].item()
            self.assertLess(val_loss, 0.05)
Example #7
0
        )

        # Add task to list of tasks
        tasks.append(task_object)

# Input list of tasks to MultitaskClassifier object to create model with architecture set for each task
model = MultitaskClassifier(tasks)

# Set out trainer settings - I.e. how the model will train
trainer_config = {
    "progress_bar": True,
    "n_epochs": 2,
    "lr": 0.02,
    "logging": True,
    "log_writer": "json",
    "checkpointing": True,
}

# Create trainer object using above settings
trainer = Trainer(**trainer_config)

# Train model using above settings on the datasets linked
trainer.fit(model, dataloaders)

# Output training stats of model
trainer.log_writer.write_log("output_statistics.json")

# Score model using test set and print
model_scores = model.score(dataloaders)
print(model_scores)
Example #8
0
# %%
from snorkel.classification import Trainer

trainer_config = {"progress_bar": False, "n_epochs": 10, "lr": 0.02}

trainer = Trainer(**trainer_config)
trainer.fit(model, dataloaders)

# %% [markdown]
# ### Evaluate model

# %% [markdown]
# After training, we can call the model.score() method to see the final performance of our trained model.

# %%
model.score(dataloaders)

# %% [markdown]
# Task-specific metrics are recorded in the form `task/dataset/split/metric` corresponding to the task the made the predictions, the dataset the predictions were made on, the split being evaluated, and the metric being calculated.
#
# For model-wide metrics (such as the total loss over all tasks or the learning rate), the default task name is `model` and the dataset name is `all` (e.g. `model/all/train/loss`).

# %% [markdown]
# # Your Turn

# %% [markdown]
# To check your understanding of how to use the multi-task `MultitaskClassifier`, see if you can add a task to this multi-task model.
#
# We'll generate the data for you (again, with a train, valid, and test split).
# Let's call it the `inv_circle_task`, since it will have the same distribution as our circle data, but with the inverted (flipped) labels.
# Intuitively, a model that is very good at telling whether a point is within a certain region should also be very good at telling if it's outside the region.