Ejemplo n.º 1
0
 def test_av_load_non_saved_layer(self) -> None:
     with tempfile.TemporaryDirectory() as tmpdir:
         model_id = "dummy"
         with self.assertRaises(RuntimeError) as context:
             AV.load(tmpdir, model_id)
         self.assertTrue(
             (f"Activation vectors for model {model_id} "
              f"was not found at path {tmpdir}") == str(context.exception))
Ejemplo n.º 2
0
    def test_av_load_all_layers_one_identifier(self) -> None:
        with tempfile.TemporaryDirectory() as tmpdir:
            av_01 = torch.randn(36, 16)
            av_02 = torch.randn(16, 16)
            av_03 = torch.randn(4, 16)
            avs_0 = [av_01, av_02, av_03]

            av_11 = torch.randn(36, 16)
            av_12 = torch.randn(16, 16)
            av_13 = torch.randn(4, 16)
            avs_1 = [av_11, av_12, av_13]

            idf1, idf2 = "idf1", "idf2"

            AV.save(
                tmpdir,
                "dummy",
                idf1,
                ["layer1.0.conv1", "layer1.0.conv2", "layer1.1.conv1"],
                avs_0,
                "0",
            )
            dataloader = DataLoader(cast(Dataset, AV.load(tmpdir, "dummy")))
            self.assertEqual(len(dataloader), 3)

            AV.save(
                tmpdir,
                "dummy",
                idf2,
                ["layer1.0.conv1", "layer1.0.conv2", "layer1.1.conv1"],
                avs_1,
                "0",
            )
            dataloader = DataLoader(cast(Dataset, AV.load(tmpdir, "dummy")))
            self.assertEqual(len(dataloader), 6)

            # check activations for idf1
            dataloader_layer = DataLoader(
                cast(Dataset, AV.load(tmpdir, "dummy", identifier=idf1)))
            self.assertEqual(len(dataloader_layer), 3)

            for i, av in enumerate(dataloader_layer):
                assertTensorAlmostEqual(self, av, avs_0[i].unsqueeze(0))

            # check activations for idf2
            dataloader_layer = DataLoader(
                cast(Dataset, AV.load(tmpdir, "dummy", identifier=idf2)))
            self.assertEqual(len(dataloader_layer), 3)
            for i, av in enumerate(dataloader_layer):
                assertTensorAlmostEqual(self, av, avs_1[i].unsqueeze(0))
Ejemplo n.º 3
0
    def test_TCAV_generate_all_activations(self) -> None:
        def forward_hook_wrapper(expected_act: Tensor):
            def forward_hook(module, inp, out=None):
                out = torch.reshape(out, (out.shape[0], -1))
                self.assertEqual(out.detach().shape[1:],
                                 expected_act.shape[1:])

            return forward_hook

        with tempfile.TemporaryDirectory() as tmpdirname:
            layers = ["conv1", "conv2", "fc1", "fc2"]
            tcav, concept_dict = init_TCAV(tmpdirname,
                                           CustomClassifier(),
                                           layers=layers)
            tcav.concepts = set(concept_dict.values())

            # generating all activations for given layers and concepts
            tcav.generate_all_activations()

            # verify that all activations exist and have correct shapes
            for layer in layers:
                for _, concept in concept_dict.items():
                    self.assertTrue(
                        AV.exists(tmpdirname, "default_model_id",
                                  concept.identifier, layer))

                concept_meta: Dict[int, int] = defaultdict(int)
                for _, concept in concept_dict.items():
                    activations = AV.load(tmpdirname, "default_model_id",
                                          concept.identifier, layer)

                    def batch_collate(batch):
                        return torch.cat(batch)

                    self.assertTrue(concept.data_iter is not None)
                    assert not (activations is None)
                    for activation in cast(
                            Iterable,
                            DataLoader(activations, collate_fn=batch_collate)):

                        concept_meta[concept.id] += activation.shape[0]

                        layer_module = _get_module_from_name(tcav.model, layer)

                        for data in cast(Iterable, concept.data_iter):
                            hook = layer_module.register_forward_hook(
                                forward_hook_wrapper(activation))
                            tcav.model(data)
                            hook.remove()

                # asserting the length of entire dataset for each concept
                for concept_meta_i in concept_meta.values():
                    self.assertEqual(concept_meta_i, 100)
Ejemplo n.º 4
0
    def test_av_load_one_batch(self) -> None:
        with tempfile.TemporaryDirectory() as tmpdir:
            av_0 = torch.randn(64, 16)
            av_1 = torch.randn(36, 16)
            avs = [av_0, av_1]

            # add av_0 to the list of activations
            model_id = "dummy"
            with self.assertRaises(RuntimeError) as context:
                AV.load(tmpdir, model_id)
            self.assertTrue(
                (f"Activation vectors for model {model_id} "
                 f"was not found at path {tmpdir}") == str(context.exception))

            AV.save(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv1",
                    av_0, "0")
            model_id = "dummy"
            dataset = AV.load(tmpdir, model_id, identifier=DEFAULT_IDENTIFIER)

            for i, av in enumerate(DataLoader(cast(Dataset, dataset))):
                assertTensorAlmostEqual(self, av, avs[i].unsqueeze(0))

            # add av_1 to the list of activations
            dataloader_2 = DataLoader(
                cast(
                    Dataset,
                    AV.load(tmpdir, "dummy", DEFAULT_IDENTIFIER,
                            "layer1.0.conv2"),
                ))
            self.assertEqual(len(dataloader_2), 0)

            AV.save(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv2",
                    av_1, "0")
            dataset = AV.load(tmpdir, "dummy", identifier=DEFAULT_IDENTIFIER)

            dataloader = DataLoader(cast(Dataset, dataset))
            self.assertEqual(len(dataloader), 2)
            for i, av in enumerate(dataloader):
                assertTensorAlmostEqual(self, av, avs[i].unsqueeze(0))
Ejemplo n.º 5
0
    def test_av_load_all_identifiers_one_layer(self) -> None:
        with tempfile.TemporaryDirectory() as tmpdir:
            av_0 = torch.randn(64, 16)
            av_1 = torch.randn(36, 16)
            av_2 = torch.randn(16, 16)
            av_3 = torch.randn(4, 16)
            avs = [av_1, av_2, av_3]

            idf1, idf2, idf3 = "idf1", "idf2", "idf3"

            AV.save(tmpdir, "dummy", DEFAULT_IDENTIFIER, "layer1.0.conv1",
                    av_0, "0")
            dataloader = DataLoader(
                cast(Dataset,
                     AV.load(tmpdir, "dummy", identifier=DEFAULT_IDENTIFIER)))
            self.assertEqual(len(dataloader), 1)

            # add activations for another layer
            AV.save(tmpdir, "dummy", idf1, "layer1.0.conv2", av_1, "0")
            AV.save(tmpdir, "dummy", idf2, "layer1.0.conv2", av_2, "0")
            AV.save(tmpdir, "dummy", idf3, "layer1.0.conv2", av_3, "0")
            dataloader_layer = DataLoader(
                cast(
                    Dataset,
                    AV.load(
                        tmpdir,
                        "dummy",
                        layer="layer1.0.conv2",
                    ),
                ))

            self.assertEqual(len(dataloader_layer), 3)
            for i, av in enumerate(dataloader_layer):
                assertTensorAlmostEqual(self, av, avs[i].unsqueeze(0))

            dataloader = DataLoader(cast(Dataset, AV.load(tmpdir, "dummy")))
            self.assertEqual(len(dataloader), 4)
Ejemplo n.º 6
0
        def save_load_and_assert_batch(layer_path, total_num_batches, batch,
                                       n_batch_name):
            # save n-th batch and verify the number of saved batches
            AV.save(
                tmpdir,
                model_id,
                DEFAULT_IDENTIFIER,
                "layer1.0.conv1",
                batch,
                n_batch_name,
            )
            loaded_dataset = AV.load(tmpdir, model_id, DEFAULT_IDENTIFIER,
                                     "layer1.0.conv1", n_batch_name)

            assertTensorAlmostEqual(self, next(iter(loaded_dataset)), batch,
                                    0.0)

            loaded_dataset_for_layer = AV.load(tmpdir, model_id,
                                               DEFAULT_IDENTIFIER,
                                               "layer1.0.conv1")
            self.assertEqual(
                loaded_dataset_for_layer.__len__(),
                total_num_batches,
            )
Ejemplo n.º 7
0
    def test_equal_activation(self) -> None:
        with tempfile.TemporaryDirectory() as tmpdir:
            num_features = 4
            low, high = 0, 16
            mymodel = BasicLinearReLULinear(num_features)
            mydata = RangeDataset(low, high, num_features)
            layers: List[str] = [
                value[0] for value in mymodel.named_modules() if value[0]
            ]

            # First AV generation on last 2 layers
            test_input = mydata[1].unsqueeze(0)
            model_id = "id_1"
            identifier = "test"
            num_id = "0"
            AV._compute_and_save_activations(tmpdir, mymodel, model_id,
                                             layers[2], test_input, identifier,
                                             num_id)
            act_dataset = AV.load(tmpdir, model_id, identifier, layers[2],
                                  num_id)
            _layer_act = [act.squeeze(0) for act in DataLoader(act_dataset)]
            act = torch.cat(_layer_act)
            out = mymodel(test_input)
            assertTensorAlmostEqual(self, out, act)
Ejemplo n.º 8
0
def train_cav(
    model_id,
    concepts: List[Concept],
    layers: Union[str, List[str]],
    classifier: Classifier,
    save_path: str,
    classifier_kwargs: Dict,
) -> Dict[str, Dict[str, CAV]]:
    r"""
    A helper function for parallel CAV computations that can be called
    from a python process.

    Please see the TCAV class documentation for further information.

    Args:
        model_id (str): A unique identifier for the PyTorch model for which
                we would like to load the layer activations and train a
                model in order to compute CAVs.
        concepts (list[Concept]): A list of Concept objects that are used
                to train a classifier and learn decision boundaries between
                those concepts for each layer defined in the `layers`
                argument.
        layers (str, list[str]): A list of layer names or a single layer
                name that is used to compute the activations of all concept
                examples per concept and train a classifier using those
                activations.
        classifier (Classifier): A custom classifier class, such as the
                Sklearn "linear_model" that allows us to train a model
                using the activation vectors extracted for a layer per concept.
                It also allows us to access trained weights of the classifier
                and the list of prediction classes.
        save_path (str): The path for storing Concept Activation
                Vectors (CAVs) and Activation Vectors (AVs).
        classifier_kwargs (dict): Additional named arguments that are passed to
                concept classifier's `train_and_eval` method.

    Returns:
        cavs (dict): A dictionary of CAV objects indexed by concept ids and
                layer names. It gives access to the weights of each concept
                in a given layer and model statistics such as accuracies
                that resulted in trained concept weights.
    """

    concepts_key = concepts_to_str(concepts)
    cavs: Dict[str, Dict[str, CAV]] = defaultdict()
    cavs[concepts_key] = defaultdict()
    layers = [layers] if isinstance(layers, str) else layers
    for layer in layers:

        # Create data loader to initialize the trainer.
        datasets = [
            AV.load(save_path, model_id, concept.identifier, layer)
            for concept in concepts
        ]

        labels = [concept.id for concept in concepts]

        labelled_dataset = LabelledDataset(cast(List[AV.AVDataset], datasets), labels)

        def batch_collate(batch):
            inputs, labels = zip(*batch)
            return torch.cat(inputs), torch.cat(labels)

        dataloader = DataLoader(labelled_dataset, collate_fn=batch_collate)

        classifier_stats_dict = classifier.train_and_eval(
            dataloader, **classifier_kwargs
        )
        classifier_stats_dict = (
            {} if classifier_stats_dict is None else classifier_stats_dict
        )

        weights = classifier.weights()
        assert (
            weights is not None and len(weights) > 0
        ), "Model weights connot be None or empty"

        classes = classifier.classes()
        assert (
            classes is not None and len(classes) > 0
        ), "Classes cannot be None or empty"

        classes = (
            cast(torch.Tensor, classes).detach().numpy()
            if isinstance(classes, torch.Tensor)
            else classes
        )
        cavs[concepts_key][layer] = CAV(
            concepts,
            layer,
            {"weights": weights, "classes": classes, **classifier_stats_dict},
            save_path,
            model_id,
        )
        # Saving cavs on the disk
        cavs[concepts_key][layer].save()

    return cavs