def test_generate_activation(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: num_features = 4 low, high = 0, 16 mymodel = BasicLinearReLULinear(num_features) mydata = RangeDataset(low, high, num_features) layers: List[str] = [ value[0] for value in mymodel.named_modules() if value[0] ] # First AV generation on last 2 layers inputs = torch.stack((mydata[1], mydata[8], mydata[14])) AV._compute_and_save_activations(tmpdir, mymodel, "model_id_1", layers[1:], inputs, "test", "0") av_test = AV._construct_file_search(tmpdir, "model_id_1", identifier="test") av_test = glob.glob(av_test) self.assertEqual(len(av_test), len(layers[1:])) # Second AV generation on first 2 layers. # Second layer overlaps with existing activations, should be loaded. inputs = torch.stack((mydata[0], mydata[7], mydata[13])) AV._compute_and_save_activations(tmpdir, mymodel, "model_id_1", layers[:2], inputs, "test", "0") av_test = AV._construct_file_search(tmpdir, "model_id_1", identifier="test") av_test = glob.glob(av_test) self.assertEqual(len(av_test), len(layers))
def test_generate_dataset_activations(self) -> None: with tempfile.TemporaryDirectory() as tmpdir: num_features = 4 low, high = 0, 16 batch_size = high // 2 mymodel = BasicLinearReLULinear(num_features) mydata = RangeDataset(low, high, num_features) layers: List[str] = [ value[0] for value in mymodel.named_modules() if value[0] ] # First AV generation on last 2 layers layer_AVDatasets = AV.generate_dataset_activations( tmpdir, mymodel, "model_id1", layers[1:], DataLoader(mydata, batch_size, shuffle=False), "src", return_activations=True, ) av_src = AV._construct_file_search(tmpdir, model_id="model_id1", identifier="src") av_src = glob.glob(av_src) self.assertEqual(len(av_src), high / batch_size * len(layers[1:])) self.assertTrue(isinstance(layer_AVDatasets, list)) layer_AVDatasets = cast(list, layer_AVDatasets) self.assertEqual(len(layer_AVDatasets), len(layers[1:])) for layer_AVDataset in layer_AVDatasets: self.assertEqual(len(layer_AVDataset), high / batch_size) # Second AV generation on first 2 layers. # Second layer overlaps with existing activations, should be loaded. layer_AVDatasets = AV.generate_dataset_activations( tmpdir, mymodel, "model_id1", layers[:2], DataLoader(mydata, batch_size, shuffle=False), "src", return_activations=True, ) av_src = AV._construct_file_search(tmpdir, model_id="model_id1", identifier="src") av_src = glob.glob(av_src) self.assertEqual(len(av_src), high / batch_size * len(layers)) self.assertTrue(isinstance(layer_AVDatasets, list)) layer_AVDatasets = cast(list, layer_AVDatasets) self.assertEqual(len(layer_AVDatasets), len(layers[:2])) for layer_AVDataset in layer_AVDatasets: self.assertEqual(len(layer_AVDataset), high / batch_size) # check that if return_activations is False, None is returned self.assertIsNone( AV.generate_dataset_activations( tmpdir, mymodel, "model_id1", layers[:2], DataLoader(mydata, batch_size, shuffle=False), "src", return_activations=False, ))