def test_extraction_batches(self): values = [1] * 10 backend = 'pt' dataset = helper.SimpleDataset(values, backend) model = Model('vgg16', pretrained=False, device=helper.DEVICE, backend=backend) model.model = helper.pt_model # no batch remainders -> 5 batches with 2 examples # batch remainders -> 3 batches with 3 examples and 1 batch with 1 remainder for batch_size in [2, 3]: dl = DataLoader( dataset, batch_size=batch_size, backend=backend, ) features, targets = model.extract_features( data_loader=dl, module_name='relu', flatten_acts=False, return_probabilities=False) self.assertEqual(features.shape[0], len(dataset)) self.assertEqual(targets.shape[0], len(dataset))
def test_load_custom_user_model(self): model_name = 'VGG16bn_ecoset' model = Model(model_name, True, 'cpu') self.assertTrue(model.__class__.__name__, 'vgg') model_name = 'Resnet50_ecoset' model = Model(model_name, True, 'cpu') self.assertTrue(model.__class__.__name__, 'resnet') model_name = 'Alexnet_ecoset' model = Model(model_name, True, 'cpu') self.assertTrue(model.__class__.__name__, 'alexnet')
def extract_features_across_models_datasets_and_modules( out_path: str, model_names: List[str], img_paths: List[str], module_names: List[str], clip: List[str], pretrained: bool, batch_size: int, backend: str, flatten_acts: bool, f_format: str = 'txt') -> None: device = 'cuda' if torch.cuda.is_available() else 'cpu' for i, model_name in enumerate(model_names): model = Model( model_name=model_name, pretrained=pretrained, device=device, model_path=None, backend=backend, ) transforms = model.get_transformations() modules = get_module_names(model, module_names[i]) for img_path in img_paths: for module_name in modules: PATH = os.path.join(out_path, img_path, model_name, module_name, 'features') dl = load_dl( root=img_path, out_path=out_path, backend=backend, batch_size=batch_size, transforms=transforms, ) features, _ = model.extract_features( data_loader=dl, module_name=module_name, flatten_acts=flatten_acts, clip=clip[i], ) save_features(features, PATH, f_format)
def create_model_and_dl(model_name, backend): """Iterate through all backends and models and create model, dataset and data loader.""" model = Model(model_name=model_name, pretrained=True, device=DEVICE, backend=backend) dataset = ImageDataset(root=TEST_PATH, out_path=OUT_PATH, backend=backend, imagenet_train=None, imagenet_val=None, things=None, things_behavior=None, add_ref_imgs=None, transforms=model.get_transformations()) dl = DataLoader( dataset, batch_size=BATCH_SIZE, backend=backend, ) return model, dataset, dl
def test_extract_features(self): layer_name = 'relu' values = [2, -10] backends = [['pt', helper.pt_model, 'vgg16'], ['tf', helper.tf_model, 'VGG16']] batch_size = 1 for backend, custom_model, vgg_model in backends: dataset = helper.SimpleDataset(values, backend) dl = DataLoader( dataset, batch_size=batch_size, backend=backend, ) model = Model(vgg_model, pretrained=False, device=helper.DEVICE, backend=backend) model.model = custom_model expected_features = np.array([[2, 2], [0, 0]]) expected_targets = np.array([0, 0]) features, targets = model.extract_features( data_loader=dl, module_name=layer_name, flatten_acts=False, return_probabilities=False) np.testing.assert_allclose(features, expected_features) np.testing.assert_allclose(targets, expected_targets) expected_probs = np.array([[0.5, 0.5], [0.5, 0.5]]) features, targets, probs = model.extract_features( data_loader=dl, module_name=layer_name, flatten_acts=False, return_probabilities=True) np.testing.assert_allclose(features, expected_features) np.testing.assert_allclose(targets, expected_targets) np.testing.assert_allclose(probs, expected_probs)
def test_custom_torch_vs_tf_extraction(self): layer_name = 'relu' values = [2, -10] backend = 'tf' tf_dataset = helper.SimpleDataset(values, backend) tf_dl = DataLoader( tf_dataset, batch_size=1, backend=backend, ) model = Model('VGG16', pretrained=False, device=helper.DEVICE, backend=backend) model.model = helper.tf_model tf_features, _ = model.extract_features( data_loader=tf_dl, module_name=layer_name, flatten_acts=False, ) backend = 'pt' pt_dataset = helper.SimpleDataset(values, backend) pt_dl = DataLoader( pt_dataset, batch_size=1, backend=backend, ) model = Model('vgg16', pretrained=False, device=helper.DEVICE, backend=backend) model.model = helper.pt_model pt_features, _ = model.extract_features( data_loader=pt_dl, module_name=layer_name, flatten_acts=False, ) np.testing.assert_allclose(tf_features, pt_features) expected_features = np.array([[2, 2], [0, 0]]) np.testing.assert_allclose(pt_features, expected_features)
def get_features( root: str, out_path: str, model_names: List[str], module_names: List[str], clip: List[bool], pretrained: bool, batch_size: int, backend: str, flatten_acts: bool, ) -> Dict[str, Dict[str, np.ndarray]]: """Extract features for a list of neural network models and corresponding modules. Parameters ---------- root : str Root directory. Directory where images are stored. out_path : str PATH where order of images features should be stored. Files are alphabetically sorted and features are extracted accordingly. model_names : List[str] List of neural network models for which features should be extracted. module_names : List[str] List of neural network layers for which features should be extracted. Modules must correspond to models. This should be thought of as zipped lists. clip : List[bool] List of Booleans which indicates whether the corresponding model in the <model_names> list is a CLIP-based model or not (i.e., True if CLIP, else False) pretrained : bool Whether pretrained or randomly initialized models should be loaded into memory. batch_size : int Integer value that determines the number of images within a single mini-batch (i.e., subsample of the data). flatten_acts : bool Whether activation tensor (e.g., activations from an early layer of the neural network model) should be transformed into a feature vector. Returns ------- output : Dict[str, Dict[str, np.ndarray]] Returns a dictionary of feature matrices corresponding to the selected models and layers. """ device = 'cuda' if torch.cuda.is_available() else 'cpu' model_features = defaultdict(dict) for i, model_name in enumerate(model_names): model = Model( model_name=model_name, pretrained=pretrained, device=device, model_path=None, backend=backend, ) transforms = model.get_transformations() dl = load_dl( root=root, out_path=out_path, backend=backend, batch_size=batch_size, transforms=transforms, ) features, _ = model.extract_features( data_loader=dl, module_name=module_names[i], flatten_acts=flatten_acts, clip=clip[i], ) model_features[model_name][module_names[i]] = features return model_features