Example #1
0
    def test_extraction_batches(self):
        values = [1] * 10
        backend = 'pt'
        dataset = helper.SimpleDataset(values, backend)
        model = Model('vgg16',
                      pretrained=False,
                      device=helper.DEVICE,
                      backend=backend)
        model.model = helper.pt_model

        # no batch remainders -> 5 batches with 2 examples
        # batch remainders -> 3 batches with 3 examples and 1 batch with 1 remainder

        for batch_size in [2, 3]:
            dl = DataLoader(
                dataset,
                batch_size=batch_size,
                backend=backend,
            )
            features, targets = model.extract_features(
                data_loader=dl,
                module_name='relu',
                flatten_acts=False,
                return_probabilities=False)
            self.assertEqual(features.shape[0], len(dataset))
            self.assertEqual(targets.shape[0], len(dataset))
Example #2
0
    def test_load_custom_user_model(self):
        model_name = 'VGG16bn_ecoset'
        model = Model(model_name, True, 'cpu')
        self.assertTrue(model.__class__.__name__, 'vgg')

        model_name = 'Resnet50_ecoset'
        model = Model(model_name, True, 'cpu')
        self.assertTrue(model.__class__.__name__, 'resnet')

        model_name = 'Alexnet_ecoset'
        model = Model(model_name, True, 'cpu')
        self.assertTrue(model.__class__.__name__, 'alexnet')
Example #3
0
def extract_features_across_models_datasets_and_modules(
        out_path: str,
        model_names: List[str],
        img_paths: List[str],
        module_names: List[str],
        clip: List[str],
        pretrained: bool,
        batch_size: int,
        backend: str,
        flatten_acts: bool,
        f_format: str = 'txt') -> None:
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    for i, model_name in enumerate(model_names):
        model = Model(
            model_name=model_name,
            pretrained=pretrained,
            device=device,
            model_path=None,
            backend=backend,
        )
        transforms = model.get_transformations()
        modules = get_module_names(model, module_names[i])
        for img_path in img_paths:
            for module_name in modules:
                PATH = os.path.join(out_path, img_path, model_name,
                                    module_name, 'features')
                dl = load_dl(
                    root=img_path,
                    out_path=out_path,
                    backend=backend,
                    batch_size=batch_size,
                    transforms=transforms,
                )
                features, _ = model.extract_features(
                    data_loader=dl,
                    module_name=module_name,
                    flatten_acts=flatten_acts,
                    clip=clip[i],
                )
                save_features(features, PATH, f_format)
Example #4
0
def create_model_and_dl(model_name, backend):
    """Iterate through all backends and models and create model, dataset and data loader."""
    model = Model(model_name=model_name,
                  pretrained=True,
                  device=DEVICE,
                  backend=backend)

    dataset = ImageDataset(root=TEST_PATH,
                           out_path=OUT_PATH,
                           backend=backend,
                           imagenet_train=None,
                           imagenet_val=None,
                           things=None,
                           things_behavior=None,
                           add_ref_imgs=None,
                           transforms=model.get_transformations())
    dl = DataLoader(
        dataset,
        batch_size=BATCH_SIZE,
        backend=backend,
    )
    return model, dataset, dl
Example #5
0
    def test_extract_features(self):
        layer_name = 'relu'
        values = [2, -10]
        backends = [['pt', helper.pt_model, 'vgg16'],
                    ['tf', helper.tf_model, 'VGG16']]
        batch_size = 1
        for backend, custom_model, vgg_model in backends:
            dataset = helper.SimpleDataset(values, backend)
            dl = DataLoader(
                dataset,
                batch_size=batch_size,
                backend=backend,
            )
            model = Model(vgg_model,
                          pretrained=False,
                          device=helper.DEVICE,
                          backend=backend)

            model.model = custom_model
            expected_features = np.array([[2, 2], [0, 0]])
            expected_targets = np.array([0, 0])

            features, targets = model.extract_features(
                data_loader=dl,
                module_name=layer_name,
                flatten_acts=False,
                return_probabilities=False)
            np.testing.assert_allclose(features, expected_features)
            np.testing.assert_allclose(targets, expected_targets)

            expected_probs = np.array([[0.5, 0.5], [0.5, 0.5]])
            features, targets, probs = model.extract_features(
                data_loader=dl,
                module_name=layer_name,
                flatten_acts=False,
                return_probabilities=True)
            np.testing.assert_allclose(features, expected_features)
            np.testing.assert_allclose(targets, expected_targets)
            np.testing.assert_allclose(probs, expected_probs)
    def test_custom_torch_vs_tf_extraction(self):
        layer_name = 'relu'
        values = [2, -10]
        backend = 'tf'
        tf_dataset = helper.SimpleDataset(values, backend)
        tf_dl = DataLoader(
            tf_dataset,
            batch_size=1,
            backend=backend,
        )

        model = Model('VGG16',
                      pretrained=False,
                      device=helper.DEVICE,
                      backend=backend)
        model.model = helper.tf_model
        tf_features, _ = model.extract_features(
            data_loader=tf_dl,
            module_name=layer_name,
            flatten_acts=False,
        )

        backend = 'pt'
        pt_dataset = helper.SimpleDataset(values, backend)
        pt_dl = DataLoader(
            pt_dataset,
            batch_size=1,
            backend=backend,
        )
        model = Model('vgg16',
                      pretrained=False,
                      device=helper.DEVICE,
                      backend=backend)
        model.model = helper.pt_model
        pt_features, _ = model.extract_features(
            data_loader=pt_dl,
            module_name=layer_name,
            flatten_acts=False,
        )
        np.testing.assert_allclose(tf_features, pt_features)

        expected_features = np.array([[2, 2], [0, 0]])
        np.testing.assert_allclose(pt_features, expected_features)
Example #7
0
def get_features(
    root: str,
    out_path: str,
    model_names: List[str],
    module_names: List[str],
    clip: List[bool],
    pretrained: bool,
    batch_size: int,
    backend: str,
    flatten_acts: bool,
) -> Dict[str, Dict[str, np.ndarray]]:
    """Extract features for a list of neural network models and corresponding modules.

    Parameters
    ----------
    root : str
        Root directory. Directory where images are stored.
    out_path : str
        PATH where order of images features should be stored.
        Files are alphabetically sorted and features are
        extracted accordingly.
    model_names : List[str]
        List of neural network models for which features
        should be extracted.
    module_names : List[str]
        List of neural network layers for which features
        should be extracted. Modules must correspond to
        models. This should be thought of as zipped lists.
    clip : List[bool]
        List of Booleans which indicates whether the
        corresponding model in the <model_names> list
        is a CLIP-based model or not (i.e., True if
        CLIP, else False)
    pretrained : bool
        Whether pretrained or randomly initialized models
        should be loaded into memory.
    batch_size : int
        Integer value that determines the number of images
        within a single mini-batch (i.e., subsample
        of the data).
    flatten_acts : bool
        Whether activation tensor (e.g., activations
        from an early layer of the neural network model)
        should be transformed into a feature vector.

    Returns
    -------
    output : Dict[str, Dict[str, np.ndarray]]
        Returns a dictionary of feature matrices
        corresponding to the selected models and layers.
    """
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model_features = defaultdict(dict)
    for i, model_name in enumerate(model_names):
        model = Model(
            model_name=model_name,
            pretrained=pretrained,
            device=device,
            model_path=None,
            backend=backend,
        )
        transforms = model.get_transformations()
        dl = load_dl(
            root=root,
            out_path=out_path,
            backend=backend,
            batch_size=batch_size,
            transforms=transforms,
        )
        features, _ = model.extract_features(
            data_loader=dl,
            module_name=module_names[i],
            flatten_acts=flatten_acts,
            clip=clip[i],
        )
        model_features[model_name][module_names[i]] = features
    return model_features