예제 #1
0
def analyze(model,
            dataset,
            sampled_classes=50,
            examples_per_class=50,
            kappa=0,
            n_t=300,
            n_reps=1,
            max_class=None,
            projection=True,
            projection_dimension=5000,
            layer_nums=None,
            layer_types=None,
            verbose=True,
            cuda=True,
            seed=0):
    '''
    Bundled analysis for PyTorch models and datasets. Automatically flattens all features.

    Args:
        model: PyTorch model to analyze.
        dataset: PyTorch style dataset or iterable containing (input, label) pairs.
        sampled_classes: Number of classes to sample in the analysis (Default 50).
        examples_per_class: Number of examples per class to use in the analysis (Default 50).
        kappa: Size of margin to use in analysis (Default 0)
        n_t: Number of t vectors to sample (Default 300)
        n_reps: Number of repititions to use in correlation analysis (Default 1)
        max_class: ID of the largest class to choose in sampling.
        projection: Whether or not to project the data to a lower dimension.
        projection_dimension: Dimension above which data is projected down to projection_dimension.
        layer_nums: Numbers of layers to analyze. Ex: [1, 2, 4]
        layer_types: Types of layers to use in analysis. Ex: ['Conv2d', 'Linear']. Only use if
            layer_nums isn't specified.
        verbose: Give updates on progress (Default True)
        cuda: Whether or not to use a GPU to generate activations (Default True)
        seed: Random seed.

    Returns:
        results: Dictionary of results for each layer.
    '''
    # Set the device
    device = torch.device(
        "cuda" if torch.cuda.is_available() and cuda else "cpu")
    # Create the manifold data
    manifold_data = make_manifold_data(dataset,
                                       sampled_classes,
                                       examples_per_class,
                                       seed=seed)
    # Move the model and data to the device
    model = model.to(device)
    manifold_data = [d.to(device) for d in manifold_data]
    # Extract the activations
    activations = extractor(model,
                            manifold_data,
                            layer_nums=layer_nums,
                            layer_types=layer_types)
    # Set the seed for random projections
    np.random.seed(seed)
    # Preprocess activations for analysis
    for layer, data, in activations.items():
        X = [d.reshape(d.shape[0], -1).T for d in data]
        # Get the number of features in the flattened data
        N = X[0].shape[0]
        # Optionally project the features to a lower dimension
        if projection and N > projection_dimension:
            # Create a projection matrix
            M = np.random.randn(projection_dimension, N)
            M /= np.sqrt(np.sum(np.square(M), axis=1, keepdims=True))
            # Project the datas
            X = [np.matmul(M, d) for d in X]
        activations[layer] = X
    # Create storage for the results
    results = OrderedDict()
    # Run the analysis on each layer that has been selected
    for k, X, in activations.items():
        analyze = False
        if layer_nums is not None and int(k.split('_')[1]) in layer_nums:
            analyze = True
        elif layer_types is not None and k.split('_')[-1] in layer_types:
            analyze = True
        elif layer_nums is None and layer_types is None:
            analyze = True

        if analyze:
            if verbose:
                print('Analyzing {}'.format(k))
            a, r, d, r0, K = manifold_analysis_corr(X,
                                                    kappa,
                                                    n_t,
                                                    n_reps=n_reps)
            # Store the results
            results[k] = {}
            results[k]['capacity'] = a
            results[k]['radius'] = r
            results[k]['dimension'] = d
            results[k]['correlation'] = r0
            results[k]['K'] = K
            results[k]['feature dimension'] = X[0].shape[0]
    return results
def main():
    model = models_dc.alexnet(sobel=True, bn=False, out=10000)
    model.cuda()
    model = model.eval()
    sampled_classes = 100
    examples_per_class = 50

    ###############  CHANGE DATASET LOADING TO YOUR STIMULI #########
    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)

    transform_train = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])

    img_pth = '/data/ILSVRC2012/val_in_folders'
    train_dataset = datasets.ImageFolder(img_pth, transform=transform_train)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=32,
                                               shuffle=True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    data = make_manifold_data(train_dataset,
                              sampled_classes,
                              examples_per_class,
                              seed=0)
    data = [d.to(device) for d in data]
    activations = extractor(model, data, layer_types=['ReLU'])
    list(activations.keys())

    for layer, data, in activations.items():
        X = [d.reshape(d.shape[0], -1).T for d in data]
        # Get the number of features in the flattened data
        N = X[0].shape[0]
        # If N is greater than 5000, do the random projection to 5000 features
        if N > 5000:
            print("Projecting {}".format(layer))
            M = np.random.randn(5000, N)
            M /= np.sqrt(np.sum(M * M, axis=1, keepdims=True))
            X = [np.matmul(M, d) for d in X]
        activations[layer] = X

    capacities = []
    radii = []
    dimensions = []
    correlations = []

    for k, X, in activations.items():
        # Analyze each layer's activations
        a, r, d, r0, K = manifold_analysis_corr(X, 0, 300, n_reps=1)

        # Compute the mean values
        a = 1 / np.mean(1 / a)
        r = np.mean(r)
        d = np.mean(d)
        print(
            "{} capacity: {:4f}, radius {:4f}, dimension {:4f}, correlation {:4f}"
            .format(k, a, r, d, r0))

        # Store for later
        capacities.append(a)
        radii.append(r)
        dimensions.append(d)
        correlations.append(r0)
    with open(
        ('/home/annatruzzi/neural_manifolds_replicaMFT/features/manifolds_capacities_dcalexnet_random_nobn.pickle'
         ), 'wb') as handle:
        pickle.dump(capacities, handle)
    with open(
        ('/home/annatruzzi/neural_manifolds_replicaMFT/features/manifolds_radii_dcalexnet_random_nobn.pickle'
         ), 'wb') as handle:
        pickle.dump(radii, handle)
    with open(
        ('/home/annatruzzi/neural_manifolds_replicaMFT/features/manifolds_dimensions_dcalexnet_random_nobn.pickle'
         ), 'wb') as handle:
        pickle.dump(dimensions, handle)
    with open(
        ('/home/annatruzzi/neural_manifolds_replicaMFT/features/manifolds_correlations_dcalexnet_random_nobn.pickle'
         ), 'wb') as handle:
        pickle.dump(correlations, handle)

    ##### Plot the results
    fig, axes = plt.subplots(1, 4, figsize=(18, 4))

    axes[0].plot(capacities, linewidth=5)
    axes[1].plot(radii, linewidth=5)
    axes[2].plot(dimensions, linewidth=5)
    axes[3].plot(correlations, linewidth=5)

    axes[0].set_ylabel(r'$\alpha_M$', fontsize=18)
    axes[1].set_ylabel(r'$R_M$', fontsize=18)
    axes[2].set_ylabel(r'$D_M$', fontsize=18)
    axes[3].set_ylabel(r'$\rho_{center}$', fontsize=18)

    axes[0].set_ylim(0.035, 0.06)
    axes[1].set_ylim(1.25, 1.60)
    axes[2].set_ylim(25, 45)
    axes[3].set_ylim(0.15, 0.60)

    names = list(activations.keys())
    names = [n.split('_')[1] + ' ' + n.split('_')[2] for n in names]
    for ax in axes:
        ax.set_xticks([i for i, _ in enumerate(names)])
        ax.set_xticklabels(names, rotation=90, fontsize=16)
        ax.tick_params(axis='both', which='major', labelsize=14)

    plt.tight_layout()
    plt.show()
    fig.savefig(
        '/home/annatruzzi/neural_manifolds_replicaMFT/plots/manifolds_dcalexnet_random_nobn.png',
        bbox_inches='tight')