def plot_eigenvalue_report(weight_directory, unpruned_n_eigenvalues=None, pruned_n_eigenvalues=None, filter_norm=1, figsize=(10, 5)): weight_paths = get_weights_paths(weight_directory, norm=filter_norm) is_slice = (unpruned_n_eigenvalues is not None or pruned_n_eigenvalues is not None) n_rows = 2 if is_slice else 1 _, axes = plt.subplots(n_rows, 2, squeeze=False, figsize=figsize) axes[0][0].set_title('Unpruned') plot_eigenvalues(weight_paths[True], filter_norm=filter_norm, ax=axes[0][0]) if is_slice: plot_eigenvalues(weight_paths[True], unpruned_n_eigenvalues, filter_norm=filter_norm, ax=axes[1][0]) axes[0][1].set_title('Pruned') plot_eigenvalues(weight_paths[False], filter_norm=filter_norm, ax=axes[0][1]) if is_slice: plot_eigenvalues(weight_paths[False], pruned_n_eigenvalues, filter_norm=filter_norm, ax=axes[1][1])
def run_double_spectral_cluster(weight_directory, with_shuffle=True, n_clusters=4, shuffle_method='layer', n_samples=None, n_workers=None, with_shuffled_ncuts=False, use_inv_avg_commute=False, filter_norm=1, random_state=RANDOM_STATE, eigen_solver='arpack'): weight_paths = get_weights_paths(weight_directory, norm=filter_norm) return { is_unpruned: run_spectral_cluster(weight_path, with_shuffle=with_shuffle, n_clusters=n_clusters, shuffle_method=shuffle_method, n_samples=n_samples, n_workers=n_workers, with_shuffled_ncuts=with_shuffled_ncuts, use_inv_avg_commute=use_inv_avg_commute, filter_norm=filter_norm, random_state=random_state, eigen_solver=eigen_solver) for is_unpruned, weight_path in weight_paths.items() }
def build_weighted_dist_mat(model_path, clustering_result, filter_norm=1, normalize_in_out=False): weight_path = get_weights_paths(model_path, norm=filter_norm)[False] # pruned G = build_cluster_graph( weight_path, clustering_result[False], # pruned normalize_in_out=normalize_in_out) df = pd.DataFrame([{ 'start': start, 'end': end, 'dist': _compute_weighted_dist(G, start, end) } for start, end in it.combinations(G.nodes, 2)]) df = df[df != 0].dropna() # The distance is normalized to [0, 1] inside the paths between two specific layers # The max weighted sitance is one. df['layers'] = df.apply( lambda r: r['start'].split('-')[0] + '-' + r['end'].split('-')[0], axis=1) df['normalized_dist'] = df['dist'] / df.groupby( 'layers')['dist'].transform('max') mat = df.pivot('start', 'end', 'normalized_dist') return mat
def draw_mlp_clustering_report(weight_directory, double_clustering_results, n_cluster=4, title=None, figsize=(20, 30)): weight_paths = get_weights_paths(weight_directory) fig, axes = plt.subplots(2, 2, figsize=figsize) if title is not None: fig.suptitle(title) axes[0][0].set_title('Unpruned') draw_clustered_mlp( weight_paths[True], # True represents **un**pruned double_clustering_results[True], n_clusters=n_cluster, ax=axes[0][0]) draw_cluster_by_layer(weight_paths[True], double_clustering_results[True], n_clusters=n_cluster, ax=axes[1][0]) axes[0][1].set_title('Pruned') draw_clustered_mlp(weight_paths[False], double_clustering_results[False], n_clusters=n_cluster, ax=axes[0][1]) draw_cluster_by_layer(weight_paths[False], double_clustering_results[False], n_clusters=n_cluster, ax=axes[1][1])
def test_two_methods_cnn_clustering_pvalue(): # See comment above about BASE_PATH config_updates = {'weights_path': get_weights_paths(get_model_path('CNN:MNIST', model_base_path=BASE_PATH))[True], 'with_labels': False, 'with_shuffle': False, 'num_clusters': 4, 'is_testing': True} experiment_run = clustering_experiment.run(config_updates=config_updates, named_configs=['cnn_config'])
def run_activations_cluster_experiment( activations_dir, weights_dir, exclude_inputs=True, filter_norm=1, with_shuffle=True, n_clusters=10, n_samples=None, n_workers=None, random_state=RANDOM_STATE, ): # # if 'mlp' in str(weights_dir) and 'mlp' in str(activations_dir): # named_configs = ['mlp_config'] # elif 'cnn' in str(weights_dir) and 'cnn' in str(activations_dir): # named_configs = ['cnn_config'] # else: # raise ValueError('Either mlp or cnn should be in path to determine the config.') activations_path_dict = get_activations_paths(activations_dir) activations_masks_path_dict = get_activation_masks_paths(activations_dir) weight_path_dict = get_weights_paths(weights_dir, norm=filter_norm) results_dicts = [] for is_unpruned in [True, False]: config_updates = { 'activations_path': activations_path_dict[is_unpruned], 'activations_mask_path': activations_masks_path_dict[is_unpruned], 'weights_path': weight_path_dict[is_unpruned], 'exclude_inputs': exclude_inputs, 'with_shuffle': with_shuffle, 'seed': random_state, 'n_clusters': n_clusters } if n_samples is not None: config_updates['n_samples'] = n_samples if n_workers is not None: config_updates['n_workers'] = n_workers with suppress(), all_logging_disabled(): experiment_run = activations_experiment.run( config_updates=config_updates) results_dicts.append(experiment_run.result) return {'unpruned': results_dicts[0], 'pruned': results_dicts[1]}
def run_double_spectral_cluster( weight_directory, with_shuffle=True, n_clusters=4, shuffle_method='layer', n_samples=None, n_workers=None, with_shuffled_ncuts=False, random_state=RANDOM_STATE, ): weight_paths = get_weights_paths(weight_directory) return { is_unpruned: run_spectral_cluster(weight_path, with_shuffle, n_clusters, shuffle_method, n_samples, n_workers, with_shuffled_ncuts, random_state) for is_unpruned, weight_path in weight_paths.items() }
def draw_mlp_clustering_report(weight_directory, double_clustering_results, n_cluster=4, filter_norm=1, is_first_square=True, title=None, figsize=(20, 30)): weight_paths = get_weights_paths(weight_directory, norm=filter_norm) if 'cnn' not in str(weight_directory).lower(): fig, axes = plt.subplots(2, 2, figsize=figsize) if title is not None: fig.suptitle(title) axes[0][0].set_title('Unpruned') draw_clustered_mlp( weight_paths[True], # True represents **un**pruned double_clustering_results[True], n_clusters=n_cluster, is_first_square=is_first_square, ax=axes[0][0]) draw_cluster_by_layer(weight_paths[True], double_clustering_results[True], n_clusters=n_cluster, ax=axes[1][0]) axes[0][1].set_title('Pruned') draw_clustered_mlp(weight_paths[False], double_clustering_results[False], n_clusters=n_cluster, is_first_square=is_first_square, ax=axes[0][1]) draw_cluster_by_layer(weight_paths[False], double_clustering_results[False], n_clusters=n_cluster, ax=axes[1][1]) else: # if it's a CNN fig, axes = plt.subplots(1, 2, figsize=figsize) if title is not None: fig.suptitle(title) axes[0].set_title('Unpruned') draw_clustered_mlp( weight_paths[True], # True represents **un**pruned double_clustering_results[True], n_clusters=n_cluster, is_first_square=is_first_square, ax=axes[0]) axes[1].set_title('Pruned') draw_clustered_mlp(weight_paths[False], double_clustering_results[False], n_clusters=n_cluster, is_first_square=is_first_square, ax=axes[1])
def cluster_and_visualize(weights_dir, activations_dir, n_clusters=10, corr_type='spearman', filter_norm=1, n_iters=20, n_random=4, side_len=28, min_size=4, max_prop=0.8): assert corr_type in ['pearson', 'spearman'] results = {} weight_path_dict = get_weights_paths(weights_dir, norm=filter_norm) activations_path_dict = get_activations_paths(activations_dir) activations_masks_path_dict = get_activation_masks_paths(activations_dir) for is_unpruned in [True, False]: # run clustering to get labels # for a cnn, this will only get results for the conv layers labels, _ = run_spectral_cluster(weight_path_dict[is_unpruned], n_clusters=n_clusters, with_shuffle=False) # get the activations and the mask with open(activations_path_dict[is_unpruned], 'rb') as f: # get stored correlation-based adjacency matrix masked_activations = pickle.load(f) with open(activations_masks_path_dict[is_unpruned], 'rb') as f: activations_mask = pickle.load(f) # the activations come pre-masked, so reconstruct them placing zeros for the units which were masked activations = np.zeros( (len(activations_mask), masked_activations.shape[-1])) activations[activations_mask] = masked_activations del masked_activations # take out the trash # get the numbers of each type of unit if 'cnn' in str(weights_dir): # if a cnn cnn_params = CNN_VGG_MODEL_PARAMS if 'vgg' in str( weights_dir).lower() else CNN_MODEL_PARAMS unit_nums = [cl['filters'] for cl in cnn_params['conv']] n_units = sum(unit_nums) n_dense = sum(d['units'] for d in cnn_params['dense']) n_outputs = 10 n_inputs = len(activations_mask) - n_units - n_dense - n_outputs else: # if an mlp n_inputs = 784 n_outputs = 10 unit_nums = [256, 256, 256, 256] n_units = sum(unit_nums) labels = labels[n_inputs:n_inputs + n_units] assert len(labels) == n_units # get correlations if corr_type == 'pearson': corr_mat = np.corrcoef(activations[:n_inputs + n_units], rowvar=True) else: # spearman corr_mat, _ = spearmanr(activations[:n_inputs + n_units], axis=1) # get correlations between inputs and units representations = corr_mat[n_inputs:, :n_inputs] del corr_mat # take out the trash representations[np.isnan(representations)] = 0 representations_by_layer = list(splitter(representations, unit_nums)) labels_by_layer = list(splitter(labels, unit_nums)) network_results = {} for layer_i in range(len(unit_nums)): # for each layer layer_reps = np.array(representations_by_layer[layer_i]) layer_reps_stds = np.std(layer_reps, axis=1) layer_reps_valid = layer_reps[layer_reps_stds > 0] n_valid = len(layer_reps_valid) layer_labels = np.array(labels_by_layer[layer_i]) layer_size = unit_nums[layer_i] max_size = max_prop * layer_size layer_results = {} for cluster_i in range( n_clusters): # for each sub module within the layer sm_reps = layer_reps[layer_labels == cluster_i] sm_reps_stds = np.std(sm_reps, axis=1) sm_reps = sm_reps[ sm_reps_stds > 0] # filter out ones that aren't responsive to anything sm_size = len(sm_reps) if sm_size < min_size or sm_size > max_size: # skip if too small or big continue sm_reps = align_reps(sm_reps, n_iters) true_avg = np.reshape(np.mean(sm_reps, axis=0), (-1, side_len, side_len)) if np.mean(true_avg) > 0: # align to have negative mean true_avg *= -1 avgs = [true_avg] # first in the list will be the true one for _ in range(n_random): rdm_idxs = np.random.choice(np.array(range(n_valid)), size=sm_size, replace=False) rdm_reps = layer_reps_valid[rdm_idxs] rdm_reps = align_reps(rdm_reps, n_iters) rdm_avg = np.reshape(np.mean(rdm_reps, axis=0), (-1, side_len, side_len)) if np.mean(rdm_avg) > 0: # align to have negative mean rdm_avg *= -1 avgs.append(rdm_avg) layer_results[f'cluster_{cluster_i}'] = { 'ims': avgs, 'size': sm_size } network_results[f'layer_{layer_i}'] = layer_results results[is_unpruned] = network_results return results
def get_selectivities(run_dir, dataset_name, is_unpruned, n_inputs=784, n_outputs=10): """ selectivity here means a unit's linear correlation with an output category across a test set this function will return the selectivities and assignments for output categories that the units correlate the best and second best with """ weight_path = get_weights_paths(run_dir)[is_unpruned] labels, _ = run_spectral_cluster(weight_path, n_clusters=n_outputs, with_shuffle=False) act_path = get_activations_paths(run_dir)[is_unpruned] act_mask_path = get_activation_masks_paths(run_dir)[is_unpruned] with open(act_path, 'rb') as f: activations = pickle.load( f) # each row a unit and each col and example with open(act_mask_path, 'rb') as f: activations_mask = pickle.load(f) if 'cnn' in str(run_dir).lower(): # for the cnns, only look at conv layers if 'stacked' in str(run_dir).lower(): n_in = n_inputs * 2 else: n_in = n_inputs cnn_params = CNN_VGG_MODEL_PARAMS if dataset_name == 'cifar10_full' else CNN_MODEL_PARAMS n_conv_filters = sum([cl['filters'] for cl in cnn_params['conv']]) n_start = np.sum(activations_mask[:n_in]) n_stop = n_start + np.sum(activations_mask[n_in:n_in + n_conv_filters]) activations = activations[n_start:n_stop, :] labels = labels[activations_mask[n_in:n_in + n_conv_filters]] else: n_in = n_inputs n_start = np.sum(activations_mask[:n_in]) activations = activations[n_start:-n_outputs, :] labels = labels[n_inputs:-n_outputs] labels = labels[activations_mask[n_in:-n_outputs]] labels = labels[labels != -1] activations = activations[labels != -1] data_path = '.' + DATA_PATHS[dataset_name] with open(data_path, 'rb') as f: dataset = pickle.load(f) y_test = dataset['y_test'][:activations.shape[1]] y_test_onehot = np.array( [y_test == label_i for label_i in range(n_outputs)]) corr_abs = np.abs( np.corrcoef(np.vstack([activations, y_test_onehot]), rowvar=True)[:activations.shape[0], :n_outputs]) assignments = np.argmax(corr_abs, axis=1) selectivities = np.max(corr_abs, axis=1) for i in range(len(assignments)): corr_abs[i, assignments[i]] = 0 second_assignments = np.argmax(corr_abs, axis=1) second_selectivities = np.max(corr_abs, axis=1) with warnings.catch_warnings(): warnings.simplefilter('ignore') nmi = normalized_mutual_info_score(assignments, labels) second_nmi = normalized_mutual_info_score(second_assignments, labels) return assignments, selectivities, second_assignments, second_selectivities, nmi, second_nmi
def _perform_lesion_sub_experiment(dataset_path, run_dir, n_clusters=4, n_shuffles=200, n_side=28, depth=1, with_random=True, model_params=None, n_way=1, n_way_type='joint', unpruned=False, true_as_random=False, verbose=False): if verbose: print('Loading data...') ds = preprocess_dataset(dataset_path) X, y = ds['X_test'], ds['y_test'] run_dir_path = Path(run_dir) weight_paths = get_weights_paths(run_dir_path) model_paths = get_model_paths(run_dir_path) if unpruned: model_path = str(model_paths[True]) weight_path = str(next(run_dir_path.glob('*-unpruned-weights.pckl'))) else: model_path = str(model_paths[False]) weight_path = str(next(run_dir_path.glob('*-pruned-weights.pckl'))) if 'mlp' in model_path.lower() or 'poly' in model_path.lower(): network_type = 'mlp' elif 'cnn' in model_path.lower(): network_type = 'cnn' X = np.reshape(X, (-1, n_side, n_side, depth)) # assert model_params is not None, ('For CNN network type, ' # 'the model_param parameter should be given.') else: raise ValueError('Network type should be expressed explicitly ' 'either mlp or cnn in run directory files.') if 'poly' in model_path.lower(): task = 'regression' else: task = 'classification' if verbose: print('Running spectral clustering...') labels, _ = run_spectral_cluster(weight_path, n_clusters=n_clusters, with_shuffle=False) if verbose: print('Loading model and extracting weights...') # os.environ['CUDA_VISIBLE_DEVICES'] = '' with suppress(), all_logging_disabled(): experiment_model = load_model2(model_path) weights, biases = extract_weights(experiment_model, with_bias=True) ignore_layers = False if verbose: print('Evaluate original model...') evaluation = _evaluate(experiment_model, X, y, task) if network_type == 'mlp': layer_widths = extract_layer_widths(weights) else: layer_widths = [] weight_shapes = [layer_weights.shape for layer_weights in weights] n_conv = sum(len(ws) == 4 for ws in weight_shapes) layer_widths.extend([weight_shapes[i][-1] for i in range(n_conv)]) layer_widths.extend([ws[-1] for ws in weight_shapes[n_conv:]]) # omit non conv layers weights = weights[:n_conv] biases = biases[:n_conv] layer_widths = layer_widths[:n_conv + 1] if verbose: print('Extract metadata...') metadata = _extract_layer_label_metadata(network_type, layer_widths, labels, ignore_layers) if verbose: print('Apply lesion trial on the true clustering...') true_results = _apply_lesion_trial(X, y, network_type, experiment_model, weights, biases, layer_widths, labels, ignore_layers, task, to_shuffle=true_as_random, n_way=n_way, n_way_type=n_way_type, verbose=verbose) if with_random: if verbose: print('Apply lesion trial on the random clusterings...') progress_iter = tqdm else: progress_iter = iter all_random_results = [] for _ in progress_iter(range(n_shuffles)): random_results = _apply_lesion_trial(X, y, network_type, experiment_model, weights, biases, layer_widths, labels, ignore_layers, task, to_shuffle=True, n_way=n_way, n_way_type=n_way_type, verbose=verbose) all_random_results.append(random_results) else: all_random_results = None if n_way == 1: true_results = _flatten_single_damage(true_results) all_random_results = ( [_flatten_single_damage(result) for result in all_random_results] if all_random_results else None) return true_results, all_random_results, metadata, evaluation