def cnn2mlp(model_path, model_params=CNN_MODEL_PARAMS, verbose=False): cnn_model = load_model2(model_path) cnn_weights, cnn_biases = extract_weights(cnn_model, with_bias=True) mlp_weights, _, mlp_biases = extract_cnn_weights(cnn_weights, biases=cnn_biases, verbose=verbose, as_sparse=True) return SimpleMLP(mlp_weights, mlp_biases, model_params)
def test_cnn2mlp(dataset, is_unpruned=True, n_datapoints=100, abs_tol=0.075): ds = preprocess_dataset(DATA_PATHS[dataset]) X, y = ds['X_test'][:n_datapoints], ds['y_test'][:n_datapoints] type_ = 'unpruned' if is_unpruned else 'pruned' # See comment above about BASE_PATH model_path = (get_model_path(f'CNN:{dataset}'.upper(), model_base_path=BASE_PATH) / f'{dataset}-cnn-{type_}.h5'.lower()) cnn_model = load_model2(model_path) mlp_model = cnn2mlp(model_path, verbose=True) y_pred = mlp_model.predict_classes(X) acc_mlp = np.mean(y == y_pred) _, acc_cnn = cnn_model.evaluate(X.reshape(-1, 28, 28, 1), tf.keras.utils.to_categorical(y)) print('CNN', acc_cnn, 'MLP', acc_mlp) assert isclose(acc_cnn, acc_mlp, abs_tol=abs_tol)
def _perform_lesion_sub_experiment(dataset_path, run_dir, n_clusters=4, n_shuffles=200, n_side=28, depth=1, with_random=True, model_params=None, n_way=1, n_way_type='joint', unpruned=False, true_as_random=False, verbose=False): if verbose: print('Loading data...') ds = preprocess_dataset(dataset_path) X, y = ds['X_test'], ds['y_test'] run_dir_path = Path(run_dir) weight_paths = get_weights_paths(run_dir_path) model_paths = get_model_paths(run_dir_path) if unpruned: model_path = str(model_paths[True]) weight_path = str(next(run_dir_path.glob('*-unpruned-weights.pckl'))) else: model_path = str(model_paths[False]) weight_path = str(next(run_dir_path.glob('*-pruned-weights.pckl'))) if 'mlp' in model_path.lower() or 'poly' in model_path.lower(): network_type = 'mlp' elif 'cnn' in model_path.lower(): network_type = 'cnn' X = np.reshape(X, (-1, n_side, n_side, depth)) # assert model_params is not None, ('For CNN network type, ' # 'the model_param parameter should be given.') else: raise ValueError('Network type should be expressed explicitly ' 'either mlp or cnn in run directory files.') if 'poly' in model_path.lower(): task = 'regression' else: task = 'classification' if verbose: print('Running spectral clustering...') labels, _ = run_spectral_cluster(weight_path, n_clusters=n_clusters, with_shuffle=False) if verbose: print('Loading model and extracting weights...') # os.environ['CUDA_VISIBLE_DEVICES'] = '' with suppress(), all_logging_disabled(): experiment_model = load_model2(model_path) weights, biases = extract_weights(experiment_model, with_bias=True) ignore_layers = False if verbose: print('Evaluate original model...') evaluation = _evaluate(experiment_model, X, y, task) if network_type == 'mlp': layer_widths = extract_layer_widths(weights) else: layer_widths = [] weight_shapes = [layer_weights.shape for layer_weights in weights] n_conv = sum(len(ws) == 4 for ws in weight_shapes) layer_widths.extend([weight_shapes[i][-1] for i in range(n_conv)]) layer_widths.extend([ws[-1] for ws in weight_shapes[n_conv:]]) # omit non conv layers weights = weights[:n_conv] biases = biases[:n_conv] layer_widths = layer_widths[:n_conv + 1] if verbose: print('Extract metadata...') metadata = _extract_layer_label_metadata(network_type, layer_widths, labels, ignore_layers) if verbose: print('Apply lesion trial on the true clustering...') true_results = _apply_lesion_trial(X, y, network_type, experiment_model, weights, biases, layer_widths, labels, ignore_layers, task, to_shuffle=true_as_random, n_way=n_way, n_way_type=n_way_type, verbose=verbose) if with_random: if verbose: print('Apply lesion trial on the random clusterings...') progress_iter = tqdm else: progress_iter = iter all_random_results = [] for _ in progress_iter(range(n_shuffles)): random_results = _apply_lesion_trial(X, y, network_type, experiment_model, weights, biases, layer_widths, labels, ignore_layers, task, to_shuffle=True, n_way=n_way, n_way_type=n_way_type, verbose=verbose) all_random_results.append(random_results) else: all_random_results = None if n_way == 1: true_results = _flatten_single_damage(true_results) all_random_results = ( [_flatten_single_damage(result) for result in all_random_results] if all_random_results else None) return true_results, all_random_results, metadata, evaluation
sphere_grad = grad_val - dot_product * image # thinking of the gradient as a vector based at the image vector, # sphere_grad is projecting that vector on the tangent space of the # sphere at the image vector. this should basically be the # difference between your image vector and the result after you add # the gradient and then project back onto the sphere. print("Norm of component of gradient that lies along the sphere:", np.sqrt(np.sum(sphere_grad**2))) print("Norm of difference in gradient vector from last step to" + " this step:", np.sqrt(np.sum(delta_grad**2))) return image # loads the model. NB: this relies on both the weights and the architecture # being stored in the h5 file. my_model = load_model2(model_path) # parameters to visualize_cluster_layer my_index = 3 my_name = my_model.layers[my_index].name output_shape = my_model.layers[my_index].output.shape.as_list()[1:] # DF didn't actually get around to using real cluster masks. no_mask = np.ones(output_shape, dtype=np.float32) my_image = visualize_cluster_layer(my_model, my_name, no_mask) # utility function to convert the image into something suitable for a png file def normalize_image(image): image -= np.amin(image) image /= np.amax(image) image = (255 * image).astype('uint8') return image
def _perform_lesion_sub_experiment(dataset_path, run_dir, n_clusters=4, n_shuffles=200, with_random=True, model_params=None, n_way=1, n_way_type='joint', true_as_random=False, verbose=False): if verbose: print('Loading data...') ds = preprocess_dataset(dataset_path) X, y = ds['X_test'], ds['y_test'] run_dir_path = Path(run_dir) model_path = str(next(run_dir_path.glob('*-pruned.h5'))) weight_path = str(next(run_dir_path.glob('*-pruned-weights.pckl'))) if 'mlp' in model_path.lower(): network_type = 'mlp' elif 'cnn' in model_path.lower(): network_type = 'cnn' assert model_params is not None, ( 'For CNN network type, ' 'the model_param parameter should be given.') else: raise ValueError('Network type should be expressed explicitly ' 'either mlp or cnn in run directory files.') if verbose: print('Running spectral clustering...') labels, _ = run_spectral_cluster(weight_path, n_clusters=n_clusters, with_shuffle=False) if verbose: print('Loading model and extracting weights...') import os os.environ['CUDA_VISIBLE_DEVICES'] = '' if network_type == 'mlp': with suppress(), all_logging_disabled(): experiment_model = load_model2(model_path) weights, biases = extract_weights(experiment_model, with_bias=True) ignore_layers = False elif network_type == 'cnn': with suppress(), all_logging_disabled(): experiment_model = cnn2mlp(model_path, model_params, verbose=verbose) weights, biases = experiment_model.get_weights_and_biases() ignore_layers = experiment_model.get_ignore_layers() if verbose: print('Evaluate original model...') evaluation = _evaluate(experiment_model, X, y) layer_widths = extact_layer_widths(weights) if verbose: print('Extract metadata...') metadata = _extract_layer_label_metadata(network_type, layer_widths, labels, ignore_layers) if verbose: print('Apply lesion trial on the true clustering...') true_results = _apply_lesion_trial(X, y, network_type, experiment_model, weights, biases, layer_widths, labels, ignore_layers, to_shuffle=true_as_random, n_way=n_way, n_way_type=n_way_type, verbose=verbose) if with_random: if verbose: print('Apply lesion trial on the random clusterings...') progress_iter = tqdm else: progress_iter = iter all_random_results = [] for _ in progress_iter(range(n_shuffles)): random_results = _apply_lesion_trial(X, y, network_type, experiment_model, weights, biases, layer_widths, labels, ignore_layers, to_shuffle=True, n_way=n_way, n_way_type=n_way_type, verbose=verbose) all_random_results.append(random_results) else: all_random_results = None if n_way == 1: true_results = _flatten_single_damage(true_results) all_random_results = ( [_flatten_single_damage(result) for result in all_random_results] if all_random_results else None) return true_results, all_random_results, metadata, evaluation
def evaluate_visualizations( model_tag, rep, is_unpruned, data_dir='/project/clusterability_in_neural_networks/datasets/'): if is_unpruned: suff = f'{rep}_unpruned_max_data.pkl' else: suff = f'{rep}_pruned_max_data.pkl' with open(data_dir + model_tag + suff, 'rb') as f: data = pickle.load(f) # unpack data max_images = data['max_images'] random_max_images = data['random_max_images'] max_losses = data['max_losses'] random_max_losses = data['random_max_losses'] sm_sizes = data['sm_sizes'] sm_layers = data['sm_layers'] sm_layer_sizes = data['sm_layer_sizes'] sm_clusters = data['sm_clusters'] n_examples = len(sm_sizes) n_max_min = int(len(max_images) / n_examples) n_random = int(len(random_max_images) / n_examples) input_side = max_images.shape[1] # flatten all inputs if mlp if 'mlp' in model_tag.lower(): max_images = np.reshape(max_images, [-1, IMAGE_SIZE**2]) random_max_images = np.reshape(random_max_images, [-1, IMAGE_SIZE**2]) # get model model_dir = get_model_path(model_tag, filter_='all')[rep] model_path = get_model_paths(model_dir)[is_unpruned] model = load_model2(model_path) # get predictions max_preds = model.predict(max_images) random_max_preds = np.reshape(model.predict(random_max_images), (n_examples, n_random, -1)) # get entropies max_entropies = np.array([entropy(pred) for pred in max_preds]) random_max_entropies = np.array([[entropy(pred) for pred in reps] for reps in random_max_preds]) # reshape losses random_max_losses = np.reshape(random_max_losses, (n_examples, n_random)) # get percentiles max_percentiles_entropy = np.array([ compute_pvalue(max_entropies[i], random_max_entropies[i]) for i in range(len(max_entropies)) ]) max_percentiles_loss = np.array([ compute_pvalue(max_losses[i], random_max_losses[i], side='right') for i in range(len(max_losses)) ]) # get effect sizes effect_factors_entropies = np.array([ np.mean(random_max_entropies[i]) / max_entropies[i] for i in range(len(max_entropies)) if max_entropies[i] > 0 ]) mean_effect_factor_entropy = np.nanmean(effect_factors_entropies) effect_factors_losses = np.array([ np.mean(random_max_losses[i]) / max_losses[i] for i in range(len(max_losses)) if max_losses[i] > 0 ]) mean_effect_factor_loss = np.nanmean(effect_factors_losses) # get pvalues max_chi2_p_entropy = chi2_categorical_test(max_percentiles_entropy, n_random) max_combined_p_entropy = combine_ps(max_percentiles_entropy, n_random) max_chi2_p_loss = chi2_categorical_test(max_percentiles_loss, n_random) max_combined_p_loss = combine_ps(max_percentiles_loss, n_random) results = { 'percentiles': ( max_percentiles_entropy, # min_percentiles_entropy, max_percentiles_loss), # min_percentiles_loss), 'effect_factors': (mean_effect_factor_entropy, mean_effect_factor_loss), 'chi2_ps': ( max_chi2_p_entropy, # min_chi2_categorical_p_entropy, max_chi2_p_loss), # min_chi2_categorical_p_loss), 'combined_ps': (max_combined_p_entropy, max_combined_p_loss), 'sm_layers': sm_layers, 'sm_sizes': sm_sizes, 'sm_layer_sizes': sm_layer_sizes, 'sm_clusters': sm_clusters } return results