def draw_clustered_mlp(weights_path, clustering_result, n_clusters=4, is_first_square=True, ax=None): """Draw MLP with its spectral clustering.""" weights = load_weights(weights_path) layer_widths = extact_layer_widths(weights) labels, metrics = clustering_result G = nx.from_scipy_sparse_matrix(weights_to_graph(weights)) pos = set_nodes_positions(G.nodes, layer_widths, labels, is_first_square) color_mapper = get_color_mapper(n_clusters) color_map = [color_mapper[label] for label in labels] if ax is None: _, ax = plt.subplots(1) with warnings.catch_warnings(): warnings.simplefilter('ignore') nx.draw(G, pos=pos, node_color=color_map, width=0, node_size=10, ax=ax) draw_metrics(metrics, ax) return ax, labels, metrics
def plot_eigenvalues_old(weights_path, n_eigenvalues=None, ax=None, **kwargs): warnings.warn('deprecated', DeprecationWarning) loaded_weights = load_weights(weights_path) G = nx.from_scipy_sparse_matrix(weights_to_graph(loaded_weights)) G_nn = G.subgraph(max(nx.connected_components(G), key=len)) assert nx.is_connected(G_nn) nrom_laplacian_matrics = nx.normalized_laplacian_matrix(G_nn) eigen_values = np.sort(np.linalg.eigvals(nrom_laplacian_matrics.A)) if n_eigenvalues == None: start, end = 0, len(G_nn) elif isinstance(n_eigenvalues, int): start, end = 0, n_eigenvalues elif isinstance(n_eigenvalues, tuple): start, end = n_eigenvalues else: raise TypeError( 'n_eigenvalues should be either None or int or tuple or slice.') eigen_values = eigen_values[start:end] if ax is None: _, ax = plt.subplots(1) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) if 'linestyle' not in kwargs: kwargs['linestyle'] = 'none' kwargs['marker'] = '*' kwargs['markersize'] = 5 return ax.plot(range(start + 1, end + 1), eigen_values, **kwargs)
def build_cluster_graph(weights_path, clustering_result, normalize_in_out=True): labels, _ = clustering_result weights = load_weights(weights_path) layer_widths = extract_layer_widths(weights) G = nx.DiGraph() (label_by_layer, current_label_by_layer, next_label_by_layer) = it.tee(splitter(labels, layer_widths), 3) next_label_by_layer = it.islice(next_label_by_layer, 1, None) for layer_index, layer_labels in enumerate(label_by_layer): unique_labels = sorted(label for label in np.unique(layer_labels) if label != -1) for label in unique_labels: node_name = nodify(layer_index, label) G.add_node(node_name) edges = {} for layer_index, (current_labels, next_labels, layer_weights) in enumerate( zip(current_label_by_layer, next_label_by_layer, weights)): label_edges = it.product( (label for label in np.unique(current_labels) if label != -1), (label for label in np.unique(next_labels) if label != -1)) for current_label, next_label in label_edges: current_mask = (current_label == current_labels) next_mask = (next_label == next_labels) between_weights = layer_weights[current_mask, :][:, next_mask] if normalize_in_out: n_weight_in, n_weight_out = between_weights.shape n_weights = n_weight_in * n_weight_out normalization_factor = n_weights else: normalization_factor = 1 edge_weight = np.abs(between_weights).sum() / normalization_factor current_node = nodify(layer_index, current_label) next_node = nodify(layer_index + 1, next_label) edges[current_node, next_node] = edge_weight for nodes, weight in edges.items(): G.add_edge(*nodes, weight=weight) return G
def draw_cluster_by_layer(weights_path, clustering_result, n_clusters=4, with_text=False, size_factor=4, width_factor=30, ax=None): G = build_cluster_graph(weights_path, clustering_result) labels, _ = clustering_result weights = load_weights(weights_path) layer_widths = extract_layer_widths(weights) color_mapper = get_color_mapper(n_clusters) node_size = {} (label_by_layer, current_label_by_layer, next_label_by_layer) = it.tee(splitter(labels, layer_widths), 3) next_label_by_layer = it.islice(next_label_by_layer, 1, None) for layer_index, layer_labels in enumerate(label_by_layer): unique_labels = sorted(label for label in np.unique(layer_labels) if label != -1) for label in unique_labels: node_name = nodify(layer_index, label) node_size[node_name] = (layer_labels == label).sum() pos = nx.drawing.nx_agraph.graphviz_layout(G, prog='dot') width = [G[u][v]['weight'] * width_factor for u, v in G.edges()] node_color = [color_mapper[int(v.split('-')[1])] for v in G.nodes()] node_size = [node_size[v] * size_factor for v in G.nodes()] if ax is None: _, ax = plt.subplots(1) with warnings.catch_warnings(): warnings.simplefilter('ignore') nx.draw( G, pos, with_labels=True, node_color=node_color, node_size=node_size, # font_color='white', width=width, ax=ax) if with_text: pprint(edges) return ax
def do_clustering_weights(network_type, weights_path, n_clusters, n_inputs, n_outputs, exclude_inputs, eigen_solver, assign_labels, use_inv_avg_commute, filter_norm, epsilon): weights_ = load_weights(weights_path) if any(len(wgts.shape) > 2 for wgts in weights_): weights_ = extract_cnn_weights_filters_as_units(weights_, filter_norm) if network_type == 'cnn': # for the cnns, only look at conv layers cnn_params = CNN_VGG_MODEL_PARAMS if 'vgg' in str( weights_path).lower() else CNN_MODEL_PARAMS n_conv_layers = len(cnn_params['conv']) weights_ = weights_[1:n_conv_layers] # n_conv_layers is in the config elif exclude_inputs: weights_ = weights_[1:-1] # exclude inputs and outputs adj_mat_ = weights_to_graph(weights_) # delete unconnected components from the net _, adj_mat, weight_mask, _ = delete_isolated_ccs_refactored( weights_, adj_mat_, is_testing=False) if use_inv_avg_commute: adj_mat = get_inv_avg_commute_time(adj_mat) # find cluster quality of this pruned net print("\nclustering unshuffled weights\n") unshuffled_ncut, clustering_labels = weights_array_to_cluster_quality( None, adj_mat, n_clusters, eigen_solver, assign_labels, epsilon, is_testing=False) ave_in_out = (1 - unshuffled_ncut / n_clusters) / (2 * unshuffled_ncut / n_clusters) ent = entropy(clustering_labels) label_proportions = np.bincount(clustering_labels) / len(clustering_labels) result = { 'ncut': unshuffled_ncut, 'ave_in_out': ave_in_out, 'mask': weight_mask, # node_mask is a 1d length n_unit boolean array 'labels': clustering_labels, 'label_proportions': label_proportions, 'entropy': ent } return result
def draw_clustered_mlp(weights_path, clustering_result, n_clusters=4, is_first_square=True, ax=None): """Draw MLP with its spectral clustering.""" weights = load_weights(weights_path) layer_widths = extract_layer_widths(weights) if 'cnn' in str( weights_path).lower(): # if cnn, omit input layer and fc layers is_first_square = False cnn_params = CNN_VGG_MODEL_PARAMS if 'vgg' in str( weights_path).lower() else CNN_MODEL_PARAMS n_conv_layers = len(cnn_params['conv']) weights = weights[1:n_conv_layers] layer_widths = layer_widths[1:n_conv_layers + 1] labels, metrics = clustering_result G = nx.from_scipy_sparse_matrix(weights_to_graph(weights)) pos = set_nodes_positions(G.nodes, layer_widths, labels, is_first_square) color_mapper = get_color_mapper(n_clusters) color_map = [color_mapper[label] for label in labels] if ax is None: _, ax = plt.subplots(1) with warnings.catch_warnings(): warnings.simplefilter('ignore') nx.draw(G, pos=pos, node_color=color_map, width=0, node_size=10, ax=ax) draw_metrics(metrics, ax) return ax, labels, metrics
def plot_eigenvalues(weights_path, n_eigenvalues=None, ax=None, filter_norm=1, **kwargs): weights = load_weights(weights_path) if 'cnn' in str(weights_path): # weights, _ = extract_cnn_weights(weights, with_avg=True) #(max_weight_convention=='one_on_n')) weights = extract_cnn_weights_filters_as_units( weights, filter_norm) #(max_weight_convention=='one_on_n')) # TODO: take simpler solution from delete_isolated_ccs_refactored adj_mat = weights_to_graph(weights) _, components = sparse.csgraph.connected_components(adj_mat) most_common_component_counts = Counter(components).most_common(2) main_component_id = most_common_component_counts[0][0] assert (len(most_common_component_counts) == 1 or most_common_component_counts[1][1] == 1) main_component_mask = (components == main_component_id) selected_adj_mat = adj_mat[main_component_mask, :][:, main_component_mask] nrom_laplacian_matrix = sparse.csgraph.laplacian(selected_adj_mat, normed=True) if n_eigenvalues == None: start, end = 0, selected_adj_mat.shape[0] - 2 elif isinstance(n_eigenvalues, int): start, end = 0, n_eigenvalues elif isinstance(n_eigenvalues, tuple): start, end = n_eigenvalues else: raise TypeError( 'n_eigenvalues should be either None or int or tuple or slice.') """ eigen_values, _ = sparse.linalg.eigs(nrom_laplacian_matrix, k=end, which='SM') """ sigma = 1 OP = nrom_laplacian_matrix - sigma * sparse.eye( nrom_laplacian_matrix.shape[0]) OPinv = sparse.linalg.LinearOperator( matvec=lambda v: sparse.linalg.minres(OP, v, tol=1e-5)[0], shape=nrom_laplacian_matrix.shape, dtype=nrom_laplacian_matrix.dtype) eigen_values, _ = sparse.linalg.eigsh(nrom_laplacian_matrix, sigma=sigma, k=end, which='LM', tol=1e-5, OPinv=OPinv) eigen_values = np.sort(eigen_values) eigen_values = eigen_values[start:end] if ax is None: _, ax = plt.subplots(1) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) if 'linestyle' not in kwargs: kwargs['linestyle'] = 'none' kwargs['marker'] = '*' kwargs['markersize'] = 5 return ax.plot(range(start + 1, end + 1), eigen_values, **kwargs)
def run_clustering(weights_path, num_clusters, eigen_solver, assign_labels, epsilon, num_samples, delete_isolated_ccs_bool, network_type, shuffle_smaller_model, with_labels, with_shuffle, shuffle_method, n_workers, is_testing, with_shuffled_ncuts): # t0 = time.time() # load weights and get adjacency matrix if is_testing: assert network_type == 'cnn' loaded_weights = load_weights(weights_path) if network_type == 'mlp': weights_ = loaded_weights adj_mat_ = weights_to_graph(loaded_weights) elif network_type == 'cnn': # comparing current and previous version of expanding CNN if is_testing: tester_cnn_tensors_to_flat_weights_and_graph(loaded_weights) weights_, adj_mat_ = cnn_tensors_to_flat_weights_and_graph(loaded_weights) else: raise ValueError("network_type must be 'mlp' or 'cnn'") # t1 = time.time() # print('time to form adjacency matrix', t1 - t0) # analyse connectivity structure of network # cc_dict = connected_comp_analysis(weights_, adj_mat_) # print("connectivity analysis:", cc_dict) if delete_isolated_ccs_bool: # delete unconnected components from the net weights, adj_mat, node_mask = delete_isolated_ccs_refactored(weights_, adj_mat_, is_testing=is_testing) if is_testing: weights_old, adj_mat_old = delete_isolated_ccs(weights_, adj_mat_) assert (adj_mat != adj_mat_old).sum() == 0 assert all((w1 == w2).all() for w1, w2 in zip(weights, weights_old)) else: weights, adj_mat = weights_, adj_mat_ node_mask = numpy.full(adj_mat.shape[0], True) # t2 = time.time() # print("time to delete isolated ccs", t2 - t1) # find cluster quality of this pruned net print("\nclustering unshuffled weights\n") unshuffled_ncut, clustering_labels = weights_array_to_cluster_quality(weights, adj_mat, num_clusters, eigen_solver, assign_labels, epsilon, is_testing) ave_in_out = (1 - unshuffled_ncut / num_clusters) / (2 * unshuffled_ncut / num_clusters) # t3 = time.time() # print("time to cluster unshuffled weights", t3 - t2) result = {'ncut': unshuffled_ncut, 'ave_in_out': ave_in_out, 'node_mask': node_mask} #return clustering_labels, adj_mat, result if with_shuffle: # find cluster quality of other ways of rearranging the net print("\nclustering shuffled weights\n") n_samples_per_worker = num_samples // n_workers function_argument = (n_samples_per_worker, weights_path, #weights, # loaded_weights, network_type, num_clusters, shuffle_smaller_model, eigen_solver, delete_isolated_ccs_bool, assign_labels, epsilon, shuffle_method) if n_workers == 1: print('No Pool! Single Worker!') shuff_ncuts = shuffle_and_cluster(*function_argument) else: print(f'Using Pool! Multiple Workers! {n_workers}') workers_arguments = [[copy.deepcopy(arg) for _ in range(n_workers)] for arg in function_argument] with ProcessPool(nodes=n_workers) as p: shuff_ncuts_results = p.map(shuffle_and_cluster, *workers_arguments) shuff_ncuts = np.concatenate(shuff_ncuts_results) shuffled_n_samples = len(shuff_ncuts) shuffled_mean = np.mean(shuff_ncuts, dtype=np.float64) shuffled_stdev = np.std(shuff_ncuts, dtype=np.float64) print('BEFORE', np.std(shuff_ncuts)) percentile = compute_pvalue(unshuffled_ncut, shuff_ncuts) print('AFTER', np.std(shuff_ncuts)) z_score = (unshuffled_ncut - shuffled_mean) / shuffled_stdev result.update({'shuffle_method': shuffle_method, 'n_samples': shuffled_n_samples, 'mean': shuffled_mean, 'stdev': shuffled_stdev, 'z_score': z_score, 'percentile': percentile}) if with_shuffled_ncuts: result['shuffled_ncuts'] = shuff_ncuts if with_labels: result['labels'] = clustering_labels return result
def shuffle_and_cluster(num_samples, #weights, weights_path, #loaded_weights, network_type, num_clusters, shuffle_smaller_model, eigen_solver, delete_isolated_ccs_bool, assign_labels, epsilon, shuffle_method): ###### loaded_weights = load_weights(weights_path) if network_type == 'mlp': weights_ = loaded_weights adj_mat_ = weights_to_graph(loaded_weights) elif network_type == 'cnn': weights_, adj_mat_ = cnn_tensors_to_flat_weights_and_graph(loaded_weights) else: raise ValueError("network_type must be 'mlp' or 'cnn'") ####### if shuffle_smaller_model and delete_isolated_ccs_bool: # delete unconnected components from the net BEFORE SHUFFLING!!! weights, adj_mat, _ = delete_isolated_ccs_refactored(weights_, adj_mat_, is_testing=True) else: weights, adj_mat = weights_, adj_mat_ #shuff_ncuts = np.array([]) shuff_ncuts = [] assert shuffle_method in SHUFFLE_METHODS if shuffle_method == 'layer': shuffle_function = shuffle_weights elif shuffle_method == 'layer_nonzero': shuffle_function = shuffle_weights_nonzero elif shuffle_method == 'layer_nonzero_distribution': shuffle_function = shuffle_weights_nonzero_distribution elif shuffle_method == 'layer_all_distribution': shuffle_function = shuffle_weights_layer_all_distribution for _ in range(num_samples): # t_start = time.time() if network_type == 'mlp': if shuffle_smaller_model: shuff_weights_ = list(map(shuffle_function, weights)) else: shuff_weights_ = list(map(shuffle_function, loaded_weights)) shuff_adj_mat_ = weights_to_graph(shuff_weights_) else: shuff_tensors = list(map(shuffle_function, loaded_weights)) shuff_weights_, shuff_adj_mat_ = cnn_tensors_to_flat_weights_and_graph(shuff_tensors) # NB: this is not quite right, because you're shuffling the whole # network, meaning that the isolated ccs get shuffled back in # t_before_mid = time.time() # print("\ntime to shuffle weights", t_before_mid - t_start) if delete_isolated_ccs_bool: shuff_weights, shuff_adj_mat, _ = delete_isolated_ccs_refactored(shuff_weights_, shuff_adj_mat_) else: shuff_weights, shuff_adj_mat = shuff_weights_, shuff_adj_mat_ # t_mid = time.time() # print("time to delete isolated ccs", t_mid - t_before_mid) shuff_ncut, _ = weights_array_to_cluster_quality(shuff_weights, shuff_adj_mat, num_clusters, eigen_solver, assign_labels, epsilon) shuff_ncuts.append(shuff_ncut) #shuff_ncuts = np.append(shuff_ncuts, shuff_ncut) # t_end = time.time() # print("time to cluster shuffled weights", t_end - t_mid) return np.array(shuff_ncuts)
def main(args): np.set_printoptions(precision=precision) network_path = args.files[0] initial_weights_path = args.files[1] dataset_path = args.files[2] r, n_inputs, n_neurons, n_outputs = load_network(network_path) initial_weights = load_weights(initial_weights_path) x, y = load_benchmark(dataset_path) epsilon = 0.0000010000 n = x.shape[0] model = NeuralNetwork(deepcopy(initial_weights), r, 0.99, 0) print("Parâmetro de regularização lambda={}\n".format(round(r, 3))) print("Inicializando rede com a seguinte estrutura de neurônios por camadas: {}\n".format([n_inputs] + n_neurons + [n_outputs])) for i in range(len(initial_weights)): print("Theta{} inicial (pesos de cada neurônio, incluindo bias, armazenados nas linhas):\n{}".format(i + 1, str_matrix(initial_weights[i], '\t'))) print("Conjunto de treinamento") for i in range(x.shape[0]): print("\tExemplo {}".format(i + 1)) print("\t\tx: {}".format(x[i, :])) print("\t\ty: {}".format(y[i, :])) print("\n--------------------------------------------") print("Calculando erro/custo J da rede") for i in range(x.shape[0]): print("\tProcessando exemplo de treinamento {}".format(i + 1)) print("\tPropagando entrada {}".format(x[i, :])) f = model.forward_propagation(x[i, :]) cost = model.cost_x(y[i, :], f) print("\t\ta1: {}\n".format(model.a[0])) for l in range(1, model.n_layers + 1): print("\t\tz{}: {}".format(l + 1, model.z[l])) print("\t\ta{}: {}\n".format(l + 1, model.a[l])) print("\t\tf(x[{}]): {}".format(i + 1, f)) print("\tSaida predita para o exemplo {}: {}".format(i + 1, f)) print("\tSaida esperada para o exemplo {}: {}".format(i + 1, y[i, :])) print("\tJ do exemplo {}: {}\n".format(i + 1, cost)) print("J total do dataset (com regularizacao): {}\n".format(model.cost(x, y))) print("\n--------------------------------------------") print("Rodando backpropagation") for i in range(n): print("\tCalculando gradientes com base no exemplo {}".format(i + 1)) model.g = [np.zeros(model.w[i].shape) for i in range(model.n_layers)] model.m = [np.zeros(model.w[i].shape) for i in range(model.n_layers)] pred = model.forward_propagation(x[i, :]) model.d[model.last_layer] = pred - y[i, :] model.update_deltas(x[i, :]) for d in range(model.last_layer, -1, -1): print("\t\tdelta{}: {}".format(d + 2, model.d[d])) model.accumulate_gradients() for t in range(model.last_layer, -1, -1): print("\t\tGradientes de Theta{} com base no exemplo {}:\n{}".format(t + 1, i + 1, str_matrix(model.g[t], '\t\t\t'))) print("\tDataset completo processado. Calculando gradientes regularizados") model.final_gradients(n) for t in range(model.n_layers): print("\t\tGradientes finais para Theta{} (com regularizacao):\n{}".format(t + 1, str_matrix(model.g[t], '\t\t\t'))) print("\n--------------------------------------------") print("Rodando verificacao numerica de gradientes (epsilon={})".format(epsilon)) backprop_gradients = deepcopy(model.g) model.g = [np.zeros(model.w[i].shape) for i in range(model.n_layers)] for t in range(model.n_layers): for i in range(model.g[t].shape[0]): for j in range(model.g[t].shape[1]): w = model.w[t][i, j] model.w[t][i, j] = w + epsilon c1 = model.cost(x, y) model.w[t][i, j] = w - epsilon c2 = model.cost(x, y) model.g[t][i, j] += (c1 - c2) / (2 * epsilon) model.w[t][i, j] = w print("\tGradiente numerico de Theta{}:\n{}".format(t + 1, str_matrix(model.g[t], '\t\t'))) print("\n--------------------------------------------") print("Verificando corretude dos gradientes com base nos gradientes numericos:") for t in range(model.n_layers): errors = np.sum(np.abs(model.g[t] - backprop_gradients[t])) print("\tErro entre gradiente via backprop e gradiente numerico para Theta{}: {}".format(t + 1, errors))