def chebynet_norm_edge(edge_index, num_nodes, edge_weight=None, normalization_type="sym", use_dynamic_lambda_max=False, cache=None): if cache is not None: cache_key = compute_cache_key(normalization_type) cached_data = cache.get(cache_key, None) if cached_data is not None: return cached_data edge_index, edge_weight = remove_self_loop_edge(edge_index, edge_weight) updated_edge_index, updated_edge_weight = get_laplacian( edge_index, num_nodes, edge_weight, normalization_type) # lambda_max = chebynet_compute_lambda_max(edge_index, edge_weight, normalization_type, num_nodes, cache=cache) if use_dynamic_lambda_max: lambda_max = LaplacianMaxEigenvalue( edge_index, num_nodes, edge_weight)(normalization_type=normalization_type) else: lambda_max = 2.0 scaled_edge_weight = (2.0 * updated_edge_weight) / lambda_max assert edge_weight is not None if cache is not None: cache[cache_key] = updated_edge_index, scaled_edge_weight return updated_edge_index, scaled_edge_weight
def chebynet_norm_edge(edge_index, num_nodes, edge_weight, lambda_max, normalization_type): edge_index, edge_weight = remove_self_loop_edge(edge_index, edge_weight) updated_edge_index, updated_edge_weight = get_laplacian( edge_index, edge_weight, normalization_type, num_nodes) scaled_edge_weight = (2.0 * updated_edge_weight) / lambda_max assert edge_weight is not None return updated_edge_index, scaled_edge_weight
def process(self): dataset_str = "cora" names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph'] objects = [] for i in range(len(names)): data_name = "ind.{}.{}".format(dataset_str, names[i]) data_path = os.path.join(self.raw_root_path, data_name) with open(data_path, 'rb') as f: if sys.version_info > (3, 0): objects.append(pickle.load(f, encoding='latin1')) else: objects.append(pickle.load(f)) x, y, tx, ty, allx, ally, graph = tuple(objects) with open(os.path.join(self.raw_root_path, "ind.{}.test.index".format(dataset_str)), "r", encoding="utf-8") as f: test_idx_reorder = [int(line.strip()) for line in f] test_idx_range = np.sort(test_idx_reorder) features = sp.vstack((allx, tx)).tolil() features[test_idx_reorder, :] = features[test_idx_range, :] # adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph)) labels = np.vstack((ally, ty)) labels[test_idx_reorder, :] = labels[test_idx_range, :] test_index = test_idx_range.tolist() train_index = list(range(len(y))) valid_index = list(range(len(y), len(y) + 500)) x = np.array(features.todense()).astype(np.float32) inv_sum_x = 1.0 / np.sum(x, axis=-1, keepdims=True) inv_sum_x[np.isnan(inv_sum_x)] = 1.0 inv_sum_x[np.isinf(inv_sum_x)] = 1.0 x *= inv_sum_x edge_index = np.array(nx.from_dict_of_lists(graph).edges).T edge_index, _ = remove_self_loop_edge(edge_index) edge_index, _ = convert_edge_to_directed(edge_index) y = np.argmax(labels, axis=-1).astype(np.int32) graph = Graph(x=x, edge_index=edge_index, y=y) return graph, (train_index, valid_index, test_index)
def __call__(self, data): edge_index = data.edge_index edge_weight = data.edge_weight edge_index, edge_weight = remove_self_loop_edge(edge_index, edge_weight) edge_index, edge_weight = get_laplacian(data.edge_index, data.x.shape[0], edge_weight, self.normalization) # edge_index, edge_weight = add_self_loop_edge(edge_index, data.x.shape[0], edge_weight, fill_weight=-1.) L = to_scipy_sparse_matrix(edge_index, edge_weight, data.x.shape[0]) eig_fn = eigs if self.is_undirected and self.normalization != 'rw': eig_fn = eigsh lambda_max = eig_fn(L, k=1, which='LM', return_eigenvectors=False) data.lambda_max = float(lambda_max) return data
def asap(x, edge_index, edge_weight, node_graph_index, attention_gcn_kernel, attention_gcn_bias, attention_query_kernel, attention_query_bias, attention_score_kernel, attention_score_bias, le_conv_self_kernel, le_conv_self_bias, le_conv_aggr_self_kernel, le_conv_aggr_self_bias, le_conv_aggr_neighbor_kernel, le_conv_aggr_neighbor_bias, K=None, ratio=None, le_conv_activation=tf.nn.sigmoid, drop_rate=0.0, training=None, cache=None): """ Functional API for ASAP: Adaptive Structure Aware Pooling for Learning Hierarchical Graph Representation :param x: Tensor, shape: [num_nodes, num_features], node features :param edge_index: Tensor, shape: [2, num_edges], edge information :param edge_weight: Tensor or None, shape: [num_edges] :param node_graph_index: Tensor/NDArray, shape: [num_nodes], graph index for each node :param K: Keep top K targets for each source :param ratio: Keep num_targets * ratio targets for each source :param le_conv_activation: Activation to use for node_score before multiplying node_features with node_score :param training: Python boolean indicating whether the layer should behave in training mode (adding dropout) or in inference mode (doing nothing). :param cache: A dict for caching A' for GCN. Different graph should not share the same cache dict. :return: [pooled_x, pooled_edge_index, pooled_edge_weight, pooled_node_graph_index] """ num_nodes = tf.shape(x)[0] # num_graphs = tf.reduce_max(node_graph_index) + 1 edge_index, edge_weight = remove_self_loop_edge(edge_index, edge_weight) edge_index_with_self_loop, edge_weight_with_self_loop = add_self_loop_edge( edge_index, num_nodes=num_nodes, edge_weight=edge_weight) row_with_self_loop, col_with_self_loop = edge_index_with_self_loop[ 0], edge_index_with_self_loop[1] attention_h = gcn(x, edge_index, edge_weight, attention_gcn_kernel, attention_gcn_bias, cache=cache) # max_pool -> query attention_query = aggregate_neighbors(attention_h, edge_index_with_self_loop, None, mapper=identity_mapper, reducer=max_reducer, updater=identity_updater, num_nodes=num_nodes) attention_query = attention_query @ attention_query_kernel + attention_query_bias repeated_attention_query = tf.gather(attention_query, row_with_self_loop) repeated_attention_h = tf.gather(attention_h, col_with_self_loop) attention_score_h = tf.concat( [repeated_attention_query, repeated_attention_h], axis=-1) attention_score = attention_score_h @ attention_score_kernel + attention_score_bias attention_score = tf.nn.leaky_relu(attention_score, alpha=0.2) normed_attention_score = segment_softmax(attention_score, row_with_self_loop, num_nodes) if training and drop_rate > 0: normed_attention_score = tf.compat.v2.nn.dropout( normed_attention_score, rate=drop_rate) # nodes are clusters cluster_h = aggregate_neighbors(x, edge_index_with_self_loop, tf.reshape(normed_attention_score, [-1]), gcn_mapper, sum_reducer, identity_updater, num_nodes=num_nodes) node_score = le_conv(cluster_h, edge_index, edge_weight, le_conv_self_kernel, le_conv_self_bias, le_conv_aggr_self_kernel, le_conv_aggr_self_bias, le_conv_aggr_neighbor_kernel, le_conv_aggr_neighbor_bias, activation=None) topk_node_index = topk_pool(node_graph_index, node_score, K=K, ratio=ratio) topk_node_score = tf.gather(node_score, topk_node_index) if le_conv_activation is not None: topk_node_score = le_conv_activation(topk_node_score) pooled_x = tf.gather(cluster_h, topk_node_index) * topk_node_score num_clusters = tf.shape(topk_node_index)[0] # node->cluster cluster_reverse_index = tf.cast(tf.fill([num_nodes], -1), tf.int32) cluster_reverse_index = tf.tensor_scatter_nd_update( cluster_reverse_index, tf.expand_dims(topk_node_index, axis=-1), tf.range(num_clusters)) # row, col = edge_index[0], edge_index[1] assign_row = tf.gather(cluster_reverse_index, row_with_self_loop) assign_mask = tf.greater_equal(assign_row, 0) assign_row = tf.boolean_mask(assign_row, assign_mask) assign_col = tf.boolean_mask(col_with_self_loop, assign_mask) assign_edge_index = tf.stack([assign_row, assign_col], axis=0) assign_edge_weight = tf.boolean_mask(normed_attention_score, assign_mask) assign_edge_weight = tf.reshape(assign_edge_weight, [-1]) assign_edge_weight = tf.stop_gradient(assign_edge_weight) # Coarsen in a large BatchGraph. _, pooled_edge_index, pooled_edge_weight = cluster_pool( None, edge_index_with_self_loop, edge_weight_with_self_loop, assign_edge_index, assign_edge_weight, num_clusters, num_nodes=num_nodes) pooled_edge_index, pooled_edge_weight = remove_self_loop_edge( pooled_edge_index, pooled_edge_weight) pooled_edge_index, pooled_edge_weight = add_self_loop_edge( pooled_edge_index, num_clusters, pooled_edge_weight) pooled_node_graph_index = tf.gather(node_graph_index, topk_node_index) return pooled_x, pooled_edge_index, pooled_edge_weight, pooled_node_graph_index
def process(self): dataset_str = self.dataset_name names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph'] objects = [] for i in range(len(names)): data_name = "ind.{}.{}".format(dataset_str, names[i]) data_path = os.path.join(self.raw_root_path, data_name) with open(data_path, 'rb') as f: if sys.version_info > (3, 0): objects.append(pickle.load(f, encoding='latin1')) else: objects.append(pickle.load(f)) x, y, tx, ty, allx, ally, graph = tuple(objects) with open(os.path.join(self.raw_root_path, "ind.{}.test.index".format(dataset_str)), "r", encoding="utf-8") as f: test_idx_reorder = [int(line.strip()) for line in f] test_idx_range = np.sort(test_idx_reorder) if self.dataset_name == 'citeseer': # Fix citeseer dataset (there are some isolated nodes in the graph) # Find isolated nodes, add them as zero-vecs into the right position test_idx_range_full = list( range(min(test_idx_reorder), max(test_idx_reorder) + 1)) tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1])) tx_extended[test_idx_range - min(test_idx_range), :] = tx tx = tx_extended ty_extended = np.zeros((len(test_idx_range_full), y.shape[1])) ty_extended[test_idx_range - min(test_idx_range), :] = ty ty = ty_extended features = sp.vstack((allx, tx)).tolil() features[test_idx_reorder, :] = features[test_idx_range, :] # adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph)) labels = np.vstack((ally, ty)) labels[test_idx_reorder, :] = labels[test_idx_range, :] test_index = test_idx_range.tolist() if self.task == "semi_supervised": train_index = list(range(len(y))) valid_index = list(range(len(y), len(y) + 500)) else: train_index = range(len(ally) - 500) valid_index = range(len(ally) - 500, len(ally)) x = np.array(features.todense()).astype(np.float32) inv_sum_x = 1.0 / np.sum(x, axis=-1, keepdims=True) inv_sum_x[np.isnan(inv_sum_x)] = 1.0 inv_sum_x[np.isinf(inv_sum_x)] = 1.0 x *= inv_sum_x edge_index = np.array(nx.from_dict_of_lists(graph).edges).T edge_index, _ = remove_self_loop_edge(edge_index) edge_index, _ = convert_edge_to_directed(edge_index) y = np.argmax(labels, axis=-1).astype(np.int32) graph = Graph(x=x, edge_index=edge_index, y=y) return graph, (train_index, valid_index, test_index)