Exemple #1
0
def chebynet_norm_edge(edge_index,
                       num_nodes,
                       edge_weight=None,
                       normalization_type="sym",
                       use_dynamic_lambda_max=False,
                       cache=None):
    if cache is not None:
        cache_key = compute_cache_key(normalization_type)
        cached_data = cache.get(cache_key, None)
        if cached_data is not None:
            return cached_data

    edge_index, edge_weight = remove_self_loop_edge(edge_index, edge_weight)

    updated_edge_index, updated_edge_weight = get_laplacian(
        edge_index, num_nodes, edge_weight, normalization_type)

    # lambda_max = chebynet_compute_lambda_max(edge_index, edge_weight, normalization_type, num_nodes, cache=cache)
    if use_dynamic_lambda_max:
        lambda_max = LaplacianMaxEigenvalue(
            edge_index, num_nodes,
            edge_weight)(normalization_type=normalization_type)
    else:
        lambda_max = 2.0
    scaled_edge_weight = (2.0 * updated_edge_weight) / lambda_max

    assert edge_weight is not None

    if cache is not None:
        cache[cache_key] = updated_edge_index, scaled_edge_weight

    return updated_edge_index, scaled_edge_weight
Exemple #2
0
def chebynet_norm_edge(edge_index, num_nodes, edge_weight, lambda_max,
                       normalization_type):
    edge_index, edge_weight = remove_self_loop_edge(edge_index, edge_weight)

    updated_edge_index, updated_edge_weight = get_laplacian(
        edge_index, edge_weight, normalization_type, num_nodes)

    scaled_edge_weight = (2.0 * updated_edge_weight) / lambda_max

    assert edge_weight is not None

    return updated_edge_index, scaled_edge_weight
Exemple #3
0
    def process(self):

        dataset_str = "cora"
        names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
        objects = []
        for i in range(len(names)):
            data_name = "ind.{}.{}".format(dataset_str, names[i])
            data_path = os.path.join(self.raw_root_path, data_name)
            with open(data_path, 'rb') as f:
                if sys.version_info > (3, 0):
                    objects.append(pickle.load(f, encoding='latin1'))
                else:
                    objects.append(pickle.load(f))

        x, y, tx, ty, allx, ally, graph = tuple(objects)

        with open(os.path.join(self.raw_root_path,
                               "ind.{}.test.index".format(dataset_str)),
                  "r",
                  encoding="utf-8") as f:
            test_idx_reorder = [int(line.strip()) for line in f]
            test_idx_range = np.sort(test_idx_reorder)

        features = sp.vstack((allx, tx)).tolil()
        features[test_idx_reorder, :] = features[test_idx_range, :]
        # adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

        labels = np.vstack((ally, ty))
        labels[test_idx_reorder, :] = labels[test_idx_range, :]

        test_index = test_idx_range.tolist()
        train_index = list(range(len(y)))
        valid_index = list(range(len(y), len(y) + 500))

        x = np.array(features.todense()).astype(np.float32)
        inv_sum_x = 1.0 / np.sum(x, axis=-1, keepdims=True)
        inv_sum_x[np.isnan(inv_sum_x)] = 1.0
        inv_sum_x[np.isinf(inv_sum_x)] = 1.0
        x *= inv_sum_x

        edge_index = np.array(nx.from_dict_of_lists(graph).edges).T
        edge_index, _ = remove_self_loop_edge(edge_index)
        edge_index, _ = convert_edge_to_directed(edge_index)
        y = np.argmax(labels, axis=-1).astype(np.int32)

        graph = Graph(x=x, edge_index=edge_index, y=y)

        return graph, (train_index, valid_index, test_index)
    def __call__(self, data):
        edge_index = data.edge_index
        edge_weight = data.edge_weight

        edge_index, edge_weight = remove_self_loop_edge(edge_index, edge_weight)

        edge_index, edge_weight = get_laplacian(data.edge_index, data.x.shape[0], edge_weight,
                                                self.normalization)
        # edge_index, edge_weight = add_self_loop_edge(edge_index, data.x.shape[0], edge_weight, fill_weight=-1.)

        L = to_scipy_sparse_matrix(edge_index, edge_weight, data.x.shape[0])

        eig_fn = eigs
        if self.is_undirected and self.normalization != 'rw':
            eig_fn = eigsh

        lambda_max = eig_fn(L, k=1, which='LM', return_eigenvectors=False)
        data.lambda_max = float(lambda_max)

        return data
Exemple #5
0
def asap(x,
         edge_index,
         edge_weight,
         node_graph_index,
         attention_gcn_kernel,
         attention_gcn_bias,
         attention_query_kernel,
         attention_query_bias,
         attention_score_kernel,
         attention_score_bias,
         le_conv_self_kernel,
         le_conv_self_bias,
         le_conv_aggr_self_kernel,
         le_conv_aggr_self_bias,
         le_conv_aggr_neighbor_kernel,
         le_conv_aggr_neighbor_bias,
         K=None,
         ratio=None,
         le_conv_activation=tf.nn.sigmoid,
         drop_rate=0.0,
         training=None,
         cache=None):
    """
    Functional API for ASAP: Adaptive Structure Aware Pooling for Learning Hierarchical Graph Representation

    :param x: Tensor, shape: [num_nodes, num_features], node features
    :param edge_index: Tensor, shape: [2, num_edges], edge information
    :param edge_weight: Tensor or None, shape: [num_edges]
    :param node_graph_index: Tensor/NDArray, shape: [num_nodes], graph index for each node
    :param K: Keep top K targets for each source
    :param ratio: Keep num_targets * ratio targets for each source
    :param le_conv_activation: Activation to use for node_score before multiplying node_features with node_score
    :param training: Python boolean indicating whether the layer should behave in
        training mode (adding dropout) or in inference mode (doing nothing).
    :param cache: A dict for caching A' for GCN. Different graph should not share the same cache dict.
    :return: [pooled_x, pooled_edge_index, pooled_edge_weight, pooled_node_graph_index]
    """

    num_nodes = tf.shape(x)[0]
    # num_graphs = tf.reduce_max(node_graph_index) + 1

    edge_index, edge_weight = remove_self_loop_edge(edge_index, edge_weight)
    edge_index_with_self_loop, edge_weight_with_self_loop = add_self_loop_edge(
        edge_index, num_nodes=num_nodes, edge_weight=edge_weight)

    row_with_self_loop, col_with_self_loop = edge_index_with_self_loop[
        0], edge_index_with_self_loop[1]

    attention_h = gcn(x,
                      edge_index,
                      edge_weight,
                      attention_gcn_kernel,
                      attention_gcn_bias,
                      cache=cache)

    # max_pool -> query
    attention_query = aggregate_neighbors(attention_h,
                                          edge_index_with_self_loop,
                                          None,
                                          mapper=identity_mapper,
                                          reducer=max_reducer,
                                          updater=identity_updater,
                                          num_nodes=num_nodes)

    attention_query = attention_query @ attention_query_kernel + attention_query_bias

    repeated_attention_query = tf.gather(attention_query, row_with_self_loop)
    repeated_attention_h = tf.gather(attention_h, col_with_self_loop)

    attention_score_h = tf.concat(
        [repeated_attention_query, repeated_attention_h], axis=-1)
    attention_score = attention_score_h @ attention_score_kernel + attention_score_bias
    attention_score = tf.nn.leaky_relu(attention_score, alpha=0.2)

    normed_attention_score = segment_softmax(attention_score,
                                             row_with_self_loop, num_nodes)
    if training and drop_rate > 0:
        normed_attention_score = tf.compat.v2.nn.dropout(
            normed_attention_score, rate=drop_rate)

    # nodes are clusters
    cluster_h = aggregate_neighbors(x,
                                    edge_index_with_self_loop,
                                    tf.reshape(normed_attention_score, [-1]),
                                    gcn_mapper,
                                    sum_reducer,
                                    identity_updater,
                                    num_nodes=num_nodes)

    node_score = le_conv(cluster_h,
                         edge_index,
                         edge_weight,
                         le_conv_self_kernel,
                         le_conv_self_bias,
                         le_conv_aggr_self_kernel,
                         le_conv_aggr_self_bias,
                         le_conv_aggr_neighbor_kernel,
                         le_conv_aggr_neighbor_bias,
                         activation=None)

    topk_node_index = topk_pool(node_graph_index, node_score, K=K, ratio=ratio)
    topk_node_score = tf.gather(node_score, topk_node_index)
    if le_conv_activation is not None:
        topk_node_score = le_conv_activation(topk_node_score)

    pooled_x = tf.gather(cluster_h, topk_node_index) * topk_node_score

    num_clusters = tf.shape(topk_node_index)[0]
    # node->cluster
    cluster_reverse_index = tf.cast(tf.fill([num_nodes], -1), tf.int32)
    cluster_reverse_index = tf.tensor_scatter_nd_update(
        cluster_reverse_index, tf.expand_dims(topk_node_index, axis=-1),
        tf.range(num_clusters))

    # row, col = edge_index[0], edge_index[1]
    assign_row = tf.gather(cluster_reverse_index, row_with_self_loop)
    assign_mask = tf.greater_equal(assign_row, 0)

    assign_row = tf.boolean_mask(assign_row, assign_mask)
    assign_col = tf.boolean_mask(col_with_self_loop, assign_mask)
    assign_edge_index = tf.stack([assign_row, assign_col], axis=0)

    assign_edge_weight = tf.boolean_mask(normed_attention_score, assign_mask)
    assign_edge_weight = tf.reshape(assign_edge_weight, [-1])
    assign_edge_weight = tf.stop_gradient(assign_edge_weight)

    # Coarsen in a large BatchGraph.
    _, pooled_edge_index, pooled_edge_weight = cluster_pool(
        None,
        edge_index_with_self_loop,
        edge_weight_with_self_loop,
        assign_edge_index,
        assign_edge_weight,
        num_clusters,
        num_nodes=num_nodes)

    pooled_edge_index, pooled_edge_weight = remove_self_loop_edge(
        pooled_edge_index, pooled_edge_weight)
    pooled_edge_index, pooled_edge_weight = add_self_loop_edge(
        pooled_edge_index, num_clusters, pooled_edge_weight)

    pooled_node_graph_index = tf.gather(node_graph_index, topk_node_index)

    return pooled_x, pooled_edge_index, pooled_edge_weight, pooled_node_graph_index
Exemple #6
0
    def process(self):

        dataset_str = self.dataset_name
        names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
        objects = []
        for i in range(len(names)):
            data_name = "ind.{}.{}".format(dataset_str, names[i])
            data_path = os.path.join(self.raw_root_path, data_name)
            with open(data_path, 'rb') as f:
                if sys.version_info > (3, 0):
                    objects.append(pickle.load(f, encoding='latin1'))
                else:
                    objects.append(pickle.load(f))

        x, y, tx, ty, allx, ally, graph = tuple(objects)

        with open(os.path.join(self.raw_root_path,
                               "ind.{}.test.index".format(dataset_str)),
                  "r",
                  encoding="utf-8") as f:
            test_idx_reorder = [int(line.strip()) for line in f]
            test_idx_range = np.sort(test_idx_reorder)

        if self.dataset_name == 'citeseer':
            # Fix citeseer dataset (there are some isolated nodes in the graph)
            # Find isolated nodes, add them as zero-vecs into the right position
            test_idx_range_full = list(
                range(min(test_idx_reorder),
                      max(test_idx_reorder) + 1))
            tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
            tx_extended[test_idx_range - min(test_idx_range), :] = tx
            tx = tx_extended
            ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
            ty_extended[test_idx_range - min(test_idx_range), :] = ty
            ty = ty_extended

        features = sp.vstack((allx, tx)).tolil()
        features[test_idx_reorder, :] = features[test_idx_range, :]
        # adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

        labels = np.vstack((ally, ty))
        labels[test_idx_reorder, :] = labels[test_idx_range, :]

        test_index = test_idx_range.tolist()
        if self.task == "semi_supervised":
            train_index = list(range(len(y)))
            valid_index = list(range(len(y), len(y) + 500))
        else:
            train_index = range(len(ally) - 500)
            valid_index = range(len(ally) - 500, len(ally))

        x = np.array(features.todense()).astype(np.float32)
        inv_sum_x = 1.0 / np.sum(x, axis=-1, keepdims=True)
        inv_sum_x[np.isnan(inv_sum_x)] = 1.0
        inv_sum_x[np.isinf(inv_sum_x)] = 1.0
        x *= inv_sum_x

        edge_index = np.array(nx.from_dict_of_lists(graph).edges).T
        edge_index, _ = remove_self_loop_edge(edge_index)
        edge_index, _ = convert_edge_to_directed(edge_index)
        y = np.argmax(labels, axis=-1).astype(np.int32)

        graph = Graph(x=x, edge_index=edge_index, y=y)

        return graph, (train_index, valid_index, test_index)