Beispiel #1
0
def set2set(x, node_graph_index, lstm, num_iterations, training=None):
    """
    Functional API for Set2Set

    :param x: Tensor, shape: [num_nodes, num_features], node features
    :param node_graph_index: Tensor/NDArray, shape: [num_nodes], graph index for each node
    :param lstm: A lstm model.
    :param num_iterations: Number of iterations for attention.
    :param training: Python boolean indicating whether the layer should behave in
        training mode (adding dropout) or in inference mode (doing nothing).
    :return: Graph features, shape: [num_graphs, num_node_features * 2]
    """

    num_graphs = tf.reduce_max(node_graph_index) + 1

    lstm_units = tf.shape(x)[-1]

    h = tf.zeros([num_graphs, lstm_units * 2], dtype=tf.float32)
    initial_state = [tf.zeros([1, lstm_units], dtype=tf.float32), tf.zeros([1, lstm_units], dtype=tf.float32)]

    for _ in range(num_iterations):

        h = tf.expand_dims(h, axis=0)
        h, state_h, state_c = lstm(h, initial_state=initial_state, training=training)
        initial_state = [state_h, state_c]
        h = tf.squeeze(h, axis=0)

        repeated_h = tf.gather(h, node_graph_index)
        # attention
        att_score = tf.reduce_sum(x * repeated_h, axis=-1, keepdims=True)
        normed_att_score = segment_softmax(att_score, node_graph_index, num_graphs)
        att_h = tf.math.unsorted_segment_sum(x * normed_att_score, node_graph_index, num_graphs)
        h = tf.concat([h, att_h], axis=-1)

    return h
Beispiel #2
0
def gat(x,
        edge_index,
        query_kernel,
        query_bias,
        query_activation,
        key_kernel,
        key_bias,
        key_activation,
        kernel,
        bias=None,
        activation=None,
        num_heads=1,
        drop_rate=0.0,
        training=False):

    num_nodes = x.shape[0]

    # self-attention
    edge_index, edge_weight = add_self_loop_edge(edge_index, num_nodes)

    row, col = edge_index

    Q = tf.gather(x, row) @ query_kernel + query_bias
    Q = query_activation(Q)

    K = tf.gather(x, col) @ key_kernel + key_bias
    K = key_activation(K)

    V = x @ kernel

    # xxxxx_ denotes the multi-head style stuff
    Q_ = tf.concat(tf.split(Q, num_heads, axis=-1), axis=0)
    K_ = tf.concat(tf.split(K, num_heads, axis=-1), axis=0)
    V_ = tf.concat(tf.split(V, num_heads, axis=-1), axis=0)
    edge_index_ = tf.concat(
        [edge_index + i * num_nodes for i in range(num_heads)], axis=1)

    att_score_ = tf.reduce_sum(Q_ * K_, axis=-1)
    normed_att_score_ = segment_softmax(att_score_, edge_index_[0],
                                        num_nodes * num_heads)

    if training and drop_rate > 0.0:
        normed_att_score_ = tf.compat.v2.nn.dropout(normed_att_score_,
                                                    drop_rate)

    h_ = aggregate_neighbors(V_, edge_index_, normed_att_score_, gcn_mapper,
                             sum_reducer, identity_updater)

    h = tf.concat(tf.split(h_, num_heads, axis=0), axis=-1)

    if bias is not None:
        h += bias

    if activation is not None:
        h = activation(h)

    return h
Beispiel #3
0
def asap(x,
         edge_index,
         edge_weight,
         node_graph_index,
         attention_gcn_kernel,
         attention_gcn_bias,
         attention_query_kernel,
         attention_query_bias,
         attention_score_kernel,
         attention_score_bias,
         le_conv_self_kernel,
         le_conv_self_bias,
         le_conv_aggr_self_kernel,
         le_conv_aggr_self_bias,
         le_conv_aggr_neighbor_kernel,
         le_conv_aggr_neighbor_bias,
         K=None,
         ratio=None,
         le_conv_activation=tf.nn.sigmoid,
         drop_rate=0.0,
         training=None,
         cache=None):
    """
    Functional API for ASAP: Adaptive Structure Aware Pooling for Learning Hierarchical Graph Representation

    :param x: Tensor, shape: [num_nodes, num_features], node features
    :param edge_index: Tensor, shape: [2, num_edges], edge information
    :param edge_weight: Tensor or None, shape: [num_edges]
    :param node_graph_index: Tensor/NDArray, shape: [num_nodes], graph index for each node
    :param K: Keep top K targets for each source
    :param ratio: Keep num_targets * ratio targets for each source
    :param le_conv_activation: Activation to use for node_score before multiplying node_features with node_score
    :param training: Python boolean indicating whether the layer should behave in
        training mode (adding dropout) or in inference mode (doing nothing).
    :param cache: A dict for caching A' for GCN. Different graph should not share the same cache dict.
    :return: [pooled_x, pooled_edge_index, pooled_edge_weight, pooled_node_graph_index]
    """

    num_nodes = tf.shape(x)[0]
    # num_graphs = tf.reduce_max(node_graph_index) + 1

    edge_index, edge_weight = remove_self_loop_edge(edge_index, edge_weight)
    edge_index_with_self_loop, edge_weight_with_self_loop = add_self_loop_edge(
        edge_index, num_nodes=num_nodes, edge_weight=edge_weight)

    row_with_self_loop, col_with_self_loop = edge_index_with_self_loop[
        0], edge_index_with_self_loop[1]

    attention_h = gcn(x,
                      edge_index,
                      edge_weight,
                      attention_gcn_kernel,
                      attention_gcn_bias,
                      cache=cache)

    # max_pool -> query
    attention_query = aggregate_neighbors(attention_h,
                                          edge_index_with_self_loop,
                                          None,
                                          mapper=identity_mapper,
                                          reducer=max_reducer,
                                          updater=identity_updater,
                                          num_nodes=num_nodes)

    attention_query = attention_query @ attention_query_kernel + attention_query_bias

    repeated_attention_query = tf.gather(attention_query, row_with_self_loop)
    repeated_attention_h = tf.gather(attention_h, col_with_self_loop)

    attention_score_h = tf.concat(
        [repeated_attention_query, repeated_attention_h], axis=-1)
    attention_score = attention_score_h @ attention_score_kernel + attention_score_bias
    attention_score = tf.nn.leaky_relu(attention_score, alpha=0.2)

    normed_attention_score = segment_softmax(attention_score,
                                             row_with_self_loop, num_nodes)
    if training and drop_rate > 0:
        normed_attention_score = tf.compat.v2.nn.dropout(
            normed_attention_score, rate=drop_rate)

    # nodes are clusters
    cluster_h = aggregate_neighbors(x,
                                    edge_index_with_self_loop,
                                    tf.reshape(normed_attention_score, [-1]),
                                    gcn_mapper,
                                    sum_reducer,
                                    identity_updater,
                                    num_nodes=num_nodes)

    node_score = le_conv(cluster_h,
                         edge_index,
                         edge_weight,
                         le_conv_self_kernel,
                         le_conv_self_bias,
                         le_conv_aggr_self_kernel,
                         le_conv_aggr_self_bias,
                         le_conv_aggr_neighbor_kernel,
                         le_conv_aggr_neighbor_bias,
                         activation=None)

    topk_node_index = topk_pool(node_graph_index, node_score, K=K, ratio=ratio)
    topk_node_score = tf.gather(node_score, topk_node_index)
    if le_conv_activation is not None:
        topk_node_score = le_conv_activation(topk_node_score)

    pooled_x = tf.gather(cluster_h, topk_node_index) * topk_node_score

    num_clusters = tf.shape(topk_node_index)[0]
    # node->cluster
    cluster_reverse_index = tf.cast(tf.fill([num_nodes], -1), tf.int32)
    cluster_reverse_index = tf.tensor_scatter_nd_update(
        cluster_reverse_index, tf.expand_dims(topk_node_index, axis=-1),
        tf.range(num_clusters))

    # row, col = edge_index[0], edge_index[1]
    assign_row = tf.gather(cluster_reverse_index, row_with_self_loop)
    assign_mask = tf.greater_equal(assign_row, 0)

    assign_row = tf.boolean_mask(assign_row, assign_mask)
    assign_col = tf.boolean_mask(col_with_self_loop, assign_mask)
    assign_edge_index = tf.stack([assign_row, assign_col], axis=0)

    assign_edge_weight = tf.boolean_mask(normed_attention_score, assign_mask)
    assign_edge_weight = tf.reshape(assign_edge_weight, [-1])
    assign_edge_weight = tf.stop_gradient(assign_edge_weight)

    # Coarsen in a large BatchGraph.
    _, pooled_edge_index, pooled_edge_weight = cluster_pool(
        None,
        edge_index_with_self_loop,
        edge_weight_with_self_loop,
        assign_edge_index,
        assign_edge_weight,
        num_clusters,
        num_nodes=num_nodes)

    pooled_edge_index, pooled_edge_weight = remove_self_loop_edge(
        pooled_edge_index, pooled_edge_weight)
    pooled_edge_index, pooled_edge_weight = add_self_loop_edge(
        pooled_edge_index, num_clusters, pooled_edge_weight)

    pooled_node_graph_index = tf.gather(node_graph_index, topk_node_index)

    return pooled_x, pooled_edge_index, pooled_edge_weight, pooled_node_graph_index
Beispiel #4
0
def gat(x,
        edge_index,
        query_kernel,
        query_bias,
        query_activation,
        key_kernel,
        key_bias,
        key_activation,
        kernel,
        bias=None,
        activation=None,
        num_heads=1,
        drop_rate=0.0,
        training=False):
    """

    :param x: Tensor, shape: [num_nodes, num_features], node features
    :param edge_index: Tensor, shape: [2, num_edges], edge information
    :param query_kernel: Tensor, shape: [num_features, num_query_features], weight for Q in attention
    :param query_bias: Tensor, shape: [num_query_features], bias for Q in attention
    :param query_activation: Activation function for Q in attention.
    :param key_kernel: Tensor, shape: [num_features, num_key_features], weight for K in attention
    :param key_bias: Tensor, shape: [num_key_features], bias for K in attention
    :param key_activation: Activation function for K in attention.
    :param kernel: Tensor, shape: [num_features, num_output_features], weight
    :param bias: Tensor, shape: [num_output_features], bias
    :param activation: Activation function to use.
    :param num_heads: Number of attention heads.
    :param drop_rate: Dropout rate.
    :param training: Python boolean indicating whether the layer should behave in
        training mode (adding dropout) or in inference mode (doing nothing).
    :return: Updated node features (x), shape: [num_nodes, num_output_features]
    """

    num_nodes = x.shape[0]

    # self-attention
    edge_index, edge_weight = add_self_loop_edge(edge_index, num_nodes)

    row, col = edge_index

    Q = query_activation(x @ query_kernel + query_bias)
    Q = tf.gather(Q, row)

    K = key_activation(x @ key_kernel + key_bias)
    K = tf.gather(K, col)

    V = x @ kernel

    # xxxxx_ denotes the multi-head style stuff
    Q_ = tf.concat(tf.split(Q, num_heads, axis=-1), axis=0)
    K_ = tf.concat(tf.split(K, num_heads, axis=-1), axis=0)
    V_ = tf.concat(tf.split(V, num_heads, axis=-1), axis=0)
    edge_index_ = tf.concat(
        [edge_index + i * num_nodes for i in range(num_heads)], axis=1)

    att_score_ = tf.reduce_sum(Q_ * K_, axis=-1)
    normed_att_score_ = segment_softmax(att_score_, edge_index_[0],
                                        num_nodes * num_heads)

    if training and drop_rate > 0.0:
        normed_att_score_ = tf.compat.v2.nn.dropout(normed_att_score_,
                                                    drop_rate)

    h_ = aggregate_neighbors(V_, edge_index_, normed_att_score_, gcn_mapper,
                             sum_reducer, identity_updater)

    h = tf.concat(tf.split(h_, num_heads, axis=0), axis=-1)

    if bias is not None:
        h += bias

    if activation is not None:
        h = activation(h)

    return h
Beispiel #5
0
def gat(x,
        edge_index,
        query_kernel,
        query_bias,
        query_activation,
        key_kernel,
        key_bias,
        key_activation,
        kernel,
        bias=None,
        activation=None,
        num_heads=1,
        split_value_heads=True,
        drop_rate=0.0,
        training=False):
    """

    :param x: Tensor, shape: [num_nodes, num_features], node features
    :param edge_index: Tensor, shape: [2, num_edges], edge information
    :param query_kernel: Tensor, shape: [num_features, num_query_features], weight for Q in attention
    :param query_bias: Tensor, shape: [num_query_features], bias for Q in attention
    :param query_activation: Activation function for Q in attention.
    :param key_kernel: Tensor, shape: [num_features, num_key_features], weight for K in attention
    :param key_bias: Tensor, shape: [num_key_features], bias for K in attention
    :param key_activation: Activation function for K in attention.
    :param kernel: Tensor, shape: [num_features, num_output_features], weight
    :param bias: Tensor, shape: [num_output_features], bias
    :param activation: Activation function to use.
    :param num_heads: Number of attention heads.
    :param split_value_heads: Boolean. If true, split V as value attention heads, and then concatenate them as output.
        Else, num_heads replicas of V are used as value attention heads, and the mean of them are used as output.
    :param drop_rate: Dropout rate.
    :param training: Python boolean indicating whether the layer should behave in
        training mode (adding dropout) or in inference mode (doing nothing).
    :return: Updated node features (x), shape: [num_nodes, num_output_features]
    """

    num_nodes = tf.shape(x)[0]

    # self-attention
    edge_index, edge_weight = add_self_loop_edge(edge_index, num_nodes)

    row, col = edge_index[0], edge_index[1]

    Q = x @ query_kernel + query_bias
    if query_activation is not None:
        Q = query_activation(Q)
    Q = tf.gather(Q, row)

    K = x @ key_kernel + key_bias
    if key_activation is not None:
        K = key_activation(K)
    K = tf.gather(K, col)

    V = x @ kernel

    # xxxxx_ denotes the multi-head style stuff
    Q_ = tf.concat(tf.split(Q, num_heads, axis=-1), axis=0)
    K_ = tf.concat(tf.split(K, num_heads, axis=-1), axis=0)
    # splited queries and keys are modeled as virtual vertices
    qk_edge_index_ = tf.concat(
        [edge_index + i * num_nodes for i in range(num_heads)], axis=1)

    scale = tf.math.sqrt(tf.cast(tf.shape(Q_)[-1], tf.float32))
    att_score_ = tf.reduce_sum(Q_ * K_ / scale, axis=-1)
    normed_att_score_ = segment_softmax(att_score_, qk_edge_index_[0],
                                        num_nodes * num_heads)

    if training and drop_rate > 0.0:
        normed_att_score_ = tf.compat.v2.nn.dropout(normed_att_score_,
                                                    drop_rate)

    if split_value_heads:
        V_ = tf.concat(tf.split(V, num_heads, axis=-1), axis=0)
        edge_index_ = qk_edge_index_
    else:
        V_ = V
        edge_index_ = tf.tile(edge_index, [1, num_heads])

    h_ = aggregate_neighbors(V_, edge_index_, normed_att_score_, gcn_mapper,
                             sum_reducer, identity_updater)

    if split_value_heads:
        h = tf.concat(tf.split(h_, num_heads, axis=0), axis=-1)
    else:
        h = h_ / num_heads

    if bias is not None:
        h += bias

    if activation is not None:
        h = activation(h)

    return h