Beispiel #1
0
    def init_learning(self):
        # Get window function
        self.feature_window = TensorList(
            [dcf.hann2d(sz) for sz in self.feature_sz])

        # Filter regularization
        self.filter_reg = self.fparams.attribute('filter_reg')

        # Activation function after the projection matrix (phi_1 in the paper)
        projection_activation = getattr(self.params, 'projection_activation',
                                        'none')
        if isinstance(projection_activation, tuple):
            projection_activation, act_param = projection_activation

        if projection_activation == 'none':
            self.projection_activation = lambda x: x
        elif projection_activation == 'relu':
            self.projection_activation = layers.relu
        elif projection_activation == 'elu':
            self.projection_activation = layers.elu
        elif projection_activation == 'mlu':
            self.projection_activation = lambda x: layers.elu(
                leaky_relu(x, 1 / act_param), act_param)
        else:
            raise ValueError('Unknown activation')

        # Activation function after the output scores (phi_2 in the paper)
        response_activation = getattr(self.params, 'response_activation',
                                      'none')
        if isinstance(response_activation, tuple):
            response_activation, act_param = response_activation

        if response_activation == 'none':
            self.response_activation = lambda x: x
        elif response_activation == 'relu':
            self.response_activation = layers.relu
        elif response_activation == 'elu':
            self.response_activation = layers.elu
        elif response_activation == 'mlu':
            self.response_activation = lambda x: layers.elu(
                leaky_relu(x, 1 / act_param), act_param)
        else:
            raise ValueError('Unknown activation')
Beispiel #2
0
    def send_attention(src_feat, dst_feat, edge_feat):
        if edge_feat is None or not edge_feat:
            k_h = L.elu(
                L.reshape(src_feat["k_h"],
                          [-1, num_heads, hidden_size, 1])) + 1
            v_h = dst_feat["v_h"]
        else:
            edge_feat = edge_feat["edge"]
            edge_feat = L.reshape(edge_feat, [-1, num_heads, hidden_size])
            k_h = L.elu(src_feat["k_h"] + edge_feat) + 1
            v_h = dst_feat["v_h"] + edge_feat
        k_h = L.reshape(k_h, [-1, num_heads, hidden_size, 1])

        v_h = L.reshape(v_h, [-1, num_heads, hidden_size, 1])
        sum_kTv = L.matmul(k_h, v_h, transpose_y=True)
        sum_k = L.reshape(k_h, [-1, num_heads * hidden_size])
        sum_kTv = L.reshape(sum_kTv,
                            [-1, num_heads * hidden_size * hidden_size])

        return {"sum_k": sum_k, "sum_kTv": sum_kTv}
Beispiel #3
0
    def func(self, place):
        shape = [2, 3, 7, 9]
        eps = 1e-6
        alpha = 1.1
        dtype = np.float64

        x = layers.data('x', shape, False, dtype)
        x.persistable = True

        y = layers.elu(x, alpha=alpha)
        x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
        gradient_checker.double_grad_check([x],
                                           y,
                                           x_init=x_arr,
                                           place=place,
                                           eps=eps)
Beispiel #4
0
    def func(self, place):
        shape = [2, 4, 4, 4]
        eps = 1e-6
        alpha = 0.2
        dtype = np.float64
        SEED = 0

        x = layers.data('x', shape, False, dtype)
        x.persistable = True

        y = layers.elu(x, alpha=alpha)
        np.random.RandomState(SEED)
        x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
        gradient_checker.double_grad_check(
            [x], y, x_init=x_arr, place=place, eps=eps)
        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
        gradient_checker.double_grad_check_for_dygraph(
            self.elu_wrapper, [x], y, x_init=x_arr, place=place)
        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
Beispiel #5
0
    def forward(self, input, adj):
        """Forward network"""
        h = layers.fc(input, size=self.out_features, num_flatten_dims=2)

        _, N, _ = h.shape
        middle_result1 = layers.expand(layers.matmul(h, self.a1),
                                       expand_times=(1, 1, N))
        middle_result2 = layers.transpose(layers.expand(
            layers.matmul(h, self.a2), expand_times=(1, 1, N)),
                                          perm=[0, 2, 1])
        e = layers.leaky_relu(middle_result1 + middle_result2, self.alpha)
        adj = layers.cast(adj, dtype='int32')
        attention = nn.mask_fill(e, adj == 0.0, -1e9)
        attention = layers.softmax(attention, axis=2)
        attention = layers.dropout(attention, self.dropout)
        h_prime = layers.matmul(attention, h)
        if self.concat:
            return layers.elu(h_prime)
        else:
            return h_prime
Beispiel #6
0
def graph_linformer(gw,
                    feature,
                    edge_feature,
                    hidden_size,
                    name,
                    num_heads=4,
                    attn_drop=False,
                    concat=True,
                    skip_feat=True,
                    gate=False,
                    norm=True,
                    relu=True,
                    k_hop=2,
                    is_test=False):
    """Implementation of graph Transformer from UniMP

    This is an implementation of the paper Unified Massage Passing Model for Semi-Supervised Classification
    (https://arxiv.org/abs/2009.03509).

    Args:
        name: Granph Transformer layer names.
        
        gw: Graph wrapper object (:code:`StaticGraphWrapper` or :code:`GraphWrapper`)

        feature: A tensor with shape (num_nodes, feature_size).

        hidden_size: The hidden size for graph transformer.

        num_heads: The head number in graph transformer.

        attn_drop: Dropout rate for attention.
        
        edge_feature: A tensor with shape (num_edges, feature_size).
        num_heads: 8
        
        concat: Reshape the output (num_nodes, num_heads, hidden_size) by concat (num_nodes, hidden_size * num_heads) or mean (num_nodes, hidden_size)
        
        skip_feat: Whether use skip connect
        
        gate: Whether add skip_feat and output up with gate weight
        
        norm: Whether use layer_norm for output
        
        relu: Whether use relu activation for output

        is_test: Whether in test phrase.

    Return:
        A tensor with shape (num_nodes, hidden_size * num_heads) or (num_nodes, hidden_size)
    """
    def send_attention(src_feat, dst_feat, edge_feat):
        if edge_feat is None or not edge_feat:
            k_h = L.elu(
                L.reshape(src_feat["k_h"],
                          [-1, num_heads, hidden_size, 1])) + 1
            v_h = dst_feat["v_h"]
        else:
            edge_feat = edge_feat["edge"]
            edge_feat = L.reshape(edge_feat, [-1, num_heads, hidden_size])
            k_h = L.elu(src_feat["k_h"] + edge_feat) + 1
            v_h = dst_feat["v_h"] + edge_feat
        k_h = L.reshape(k_h, [-1, num_heads, hidden_size, 1])

        v_h = L.reshape(v_h, [-1, num_heads, hidden_size, 1])
        sum_kTv = L.matmul(k_h, v_h, transpose_y=True)
        sum_k = L.reshape(k_h, [-1, num_heads * hidden_size])
        sum_kTv = L.reshape(sum_kTv,
                            [-1, num_heads * hidden_size * hidden_size])

        return {"sum_k": sum_k, "sum_kTv": sum_kTv}

    def send_copy(src_feat, dst_feat, edge_feat):
        return src_feat

    def reduce_sum(msg):
        return L.sequence_pool(msg, "sum")

    q = L.elu(
        linear(feature,
               hidden_size * num_heads,
               name=name + '_q_weight',
               init_type='gcn')) + 1
    k = linear(feature,
               hidden_size * num_heads,
               name=name + '_k_weight',
               init_type='gcn')
    v = linear(feature,
               hidden_size * num_heads,
               name=name + '_v_weight',
               init_type='gcn')

    reshape_q = L.reshape(q, [-1, num_heads, 1, hidden_size])
    reshape_k = L.reshape(k, [-1, num_heads, hidden_size])
    reshape_v = L.reshape(v, [-1, num_heads, hidden_size])

    msg = gw.send(send_attention,
                  nfeat_list=[("k_h", reshape_k), ("v_h", reshape_v)],
                  efeat_list=[('edge', edge_feature)])

    sum_k = gw.recv(msg["sum_k"], reduce_sum)
    sum_kTv = gw.recv(msg["sum_kTv"], reduce_sum)

    for i in range(1, k_hop):
        msg = gw.send(send_copy,
                      nfeat_list=[("sum_k", sum_k), ("sum_kTv", sum_kTv)])
        sum_k = gw.recv(msg["sum_k"], reduce_sum)
        sum_kTv = gw.recv(msg["sum_kTv"], reduce_sum)
        # sum_k: [-1, num_heads * hidden_size]
        # sum_kTv: [-1, num_heads * hidden_size * hidden_size]
    sum_k = L.reshape(sum_k, [-1, num_heads, 1, hidden_size])
    sum_kTv = L.reshape(sum_kTv, [-1, num_heads, hidden_size, hidden_size])
    out_feat = L.reshape(L.matmul(reshape_q, sum_kTv),
                         [-1, num_heads, hidden_size]) / L.reduce_sum(
                             reshape_q * sum_k, -1)
    if concat:
        out_feat = L.reshape(out_feat, [-1, num_heads * hidden_size])
    else:
        out_feat = L.reduce_mean(out_feat, dim=1)

    if skip_feat:
        if concat:
            skip_feature = linear(feature,
                                  hidden_size * num_heads,
                                  name=name + '_skip_weight',
                                  init_type='lin')
        else:
            skip_feature = linear(feature,
                                  hidden_size,
                                  name=name + '_skip_weight',
                                  init_type='lin')

        if gate:
            temp_output = L.concat(
                [skip_feature, out_feat, out_feat - skip_feature], axis=-1)
            gate_f = L.sigmoid(
                linear(temp_output,
                       1,
                       name=name + '_gate_weight',
                       init_type='lin'))
            out_feat = skip_feature * gate_f + out_feat * (1 - gate_f)
        else:
            out_feat = skip_feature + out_feat

    if norm:
        out_feat = layer_norm(out_feat, name="ln_%s" % name)

    if relu:
        out_feat = L.relu(out_feat)

    return out_feat