def infer(self, inputs, outputs):
        """Run model inference.

        Only support generation now.
        """
        if self.do_generation:
            return self.generator.inference(self, inputs, outputs)
        else:
            tgt_logits = self._calc_logits(outputs["enc_out"], inputs["tgt_idx"])
            tgt_lm_loss = layers.softmax_with_cross_entropy(
                logits=tgt_logits, label=inputs["tgt_label"])
            lm_loss = layers.fill_constant_batch_size_like(
                outputs["enc_out"], [-1], self.dtype, 0)
            lm_loss = layers.scatter(lm_loss, inputs["tgt_idx"][:, 0], tgt_lm_loss[:, 0], overwrite=False)
            tokens_num = layers.fill_constant_batch_size_like(
                outputs["enc_out"], [-1], self.dtype, 0)
            tgt_tokens_num = layers.fill_constant_batch_size_like(
                tgt_lm_loss, [-1], self.dtype, 1)
            tokens_num = layers.scatter(tokens_num, inputs["tgt_idx"][:, 0], tgt_tokens_num, overwrite=False)
            predictions = {
                "lm_loss": lm_loss,
                "tokens_num": tokens_num,
                "data_id": inputs["data_id"]
            }
            return predictions
Beispiel #2
0
    def label_embed_input(self, feature):
        label = F.data(name="label", shape=[None, 1], dtype="int64")
        label_idx = F.data(name='label_idx', shape=[None], dtype="int64")
        label = L.reshape(label, shape=[-1])
        label = L.gather(label, label_idx, overwrite=False)

        lay_norm_attr = F.ParamAttr(
            initializer=F.initializer.ConstantInitializer(value=1))
        lay_norm_bias = F.ParamAttr(
            initializer=F.initializer.ConstantInitializer(value=0))
        feature = L.layer_norm(feature,
                               name='layer_norm_feature_input1',
                               param_attr=lay_norm_attr,
                               bias_attr=lay_norm_bias)

        embed_attr = F.ParamAttr(
            initializer=F.initializer.NormalInitializer(loc=0.0, scale=1.0))
        embed = F.embedding(input=label,
                            size=(self.out_size, self.embed_size),
                            param_attr=embed_attr)
        lay_norm_attr = F.ParamAttr(
            initializer=F.initializer.ConstantInitializer(value=1))
        lay_norm_bias = F.ParamAttr(
            initializer=F.initializer.ConstantInitializer(value=0))
        embed = L.layer_norm(embed,
                             name='layer_norm_feature_input2',
                             param_attr=lay_norm_attr,
                             bias_attr=lay_norm_bias)
        embed = L.relu(embed)

        feature_label = L.gather(feature, label_idx, overwrite=False)
        feature_label = feature_label + embed
        feature = L.scatter(feature, label_idx, feature_label, overwrite=True)

        return feature
Beispiel #3
0
def get_degree(edge, num_nodes):
    init_output = L.fill_constant(shape=[num_nodes], value=0, dtype="float32")
    init_output.stop_gradient = True
    final_output = L.scatter(init_output,
                             edge,
                             L.full_like(edge, 1, dtype="float32"),
                             overwrite=False)
    return final_output
Beispiel #4
0
def sag_pool(gw, feature, ratio, graph_id, dataset, name, activation=L.tanh):
    """Implementation of self-attention graph pooling (SAGPool)

    This is an implementation of the paper SELF-ATTENTION GRAPH POOLING
    (https://arxiv.org/pdf/1904.08082.pdf)

    Args:
        gw: Graph wrapper object.

        feature: A tensor with shape (num_nodes, feature_size).

        ratio: The pooling ratio of nodes we want to select.

        graph_id: The graphs that the nodes belong to. 

        dataset: To differentiate FRANKENSTEIN dataset and other datasets.

        name: The name of SAGPool layer.
        
        activation: The activation function.

    Return:
        new_feature: A tensor with shape (num_nodes, feature_size), and the unselected
                     nodes' feature is masked by zero.

        ratio_length: The selected node numbers of each graph.

    """
    if dataset == "FRANKENSTEIN":
        gcn_ = gcn
    else:
        gcn_ = norm_gcn

    score = gcn_(gw=gw,
                 feature=feature,
                 hidden_size=1,
                 activation=None,
                 norm=gw.node_feat["norm"],
                 name=name)
    score = L.squeeze(score, axes=[])
    perm, ratio_length = topk_pool(gw, score, graph_id, ratio)

    mask = L.zeros_like(score)
    mask = L.cast(mask, dtype="float32")
    updates = L.ones_like(perm)
    updates = L.cast(updates, dtype="float32")
    mask = L.scatter(mask, perm, updates)
    new_feature = L.elementwise_mul(feature, mask, axis=0)
    temp_score = activation(score)
    new_feature = L.elementwise_mul(new_feature, temp_score, axis=0)
    return new_feature, ratio_length
Beispiel #5
0
    def label_embed_input(self, feature):
        label = F.data(name="label",
                       shape=[None, self.out_size],
                       dtype="int64")
        label_idx = F.data(name='label_idx', shape=[None], dtype="int64")
        label = L.gather(label, label_idx, overwrite=False)
        label = L.cast(label, dtype="float32")

        label_feat = self.embed_input(label, "label_feat")
        feature_label = L.gather(feature, label_idx, overwrite=False)

        feature_label = feature_label + label_feat
        feature = L.scatter(feature, label_idx, feature_label, overwrite=True)
        return feature
Beispiel #6
0
def fluid_sequence_scatter(input, index, value):
    """
    args:
        input: 1-level LoDTensor
        index: 1-d tensor of the sequence index
        value: scalar
    return:
        output = input
        output[index + offset] = updates
        lod_set(output, input)
    """
    offset = fluid_sequence_get_offset(input)
    offset_index = index + offset
    offset_index.stop_gradient = True
    updates = fluid.layers.fill_constant_batch_size_like(input, shape=input.shape, value=value, dtype=input.dtype)
    output = layers.scatter(input, layers.cast(offset_index, 'int32'), updates)
    return layers.lod_reset(output, input)
Beispiel #7
0
 def test_scatter(self):
     program = Program()
     with program_guard(program):
         x = layers.data(name='x',
                         shape=[3, 3],
                         append_batch_size=False,
                         dtype='float32')
         idx = layers.data(name='idx',
                           shape=[2],
                           append_batch_size=False,
                           dtype='int32')
         updates = layers.data(name='updates',
                               shape=[2, 3],
                               append_batch_size=False,
                               dtype='float32')
         out = layers.scatter(input=x, index=idx, updates=updates)
         self.assertIsNotNone(out)
     print(str(program))
Beispiel #8
0
def scatter_max(input, index, updates):
    """Scatter max updates to input by given index.

    Adds sparse updates to input variables.

    Args:
        input: Input tensor to be updated

        index: Slice index

        updates: Must have same type as input.

    Return:
        Same type and shape as input.
    """

    output = L.scatter(input, index, updates, mode='max')
    return output
Beispiel #9
0
def scatter_add(input, index, updates):
    """Scatter add updates to input by given index.

    Adds sparse updates to input variables.

    Args:
        input: Input tensor to be updated

        index: Slice index

        updates: Must have same type as input.

    Return:
        Same type and shape as input.
    """

    output = L.scatter(input, index, updates, overwrite=False)
    return output
Beispiel #10
0
    def label_embed_input(self, feature):
        label = F.data(name="label_all", shape=[None, 1], dtype="int64")
        label_idx = F.data(name='label_idx', shape=[None], dtype="int64")
        label = L.reshape(label, shape=[-1])
        #         label = L.index_select(label, label_idx)
        label = L.gather(label, label_idx, overwrite=False)

        embed_attr = F.ParamAttr(
            initializer=F.initializer.NormalInitializer(loc=0.0, scale=1.0))
        embed = F.embedding(input=label,
                            size=(self.out_size, self.embed_size),
                            param_attr=embed_attr)
        #         feature_label = L.index_select(feature, label_idx)
        feature_label = L.gather(feature, label_idx, overwrite=False)
        feature_label = feature_label + embed
        feature = L.scatter(feature, label_idx, feature_label, overwrite=True)

        return feature
Beispiel #11
0
def fluid_sequence_scatter(input, index, offset, updates):
    """
    args:
        input: 1-level LoDTensor, 'float32' only
        index: 1-d tensor of the sequence index, 'int32' only
        offset: the same shape and dtype as index
        updates: (len(index), input[1:])
    return:
        output = input
        output[index + offset] = updates
        lod_set(output, input)
    """
    # assert input.lod_level == 1, input
    assert index.shape == offset.shape
    assert input.shape[1:] == updates.shape[1:]
    new_index = index + offset
    new_index.stop_gradient = True
    output = layers.scatter(input, new_index, updates)
    return layers.lod_reset(output, input)
Beispiel #12
0
def recv(dst, uniq_dst, bucketing_index, msg, reduce_function, num_nodes,
         num_edges):
    """Recv message from given msg to dst nodes.
    """
    if reduce_function == "sum":
        if isinstance(msg, dict):
            raise TypeError("The message for build-in function"
                            " should be Tensor not dict.")

        try:
            out_dim = msg.shape[-1]
            init_output = L.fill_constant(shape=[num_nodes, out_dim],
                                          value=0,
                                          dtype=msg.dtype)
            init_output.stop_gradient = False
            empty_msg_flag = L.cast(num_edges > 0, dtype=msg.dtype)
            msg = msg * empty_msg_flag
            output = paddle_helper.scatter_add(init_output, dst, msg)
            return output
        except TypeError as e:
            warnings.warn(
                "scatter_add is not supported with paddle version <= 1.5")

            def sum_func(message):
                return L.sequence_pool(message, "sum")

            reduce_function = sum_func

    bucketed_msg = op.nested_lod_reset(msg, bucketing_index)
    output = reduce_function(bucketed_msg)
    output_dim = output.shape[-1]

    empty_msg_flag = L.cast(num_edges > 0, dtype=output.dtype)
    output = output * empty_msg_flag

    init_output = L.fill_constant(shape=[num_nodes, output_dim],
                                  value=0,
                                  dtype=output.dtype)
    init_output.stop_gradient = True
    final_output = L.scatter(init_output, uniq_dst, output)
    return final_output
Beispiel #13
0
def topk_pool(gw, score, graph_id, ratio):
    """Implementation of topk pooling, where k means pooling ratio.
    
    Args:
        gw: Graph wrapper object.

        score: The attention score of all nodes, which is used to select 
               important nodes.

        graph_id: The graphs that the nodes belong to.

        ratio: The pooling ratio of nodes we want to select.

    Return: 
        perm: The index of nodes we choose.

        ratio_length: The selected node numbers of each graph.
    """

    graph_lod = gw.graph_lod
    graph_nodes = gw.num_nodes
    num_graph = gw.num_graph

    num_nodes = L.ones(shape=[graph_nodes], dtype="float32")
    num_nodes = L.lod_reset(num_nodes, graph_lod)
    num_nodes_per_graph = L.sequence_pool(num_nodes, pool_type='sum')
    max_num_nodes = L.reduce_max(num_nodes_per_graph, dim=0)
    max_num_nodes = L.cast(max_num_nodes, dtype="int32")

    index = L.arange(0, gw.num_nodes, dtype="int64")
    offset = L.gather(graph_lod, graph_id, overwrite=False)
    index = (index - offset) + (graph_id * max_num_nodes)
    index.stop_gradient = True

    # padding
    dense_score = L.fill_constant(shape=[num_graph * max_num_nodes],
                                  dtype="float32",
                                  value=-999999)
    index = L.reshape(index, shape=[-1])
    dense_score = L.scatter(dense_score, index, updates=score)
    num_graph = L.cast(num_graph, dtype="int32")
    dense_score = L.reshape(dense_score, shape=[num_graph, max_num_nodes])

    # record the sorted index
    _, sort_index = L.argsort(dense_score, axis=-1, descending=True)

    # recover the index range
    graph_lod = graph_lod[:-1]
    graph_lod = L.reshape(graph_lod, shape=[-1, 1])
    graph_lod = L.cast(graph_lod, dtype="int64")
    sort_index = L.elementwise_add(sort_index, graph_lod, axis=-1)
    sort_index = L.reshape(sort_index, shape=[-1, 1])

    # use sequence_slice to choose selected node index
    pad_lod = L.arange(0, (num_graph + 1) * max_num_nodes,
                       step=max_num_nodes,
                       dtype="int32")
    sort_index = L.lod_reset(sort_index, pad_lod)
    ratio_length = L.ceil(num_nodes_per_graph * ratio)
    ratio_length = L.cast(ratio_length, dtype="int64")
    ratio_length = L.reshape(ratio_length, shape=[-1, 1])
    offset = L.zeros(shape=[num_graph, 1], dtype="int64")
    choose_index = L.sequence_slice(input=sort_index,
                                    offset=offset,
                                    length=ratio_length)

    perm = L.reshape(choose_index, shape=[-1])
    return perm, ratio_length