Example #1
0
    def entropy(self):
        """Shannon entropy in nats.
        Returns:
            Tensor: Shannon entropy of Categorical distribution. The data type is float32.

        Examples:
            .. code-block:: python
                import paddle
                from paddle.distribution import Categorical
                paddle.seed(100) # on CPU device
                x = paddle.rand([6])
                print(x)
                # [0.5535528  0.20714243 0.01162981
                #  0.51577556 0.36369765 0.2609165 ]
                cat = Categorical(x)
                cat.entropy()
                # [1.77528]
        """
        name = self.name + '_entropy'
        logits = self.logits - nn.reduce_max(
            self.logits, dim=-1, keep_dim=True)
        e_logits = ops.exp(logits)
        z = nn.reduce_sum(e_logits, dim=-1, keep_dim=True)
        prob = e_logits / z

        neg_entropy = nn.reduce_sum(prob * (logits - nn.log(z)),
                                    dim=-1,
                                    keep_dim=True)
        entropy = nn.scale(neg_entropy, scale=-1.0, name=name)
        return entropy
Example #2
0
    def softmax_with_cross_entropy(self, shard_logit, shard_label):
        shard_max = nn.reduce_max(shard_logit, dim=1, keep_dim=True)
        global_max = collective._c_allreduce(shard_max,
                                             reduce_type='max',
                                             use_calc_stream=True)
        shard_logit_new = nn.elementwise_sub(shard_logit, global_max)

        shard_exp = ops.exp(shard_logit_new)
        shard_demon = nn.reduce_sum(shard_exp, dim=1, keep_dim=True)
        global_demon = collective._c_allreduce(shard_demon,
                                               reduce_type='sum',
                                               use_calc_stream=True)

        global_log_demon = nn.log(global_demon)
        shard_log_prob = shard_logit_new - global_log_demon
        shard_prob = ops.exp(shard_log_prob)

        shard_one_hot = nn.one_hot(shard_label,
                                   depth=self.shard_dim,
                                   allow_out_of_range=True)
        target_log_prob = nn.reduce_min(shard_log_prob * shard_one_hot,
                                        dim=1,
                                        keep_dim=True)
        shard_loss = nn.scale(target_log_prob, scale=-1.0)
        global_loss = collective._c_reducescatter(shard_loss,
                                                  nranks=self.nranks,
                                                  use_calc_stream=True)
        return global_loss, shard_prob
Example #3
0
    def net(self, input, is_infer=False):
        """ network"""
        text = input[0]
        pos_tag = input[1]
        neg_tag = input[2]

        text_emb = fluid.embedding(input=text,
                                   size=[self.vocab_text_size, self.emb_dim],
                                   param_attr="text_emb")
        text_emb = fluid.layers.squeeze(input=text_emb, axes=[1])
        pos_tag_emb = fluid.embedding(input=pos_tag,
                                      size=[self.vocab_tag_size, self.emb_dim],
                                      param_attr="tag_emb")
        pos_tag_emb = fluid.layers.squeeze(input=pos_tag_emb, axes=[1])
        neg_tag_emb = fluid.embedding(input=neg_tag,
                                      size=[self.vocab_tag_size, self.emb_dim],
                                      param_attr="tag_emb")
        neg_tag_emb = fluid.layers.squeeze(input=neg_tag_emb, axes=[1])

        conv_1d = fluid.nets.sequence_conv_pool(input=text_emb,
                                                num_filters=self.hid_dim,
                                                filter_size=self.win_size,
                                                act="tanh",
                                                pool_type="max",
                                                param_attr="cnn")
        text_hid = fluid.layers.fc(input=conv_1d,
                                   size=self.emb_dim,
                                   param_attr="text_hid")
        cos_pos = nn.cos_sim(pos_tag_emb, text_hid)
        mul_text_hid = fluid.layers.sequence_expand_as(x=text_hid,
                                                       y=neg_tag_emb)
        mul_cos_neg = nn.cos_sim(neg_tag_emb, mul_text_hid)
        cos_neg_all = fluid.layers.sequence_reshape(input=mul_cos_neg,
                                                    new_dim=self.neg_size)
        #choose max negtive cosine
        cos_neg = nn.reduce_max(cos_neg_all, dim=1, keep_dim=True)
        #calculate hinge loss
        loss_part1 = nn.elementwise_sub(
            tensor.fill_constant_batch_size_like(input=cos_pos,
                                                 shape=[-1, 1],
                                                 value=self.margin,
                                                 dtype='float32'), cos_pos)
        loss_part2 = nn.elementwise_add(loss_part1, cos_neg)
        loss_part3 = nn.elementwise_max(
            tensor.fill_constant_batch_size_like(input=loss_part2,
                                                 shape=[-1, 1],
                                                 value=0.0,
                                                 dtype='float32'), loss_part2)
        avg_cost = nn.mean(loss_part3)
        less = tensor.cast(cf.less_than(cos_neg, cos_pos), dtype='float32')
        correct = nn.reduce_sum(less)
        self._cost = avg_cost

        if is_infer:
            self._infer_results["correct"] = correct
            self._infer_results["cos_pos"] = cos_pos
        else:
            self._metrics["correct"] = correct
            self._metrics["cos_pos"] = cos_pos
Example #4
0
    def kl_divergence(self, other):
        """The KL-divergence between two Categorical distributions.
        Args:
            other (Categorical): instance of Categorical. The data type is float32.
        Returns:
            Tensor: kl-divergence between two Categorical distributions.

        Examples:
            .. code-block:: python
                import paddle
                from paddle.distribution import Categorical
                paddle.seed(100) # on CPU device
                x = paddle.rand([6])
                print(x)
                # [0.5535528  0.20714243 0.01162981
                #  0.51577556 0.36369765 0.2609165 ]
                paddle.seed(200) # on CPU device
                y = paddle.rand([6])
                print(y)
                # [0.77663314 0.90824795 0.15685187
                #  0.04279523 0.34468332 0.7955718 ]
                cat = Categorical(x)
                cat2 = Categorical(y)
                cat.kl_divergence(cat2)
                # [0.071952]
        """
        name = self.name + '_kl_divergence'
        if not in_dygraph_mode():
            check_type(other, 'other', Categorical, 'kl_divergence')

        logits = self.logits - nn.reduce_max(
            self.logits, dim=-1, keep_dim=True)
        other_logits = other.logits - nn.reduce_max(
            other.logits, dim=-1, keep_dim=True)
        e_logits = ops.exp(logits)
        other_e_logits = ops.exp(other_logits)
        z = nn.reduce_sum(e_logits, dim=-1, keep_dim=True)
        other_z = nn.reduce_sum(other_e_logits, dim=-1, keep_dim=True)
        prob = e_logits / z
        kl = nn.reduce_sum(
            prob * (logits - nn.log(z) - other_logits + nn.log(other_z)),
            dim=-1,
            keep_dim=True,
            name=name)

        return kl
Example #5
0
def network(vocab_text_size,
            vocab_tag_size,
            emb_dim=10,
            hid_dim=1000,
            win_size=5,
            margin=0.1,
            neg_size=5):
    """ network definition """
    text = io.data(name="text", shape=[1], lod_level=1, dtype='int64')
    pos_tag = io.data(name="pos_tag", shape=[1], lod_level=1, dtype='int64')
    neg_tag = io.data(name="neg_tag", shape=[1], lod_level=1, dtype='int64')
    text_emb = nn.embedding(input=text,
                            size=[vocab_text_size, emb_dim],
                            param_attr="text_emb")
    pos_tag_emb = nn.embedding(input=pos_tag,
                               size=[vocab_tag_size, emb_dim],
                               param_attr="tag_emb")
    neg_tag_emb = nn.embedding(input=neg_tag,
                               size=[vocab_tag_size, emb_dim],
                               param_attr="tag_emb")

    conv_1d = fluid.nets.sequence_conv_pool(input=text_emb,
                                            num_filters=hid_dim,
                                            filter_size=win_size,
                                            act="tanh",
                                            pool_type="max",
                                            param_attr="cnn")
    text_hid = fluid.layers.fc(input=conv_1d,
                               size=emb_dim,
                               param_attr="text_hid")
    cos_pos = nn.cos_sim(pos_tag_emb, text_hid)
    mul_text_hid = fluid.layers.sequence_expand_as(x=text_hid, y=neg_tag_emb)
    mul_cos_neg = nn.cos_sim(neg_tag_emb, mul_text_hid)
    cos_neg_all = fluid.layers.sequence_reshape(input=mul_cos_neg,
                                                new_dim=neg_size)
    #choose max negtive cosine
    cos_neg = nn.reduce_max(cos_neg_all, dim=1, keep_dim=True)
    #calculate hinge loss
    loss_part1 = nn.elementwise_sub(
        tensor.fill_constant_batch_size_like(input=cos_pos,
                                             shape=[-1, 1],
                                             value=margin,
                                             dtype='float32'), cos_pos)
    loss_part2 = nn.elementwise_add(loss_part1, cos_neg)
    loss_part3 = nn.elementwise_max(
        tensor.fill_constant_batch_size_like(input=loss_part2,
                                             shape=[-1, 1],
                                             value=0.0,
                                             dtype='float32'), loss_part2)
    avg_cost = nn.mean(loss_part3)
    less = tensor.cast(cf.less_than(cos_neg, cos_pos), dtype='float32')
    correct = nn.reduce_sum(less)
    return avg_cost, correct, cos_pos