Example #1
0
    def net(self, input, is_infer=False):
        """ network"""
        text = input[0]
        pos_tag = input[1]
        neg_tag = input[2]

        text_emb = fluid.embedding(input=text,
                                   size=[self.vocab_text_size, self.emb_dim],
                                   param_attr="text_emb")
        text_emb = fluid.layers.squeeze(input=text_emb, axes=[1])
        pos_tag_emb = fluid.embedding(input=pos_tag,
                                      size=[self.vocab_tag_size, self.emb_dim],
                                      param_attr="tag_emb")
        pos_tag_emb = fluid.layers.squeeze(input=pos_tag_emb, axes=[1])
        neg_tag_emb = fluid.embedding(input=neg_tag,
                                      size=[self.vocab_tag_size, self.emb_dim],
                                      param_attr="tag_emb")
        neg_tag_emb = fluid.layers.squeeze(input=neg_tag_emb, axes=[1])

        conv_1d = fluid.nets.sequence_conv_pool(input=text_emb,
                                                num_filters=self.hid_dim,
                                                filter_size=self.win_size,
                                                act="tanh",
                                                pool_type="max",
                                                param_attr="cnn")
        text_hid = fluid.layers.fc(input=conv_1d,
                                   size=self.emb_dim,
                                   param_attr="text_hid")
        cos_pos = nn.cos_sim(pos_tag_emb, text_hid)
        mul_text_hid = fluid.layers.sequence_expand_as(x=text_hid,
                                                       y=neg_tag_emb)
        mul_cos_neg = nn.cos_sim(neg_tag_emb, mul_text_hid)
        cos_neg_all = fluid.layers.sequence_reshape(input=mul_cos_neg,
                                                    new_dim=self.neg_size)
        #choose max negtive cosine
        cos_neg = nn.reduce_max(cos_neg_all, dim=1, keep_dim=True)
        #calculate hinge loss
        loss_part1 = nn.elementwise_sub(
            tensor.fill_constant_batch_size_like(input=cos_pos,
                                                 shape=[-1, 1],
                                                 value=self.margin,
                                                 dtype='float32'), cos_pos)
        loss_part2 = nn.elementwise_add(loss_part1, cos_neg)
        loss_part3 = nn.elementwise_max(
            tensor.fill_constant_batch_size_like(input=loss_part2,
                                                 shape=[-1, 1],
                                                 value=0.0,
                                                 dtype='float32'), loss_part2)
        avg_cost = nn.mean(loss_part3)
        less = tensor.cast(cf.less_than(cos_neg, cos_pos), dtype='float32')
        correct = nn.reduce_sum(less)
        self._cost = avg_cost

        if is_infer:
            self._infer_results["correct"] = correct
            self._infer_results["cos_pos"] = cos_pos
        else:
            self._metrics["correct"] = correct
            self._metrics["cos_pos"] = cos_pos
Example #2
0
def network(vocab_text_size,
            vocab_tag_size,
            emb_dim=10,
            hid_dim=1000,
            win_size=5,
            margin=0.1,
            neg_size=5):
    """ network definition """
    text = io.data(name="text", shape=[1], lod_level=1, dtype='int64')
    pos_tag = io.data(name="pos_tag", shape=[1], lod_level=1, dtype='int64')
    neg_tag = io.data(name="neg_tag", shape=[1], lod_level=1, dtype='int64')
    text_emb = nn.embedding(input=text,
                            size=[vocab_text_size, emb_dim],
                            param_attr="text_emb")
    pos_tag_emb = nn.embedding(input=pos_tag,
                               size=[vocab_tag_size, emb_dim],
                               param_attr="tag_emb")
    neg_tag_emb = nn.embedding(input=neg_tag,
                               size=[vocab_tag_size, emb_dim],
                               param_attr="tag_emb")

    conv_1d = fluid.nets.sequence_conv_pool(input=text_emb,
                                            num_filters=hid_dim,
                                            filter_size=win_size,
                                            act="tanh",
                                            pool_type="max",
                                            param_attr="cnn")
    text_hid = fluid.layers.fc(input=conv_1d,
                               size=emb_dim,
                               param_attr="text_hid")
    cos_pos = nn.cos_sim(pos_tag_emb, text_hid)
    mul_text_hid = fluid.layers.sequence_expand_as(x=text_hid, y=neg_tag_emb)
    mul_cos_neg = nn.cos_sim(neg_tag_emb, mul_text_hid)
    cos_neg_all = fluid.layers.sequence_reshape(input=mul_cos_neg,
                                                new_dim=neg_size)
    #choose max negtive cosine
    cos_neg = nn.reduce_max(cos_neg_all, dim=1, keep_dim=True)
    #calculate hinge loss
    loss_part1 = nn.elementwise_sub(
        tensor.fill_constant_batch_size_like(input=cos_pos,
                                             shape=[-1, 1],
                                             value=margin,
                                             dtype='float32'), cos_pos)
    loss_part2 = nn.elementwise_add(loss_part1, cos_neg)
    loss_part3 = nn.elementwise_max(
        tensor.fill_constant_batch_size_like(input=loss_part2,
                                             shape=[-1, 1],
                                             value=0.0,
                                             dtype='float32'), loss_part2)
    avg_cost = nn.mean(loss_part3)
    less = tensor.cast(cf.less_than(cos_neg, cos_pos), dtype='float32')
    correct = nn.reduce_sum(less)
    return avg_cost, correct, cos_pos
Example #3
0
    def train(self):
        user_data = io.data(name="user", shape=[1], dtype="int64", lod_level=1)
        pos_item_data = io.data(name="p_item",
                                shape=[1],
                                dtype="int64",
                                lod_level=1)
        neg_item_data = io.data(name="n_item",
                                shape=[1],
                                dtype="int64",
                                lod_level=1)
        user_emb = nn.embedding(input=user_data,
                                size=self.emb_shape,
                                param_attr="emb.item")
        pos_item_emb = nn.embedding(input=pos_item_data,
                                    size=self.emb_shape,
                                    param_attr="emb.item")
        neg_item_emb = nn.embedding(input=neg_item_data,
                                    size=self.emb_shape,
                                    param_attr="emb.item")
        user_enc = self.user_encoder.forward(user_emb)
        pos_item_enc = self.item_encoder.forward(pos_item_emb)
        neg_item_enc = self.item_encoder.forward(neg_item_emb)
        user_hid = nn.fc(input=user_enc,
                         size=self.hidden_size,
                         param_attr='user.w',
                         bias_attr="user.b")
        pos_item_hid = nn.fc(input=pos_item_enc,
                             size=self.hidden_size,
                             param_attr='item.w',
                             bias_attr="item.b")
        neg_item_hid = nn.fc(input=neg_item_enc,
                             size=self.hidden_size,
                             param_attr='item.w',
                             bias_attr="item.b")
        cos_pos = nn.cos_sim(user_hid, pos_item_hid)
        cos_neg = nn.cos_sim(user_hid, neg_item_hid)
        hinge_loss = self.pairwise_hinge_loss.forward(cos_pos, cos_neg)
        avg_cost = nn.mean(hinge_loss)
        correct = self.get_correct(cos_neg, cos_pos)

        return [user_data, pos_item_data,
                neg_item_data], cos_pos, avg_cost, correct
Example #4
0
    def softmax_classify(self,
                         x,
                         label,
                         param_attr=None,
                         use_bias=True,
                         bias_attr=None):
        flatten_dim = reduce(lambda a, b: a * b, x.shape[1:], 1)
        weight, bias = self.create_parameter(dtype=x.dtype,
                                             in_dim=flatten_dim,
                                             param_attr=param_attr,
                                             bias_attr=bias_attr,
                                             use_bias=use_bias)

        x_all = collective._c_allgather(x,
                                        nranks=self.nranks,
                                        use_calc_stream=True)
        label_all = collective._c_allgather(label,
                                            nranks=self.nranks,
                                            use_calc_stream=True)
        label_all.stop_gradient = True

        shard_fc = nn.mul(x_all, weight, x_num_col_dims=1)
        if use_bias:
            shard_fc = nn.elementwise_add(shard_fc, bias)

        shard_label = nn.shard_index(label_all,
                                     index_num=self.nclasses,
                                     nshards=self.nranks,
                                     shard_id=self.rank_id,
                                     ignore_value=-1)
        shard_label.stop_gradient = True

        global_loss, shard_prob = self.softmax_with_cross_entropy(
            shard_fc, shard_label)
        avg_loss = nn.mean(global_loss)

        avg_loss._set_info('shard_logit', shard_fc)
        avg_loss._set_info('shard_prob', shard_prob)
        avg_loss._set_info('shard_label', shard_label)
        avg_loss._set_info('shard_dim', self.shard_dim)

        return avg_loss
Example #5
0
    def train_net(self):
        # input fields for query, pos_title, neg_title
        q_slots = [
            io.data(name="q%d" % i, shape=[1], lod_level=1, dtype='int64')
            for i in range(len(self.query_encoders))
        ]
        pt_slots = [
            io.data(name="pt%d" % i, shape=[1], lod_level=1, dtype='int64')
            for i in range(len(self.title_encoders))
        ]
        nt_slots = [
            io.data(name="nt%d" % i, shape=[1], lod_level=1, dtype='int64')
            for i in range(len(self.title_encoders))
        ]

        # lookup embedding for each slot
        q_embs = [
            nn.embedding(input=query, size=self.emb_shape, param_attr="emb")
            for query in q_slots
        ]
        pt_embs = [
            nn.embedding(input=title, size=self.emb_shape, param_attr="emb")
            for title in pt_slots
        ]
        nt_embs = [
            nn.embedding(input=title, size=self.emb_shape, param_attr="emb")
            for title in nt_slots
        ]

        # encode each embedding field with encoder
        q_encodes = [
            self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs)
        ]
        pt_encodes = [
            self.title_encoders[i].forward(emb)
            for i, emb in enumerate(pt_embs)
        ]
        nt_encodes = [
            self.title_encoders[i].forward(emb)
            for i, emb in enumerate(nt_embs)
        ]

        # concat multi view for query, pos_title, neg_title
        q_concat = nn.concat(q_encodes)
        pt_concat = nn.concat(pt_encodes)
        nt_concat = nn.concat(nt_encodes)

        # projection of hidden layer
        q_hid = nn.fc(q_concat,
                      size=self.hidden_size,
                      param_attr='q_fc.w',
                      bias_attr='q_fc.b')
        pt_hid = nn.fc(pt_concat,
                       size=self.hidden_size,
                       param_attr='t_fc.w',
                       bias_attr='t_fc.b')
        nt_hid = nn.fc(nt_concat,
                       size=self.hidden_size,
                       param_attr='t_fc.w',
                       bias_attr='t_fc.b')

        # cosine of hidden layers
        cos_pos = nn.cos_sim(q_hid, pt_hid)
        cos_neg = nn.cos_sim(q_hid, nt_hid)

        # pairwise hinge_loss
        loss_part1 = nn.elementwise_sub(
            tensor.fill_constant_batch_size_like(input=cos_pos,
                                                 shape=[-1, 1],
                                                 value=self.margin,
                                                 dtype='float32'), cos_pos)

        loss_part2 = nn.elementwise_add(loss_part1, cos_neg)

        loss_part3 = nn.elementwise_max(
            tensor.fill_constant_batch_size_like(input=loss_part2,
                                                 shape=[-1, 1],
                                                 value=0.0,
                                                 dtype='float32'), loss_part2)

        avg_cost = nn.mean(loss_part3)
        correct = self.get_correct(cos_neg, cos_pos)

        return q_slots + pt_slots + nt_slots, avg_cost, correct
Example #6
0
    def arcface_classify(self,
                         x,
                         label,
                         margin=0.5,
                         logit_scale=64,
                         param_attr=None):
        '''
        reference: ArcFace. https://arxiv.org/abs/1801.07698
        '''
        flatten_dim = reduce(lambda a, b: a * b, x.shape[1:], 1)
        weight, bias = self.create_parameter(dtype=x.dtype,
                                             in_dim=flatten_dim,
                                             param_attr=param_attr,
                                             use_bias=False)

        # normalize x
        x_l2 = ops.sqrt(nn.reduce_sum(ops.square(x), dim=1))
        norm_x = nn.elementwise_div(x, x_l2, axis=0)

        norm_x_all = collective._c_allgather(norm_x,
                                             nranks=self.nranks,
                                             use_calc_stream=True)
        label_all = collective._c_allgather(label,
                                            nranks=self.nranks,
                                            use_calc_stream=True)
        label_all.stop_gradient = True
        shard_label = nn.shard_index(label_all,
                                     index_num=self.nclasses,
                                     nshards=self.nranks,
                                     shard_id=self.rank_id,
                                     ignore_value=-1)
        # TODO check necessary
        shard_label.stop_gradient = True

        # normalize weight
        weight_l2 = ops.sqrt(nn.reduce_sum(ops.square(weight), dim=0))
        norm_weight = nn.elementwise_div(weight, weight_l2, axis=1)

        shard_cos = nn.mul(norm_x_all, norm_weight, x_num_col_dims=1)

        theta = ops.acos(shard_cos)
        margin_cos = ops.cos(theta + margin)

        shard_one_hot = nn.one_hot(shard_label,
                                   depth=self.shard_dim,
                                   allow_out_of_range=True)
        # TODO check necessary
        shard_one_hot.stop_gradient = True

        diff = (margin_cos - shard_cos) * shard_one_hot
        shard_target_cos = shard_cos + diff
        shard_logit = nn.scale(shard_target_cos, scale=logit_scale)

        global_loss, shard_prob = self.softmax_with_cross_entropy(
            shard_logit, shard_label)
        avg_loss = nn.mean(global_loss)

        avg_loss._set_info('shard_logit', shard_logit)
        avg_loss._set_info('shard_prob', shard_prob)
        avg_loss._set_info('shard_label', shard_label)
        avg_loss._set_info('shard_dim', self.shard_dim)

        return avg_loss