def softmax_with_cross_entropy(self, shard_logit, shard_label): shard_max = nn.reduce_max(shard_logit, dim=1, keep_dim=True) global_max = collective._c_allreduce(shard_max, reduce_type='max', use_calc_stream=True) shard_logit_new = nn.elementwise_sub(shard_logit, global_max) shard_exp = ops.exp(shard_logit_new) shard_demon = nn.reduce_sum(shard_exp, dim=1, keep_dim=True) global_demon = collective._c_allreduce(shard_demon, reduce_type='sum', use_calc_stream=True) global_log_demon = nn.log(global_demon) shard_log_prob = shard_logit_new - global_log_demon shard_prob = ops.exp(shard_log_prob) shard_one_hot = nn.one_hot(shard_label, depth=self.shard_dim, allow_out_of_range=True) target_log_prob = nn.reduce_min(shard_log_prob * shard_one_hot, dim=1, keep_dim=True) shard_loss = nn.scale(target_log_prob, scale=-1.0) global_loss = collective._c_reducescatter(shard_loss, nranks=self.nranks, use_calc_stream=True) return global_loss, shard_prob
def net(self, input, is_infer=False): """ network""" text = input[0] pos_tag = input[1] neg_tag = input[2] text_emb = fluid.embedding(input=text, size=[self.vocab_text_size, self.emb_dim], param_attr="text_emb") text_emb = fluid.layers.squeeze(input=text_emb, axes=[1]) pos_tag_emb = fluid.embedding(input=pos_tag, size=[self.vocab_tag_size, self.emb_dim], param_attr="tag_emb") pos_tag_emb = fluid.layers.squeeze(input=pos_tag_emb, axes=[1]) neg_tag_emb = fluid.embedding(input=neg_tag, size=[self.vocab_tag_size, self.emb_dim], param_attr="tag_emb") neg_tag_emb = fluid.layers.squeeze(input=neg_tag_emb, axes=[1]) conv_1d = fluid.nets.sequence_conv_pool(input=text_emb, num_filters=self.hid_dim, filter_size=self.win_size, act="tanh", pool_type="max", param_attr="cnn") text_hid = fluid.layers.fc(input=conv_1d, size=self.emb_dim, param_attr="text_hid") cos_pos = nn.cos_sim(pos_tag_emb, text_hid) mul_text_hid = fluid.layers.sequence_expand_as(x=text_hid, y=neg_tag_emb) mul_cos_neg = nn.cos_sim(neg_tag_emb, mul_text_hid) cos_neg_all = fluid.layers.sequence_reshape(input=mul_cos_neg, new_dim=self.neg_size) #choose max negtive cosine cos_neg = nn.reduce_max(cos_neg_all, dim=1, keep_dim=True) #calculate hinge loss loss_part1 = nn.elementwise_sub( tensor.fill_constant_batch_size_like(input=cos_pos, shape=[-1, 1], value=self.margin, dtype='float32'), cos_pos) loss_part2 = nn.elementwise_add(loss_part1, cos_neg) loss_part3 = nn.elementwise_max( tensor.fill_constant_batch_size_like(input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), loss_part2) avg_cost = nn.mean(loss_part3) less = tensor.cast(cf.less_than(cos_neg, cos_pos), dtype='float32') correct = nn.reduce_sum(less) self._cost = avg_cost if is_infer: self._infer_results["correct"] = correct self._infer_results["cos_pos"] = cos_pos else: self._metrics["correct"] = correct self._metrics["cos_pos"] = cos_pos
def network(vocab_text_size, vocab_tag_size, emb_dim=10, hid_dim=1000, win_size=5, margin=0.1, neg_size=5): """ network definition """ text = io.data(name="text", shape=[1], lod_level=1, dtype='int64') pos_tag = io.data(name="pos_tag", shape=[1], lod_level=1, dtype='int64') neg_tag = io.data(name="neg_tag", shape=[1], lod_level=1, dtype='int64') text_emb = nn.embedding(input=text, size=[vocab_text_size, emb_dim], param_attr="text_emb") pos_tag_emb = nn.embedding(input=pos_tag, size=[vocab_tag_size, emb_dim], param_attr="tag_emb") neg_tag_emb = nn.embedding(input=neg_tag, size=[vocab_tag_size, emb_dim], param_attr="tag_emb") conv_1d = fluid.nets.sequence_conv_pool(input=text_emb, num_filters=hid_dim, filter_size=win_size, act="tanh", pool_type="max", param_attr="cnn") text_hid = fluid.layers.fc(input=conv_1d, size=emb_dim, param_attr="text_hid") cos_pos = nn.cos_sim(pos_tag_emb, text_hid) mul_text_hid = fluid.layers.sequence_expand_as(x=text_hid, y=neg_tag_emb) mul_cos_neg = nn.cos_sim(neg_tag_emb, mul_text_hid) cos_neg_all = fluid.layers.sequence_reshape(input=mul_cos_neg, new_dim=neg_size) #choose max negtive cosine cos_neg = nn.reduce_max(cos_neg_all, dim=1, keep_dim=True) #calculate hinge loss loss_part1 = nn.elementwise_sub( tensor.fill_constant_batch_size_like(input=cos_pos, shape=[-1, 1], value=margin, dtype='float32'), cos_pos) loss_part2 = nn.elementwise_add(loss_part1, cos_neg) loss_part3 = nn.elementwise_max( tensor.fill_constant_batch_size_like(input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), loss_part2) avg_cost = nn.mean(loss_part3) less = tensor.cast(cf.less_than(cos_neg, cos_pos), dtype='float32') correct = nn.reduce_sum(less) return avg_cost, correct, cos_pos
def forward(self, pos, neg): loss_part1 = nn.elementwise_sub( tensor.fill_constant_batch_size_like(input=pos, shape=[-1, 1], value=self.margin, dtype='float32'), pos) loss_part2 = nn.elementwise_add(loss_part1, neg) loss_part3 = nn.elementwise_max( tensor.fill_constant_batch_size_like(input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), loss_part2) return loss_part3
def train_net(self): # input fields for query, pos_title, neg_title q_slots = [ io.data(name="q%d" % i, shape=[1], lod_level=1, dtype='int64') for i in range(len(self.query_encoders)) ] pt_slots = [ io.data(name="pt%d" % i, shape=[1], lod_level=1, dtype='int64') for i in range(len(self.title_encoders)) ] nt_slots = [ io.data(name="nt%d" % i, shape=[1], lod_level=1, dtype='int64') for i in range(len(self.title_encoders)) ] # lookup embedding for each slot q_embs = [ nn.embedding(input=query, size=self.emb_shape, param_attr="emb") for query in q_slots ] pt_embs = [ nn.embedding(input=title, size=self.emb_shape, param_attr="emb") for title in pt_slots ] nt_embs = [ nn.embedding(input=title, size=self.emb_shape, param_attr="emb") for title in nt_slots ] # encode each embedding field with encoder q_encodes = [ self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs) ] pt_encodes = [ self.title_encoders[i].forward(emb) for i, emb in enumerate(pt_embs) ] nt_encodes = [ self.title_encoders[i].forward(emb) for i, emb in enumerate(nt_embs) ] # concat multi view for query, pos_title, neg_title q_concat = nn.concat(q_encodes) pt_concat = nn.concat(pt_encodes) nt_concat = nn.concat(nt_encodes) # projection of hidden layer q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w', bias_attr='q_fc.b') pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b') nt_hid = nn.fc(nt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b') # cosine of hidden layers cos_pos = nn.cos_sim(q_hid, pt_hid) cos_neg = nn.cos_sim(q_hid, nt_hid) # pairwise hinge_loss loss_part1 = nn.elementwise_sub( tensor.fill_constant_batch_size_like(input=cos_pos, shape=[-1, 1], value=self.margin, dtype='float32'), cos_pos) loss_part2 = nn.elementwise_add(loss_part1, cos_neg) loss_part3 = nn.elementwise_max( tensor.fill_constant_batch_size_like(input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), loss_part2) avg_cost = nn.mean(loss_part3) correct = self.get_correct(cos_neg, cos_pos) return q_slots + pt_slots + nt_slots, avg_cost, correct