def exist_objs_3(keep, masks, classes, scores, upsampled_size_out, resize_shape, ori_shape): keep = L.reshape(keep, (-1,)) keep.stop_gradient = True masks = L.gather(masks, keep) # [M4, s4, s4] M4个物体的掩码概率 scores = L.gather(scores, keep) # [M4, ] M4个物体的分数 classes = L.gather(classes, keep) # [M4, ] M4个物体的类别id # 第五次过滤,只保留得分前cfg['max_per_img']个物体 _, sort_inds = L.argsort(scores, axis=-1, descending=True) sort_inds = sort_inds[:cfg['max_per_img']] sort_inds.stop_gradient = True masks = L.gather(masks, sort_inds) # [M5, s4, s4] M5个物体的掩码概率 scores = L.gather(scores, sort_inds) # [M5, ] M5个物体的分数 classes = L.gather(classes, sort_inds) # [M5, ] M5个物体的类别id # 插值成图片输入张量的大小 masks = L.resize_bilinear(L.unsqueeze(masks, axes=[0]), out_shape=upsampled_size_out, align_corners=False, align_mode=0) # 去掉黑边 masks = L.slice(masks, axes=[2], starts=[0], ends=[resize_shape[0]]) masks = L.slice(masks, axes=[3], starts=[0], ends=[resize_shape[1]]) # 插值成原图大小 masks = L.resize_bilinear(masks, out_shape=ori_shape[:2], align_corners=False, align_mode=0) # 掩码二值化 masks = L.cast(masks > cfg['mask_thr'], 'float32')[0] return masks, classes, scores
def is_finished(alive_log_prob, finished_scores, finished_in_finished): max_out_len = 200 max_length_penalty = layers.pow( layers.fill_constant([1], dtype='float32', value=((5.0 + max_out_len) / 6.0)), alpha) lower_bound_alive_score = layers.slice( alive_log_prob, starts=[0], ends=[1], axes=[0]) / max_length_penalty lowest_score_of_fininshed_in_finished = finished_scores * finished_in_finished lowest_score_of_fininshed_in_finished += ( 1.0 - finished_in_finished) * -INF lowest_score_of_fininshed_in_finished = layers.reduce_min( lowest_score_of_fininshed_in_finished) met = layers.less_than( lower_bound_alive_score, lowest_score_of_fininshed_in_finished) met = layers.cast(met, 'float32') bound_is_met = layers.reduce_sum(met) finished_eos_num = layers.reduce_sum(finished_in_finished) finish_cond = layers.less_than( finished_eos_num, layers.fill_constant([1], dtype='float32', value=beam_size)) return finish_cond
def decoder_step(gru_unit, cue_gru_unit, step_in, hidden, input_size, hidden_size, memory, memory_mask, knowledge, mask=None): """ decoder step """ # get attention out # get hidden top layers top_hidden = layers.slice(hidden, axes=[0], starts=[0], ends=[1]) top_hidden = layers.squeeze(top_hidden, axes=[0]) top_hidden = layers.unsqueeze(top_hidden, axes=[1]) weight_memory, attn = dot_attention(top_hidden, memory, memory_mask) step_in = layers.unsqueeze(step_in, axes=[1]) rnn_input_list = [step_in, weight_memory] if weight_memory.shape[0] == -1: knowledge_1 = layers.reshape(knowledge, shape=weight_memory.shape) else: knowledge_1 = knowledge cue_input_list = [knowledge_1, weight_memory] output_list = [weight_memory] rnn_input = layers.concat(rnn_input_list, axis=2) rnn_input = layers.squeeze(rnn_input, axes=[1]) rnn_output, rnn_last_hidden = gru_unit(rnn_input, hidden, mask) cue_input = layers.concat(cue_input_list, axis=2) cue_input = layers.squeeze(cue_input, axes=[1]) cue_rnn_out, cue_rnn_last_hidden = cue_gru_unit(cue_input, hidden, mask) h_y = layers.tanh( fc(rnn_last_hidden, hidden_size, hidden_size, name="dec_fc1")) h_cue = layers.tanh( fc(cue_rnn_last_hidden, hidden_size, hidden_size, name="dec_fc2")) concate_y_cue = layers.concat([h_y, h_cue], axis=2) k = layers.sigmoid(fc(concate_y_cue, hidden_size * 2, 1, name='dec_fc3')) new_hidden = h_y * k - h_cue * (k - 1.0) new_hidden_tmp = layers.transpose(new_hidden, perm=[1, 0, 2]) output_list.append(new_hidden_tmp) real_out = layers.concat(output_list, axis=2) if mask: mask_tmp = layers.unsqueeze(mask, axes=[0]) new_hidden = layers.elementwise_mul((new_hidden - hidden), mask_tmp, axis=0) new_hidden += hidden return real_out, new_hidden
def _recognition_network(self, token_ids, type_ids, pos_ids, role_ids, recognition_mask): mask_id = layers.fill_constant_batch_size_like( input=token_ids, shape=[-1, 1, 1], value=self.mask_id, dtype="int64") mask_emb = layers.embedding( input=mask_id, size=[self.vocab_size, self.emb_size], dtype=self.dtype, param_attr=fluid.ParamAttr( name=self.token_emb_name, initializer=self.param_initializer)) emb_out, n_head_self_attn_mask = self._gen_input( token_ids, type_ids, pos_ids, role_ids, recognition_mask, aux_emb=mask_emb) recognition_out, checkpoints = self._encode(emb_out, n_head_self_attn_mask) recognition_feat = layers.slice( input=recognition_out, axes=[1], starts=[0], ends=[1]) recognition_feat = layers.fc( input=recognition_feat, size=self.hidden_size, act="tanh", param_attr=fluid.ParamAttr( name="recognition_fc.w_0", initializer=self.param_initializer), bias_attr="recognition_fc.b_0") logits = layers.fc( input=recognition_feat, size=self.latent_type_size, param_attr=fluid.ParamAttr( name=self.latent_emb_name, initializer=self.param_initializer), bias_attr="recognition_bias") return logits, checkpoints
def encoder_1(x_emb, vocab_size, emb_size, init_hidden=None, init_cell=None, para_name='', args=None): rnn_input = x_emb #rnn_input.stop_gradient = True rnn_outs = [] rnn_outs_ori = [] cells = [] projs = [] num_layers = 2 for i in range(num_layers): #rnn_input = dropout(rnn_input, False, args) if init_hidden and init_cell: h0 = layers.squeeze(layers.slice(init_hidden, axes=[0], starts=[i], ends=[i + 1]), axes=[0]) c0 = layers.squeeze(layers.slice(init_cell, axes=[0], starts=[i], ends=[i + 1]), axes=[0]) else: h0 = c0 = None rnn_out, cell, input_proj = lstmp_encoder( rnn_input, hidden_size, h0, c0, para_name + 'layer{}'.format(i + 1), emb_size, test_mode, args) rnn_out_ori = rnn_out if i > 0: rnn_out = rnn_out + rnn_input #rnn_out = dropout(rnn_out, test_mode, args) rnn_out.stop_gradient = True rnn_outs.append(rnn_out) #rnn_outs_ori.stop_gradient = True rnn_outs_ori.append(rnn_out_ori) #ipdb.set_trace() #layers.Print(input_seq, message='input_seq', summarize=10) #layers.Print(rnn_outs[-1], message='rnn_outs', summarize=10) return rnn_outs[-1], rnn_outs_ori
def test_slice(self): starts = [1, 0, 2] ends = [3, 3, 4] axes = [0, 1, 2] program = Program() with program_guard(program): input = layers.data( name="input", shape=[3, 4, 5, 6], dtype='float32') out = layers.slice(input, axes=axes, starts=starts, ends=ends)
def encoder_wrapper(x_emb, vocab_size, emb_size, init_hidden=None, init_cell=None, para_name='', args=None): """ encoder_wrapper """ rnn_input = x_emb rnn_outs = [] rnn_outs_ori = [] cells = [] projs = [] num_layers = 2 for i in range(num_layers): if init_hidden and init_cell: h0 = layers.squeeze(layers.slice(init_hidden, axes=[0], starts=[i], ends=[i + 1]), axes=[0]) c0 = layers.squeeze(layers.slice(init_cell, axes=[0], starts=[i], ends=[i + 1]), axes=[0]) else: h0 = c0 = None rnn_out, cell, input_proj = lstmp_encoder( rnn_input, hidden_size, h0, c0, para_name + 'layer{}'.format(i + 1), emb_size, args) rnn_out_ori = rnn_out if i > 0: rnn_out = rnn_out + rnn_input rnn_out.stop_gradient = True rnn_outs.append(rnn_out) rnn_outs_ori.append(rnn_out_ori) return rnn_outs, rnn_outs_ori
def metrics(self, predictions, label): qid, logits = predictions positive_class_logits = L.slice(logits, axes=[1], starts=[1], ends=[2]) mrr = propeller.metrics.Mrr(qid, label, positive_class_logits) predictions = L.argmax(logits, axis=1) predictions = L.unsqueeze(predictions, axes=[1]) f1 = propeller.metrics.F1(label, predictions) acc = propeller.metrics.Acc(label, predictions) #auc = propeller.metrics.Auc(label, predictions) return {'acc': acc, 'f1': f1, 'mrr': mrr}
def _dequeue_and_enqueue(self, keys): # gather keys before updating queue batch_size = keys.shape[0] ptr = int(self.queue_ptr) # assert self.K % batch_size == 0 # for simplic # replace the keys at ptr (dequeue and enqueue) if ptr==0: li = [L.transpose(keys, perm=[1, 0]), L.slice(self.queue, axes=[1], starts=[ptr+batch_size], ends=[self.K+100])] elif ptr+batch_size == self.K: print(ptr) print(keys.shape) li = [L.slice(self.queue, axes=[1], starts=[0], ends=[ptr]), L.transpose(keys, perm=[1, 0])] else: li = [L.slice(self.queue, axes=[1], starts=[0], ends=[ptr]), \ L.transpose(keys, perm=[1, 0]), \ L.slice(self.queue, axes=[1], starts=[ptr+batch_size], ends=[self.K+100])] self.queue = L.concat(li, axis=1) ptr = (ptr + batch_size) % self.K # move pointer self.queue_ptr = ptr
def gru_rnn(input, input_size, hidden_size, init_hidden=None, batch_first=False, mask=None, num_layers=1, dropout=0.0, name="gru"): """ gru rnn """ gru_unit = GRU_unit(input_size, hidden_size, num_layers=num_layers, dropout=dropout, name=name + "_gru_unit") if batch_first: input = layers.transpose(x=input, perm=[1, 0, 2]) if mask: mask = layers.transpose(mask, perm=[1, 0]) rnn = PaddingRNN() with rnn.step(): step_in = rnn.step_input(input) step_mask = None if mask: step_mask = rnn.step_input(mask) pre_hidden = rnn.memory(init=init_hidden) new_hidden, last_hidden = gru_unit(step_in, pre_hidden, step_mask) rnn.update_memory(pre_hidden, last_hidden) step_in = new_hidden rnn.step_output(step_in) rnn.step_output(last_hidden) rnn_res = rnn() rnn_out = rnn_res[0] last_hidden = layers.slice(rnn_res[1], axes=[0], starts=[-1], ends=[1000000000]) last_hidden = layers.reshape(last_hidden, shape=[num_layers, -1, hidden_size]) if batch_first: rnnout = layers.transpose(x=rnn_out, perm=[1, 0, 2]) return rnnout, last_hidden
def knowledge_task(enc_output: layers.data, mask_pos_list: List[List[int]], type_list: List[List[str]], entities_size: int, property_size: int, name='knowledge'): """ the knowledge task for pre-train stage: There are 2 types for knowledge, one for S, one for P 1. mask entity to predict 2. mask property to predict :param enc_output: :param mask_pos: :param type_list: :return: """ assert len(mask_pos_list) == len( type_list ), "InputError: Type list must have the same length as mask_pos_list" s_fc = lambda x: layers.fc(x, entities_size, param_attr=fluid.ParamAttr(name=name + "_S_w"), bias_attr=fluid.ParamAttr(name=name + "_S_b"), name=name + "_S") p_fc = lambda x: layers.fc(x, property_size, param_attr=fluid.ParamAttr(name=name + "_P_w"), bias_attr=fluid.ParamAttr(name=name + "_P_b"), name=name + "_P") S_res_list = [] P_res_list = [] for batch_id, (mask_pos_sub, type_sub) in enumerate(zip(mask_pos_list, type_list)): for mask_pos, mask_type in zip(mask_pos_sub, type_sub): tmp = layers.slice(enc_output, axes=[0, 1, 2], starts=[batch_id, mask_pos, 0], ends=[batch_id + 1, mask_pos + 1, INT_MAX]) if mask_type.lower() == 'p': P_res_list.append(p_fc(tmp)) elif mask_type.lower() == 's': S_res_list.append(s_fc(tmp)) S_output = layers.concat(S_res_list, axis=0) P_res_list = layers.concat(P_res_list, axis=0) return S_output, P_res_list
def gru_step(self, input, hidden, mask=None): """ gru step """ hidden_array = [] for i in range(self.num_layers): hidden_temp = layers.slice(hidden, axes=[0], starts=[i], ends=[i + 1]) hidden_temp = layers.reshape(hidden_temp, shape=[-1, self.hidden_size]) hidden_array.append(hidden_temp) last_hidden_array = [] for k in range(self.num_layers): trans_input = layers.matmul(input, self.weight_input_array[k]) trans_input += self.bias_input_array[k] trans_hidden = layers.matmul(hidden_array[k], self.weight_hidden_array[k]) trans_hidden += self.bias_hidden_array[k] input_array = layers.split(trans_input, num_or_sections=3, dim=-1) trans_array = layers.split(trans_hidden, num_or_sections=3, dim=-1) reset_gate = layers.sigmoid(input_array[0] + trans_array[0]) input_gate = layers.sigmoid(input_array[1] + trans_array[1]) new_gate = layers.tanh(input_array[2] + reset_gate * trans_array[2]) new_hidden = new_gate + input_gate * (hidden_array[k] - new_gate) if mask: neg_mask = layers.fill_constant_batch_size_like( input=mask, shape=[1], value=1.0, dtype='float32') - mask new_hidden = new_hidden * mask + hidden_array[k] * neg_mask last_hidden_array.append(new_hidden) input = new_hidden if self.dropout and self.dropout > 0.0: input = layers.dropout(input, dropout_prob=self.dropout) last_hidden = layers.concat(last_hidden_array, 0) last_hidden = layers.reshape( last_hidden, shape=[self.num_layers, -1, self.hidden_size]) return input, last_hidden
def fluid_sequence_get_seq_len(lodtensor): """ args: lodtensor: lod = [[0,4,7]] return: seq_len: lod = [] data = [4, 3] shape = [-1, 1] """ lodtensor_slice = layers.slice(lodtensor, axes=[1], starts=[0], ends=[1]) assert lodtensor_slice.shape == (-1, 1), (lodtensor_slice.shape()) ones = layers.cast(lodtensor_slice * 0 + 1, 'float32') # (batch*seq_len, 1) ones = layers.lod_reset(ones, lodtensor) ones_padded = fluid_sequence_pad(ones, 0) # (batch, max_seq_len, 1) ones_padded = layers.squeeze(ones_padded, [2]) # (batch, max_seq_len) seq_len = layers.cast(layers.reduce_sum(ones_padded, 1, keep_dim=True), 'int64') # (batch, 1) return seq_len
def _calc_bow_logits(self, enc_out, checkpoints, bow_pos): """Get the logits of generation.""" bow_feat = layers.slice(input=enc_out, axes=[1], starts=[0], ends=[1]) bow_feat = layers.reshape(x=bow_feat, shape=[-1, self.hidden_size]) bow_pos = layers.cast(x=bow_pos, dtype="int32") bow_feat = layers.gather(input=bow_feat, index=bow_pos) bow_trans_feat = layers.fc( input=bow_feat, size=self.emb_size, act=self.hidden_act, param_attr=fluid.ParamAttr(name="bow_trans_fc.w_0", initializer=self.param_initializer), bias_attr=fluid.ParamAttr(name="bow_trans_fc.b_0")) bow_trans_feat = pre_process_layer(bow_trans_feat, self.post_cls_cmd, name="bow_trans") checkpoints.append(bow_trans_feat) if self.weight_sharing: fc_out = layers.matmul( x=bow_trans_feat, y=fluid.default_main_program().global_block().var( self.token_emb_name), transpose_y=True) if self.cls_bias: fc_out += layers.create_parameter( shape=[self.vocab_size], dtype=self.dtype, attr=fluid.ParamAttr(name="bow_out_fc.b_0"), is_bias=True) else: bow_out_bias_attr = fluid.ParamAttr( name="bow_out_fc.b_0") if self.cls_bias else False fc_out = layers.fc(input=bow_trans_feat, size=self.vocab_size, param_attr=fluid.ParamAttr( name="bow_out_fc.w_0", initializer=self.param_initializer), bias_attr=bow_out_bias_attr) return fc_out
def crop(x, audio_start, audio_length): """Crop the upsampled condition to match audio_length. The upsampled condition has the same time steps as the whole audio does. But since audios are sliced to 0.5 seconds randomly while conditions are not, upsampled conditions should also be sliced to extaclt match the time steps of the audio slice. Args: x (Variable): shape(B, C, T), dtype float32, the upsample condition. audio_start (Variable): shape(B, ), dtype: int64, the index the starting point. audio_length (int): the length of the audio (number of samples it contaions). Returns: Variable: shape(B, C, audio_length), cropped condition. """ # crop audio slices = [] # for each example starts = audio_start.numpy() for i in range(x.shape[0]): start = starts[i] end = start + audio_length slice = F.slice(x[i], axes=[1], starts=[start], ends=[end]) slices.append(slice) out = F.stack(slices) return out
def false_fn(array, start, end): new_array = slice(array, starts=[start], ends=[end], axes=[0]) return new_array
def encoder_static(input_embedding, len=3, init_hidden=None, init_cell=None): weight_1_arr = [] weight_2_arr = [] bias_arr = [] hidden_array = [] cell_array = [] mask_array = [] for i in range(num_layers): weight_1 = layers.create_parameter( [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) weight_1_arr.append(weight_1) bias_1 = layers.create_parameter( [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), default_initializer=fluid.initializer.Constant(0.0)) bias_arr.append(bias_1) pre_hidden = layers.slice(init_hidden, axes=[0], starts=[i], ends=[i + 1]) pre_cell = layers.slice(init_cell, axes=[0], starts=[i], ends=[i + 1]) pre_hidden = layers.reshape(pre_hidden, shape=[-1, hidden_size], inplace=True) pre_cell = layers.reshape(pre_cell, shape=[-1, hidden_size], inplace=True) hidden_array.append(pre_hidden) cell_array.append(pre_cell) res = [] sliced_inputs = layers.split(input_embedding, num_or_sections=len, dim=1) for index in range(len): input = sliced_inputs[index] input = layers.reshape(input, shape=[-1, hidden_size], inplace=True) for k in range(num_layers): pre_hidden = hidden_array[k] pre_cell = cell_array[k] weight_1 = weight_1_arr[k] bias = bias_arr[k] nn = layers.concat([input, pre_hidden], 1) gate_input = layers.matmul(x=nn, y=weight_1) gate_input = layers.elementwise_add(gate_input, bias) i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1) c = pre_cell * layers.sigmoid(f) + layers.sigmoid( i) * layers.tanh(j) m = layers.tanh(c) * layers.sigmoid(o) hidden_array[k] = m cell_array[k] = c input = m if dropout != None and dropout > 0.0: input = layers.dropout( input, dropout_prob=dropout, dropout_implementation='upscale_in_train') res.append(input) last_hidden = layers.concat(hidden_array, 1) last_hidden = layers.reshape(last_hidden, shape=[-1, num_layers, hidden_size], inplace=True) last_hidden = layers.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = layers.concat(cell_array, 1) last_cell = layers.reshape(last_cell, shape=[-1, num_layers, hidden_size]) last_cell = layers.transpose(x=last_cell, perm=[1, 0, 2]) real_res = layers.concat(res, 0) real_res = layers.reshape(real_res, shape=[len, -1, hidden_size], inplace=True) real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) return real_res, last_hidden, last_cell
def forward(self): """ forward """ src, dst = L.read_file(self.pyreader) src_id = L.slice(src, [0, 1, 2, 3], [0, 0, 0, 0], [int(math.pow(2, 30)) - 1, 1, 1, 1]) dst_id = L.slice(dst, [0, 1, 2, 3], [0, 0, 0, 0], [int(math.pow(2, 30)) - 1, self.neg_num + 1, 1, 1]) if self.is_sparse: # sparse mode use 2 dims input. src = L.reshape(src, [-1, 1]) dst = L.reshape(dst, [-1, 1]) # [b, 1, f, h] src_embed = split_embedding(src, self.num_nodes, self.hidden_size, self.embed_init, "weight", self.num_part, self.is_sparse) # [b, n+1, f, h] dst_embed = split_embedding(dst, self.num_nodes, self.hidden_size, self.embed_init, "weight", self.num_part, self.is_sparse) if self.is_sparse: src_embed = L.reshape(src_embed, [-1, 1, self.num_featuers, self.hidden_size]) dst_embed = L.reshape( dst_embed, [-1, self.neg_num + 1, self.num_featuers, self.hidden_size]) # [b, 1, 1, f] src_weight = L.softmax( L.embedding(src_id, [self.num_nodes, self.num_featuers], param_attr=F.ParamAttr(name="alpha"))) # [b, n+1, 1, f] dst_weight = L.softmax( L.embedding(dst_id, [self.num_nodes, self.num_featuers], param_attr=F.ParamAttr(name="alpha"))) # [b, 1, h] src_sum = L.squeeze(L.matmul(src_weight, src_embed), axes=[2]) # [b, n+1, h] dst_sum = L.squeeze(L.matmul(dst_weight, dst_embed), axes=[2]) logits = L.matmul(src_sum, dst_sum, transpose_y=True) # [batch_size, 1, neg_num+1] pos_label = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", 1) neg_label = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 0) label = L.concat([pos_label, neg_label], -1) pos_weight = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", self.neg_num) neg_weight = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 1) weight = L.concat([pos_weight, neg_weight], -1) weight.stop_gradient = True label.stop_gradient = True loss = L.sigmoid_cross_entropy_with_logits(logits, label) loss = loss * weight loss = L.reduce_mean(loss) loss = loss * ((self.neg_num + 1) / 2 / self.neg_num) loss.persistable = True self.loss = loss return loss
def encoder_static(input_embedding, len=3, init_hidden=None, init_cell=None): weight_1_arr = [] weight_2_arr = [] bias_arr = [] hidden_array = [] cell_array = [] mask_array = [] for i in range(num_layers): weight_1 = layers.create_parameter( [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) weight_1_arr.append(weight_1) bias_1 = layers.create_parameter( [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), default_initializer=fluid.initializer.Constant(0.0)) bias_arr.append(bias_1) pre_hidden = layers.slice( init_hidden, axes=[0], starts=[i], ends=[i + 1]) pre_cell = layers.slice( init_cell, axes=[0], starts=[i], ends=[i + 1]) pre_hidden = layers.reshape( pre_hidden, shape=[-1, hidden_size], inplace=True) pre_cell = layers.reshape( pre_cell, shape=[-1, hidden_size], inplace=True) hidden_array.append(pre_hidden) cell_array.append(pre_cell) res = [] sliced_inputs = layers.split( input_embedding, num_or_sections=len, dim=1) for index in range(len): input = sliced_inputs[index] input = layers.reshape(input, shape=[-1, hidden_size], inplace=True) for k in range(num_layers): pre_hidden = hidden_array[k] pre_cell = cell_array[k] weight_1 = weight_1_arr[k] bias = bias_arr[k] nn = layers.concat([input, pre_hidden], 1) gate_input = layers.matmul(x=nn, y=weight_1) gate_input = layers.elementwise_add(gate_input, bias) i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1) try: from paddle.fluid.contrib.layers import fused_elemwise_activation # fluid.contrib.layers.fused_elemwise_activation can do a fused # operation, like: # 1) x + sigmoid(y); x + tanh(y) # 2) tanh(x + y) # Now the unary operation supported in this fused op is limit, and # we will extent this operation to support more unary operations and # do this kind of fusion automitically in future version of paddle.fluid. # layers.sigmoid(i) * layers.tanh(j) tmp0 = fused_elemwise_activation( x=layers.tanh(j), y=i, functor_list=['elementwise_mul', 'sigmoid'], save_intermediate_out=False) # pre_cell * layers.sigmoid(f) tmp1 = fused_elemwise_activation( x=pre_cell, y=f, functor_list=['elementwise_mul', 'sigmoid'], save_intermediate_out=False) c = tmp0 + tmp1 # layers.tanh(c) * layers.sigmoid(o) m = fused_elemwise_activation( x=layers.tanh(c), y=o, functor_list=['elementwise_mul', 'sigmoid'], save_intermediate_out=False) except ImportError: c = pre_cell * layers.sigmoid(f) + layers.sigmoid( i) * layers.tanh(j) m = layers.tanh(c) * layers.sigmoid(o) hidden_array[k] = m cell_array[k] = c input = m if dropout != None and dropout > 0.0: input = layers.dropout( input, dropout_prob=dropout, dropout_implementation='upscale_in_train') res.append(input) last_hidden = layers.concat(hidden_array, 1) last_hidden = layers.reshape( last_hidden, shape=[-1, num_layers, hidden_size], inplace=True) last_hidden = layers.transpose(x=last_hidden, perm=[1, 0, 2]) last_cell = layers.concat(cell_array, 1) last_cell = layers.reshape( last_cell, shape=[-1, num_layers, hidden_size]) last_cell = layers.transpose(x=last_cell, perm=[1, 0, 2]) real_res = layers.concat(res, 0) real_res = layers.reshape( real_res, shape=[len, -1, hidden_size], inplace=True) real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) return real_res, last_hidden, last_cell
def create_model(self, pyreader_name, is_prediction=False): """Create the network""" if is_prediction: return self.fast_decode(pyreader_name) pyreader = fluid.layers.py_reader( capacity=50, shapes=[ [-1, self.max_para_num, self.max_para_len], # src_word [-1, self.max_para_num, self.max_para_len], # src_word_pos [-1, self.max_para_num], # src_sent_pos [-1, self.max_para_num, self.max_para_len], # src_words_slf_attn_bias [-1, self.max_para_num], # src_sents_slf_attn_bias [-1, self.max_para_num, self.max_para_num], # graph_attn_bias [-1, self.max_tgt_len], # trg_word [-1, self.max_tgt_len], # trg_pos [-1, self.max_tgt_len, self.max_tgt_len], # trg_slf_attn_bias [-1, 1], # tgt_label [-1, 1] ], # label_weights dtypes=[ 'int64', 'int64', 'int64', 'float32', 'float32', 'float32', 'int64', 'int64', 'float32', 'int64', 'float32' ], lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_word, src_word_pos, src_sent_pos, src_words_slf_attn_bias, src_sents_slf_attn_bias, graph_attn_bias, trg_word, trg_pos, trg_slf_attn_bias, tgt_label, label_weights) = \ fluid.layers.read_file(pyreader) src_words_slf_attn_bias = layers.expand( layers.unsqueeze(src_words_slf_attn_bias, axes=[2, 3]), expand_times=[1, 1, self._n_head, self.max_para_len, 1]) src_words_slf_attn_bias.stop_gradient = True src_sents_slf_attn_bias = layers.expand( layers.unsqueeze(src_sents_slf_attn_bias, axes=[1, 2]), expand_times=[1, self._n_head, self.max_para_num, 1]) src_sents_slf_attn_bias.stop_gradient = True graph_attn_bias = layers.expand(layers.unsqueeze(graph_attn_bias, axes=[1]), expand_times=[1, self._n_head, 1, 1]) graph_attn_bias.stop_gradient = True trg_slf_attn_bias = layers.expand(layers.unsqueeze(trg_slf_attn_bias, axes=[1]), expand_times=[1, self._n_head, 1, 1]) trg_slf_attn_bias.stop_gradient = True tgt_src_words_attn_bias = layers.expand( layers.slice(src_words_slf_attn_bias, axes=[3], starts=[0], ends=[1]), expand_times=[1, 1, 1, self.max_tgt_len, 1]) tgt_src_words_attn_bias.stop_gradient = True tgt_src_sents_attn_bias = layers.expand( layers.slice(src_sents_slf_attn_bias, axes=[2], starts=[0], ends=[1]), expand_times=[1, 1, self.max_tgt_len, 1]) tgt_src_sents_attn_bias.stop_gradient = True src_word = layers.reshape( src_word, [-1, self.max_para_num, self.max_para_len, 1]) src_word_pos = layers.reshape( src_word_pos, [-1, self.max_para_num, self.max_para_len, 1]) src_sent_pos = layers.reshape(src_sent_pos, [-1, self.max_para_num, 1]) trg_word = layers.reshape(trg_word, [-1, self.max_tgt_len, 1]) trg_pos = layers.reshape(trg_pos, [-1, self.max_tgt_len, 1]) tgt_label = layers.reshape(tgt_label, [-1, 1]) label_weights = layers.reshape(label_weights, [-1, 1]) enc_input = (src_word, src_word_pos, src_sent_pos, src_words_slf_attn_bias, src_sents_slf_attn_bias, graph_attn_bias) dec_input = (trg_word, trg_pos, trg_slf_attn_bias, tgt_src_words_attn_bias, tgt_src_sents_attn_bias, graph_attn_bias) graph_vars = self.build_model(enc_input=enc_input, dec_input=dec_input, tgt_label=tgt_label, label_weights=label_weights) return pyreader, graph_vars
def gru_attention_infer(self, decoder_boot, max_length, char_num, word_vector_dim, encoded_vector, encoded_proj, decoder_size): init_state = decoder_boot beam_size = 1 array_len = layers.fill_constant( shape=[1], dtype='int64', value=max_length) counter = layers.zeros(shape=[1], dtype='int64', force_cpu=True) # fill the first element with init_state state_array = layers.create_array('float32') layers.array_write(init_state, array=state_array, i=counter) # ids, scores as memory ids_array = layers.create_array('int64') scores_array = layers.create_array('float32') rois_shape = layers.shape(init_state) batch_size = layers.slice( rois_shape, axes=[0], starts=[0], ends=[1]) + 1 lod_level = layers.range( start=0, end=batch_size, step=1, dtype=batch_size.dtype) init_ids = layers.fill_constant_batch_size_like( input=init_state, shape=[-1, 1], value=0, dtype='int64') init_ids = layers.lod_reset(init_ids, lod_level) init_ids = layers.lod_append(init_ids, lod_level) init_scores = layers.fill_constant_batch_size_like( input=init_state, shape=[-1, 1], value=1, dtype='float32') init_scores = layers.lod_reset(init_scores, init_ids) layers.array_write(init_ids, array=ids_array, i=counter) layers.array_write(init_scores, array=scores_array, i=counter) full_ids = fluid.layers.fill_constant_batch_size_like( input=init_state, shape=[-1, 1], dtype='int64', value=1) cond = layers.less_than(x=counter, y=array_len) while_op = layers.While(cond=cond) with while_op.block(): pre_ids = layers.array_read(array=ids_array, i=counter) pre_state = layers.array_read(array=state_array, i=counter) pre_score = layers.array_read(array=scores_array, i=counter) pre_ids_emb = layers.embedding( input=pre_ids, size=[char_num, word_vector_dim], dtype='float32') context = self.simple_attention(encoded_vector, encoded_proj, pre_state, decoder_size) # expand the recursive_sequence_lengths of pre_state # to be the same with pre_score pre_state_expanded = layers.sequence_expand(pre_state, pre_score) context_expanded = layers.sequence_expand(context, pre_score) fc_1 = layers.fc(input=context_expanded, size=decoder_size * 3, bias_attr=False, name="rnn_fc1") fc_2 = layers.fc(input=pre_ids_emb, size=decoder_size * 3, bias_attr=False, name="rnn_fc2") decoder_inputs = fc_1 + fc_2 current_state, _, _ = layers.gru_unit( input=decoder_inputs, hidden=pre_state_expanded, size=decoder_size * 3) current_state_with_lod = layers.lod_reset( x=current_state, y=pre_score) # use score to do beam search current_score = layers.fc(input=current_state_with_lod, size=char_num, bias_attr=True, act='softmax', name="rnn_out_fc") topk_scores, topk_indices = layers.topk(current_score, k=beam_size) new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1) fluid.layers.assign(new_ids, full_ids) layers.increment(x=counter, value=1, in_place=True) # update the memories layers.array_write(current_state, array=state_array, i=counter) layers.array_write(topk_indices, array=ids_array, i=counter) layers.array_write(topk_scores, array=scores_array, i=counter) # update the break condition: # up to the max length or all candidates of # source sentences have ended. length_cond = layers.less_than(x=counter, y=array_len) finish_cond = layers.logical_not(layers.is_empty(x=topk_indices)) layers.logical_and(x=length_cond, y=finish_cond, out=cond) return full_ids
def convlstm2d_rnn(rnn_input, init_hidden, init_cell, padding, hidden_h, hidden_w, filters, filter_size, drop_out=None, sequence_length=None, name='conv_lstm_2d'): # transpose : (sequence x batch x C x H x W) rnn_input = layers.transpose(rnn_input, [1, 0, 4, 2, 3]) # generate mask mask = None if sequence_length: max_seq_len = layers.shape(rnn_input)[0] mask = layers.sequence_mask(sequence_length, maxlen=max_seq_len, dtype='float32') mask = layers.transpose(mask, [1, 0]) # init conv_lstm_2d = ConvLSTM2D_unit(filters, filter_size, padding) rnn = PaddingRNN() with rnn.step(): step_in = rnn.step_input(rnn_input) if mask: step_mask = rnn.step_input(mask) if init_hidden and init_cell: pre_hidden = rnn.memory(init=init_hidden) pre_cell = rnn.memory(init=init_cell) else: pre_hidden = rnn.memory(batch_ref=rnn_input, shape=[-1, filters, hidden_h, hidden_w]) pre_cell = rnn.memory(batch_ref=rnn_input, shape=[-1, filters, hidden_h, hidden_w]) real_out, last_hidden, last_cell = conv_lstm_2d( step_in, pre_hidden, pre_cell) if mask: last_hidden = dot(last_hidden, step_mask, axis=0) - dot( pre_hidden, (step_mask - 1), axis=0) last_cell = dot(last_cell, step_mask, axis=0) - dot( pre_cell, (step_mask - 1), axis=0) rnn.update_memory(pre_hidden, last_hidden) rnn.update_memory(pre_cell, last_cell) rnn.step_output(last_hidden) rnn.step_output(last_cell) step_input = last_hidden if drop_out != None and drop_out > 0.0: step_input = layers.dropout( step_input, dropout_prob=drop_out, dropout_implementation='upscale_in_train') rnn_res = rnn() rnn_out = rnn_res[0] last_hidden = layers.slice(rnn_res[1], axes=[0], starts=[-1], ends=[1000000000]) rnn_out = layers.transpose(rnn_out, [1, 0, 3, 4, 2]) last_hidden = layers.transpose(last_hidden, [1, 0, 3, 4, 2]) # print('rnn_out ', rnn_out.shape) # print('last_hidden ', last_hidden.shape) return rnn_out, last_hidden
def build(self): args = self.args emb_size = args.embed_size proj_size = args.embed_size hidden_size = args.hidden_size batch_size = args.batch_size num_layers = args.num_layers num_steps = args.num_steps lstm_outputs = [] x_f = layers.data(name="x", shape=[1], dtype='int64', lod_level=1) y_f = layers.data(name="y", shape=[1], dtype='int64', lod_level=1) x_b = layers.data(name="x_r", shape=[1], dtype='int64', lod_level=1) y_b = layers.data(name="y_r", shape=[1], dtype='int64', lod_level=1) init_hiddens_ = layers.data(name="init_hiddens", shape=[1], dtype='float32') init_cells_ = layers.data(name="init_cells", shape=[1], dtype='float32') init_hiddens = layers.reshape(init_hiddens_, shape=[2 * num_layers, -1, proj_size]) init_cells = layers.reshape(init_cells_, shape=[2 * num_layers, -1, hidden_size]) init_hidden = layers.slice(init_hiddens, axes=[0], starts=[0], ends=[num_layers]) init_cell = layers.slice(init_cells, axes=[0], starts=[0], ends=[num_layers]) init_hidden_r = layers.slice(init_hiddens, axes=[0], starts=[num_layers], ends=[2 * num_layers]) init_cell_r = layers.slice(init_cells, axes=[0], starts=[num_layers], ends=[2 * num_layers]) if args.use_custom_samples: custom_samples = layers.data( name="custom_samples", shape=[args.n_negative_samples_batch + 1], dtype='int64', lod_level=1) custom_samples_r = layers.data( name="custom_samples_r", shape=[args.n_negative_samples_batch + 1], dtype='int64', lod_level=1) custom_probabilities = layers.data( name="custom_probabilities", shape=[args.n_negative_samples_batch + 1], dtype='float32', lod_level=1) else: custom_samples = None custom_samples_r = None custom_probabilities = None forward, fw_hiddens, fw_hiddens_ori, fw_cells, fw_projs = encoder( x_f, y_f, self.vocab_size, emb_size, init_hidden, init_cell, para_name='fw_', custom_samples=custom_samples, custom_probabilities=custom_probabilities, test_mode=self.test_mode, args=args) backward, bw_hiddens, bw_hiddens_ori, bw_cells, bw_projs = encoder( x_b, y_b, self.vocab_size, emb_size, init_hidden_r, init_cell_r, para_name='bw_', custom_samples=custom_samples_r, custom_probabilities=custom_probabilities, test_mode=self.test_mode, args=args) losses = layers.concat([forward[-1], backward[-1]]) self.loss = layers.reduce_mean(losses) self.loss.persistable = True self.grad_vars = [x_f, y_f, x_b, y_b, self.loss] self.grad_vars_name = ['x', 'y', 'x_r', 'y_r', 'final_loss'] fw_vars_name = ['x_emb', 'proj', 'loss'] + [ 'init_hidden', 'init_cell' ] + ['rnn_out', 'rnn_out2', 'cell', 'cell2', 'xproj', 'xproj2'] bw_vars_name = ['x_emb_r', 'proj_r', 'loss_r' ] + ['init_hidden_r', 'init_cell_r'] + [ 'rnn_out_r', 'rnn_out2_r', 'cell_r', 'cell2_r', 'xproj_r', 'xproj2_r' ] fw_vars = forward + [init_hidden, init_cell ] + fw_hiddens + fw_cells + fw_projs bw_vars = backward + [init_hidden_r, init_cell_r ] + bw_hiddens + bw_cells + bw_projs for i in range(len(fw_vars_name)): self.grad_vars.append(fw_vars[i]) self.grad_vars.append(bw_vars[i]) self.grad_vars_name.append(fw_vars_name[i]) self.grad_vars_name.append(bw_vars_name[i]) if args.use_custom_samples: self.feed_order = [ 'x', 'y', 'x_r', 'y_r', 'custom_samples', 'custom_samples_r', 'custom_probabilities' ] else: self.feed_order = ['x', 'y', 'x_r', 'y_r'] self.last_hidden = [ fluid.layers.sequence_last_step(input=x) for x in fw_hiddens_ori + bw_hiddens_ori ] self.last_cell = [ fluid.layers.sequence_last_step(input=x) for x in fw_cells + bw_cells ] self.last_hidden = layers.concat(self.last_hidden, axis=0) self.last_hidden.persistable = True self.last_cell = layers.concat(self.last_cell, axis=0) self.last_cell.persistable = True
def knowledge_seq2seq(config): """ knowledge seq2seq """ emb_size = config.embed_size hidden_size = config.hidden_size input_size = emb_size num_layers = config.num_layers bi_direc = config.bidirectional batch_size = config.batch_size vocab_size = config.vocab_size run_type = config.run_type enc_input = layers.data(name="enc_input", shape=[1], dtype='int64', lod_level=1) #enc_input --> goal enc_mask = layers.data(name="enc_mask", shape=[-1, 1], dtype='float32') goal_input = layers.data(name="goal_input", shape=[1], dtype='int64', lod_level=1) #goal_input --> x cue_input = layers.data(name="cue_input", shape=[1], dtype='int64', lod_level=1) #cue_input --> kg #cue_mask = layers.data(name='cue_mask', shape=[-1, 1], dtype='float32') memory_mask = layers.data(name='memory_mask', shape=[-1, 1], dtype='float32') tar_input = layers.data(name='tar_input', shape=[1], dtype='int64', lod_level=1) #tar_input --> y # tar_mask = layers.data(name="tar_mask", shape=[-1, 1], dtype='float32') rnn_hidden_size = hidden_size if bi_direc: rnn_hidden_size //= 2 enc_out, enc_last_hidden = \ rnn_encoder(enc_input, vocab_size, input_size, rnn_hidden_size, batch_size, num_layers, bi_direc, dropout=0.0, batch_first=True, name="rnn_enc") goal_out, goal_last_hidden = \ rnn_encoder(goal_input, vocab_size, input_size, rnn_hidden_size, batch_size, num_layers, bi_direc, dropout=0.0, batch_first=True, name="rnn_enc1") context_goal_out = fluid.layers.concat( input=[enc_last_hidden, goal_last_hidden], axis=2) context_goal_out = layers.reshape(context_goal_out, shape=[-1, 1, rnn_hidden_size * 4]) # context_goal_out = layers.squeeze(context_goal_out, axes=[1]) context_goal_out = fluid.layers.fc(context_goal_out, size=rnn_hidden_size * 2, bias_attr=False) context_goal_out = layers.unsqueeze(context_goal_out, axes=[0]) bridge_out = fc(context_goal_out, hidden_size, hidden_size, name="bridge") bridge_out = layers.tanh(bridge_out) cue_last_mask = layers.data(name='cue_last_mask', shape=[-1], dtype='float32') knowledge_out, knowledge_last_hidden = \ rnn_encoder(cue_input, vocab_size, input_size, rnn_hidden_size, batch_size, num_layers, bi_direc, dropout=0.0, batch_first=True, last_mask=cue_last_mask, name="knowledge_enc") query = layers.slice(bridge_out, axes=[0], starts=[0], ends=[1]) query = layers.squeeze(query, axes=[0]) query = layers.unsqueeze(query, axes=[1]) query = layers.reshape(query, shape=[batch_size, -1, hidden_size]) cue_memory = layers.slice(knowledge_last_hidden, axes=[0], starts=[0], ends=[1]) cue_memory = layers.reshape(cue_memory, shape=[batch_size, -1, hidden_size]) memory_mask = layers.reshape(memory_mask, shape=[batch_size, 1, -1]) weighted_cue, cue_att = dot_attention(query, cue_memory, mask=memory_mask) cue_att = layers.reshape(cue_att, shape=[batch_size, -1]) knowledge = weighted_cue if config.use_posterior: print("config.use_posterior", config.use_posterior) target_out, target_last_hidden = \ rnn_encoder(tar_input, vocab_size, input_size, rnn_hidden_size, batch_size, num_layers, bi_direc, dropout=0.0, batch_first=True, name="knowledge_enc1") target_goal_out = fluid.layers.concat( input=[target_last_hidden, goal_last_hidden], axis=2) target_goal_out = layers.reshape(target_goal_out, shape=[-1, 1, rnn_hidden_size * 4]) # target_goal_out = layers.squeeze(target_goal_out, axes=[1]) target_goal_out = fluid.layers.fc(target_goal_out, size=rnn_hidden_size * 2, bias_attr=False) target_goal_out = layers.unsqueeze(target_goal_out, axes=[0]) # get attenion # target_query = layers.slice(target_last_hidden, axes=[0], starts=[0], ends=[1]) target_query = layers.slice(target_goal_out, axes=[0], starts=[0], ends=[1]) target_query = layers.squeeze(target_query, axes=[0]) target_query = layers.unsqueeze(target_query, axes=[1]) target_query = layers.reshape(target_query, shape=[batch_size, -1, hidden_size]) weight_target, target_att = dot_attention(target_query, cue_memory, mask=memory_mask) target_att = layers.reshape(target_att, shape=[batch_size, -1]) # add to output knowledge = weight_target enc_memory_mask = layers.data(name="enc_memory_mask", shape=[-1, 1], dtype='float32') enc_memory_mask = layers.unsqueeze(enc_memory_mask, axes=[1]) # decoder init_hidden, enc_memory, enc_mask dec_init_hidden = bridge_out pad_value = fluid.layers.assign(input=np.array([0.0], dtype='float32')) enc_memory, origl_len_1 = layers.sequence_pad(x=enc_out, pad_value=pad_value) enc_memory.persistable = True gru_unit = GRU_unit(input_size + hidden_size, hidden_size, num_layers=num_layers, dropout=0.0, name="decoder_gru_unit") cue_gru_unit = GRU_unit(hidden_size + hidden_size, hidden_size, num_layers=num_layers, dropout=0.0, name="decoder_cue_gru_unit") tgt_vocab_size = config.vocab_size if run_type == "train": if config.use_bow: bow_logits = fc(knowledge, hidden_size, hidden_size, name='bow_fc_1') bow_logits = layers.tanh(bow_logits) bow_logits = fc(bow_logits, hidden_size, tgt_vocab_size, name='bow_fc_2') bow_logits = layers.softmax(bow_logits) bow_label = layers.data(name='bow_label', shape=[-1, config.max_len], dtype='int64') bow_mask = layers.data(name="bow_mask", shape=[-1, config.max_len], dtype='float32') bow_logits = layers.expand(bow_logits, [1, config.max_len, 1]) bow_logits = layers.reshape(bow_logits, shape=[-1, tgt_vocab_size]) bow_label = layers.reshape(bow_label, shape=[-1, 1]) bow_loss = layers.cross_entropy(bow_logits, bow_label, soft_label=False) bow_loss = layers.reshape(bow_loss, shape=[-1, config.max_len]) bow_loss *= bow_mask bow_loss = layers.reduce_sum(bow_loss, dim=[1]) bow_loss = layers.reduce_mean(bow_loss) dec_input = layers.data(name="dec_input", shape=[-1, 1, 1], dtype='int64') dec_mask = layers.data(name="dec_mask", shape=[-1, 1], dtype='float32') dec_knowledge = weight_target knowledge_goal_out = fluid.layers.concat( input=[dec_knowledge, target_query], axis=2) knowledge_goal_out = layers.reshape(knowledge_goal_out, shape=[-1, 1, rnn_hidden_size * 4]) # knowledge_goal_out = layers.squeeze(knowledge_goal_out, axes=[1]) knowledge_goal_out = fluid.layers.fc(knowledge_goal_out, size=rnn_hidden_size * 2, bias_attr=False) knowledge_goal_out = layers.unsqueeze(knowledge_goal_out, axes=[0]) decoder_logits = \ rnn_decoder(gru_unit, cue_gru_unit, dec_input, input_size, hidden_size, num_layers, enc_memory, enc_memory_mask, dec_knowledge, vocab_size, init_hidden=dec_init_hidden, mask=dec_mask, dropout=config.dropout) target_label = layers.data(name='target_label', shape=[-1, 1], dtype='int64') target_mask = layers.data(name='target_mask', shape=[-1, 1], dtype='float32') decoder_logits = layers.reshape(decoder_logits, shape=[-1, tgt_vocab_size]) target_label = layers.reshape(target_label, shape=[-1, 1]) nll_loss = layers.cross_entropy(decoder_logits, target_label, soft_label=False) nll_loss = layers.reshape(nll_loss, shape=[batch_size, -1]) nll_loss *= target_mask nll_loss = layers.reduce_sum(nll_loss, dim=[1]) nll_loss = layers.reduce_mean(nll_loss) prior_attn = cue_att + 1e-10 posterior_att = target_att posterior_att.stop_gradient = True prior_attn = layers.log(prior_attn) kl_loss = posterior_att * (layers.log(posterior_att + 1e-10) - prior_attn) kl_loss = layers.reduce_mean(kl_loss) kl_and_nll_factor = layers.data(name='kl_and_nll_factor', shape=[1], dtype='float32') kl_and_nll_factor = layers.reshape(kl_and_nll_factor, shape=[-1]) final_loss = bow_loss + kl_loss * kl_and_nll_factor + nll_loss * kl_and_nll_factor return [bow_loss, kl_loss, nll_loss, final_loss] elif run_type == "test": beam_size = config.beam_size batch_size = config.batch_size token = layers.fill_constant(shape=[batch_size * beam_size, 1], value=config.bos_id, dtype='int64') token = layers.reshape(token, shape=[-1, 1]) max_decode_len = config.max_dec_len dec_knowledge = knowledge INF = 100000000.0 init_score_np = np.ones([beam_size * batch_size], dtype='float32') * -INF for i in range(batch_size): init_score_np[i * beam_size] = 0.0 pre_score = layers.assign(init_score_np) pos_index_np = np.arange(batch_size).reshape(-1, 1) pos_index_np = \ np.tile(pos_index_np, (1, beam_size)).reshape(-1).astype('int32') * beam_size pos_index = layers.assign(pos_index_np) id_array = [] score_array = [] index_array = [] init_enc_memory = layers.expand(enc_memory, [1, beam_size, 1]) init_enc_memory = layers.reshape( init_enc_memory, shape=[batch_size * beam_size, -1, hidden_size]) init_enc_mask = layers.expand(enc_memory_mask, [1, beam_size, 1]) init_enc_mask = layers.reshape(init_enc_mask, shape=[batch_size * beam_size, 1, -1]) dec_knowledge = layers.reshape(dec_knowledge, shape=[-1, 1, hidden_size]) init_dec_knowledge = layers.expand(dec_knowledge, [1, beam_size, 1]) init_dec_knowledge = layers.reshape( init_dec_knowledge, shape=[batch_size * beam_size, -1, hidden_size]) dec_init_hidden = layers.expand(dec_init_hidden, [1, 1, beam_size]) dec_init_hidden = layers.reshape(dec_init_hidden, shape=[1, -1, hidden_size]) length_average = config.length_average UNK = config.unk_id EOS = config.eos_id for i in range(1, max_decode_len + 1): dec_emb = get_embedding(token, input_size, vocab_size) dec_out, dec_last_hidden = \ decoder_step(gru_unit, cue_gru_unit, dec_emb, dec_init_hidden, input_size, hidden_size, init_enc_memory, init_enc_mask, init_dec_knowledge, mask=None) output_in_size = hidden_size + hidden_size rnnout = layers.dropout(dec_out, dropout_prob=config.dropout, is_test=True) rnnout = fc(rnnout, output_in_size, hidden_size, name='dec_out_fc1') rnnout = fc(rnnout, hidden_size, vocab_size, name='dec_out_fc2') log_softmax_output = log_softmax(rnnout) log_softmax_output = layers.squeeze(log_softmax_output, axes=[1]) if i > 1: if length_average: log_softmax_output = layers.elementwise_add( (log_softmax_output / i), (pre_score * (1.0 - 1.0 / i)), axis=0) else: log_softmax_output = layers.elementwise_add( log_softmax_output, pre_score, axis=0) else: log_softmax_output = layers.elementwise_add(log_softmax_output, pre_score, axis=0) log_softmax_output = layers.reshape(log_softmax_output, shape=[batch_size, -1]) topk_score, topk_index = layers.topk(log_softmax_output, k=beam_size) topk_score = layers.reshape(topk_score, shape=[-1]) topk_index = layers.reshape(topk_index, shape=[-1]) vocab_var = layers.fill_constant([1], dtype='int64', value=vocab_size) new_token = topk_index % vocab_var index = topk_index // vocab_var id_array.append(new_token) index_array.append(index) index = index + pos_index score_array.append(topk_score) eos_ids = layers.fill_constant([beam_size * batch_size], dtype='int64', value=EOS) unk_ids = layers.fill_constant([beam_size * batch_size], dtype='int64', value=UNK) eos_eq = layers.cast(layers.equal(new_token, eos_ids), dtype='float32') topk_score += eos_eq * -100000000.0 unk_eq = layers.cast(layers.equal(new_token, unk_ids), dtype='float32') topk_score += unk_eq * -100000000.0 # update token = new_token pre_score = topk_score token = layers.reshape(token, shape=[-1, 1]) index = layers.cast(index, dtype='int32') dec_last_hidden = layers.squeeze(dec_last_hidden, axes=[0]) dec_init_hidden = layers.gather(dec_last_hidden, index=index) dec_init_hidden = layers.unsqueeze(dec_init_hidden, axes=[0]) init_enc_memory = layers.gather(init_enc_memory, index) init_enc_mask = layers.gather(init_enc_mask, index) init_dec_knowledge = layers.gather(init_dec_knowledge, index) final_score = layers.concat(score_array, axis=0) final_ids = layers.concat(id_array, axis=0) final_index = layers.concat(index_array, axis=0) final_score = layers.reshape( final_score, shape=[max_decode_len, beam_size * batch_size]) final_ids = layers.reshape( final_ids, shape=[max_decode_len, beam_size * batch_size]) final_index = layers.reshape( final_index, shape=[max_decode_len, beam_size * batch_size]) return final_score, final_ids, final_index
def encoder(x, y, vocab_size, emb_size, init_hidden=None, init_cell=None, para_name='', custom_samples=None, custom_probabilities=None, test_mode=False, args=None): x_emb = layers.embedding(input=x, size=[vocab_size, emb_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr(name='embedding_para')) rnn_input = x_emb rnn_outs = [] rnn_outs_ori = [] cells = [] projs = [] for i in range(args.num_layers): rnn_input = dropout(rnn_input, test_mode, args) if init_hidden and init_cell: h0 = layers.squeeze(layers.slice(init_hidden, axes=[0], starts=[i], ends=[i + 1]), axes=[0]) c0 = layers.squeeze(layers.slice(init_cell, axes=[0], starts=[i], ends=[i + 1]), axes=[0]) else: h0 = c0 = None rnn_out, cell, input_proj = lstmp_encoder( rnn_input, args.hidden_size, h0, c0, para_name + 'layer{}'.format(i + 1), emb_size, test_mode, args) rnn_out_ori = rnn_out if i > 0: rnn_out = rnn_out + rnn_input rnn_out = dropout(rnn_out, test_mode, args) cell = dropout(cell, test_mode, args) rnn_outs.append(rnn_out) rnn_outs_ori.append(rnn_out_ori) rnn_input = rnn_out cells.append(cell) projs.append(input_proj) softmax_weight = layers.create_parameter([vocab_size, emb_size], dtype="float32", name="softmax_weight") softmax_bias = layers.create_parameter([vocab_size], dtype="float32", name='softmax_bias') projection = layers.matmul(rnn_outs[-1], softmax_weight, transpose_y=True) projection = layers.elementwise_add(projection, softmax_bias) projection = layers.reshape(projection, shape=[-1, vocab_size]) if args.sample_softmax and (not test_mode): loss = layers.sampled_softmax_with_cross_entropy( logits=projection, label=y, num_samples=args.n_negative_samples_batch, seed=args.random_seed) else: label = layers.one_hot(input=y, depth=vocab_size) loss = layers.softmax_with_cross_entropy(logits=projection, label=label, soft_label=True) return [x_emb, projection, loss], rnn_outs, rnn_outs_ori, cells, projs
def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None): weight_1_arr = [] weight_2_arr = [] bias_arr = [] hidden_array = [] cell_array = [] mask_array = [] for i in range(num_layers): weight_1 = layers.create_parameter( [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), default_initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale)) weight_1_arr.append(weight_1) bias_1 = layers.create_parameter( [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), default_initializer=fluid.initializer.Constant(0.0)) bias_arr.append(bias_1) pre_hidden = layers.slice( init_hidden, axes=[0], starts=[i], ends=[i + 1]) pre_cell = layers.slice( init_cell, axes=[0], starts=[i], ends=[i + 1]) pre_hidden = layers.reshape(pre_hidden, shape=[-1, hidden_size]) pre_cell = layers.reshape(pre_cell, shape=[-1, hidden_size]) hidden_array.append(pre_hidden) cell_array.append(pre_cell) input_embedding = layers.transpose(input_embedding, perm=[1, 0, 2]) rnn = PaddingRNN() with rnn.step(): input = rnn.step_input(input_embedding) for k in range(num_layers): pre_hidden = rnn.memory(init=hidden_array[k]) pre_cell = rnn.memory(init=cell_array[k]) weight_1 = weight_1_arr[k] bias = bias_arr[k] nn = layers.concat([input, pre_hidden], 1) gate_input = layers.matmul(x=nn, y=weight_1) gate_input = layers.elementwise_add(gate_input, bias) i = layers.slice( gate_input, axes=[1], starts=[0], ends=[hidden_size]) j = layers.slice( gate_input, axes=[1], starts=[hidden_size], ends=[hidden_size * 2]) f = layers.slice( gate_input, axes=[1], starts=[hidden_size * 2], ends=[hidden_size * 3]) o = layers.slice( gate_input, axes=[1], starts=[hidden_size * 3], ends=[hidden_size * 4]) c = pre_cell * layers.sigmoid(f) + layers.sigmoid( i) * layers.tanh(j) m = layers.tanh(c) * layers.sigmoid(o) rnn.update_memory(pre_hidden, m) rnn.update_memory(pre_cell, c) rnn.step_output(m) rnn.step_output(c) input = m if dropout != None and dropout > 0.0: input = layers.dropout( input, dropout_prob=dropout, dropout_implementation='upscale_in_train') rnn.step_output(input) rnnout = rnn() last_hidden_array = [] last_cell_array = [] real_res = rnnout[-1] for i in range(num_layers): m = rnnout[i * 2] c = rnnout[i * 2 + 1] m.stop_gradient = True c.stop_gradient = True last_h = layers.slice( m, axes=[0], starts=[num_steps - 1], ends=[num_steps]) last_hidden_array.append(last_h) last_c = layers.slice( c, axes=[0], starts=[num_steps - 1], ends=[num_steps]) last_cell_array.append(last_c) real_res = layers.transpose(x=real_res, perm=[1, 0, 2]) last_hidden = layers.concat(last_hidden_array, 0) last_cell = layers.concat(last_cell_array, 0) return real_res, last_hidden, last_cell
def call(self, global_img_feat, p_img_feat, embedding_fn, words=None): # 图片特征 img_feat = layers.fc(p_img_feat, self.hid_size, num_flatten_dims=2, act='tanh') # [batch, k, hid] img_feat_emb = layers.fc(p_img_feat, self.hid_size, num_flatten_dims=2) if self.mode == 'eval': word = layers.fill_constant_batch_size_like(global_img_feat, [-1], dtype='int64', value=config.data['start_idx']) else: words = layers.transpose(words, [1, 0]) # [seq, batch] words.stop_gradient = True # lstm 初始化 hid, cell = create_zero_state(global_img_feat), create_zero_state(global_img_feat) # While loop 参数初始化 mx = decoder_config['sentence_length'] - 1 if self.mode == 'train' else decoder_config['infer_max_length'] if self.mode == 'eval': mx = decoder_config['infer_max_length'] while_op_output = layers.create_array('int64') else: while_op_output = layers.create_array('float32') max_step = layers.fill_constant(shape=[1], dtype='int64', value=mx) step = layers.fill_constant(shape=[1], dtype='int64', value=0) cond = layers.less_than(step, max_step) while_op = layers.While(cond) with while_op.block(): if self.mode == 'train': st = layers.cast(step, 'int32') word = layers.slice(words, axes=[0], starts=st, ends=st + 1) word = layers.squeeze(word, [0]) word.stop_gradient = True word_emb = embedding_fn(word) # 这里可能用+效果更好? xt = layers.concat([word_emb, global_img_feat], axis=-1) # [batch, feat] h, c = layers.lstm_unit(xt, hid, cell, param_attr=fluid.ParamAttr('lstm_w'), bias_attr=fluid.ParamAttr('lstm_b')) p_word_emb = layers.fc(xt, size=self.hid_size) p_hidden = layers.fc(hid, size=self.hid_size) sentinel_gate = layers.sigmoid(p_word_emb + p_hidden) # [batch, hidden] sentinel = layers.elementwise_mul(sentinel_gate, layers.tanh(c)) # [batch, hidden] layers.assign(h, hid) layers.assign(c, cell) k = layers.shape(p_img_feat)[1] p_hid = layers.fc(h, self.hid_size, act='tanh') # attention 部分 # alpha hid_emb = layers.fc(p_hid, self.hid_size) # [batch, hidden] exp_hid_emb = layers.expand(layers.unsqueeze(hid_emb, 1), [1, k + 1, 1]) # [batch, k+1, hidden] sentinel_emb = layers.unsqueeze(layers.fc(sentinel, self.hid_size), axes=1) # [batch, 1, hidden] feat_emb = layers.concat([img_feat_emb, sentinel_emb], axis=1) # [batch, k+1, hidden] z = layers.tanh(feat_emb + exp_hid_emb) # [batch, k+1, 1] alpha = layers.fc(z, size=1, num_flatten_dims=2, act='softmax') # [batch, k+1, 1] # context vector context = layers.concat([img_feat, layers.unsqueeze(sentinel, axes=1)], axis=1) # [batch, k+1, hidden] context = layers.elementwise_mul(context, alpha, axis=0) context = layers.reduce_mean(context, dim=1) # [batch, hidden] out = layers.fc(context + p_hid, self.hid_size, act='tanh') word_pred = weight_tying_fc(out) # [batch, vocab] if self.mode == 'eval': next_word = layers.argmax(word_pred, axis=-1) layers.assign(next_word, word) next_word = layers.cast(next_word, 'float32') layers.array_write(next_word, step, array=while_op_output) else: layers.array_write(word_pred, step, array=while_op_output) layers.increment(step) layers.less_than(step, max_step, cond=cond) if self.mode == 'train': output_time_major, _ = layers.tensor_array_to_tensor(while_op_output, axis=0, use_stack=True) output = layers.transpose(output_time_major, [1, 0, 2]) else: output_time_major = layers.tensor_array_to_tensor(while_op_output, axis=0, use_stack=True)[0] output = layers.transpose(output_time_major, [1, 0]) return output
def fast_decode(self, pyreader_name): """Inference process of the model""" pyreader = fluid.layers.py_reader( capacity=50, shapes=[ [-1, self.max_para_num, self.max_para_len], # src_word [-1, self.max_para_num, self.max_para_len], # src_word_pos [-1, self.max_para_num], # src_sent_pos [-1, self.max_para_num, self.max_para_len], # src_words_slf_attn_bias [-1, self.max_para_num], # src_sents_slf_attn_bias [-1, self.max_para_num, self.max_para_num], # graph_attn_bias [-1, 1], # start_tokens [-1, 1], # init_scores [-1], # parent_idx [-1, 1] ], # data_ids dtypes=[ 'int64', 'int64', 'int64', 'float32', 'float32', 'float32', 'int64', 'float32', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 2, 2, 0, 0], name=pyreader_name, use_double_buffer=True) (src_word, src_word_pos, src_sent_pos, src_words_slf_attn_bias, src_sents_slf_attn_bias, graph_attn_bias, start_tokens, init_scores, parent_idx, data_ids) = \ fluid.layers.read_file(pyreader) src_words_slf_attn_bias = layers.expand( layers.unsqueeze(src_words_slf_attn_bias, axes=[2, 3]), expand_times=[1, 1, self._n_head, self.max_para_len, 1]) src_words_slf_attn_bias.stop_gradient = True src_sents_slf_attn_bias = layers.expand( layers.unsqueeze(src_sents_slf_attn_bias, axes=[1, 2]), expand_times=[1, self._n_head, self.max_para_num, 1]) src_sents_slf_attn_bias.stop_gradient = True graph_attn_bias = layers.expand(layers.unsqueeze(graph_attn_bias, axes=[1]), expand_times=[1, self._n_head, 1, 1]) graph_attn_bias.stop_gradient = True tgt_src_words_attn_bias = layers.slice(src_words_slf_attn_bias, axes=[3], starts=[0], ends=[1]) tgt_src_words_attn_bias.stop_gradient = True tgt_src_sents_attn_bias = layers.slice(src_sents_slf_attn_bias, axes=[2], starts=[0], ends=[1]) tgt_src_sents_attn_bias.stop_gradient = True src_word = layers.reshape( src_word, [-1, self.max_para_num, self.max_para_len, 1]) src_word_pos = layers.reshape( src_word_pos, [-1, self.max_para_num, self.max_para_len, 1]) src_sent_pos = layers.reshape(src_sent_pos, [-1, self.max_para_num, 1]) enc_input = (src_word, src_word_pos, src_sent_pos, src_words_slf_attn_bias, src_sents_slf_attn_bias, graph_attn_bias) enc_words_output, enc_sents_output = self.encode(enc_input=enc_input) def beam_search(): """Beam search function""" max_len = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=self.max_out_len, force_cpu=True) min_len = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=self.min_out_len) neg_inf = layers.fill_constant(shape=[1], dtype='float32', value=-INF) step_idx = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=0, force_cpu=True) step_next_idx = layers.fill_constant(shape=[1], dtype=start_tokens.dtype, value=1, force_cpu=True) cond = layers.less_than(x=step_idx, y=max_len) # default force_cpu=True while_op = layers.While(cond) # array states will be stored for each step. ids = layers.array_write(layers.reshape(start_tokens, (-1, 1)), step_idx) scores = layers.array_write(init_scores, step_idx) # cell states will be overwrited at each step. # caches contains states of history steps in decoder self-attention # and static encoder output projections in encoder-decoder attention # to reduce redundant computation. caches = [ { "k": # for self attention layers.fill_constant_batch_size_like( input=start_tokens, shape=[-1, self._n_head, 0, self._emb_size // self._n_head], dtype=enc_words_output.dtype, value=0), "v": # for self attention layers.fill_constant_batch_size_like( input=start_tokens, shape=[-1, self._n_head, 0, self._emb_size // self._n_head], dtype=enc_words_output.dtype, value=0), "static_k_word": # for encoder-decoder attention layers.create_tensor(dtype=enc_words_output.dtype), "static_v_word": # for encoder-decoder attention layers.create_tensor(dtype=enc_words_output.dtype), "static_k_sent": # for encoder-decoder attention layers.create_tensor(dtype=enc_sents_output.dtype), "static_v_sent": # for encoder-decoder attention layers.create_tensor(dtype=enc_sents_output.dtype) } for i in range(self._dec_n_layer) ] trigram_blocking = TrigramBlocking(start_tokens, self.tokenizer, use_fp16=self._use_fp16, beam_size=self.beam_size) with while_op.block(): pre_ids = layers.array_read(array=ids, i=step_idx) pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True) # Since beam_search_op dosen't enforce pre_ids' shape, we can do # inplace reshape here which actually change the shape of pre_ids. # pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True) pre_scores = layers.array_read(array=scores, i=step_idx) # gather cell states corresponding to selected parent pre_src_words_attn_bias = layers.gather( tgt_src_words_attn_bias, index=parent_idx) pre_src_sents_attn_bias = layers.gather( tgt_src_sents_attn_bias, index=parent_idx) pre_graph_attn_bias = layers.gather(graph_attn_bias, index=parent_idx) pre_pos = layers.elementwise_mul( x=layers.fill_constant_batch_size_like( input= pre_src_sents_attn_bias, # cann't use lod tensor here value=1, shape=[-1, 1, 1], dtype=pre_ids.dtype), y=step_idx, axis=0) logits = self.decode( dec_input=(pre_ids, pre_pos, None, pre_src_words_attn_bias, pre_src_sents_attn_bias, pre_graph_attn_bias), enc_words_output=enc_words_output, enc_sents_output=enc_sents_output, caches=caches, gather_idx=parent_idx) # prevent generating end token if length less than min_out_len eos_index = layers.fill_constant( shape=[layers.shape(logits)[0]], dtype='int64', value=self.eos_idx) eos_index = fluid.one_hot(eos_index, depth=self.voc_size) less_cond = layers.cast(layers.less_than(x=step_idx, y=min_len), dtype='float32') less_val = layers.elementwise_mul(less_cond, neg_inf) eos_val = layers.elementwise_mul(eos_index, less_val, axis=0) revised_logits = layers.elementwise_add(logits, eos_val, axis=0) # topK reduction across beams, also contain special handle of # end beams and end sentences(batch reduction) topk_scores, topk_indices = layers.topk( input=layers.softmax(revised_logits), k=self.beam_size) # Roll-Back previous-scores for length-penalty # previous-scores has been length-penaltied, before this timestep length-penalty, need roll-back # because of doing this, we need store the length-penaltied score in `scores` # while calculating use the un-penaltied score # -> safe for step_idx == 0 (initialization state), because previous-score == 0 pre_timestep_length_penalty = fluid.layers.pow( ((5.0 + fluid.layers.cast(step_idx, pre_scores.dtype)) / 6.0), self.len_penalty) pre_scores_wo_len_penalty = fluid.layers.elementwise_mul( pre_scores, pre_timestep_length_penalty) # calc trigram-blocking delta scores for current alive sequence if self.block_trigram: trigram_blocking.update_seq(pre_ids, parent_idx) trigram_blocking.expand_cand_seq(topk_indices) fluid.layers.py_func( func=trigram_blocking.blocking_forward, x=[ trigram_blocking.cand_seq, trigram_blocking.id2is_full_token ], out=trigram_blocking.delta_score_out, backward_func=None) layers.Print(trigram_blocking.delta_score_out, summarize=100, message="trigram_blocking.delta_score_out") pre_scores_wo_len_penalty = fluid.layers.elementwise_add( x=trigram_blocking.delta_score_out, y=pre_scores_wo_len_penalty, axis=0) # => [N, topk] accu_scores = layers.elementwise_add( x=layers.log(topk_scores), y=pre_scores_wo_len_penalty, axis=0) cur_timestep_length_penalty = layers.pow( ((5.0 + layers.cast(step_next_idx, accu_scores.dtype)) / 6.0), self.len_penalty) curr_scores = layers.elementwise_div( accu_scores, cur_timestep_length_penalty) # beam_search op uses lod to differentiate branches. curr_scores = layers.lod_reset(curr_scores, pre_ids) topk_indices = layers.lod_reset(topk_indices, pre_ids) selected_ids, selected_scores, gather_idx = layers.beam_search( pre_ids=pre_ids, pre_scores=pre_scores, ids=topk_indices, scores=curr_scores, beam_size=self.beam_size, end_id=self.eos_idx, return_parent_idx=True) layers.increment(x=step_idx, value=1.0, in_place=True) layers.increment(x=step_next_idx, value=1.0, in_place=True) # cell states(caches) have been updated in wrap_decoder, # only need to update beam search states here. layers.array_write(selected_ids, i=step_idx, array=ids) layers.array_write(selected_scores, i=step_idx, array=scores) layers.assign(gather_idx, parent_idx) layers.assign(pre_src_words_attn_bias, tgt_src_words_attn_bias) layers.assign(pre_src_sents_attn_bias, tgt_src_sents_attn_bias) layers.assign(pre_graph_attn_bias, graph_attn_bias) length_cond = layers.less_than(x=step_idx, y=max_len) finish_cond = layers.logical_not( layers.is_empty(x=selected_ids)) layers.logical_and(x=length_cond, y=finish_cond, out=cond) finished_ids, finished_scores = layers.beam_search_decode( ids, scores, beam_size=self.beam_size, end_id=self.eos_idx) return finished_ids, finished_scores finished_ids, finished_scores = beam_search() graph_vars = { "finished_ids": finished_ids, "finished_scores": finished_scores, "data_ids": data_ids } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def __call__(self, location, confidence, gt_box, gt_label, landmark_predict, lmk_label, lmk_ignore_flag, prior_box, prior_box_var=None): def _reshape_to_2d(var): return layers.flatten(x=var, axis=2) helper = LayerHelper('ssd_loss') #, **locals()) # Only support mining_type == 'max_negative' now. mining_type = 'max_negative' # The max `sample_size` of negative box, used only # when mining_type is `hard_example`. sample_size = None num, num_prior, num_class = confidence.shape conf_shape = layers.shape(confidence) # 1. Find matched boundding box by prior box. # 1.1 Compute IOU similarity between ground-truth boxes and prior boxes. iou = iou_similarity(x=gt_box, y=prior_box) # 1.2 Compute matched boundding box by bipartite matching algorithm. matched_indices, matched_dist = bipartite_match( iou, self.match_type, self.overlap_threshold) # 2. Compute confidence for mining hard examples # 2.1. Get the target label based on matched indices gt_label = layers.reshape(x=gt_label, shape=(len(gt_label.shape) - 1) * (0, ) + (-1, 1)) gt_label.stop_gradient = True target_label, _ = target_assign(gt_label, matched_indices, mismatch_value=self.background_label) # 2.2. Compute confidence loss. # Reshape confidence to 2D tensor. confidence = _reshape_to_2d(confidence) target_label = tensor.cast(x=target_label, dtype='int64') target_label = _reshape_to_2d(target_label) target_label.stop_gradient = True conf_loss = layers.softmax_with_cross_entropy(confidence, target_label) # 3. Mining hard examples actual_shape = layers.slice(conf_shape, axes=[0], starts=[0], ends=[2]) actual_shape.stop_gradient = True conf_loss = layers.reshape(x=conf_loss, shape=(-1, 0), actual_shape=actual_shape) conf_loss.stop_gradient = True neg_indices = helper.create_variable_for_type_inference(dtype='int32') updated_matched_indices = helper.create_variable_for_type_inference( dtype=matched_indices.dtype) helper.append_op(type='mine_hard_examples', inputs={ 'ClsLoss': conf_loss, 'LocLoss': None, 'MatchIndices': matched_indices, 'MatchDist': matched_dist, }, outputs={ 'NegIndices': neg_indices, 'UpdatedMatchIndices': updated_matched_indices }, attrs={ 'neg_pos_ratio': self.neg_pos_ratio, 'neg_dist_threshold': self.neg_overlap, 'mining_type': mining_type, 'sample_size': sample_size, }) # 4. Assign classification and regression targets # 4.1. Encoded bbox according to the prior boxes. encoded_bbox = box_coder(prior_box=prior_box, prior_box_var=prior_box_var, target_box=gt_box, code_type='encode_center_size') # 4.2. Assign regression targets target_bbox, target_loc_weight = target_assign( encoded_bbox, updated_matched_indices, mismatch_value=self.background_label) # 4.3. Assign classification targets target_label, target_conf_weight = target_assign( gt_label, updated_matched_indices, negative_indices=neg_indices, mismatch_value=self.background_label) target_loc_weight = target_loc_weight * target_label encoded_lmk_label = self.decode_lmk(lmk_label, prior_box, prior_box_var) target_lmk, target_lmk_weight = target_assign( encoded_lmk_label, updated_matched_indices, mismatch_value=self.background_label) lmk_ignore_flag = layers.reshape( x=lmk_ignore_flag, shape=(len(lmk_ignore_flag.shape) - 1) * (0, ) + (-1, 1)) target_ignore, nouse = target_assign( lmk_ignore_flag, updated_matched_indices, mismatch_value=self.background_label) target_lmk_weight = target_lmk_weight * target_ignore landmark_predict = _reshape_to_2d(landmark_predict) target_lmk = _reshape_to_2d(target_lmk) target_lmk_weight = _reshape_to_2d(target_lmk_weight) lmk_loss = layers.smooth_l1(landmark_predict, target_lmk) lmk_loss = lmk_loss * target_lmk_weight target_lmk.stop_gradient = True target_lmk_weight.stop_gradient = True target_ignore.stop_gradient = True nouse.stop_gradient = True # 5. Compute loss. # 5.1 Compute confidence loss. target_label = _reshape_to_2d(target_label) target_label = tensor.cast(x=target_label, dtype='int64') conf_loss = layers.softmax_with_cross_entropy(confidence, target_label) target_conf_weight = _reshape_to_2d(target_conf_weight) conf_loss = conf_loss * target_conf_weight # the target_label and target_conf_weight do not have gradient. target_label.stop_gradient = True target_conf_weight.stop_gradient = True # 5.2 Compute regression loss. location = _reshape_to_2d(location) target_bbox = _reshape_to_2d(target_bbox) loc_loss = layers.smooth_l1(location, target_bbox) target_loc_weight = _reshape_to_2d(target_loc_weight) loc_loss = loc_loss * target_loc_weight # the target_bbox and target_loc_weight do not have gradient. target_bbox.stop_gradient = True target_loc_weight.stop_gradient = True # 5.3 Compute overall weighted loss. loss = self.conf_loss_weight * conf_loss + self.loc_loss_weight * loc_loss + 0.4 * lmk_loss # reshape to [N, Np], N is the batch size and Np is the prior box number. loss = layers.reshape(x=loss, shape=(-1, 0), actual_shape=actual_shape) loss = layers.reduce_sum(loss, dim=1, keep_dim=True) if self.normalize: normalizer = layers.reduce_sum(target_loc_weight) + 1 loss = loss / normalizer return loss