def is_finished(self, step_idx, source_length, alive_log_probs, finished_scores, finished_in_finished): """ is_finished """ base_1 = layers.cast(source_length, 'float32') + 55.0 base_1 /= 6.0 max_length_penalty = layers.pow(base_1, self.alpha) flat_alive_log_probs = layers.reshape(alive_log_probs, [-1]) lower_bound_alive_scores_1 = layers.gather(flat_alive_log_probs, [self.get_alive_index]) lower_bound_alive_scores = lower_bound_alive_scores_1 / max_length_penalty lowest_score_of_finished_in_finish = layers.reduce_min(finished_scores * finished_in_finished, dim=1) finished_in_finished = layers.cast(finished_in_finished, 'bool') lowest_score_of_finished_in_finish += \ ((1.0 - layers.cast(layers.reduce_any(finished_in_finished, 1), 'float32')) * -INF) #print lowest_score_of_finished_in_finish bound_is_met = layers.reduce_all(layers.greater_than(lowest_score_of_finished_in_finish, lower_bound_alive_scores)) decode_length = source_length + 50 length_cond = layers.less_than(x=step_idx, y=decode_length) return layers.logical_and(x=layers.logical_not(bound_is_met), y=length_cond)
def reduce_compare(x, op_str, y): element_wise_result = eval("x " + op_str + " y") if op_str == "!=": return reduce_any(element_wise_result) elif op_str == "is" or op_str == "is not" or op_str == "in" or op_str == "not in": return element_wise_result else: return reduce_all(element_wise_result)
def forward(self, x): """Forward network""" mask = layers.reduce_any(x != self.pad_index, -1) lens = nn.reduce_sum(mask, -1) masked_x = nn.masked_select(x, mask) h, _ = self.transformer(masked_x) feat_embed = nn.pad_sequence_paddle( layers.split(h, lens.numpy().tolist(), dim=0), self.pad_index) return feat_embed
def _push_to_stack(gmr_desc, gmr_pos, gmr_lens, gmr_stack_info): """push grammar id in gmr_desc from gmr_pos to gmr_lens to gmr_stack. and update step_gmr_pos Args: gmr_desc (TYPE): NULL gmr_pos (TYPE): NULL gmr_lens (TYPE): NULL gmr_stack_info (tuple): [in/out] (gmr_stack, gmr_stack_pos) Returns: tuple (gmr_stack, gmr_stack_pos) Raises: NULL """ gmr_stack, gmr_stack_pos = gmr_stack_info mv_step = layers.cast(layers.greater_than(gmr_lens, layers.zeros_like(gmr_lens)), dtype=gmr_lens.dtype) gmr_mv_pos = layers.elementwise_sub(gmr_lens, mv_step) cond = layers.reduce_any(layers.greater_than(gmr_mv_pos, gmr_pos)) while_op = layers.While(cond) with while_op.block(): gmr_ids = nn_utils.batch_gather(gmr_desc, gmr_mv_pos) gmr_stack_tmp, gmr_stack_pos_tmp = data_structure.Stack.push( gmr_stack_info, gmr_ids, in_place=False) mv_cond = layers.greater_than(gmr_mv_pos, gmr_pos) gmr_mv_pos_tmp = fluider.elementwise_sub(gmr_mv_pos, mv_cond, force=True) new_gmr_stack, new_gmr_stack_pos = nn_utils.ifelse( mv_cond, [gmr_stack_tmp, gmr_stack_pos_tmp], [gmr_stack, gmr_stack_pos]) layers.utils.map_structure(layers.assign, [new_gmr_stack, new_gmr_stack_pos], [gmr_stack, gmr_stack_pos]) layers.assign(gmr_mv_pos_tmp, gmr_mv_pos) layers.assign( layers.reduce_any(layers.greater_than(gmr_mv_pos, gmr_pos)), cond) return gmr_stack, gmr_stack_pos
def forward(self, x): """Forward network""" mask = layers.reduce_any(x != self.pad_index, -1) lens = nn.reduce_sum(mask, -1) masked_x = nn.masked_select(x, mask) char_mask = masked_x != self.pad_index emb = self.embed(masked_x) _, (h, _) = self.lstm(emb, char_mask, self.pad_index) h = layers.concat(layers.unstack(h), axis=-1) feat_embed = nn.pad_sequence_paddle( layers.split(h, lens.numpy().tolist(), dim=0), self.pad_index) return feat_embed
def beam_search(enc_output, enc_bias, source_length): """ beam_search """ max_len = layers.fill_constant( shape=[1], dtype='int64', value=max_out_len) step_idx = layers.fill_constant( shape=[1], dtype='int64', value=0) cond = layers.less_than(x=step_idx, y=max_len) while_op = layers.While(cond) caches_batch_size = batch_size * beam_size init_score = np.zeros([1, beam_size]).astype('float32') init_score[:, 1:] = -INF initial_log_probs = layers.assign(init_score) alive_log_probs = layers.expand(initial_log_probs, [batch_size, 1]) # alive seq [batch_size, beam_size, 1] initial_ids = layers.zeros([batch_size, 1, 1], 'float32') alive_seq = layers.expand(initial_ids, [1, beam_size, 1]) alive_seq = layers.cast(alive_seq, 'int64') enc_output = layers.unsqueeze(enc_output, axes=[1]) enc_output = layers.expand(enc_output, [1, beam_size, 1, 1]) enc_output = layers.reshape(enc_output, [caches_batch_size, -1, d_model]) tgt_src_attn_bias = layers.unsqueeze(enc_bias, axes=[1]) tgt_src_attn_bias = layers.expand(tgt_src_attn_bias, [1, beam_size, n_head, 1, 1]) enc_bias_shape = layers.shape(tgt_src_attn_bias) tgt_src_attn_bias = layers.reshape(tgt_src_attn_bias, [-1, enc_bias_shape[2], enc_bias_shape[3], enc_bias_shape[4]]) beam_search = BeamSearch(beam_size, batch_size, decode_alpha, trg_vocab_size, d_model) caches = [{ "k": layers.fill_constant( shape=[caches_batch_size, 0, d_model], dtype=enc_output.dtype, value=0), "v": layers.fill_constant( shape=[caches_batch_size, 0, d_model], dtype=enc_output.dtype, value=0) } for i in range(n_layer)] finished_seq = layers.zeros_like(alive_seq) finished_scores = layers.fill_constant([batch_size, beam_size], dtype='float32', value=-INF) finished_flags = layers.fill_constant([batch_size, beam_size], dtype='float32', value=0) with while_op.block(): pos = layers.fill_constant([caches_batch_size, 1, 1], dtype='int64', value=1) pos = layers.elementwise_mul(pos, step_idx, axis=0) alive_seq_1 = layers.reshape(alive_seq, [caches_batch_size, -1]) alive_seq_2 = alive_seq_1[:, -1:] alive_seq_2 = layers.unsqueeze(alive_seq_2, axes=[1]) logits = wrap_decoder( trg_vocab_size, max_in_len, n_layer, n_head, d_key, d_value, d_model, d_inner_hid, prepostprocess_dropout, attention_dropout, relu_dropout, preprocess_cmd, postprocess_cmd, weight_sharing, embedding_sharing, dec_inputs=(alive_seq_2, alive_seq_2, pos, None, tgt_src_attn_bias), enc_output=enc_output, caches=caches, is_train=False, params_type=params_type) alive_seq_2, alive_log_probs_2, finished_seq_2, finished_scores_2, finished_flags_2, caches_2 = \ beam_search.inner_func(step_idx, logits, alive_seq_1, alive_log_probs, finished_seq, finished_scores, finished_flags, caches, enc_output, tgt_src_attn_bias) layers.increment(x=step_idx, value=1.0, in_place=True) finish_cond = beam_search.is_finished(step_idx, source_length, alive_log_probs_2, finished_scores_2, finished_flags_2) layers.assign(alive_seq_2, alive_seq) layers.assign(alive_log_probs_2, alive_log_probs) layers.assign(finished_seq_2, finished_seq) layers.assign(finished_scores_2, finished_scores) layers.assign(finished_flags_2, finished_flags) for i in xrange(len(caches_2)): layers.assign(caches_2[i]["k"], caches[i]["k"]) layers.assign(caches_2[i]["v"], caches[i]["v"]) layers.logical_and(x=cond, y=finish_cond, out=cond) finished_flags = layers.reduce_sum(finished_flags, dim=1, keep_dim=True) / beam_size finished_flags = layers.cast(finished_flags, 'bool') mask = layers.cast(layers.reduce_any(input=finished_flags, dim=1, keep_dim=True), 'float32') mask = layers.expand(mask, [1, beam_size]) mask2 = 1.0 - mask finished_seq = layers.cast(finished_seq, 'float32') alive_seq = layers.cast(alive_seq, 'float32') #print mask finished_seq = layers.elementwise_mul(finished_seq, mask, axis=0) + \ layers.elementwise_mul(alive_seq, mask2, axis = 0) finished_seq = layers.cast(finished_seq, 'int32') finished_scores = layers.elementwise_mul(finished_scores, mask, axis=0) + \ layers.elementwise_mul(alive_log_probs, mask2) finished_seq.persistable = True finished_scores.persistable = True return finished_seq, finished_scores