def is_finished(self, step_idx, source_length, alive_log_probs, finished_scores, finished_in_finished):
        """
            is_finished
        """
        base_1 = layers.cast(source_length, 'float32') + 55.0
        base_1 /= 6.0
        max_length_penalty = layers.pow(base_1, self.alpha)

        flat_alive_log_probs = layers.reshape(alive_log_probs, [-1])
        lower_bound_alive_scores_1 = layers.gather(flat_alive_log_probs, [self.get_alive_index])
        
        lower_bound_alive_scores = lower_bound_alive_scores_1 / max_length_penalty
        
        lowest_score_of_finished_in_finish = layers.reduce_min(finished_scores * finished_in_finished, dim=1)

        finished_in_finished = layers.cast(finished_in_finished, 'bool')
        lowest_score_of_finished_in_finish += \
                        ((1.0 - layers.cast(layers.reduce_any(finished_in_finished, 1), 'float32')) * -INF)
        
        #print lowest_score_of_finished_in_finish
        bound_is_met = layers.reduce_all(layers.greater_than(lowest_score_of_finished_in_finish, 
                                                             lower_bound_alive_scores))

        decode_length = source_length + 50
        length_cond = layers.less_than(x=step_idx, y=decode_length)

        return layers.logical_and(x=layers.logical_not(bound_is_met), y=length_cond)
Beispiel #2
0
 def reduce_compare(x, op_str, y):
     element_wise_result = eval("x " + op_str + " y")
     if op_str == "!=":
         return reduce_any(element_wise_result)
     elif op_str == "is" or op_str == "is not" or op_str == "in" or op_str == "not in":
         return element_wise_result
     else:
         return reduce_all(element_wise_result)
Beispiel #3
0
 def forward(self, x):
     """Forward network"""
     mask = layers.reduce_any(x != self.pad_index, -1)
     lens = nn.reduce_sum(mask, -1)
     masked_x = nn.masked_select(x, mask)
     h, _ = self.transformer(masked_x)
     feat_embed = nn.pad_sequence_paddle(
         layers.split(h, lens.numpy().tolist(), dim=0), self.pad_index)
     return feat_embed
Beispiel #4
0
def _push_to_stack(gmr_desc, gmr_pos, gmr_lens, gmr_stack_info):
    """push grammar id in gmr_desc from gmr_pos to gmr_lens to
    gmr_stack. and update step_gmr_pos

    Args:
        gmr_desc (TYPE): NULL
        gmr_pos (TYPE): NULL
        gmr_lens (TYPE): NULL
        gmr_stack_info (tuple): [in/out] (gmr_stack, gmr_stack_pos)

    Returns: tuple (gmr_stack, gmr_stack_pos)

    Raises: NULL
    """
    gmr_stack, gmr_stack_pos = gmr_stack_info
    mv_step = layers.cast(layers.greater_than(gmr_lens,
                                              layers.zeros_like(gmr_lens)),
                          dtype=gmr_lens.dtype)
    gmr_mv_pos = layers.elementwise_sub(gmr_lens, mv_step)

    cond = layers.reduce_any(layers.greater_than(gmr_mv_pos, gmr_pos))
    while_op = layers.While(cond)
    with while_op.block():
        gmr_ids = nn_utils.batch_gather(gmr_desc, gmr_mv_pos)
        gmr_stack_tmp, gmr_stack_pos_tmp = data_structure.Stack.push(
            gmr_stack_info, gmr_ids, in_place=False)

        mv_cond = layers.greater_than(gmr_mv_pos, gmr_pos)
        gmr_mv_pos_tmp = fluider.elementwise_sub(gmr_mv_pos,
                                                 mv_cond,
                                                 force=True)
        new_gmr_stack, new_gmr_stack_pos = nn_utils.ifelse(
            mv_cond, [gmr_stack_tmp, gmr_stack_pos_tmp],
            [gmr_stack, gmr_stack_pos])
        layers.utils.map_structure(layers.assign,
                                   [new_gmr_stack, new_gmr_stack_pos],
                                   [gmr_stack, gmr_stack_pos])
        layers.assign(gmr_mv_pos_tmp, gmr_mv_pos)
        layers.assign(
            layers.reduce_any(layers.greater_than(gmr_mv_pos, gmr_pos)), cond)
    return gmr_stack, gmr_stack_pos
Beispiel #5
0
    def forward(self, x):
        """Forward network"""
        mask = layers.reduce_any(x != self.pad_index, -1)
        lens = nn.reduce_sum(mask, -1)
        masked_x = nn.masked_select(x, mask)
        char_mask = masked_x != self.pad_index
        emb = self.embed(masked_x)

        _, (h, _) = self.lstm(emb, char_mask, self.pad_index)
        h = layers.concat(layers.unstack(h), axis=-1)
        feat_embed = nn.pad_sequence_paddle(
            layers.split(h, lens.numpy().tolist(), dim=0), self.pad_index)
        return feat_embed
Beispiel #6
0
    def beam_search(enc_output, enc_bias, source_length):
        """
            beam_search
        """
        max_len = layers.fill_constant(
            shape=[1], dtype='int64', value=max_out_len)
        step_idx = layers.fill_constant(
            shape=[1], dtype='int64', value=0)
        cond = layers.less_than(x=step_idx, y=max_len)
        while_op = layers.While(cond)

        caches_batch_size = batch_size * beam_size
        init_score = np.zeros([1, beam_size]).astype('float32')
        init_score[:, 1:] = -INF
        initial_log_probs = layers.assign(init_score)

        alive_log_probs = layers.expand(initial_log_probs, [batch_size, 1])
        # alive seq [batch_size, beam_size, 1]
        initial_ids = layers.zeros([batch_size, 1, 1], 'float32')
        alive_seq = layers.expand(initial_ids, [1, beam_size, 1]) 
        alive_seq = layers.cast(alive_seq, 'int64')

        enc_output = layers.unsqueeze(enc_output, axes=[1])
        enc_output = layers.expand(enc_output, [1, beam_size, 1, 1])
        enc_output = layers.reshape(enc_output, [caches_batch_size, -1, d_model])

        tgt_src_attn_bias = layers.unsqueeze(enc_bias, axes=[1])
        tgt_src_attn_bias = layers.expand(tgt_src_attn_bias, [1, beam_size, n_head, 1, 1]) 
        enc_bias_shape = layers.shape(tgt_src_attn_bias)
        tgt_src_attn_bias = layers.reshape(tgt_src_attn_bias, [-1, enc_bias_shape[2], 
                enc_bias_shape[3], enc_bias_shape[4]])
            
        beam_search = BeamSearch(beam_size, batch_size, decode_alpha, trg_vocab_size, d_model)

        caches = [{
            "k": layers.fill_constant(
                shape=[caches_batch_size, 0, d_model],
                dtype=enc_output.dtype,
                value=0),
            "v": layers.fill_constant(
                shape=[caches_batch_size, 0, d_model],
                dtype=enc_output.dtype,
                value=0)
        } for i in range(n_layer)]
        
        finished_seq = layers.zeros_like(alive_seq)
        finished_scores = layers.fill_constant([batch_size, beam_size], 
                                                dtype='float32', value=-INF)
        finished_flags = layers.fill_constant([batch_size, beam_size], 
                                                dtype='float32', value=0)
        
        with while_op.block():
            pos = layers.fill_constant([caches_batch_size, 1, 1], dtype='int64', value=1)
            pos = layers.elementwise_mul(pos, step_idx, axis=0)

            alive_seq_1 = layers.reshape(alive_seq, [caches_batch_size, -1])
            alive_seq_2 = alive_seq_1[:, -1:] 
            alive_seq_2 = layers.unsqueeze(alive_seq_2, axes=[1])
 
            logits = wrap_decoder(
                trg_vocab_size, max_in_len, n_layer, n_head, d_key,
                d_value, d_model, d_inner_hid, prepostprocess_dropout,
                attention_dropout, relu_dropout, preprocess_cmd,
                postprocess_cmd, weight_sharing, embedding_sharing,
                dec_inputs=(alive_seq_2, alive_seq_2, pos, None, tgt_src_attn_bias),
                enc_output=enc_output, caches=caches, is_train=False, params_type=params_type)

            alive_seq_2, alive_log_probs_2, finished_seq_2, finished_scores_2, finished_flags_2, caches_2 = \
                    beam_search.inner_func(step_idx, logits, alive_seq_1, alive_log_probs, finished_seq, 
                                           finished_scores, finished_flags, caches, enc_output, 
                                           tgt_src_attn_bias)
            
            layers.increment(x=step_idx, value=1.0, in_place=True)
            finish_cond = beam_search.is_finished(step_idx, source_length, alive_log_probs_2, 
                                                  finished_scores_2, finished_flags_2) 

            layers.assign(alive_seq_2, alive_seq)
            layers.assign(alive_log_probs_2, alive_log_probs)
            layers.assign(finished_seq_2, finished_seq)
            layers.assign(finished_scores_2, finished_scores)
            layers.assign(finished_flags_2, finished_flags)

            for i in xrange(len(caches_2)):
                layers.assign(caches_2[i]["k"], caches[i]["k"])
                layers.assign(caches_2[i]["v"], caches[i]["v"])

            layers.logical_and(x=cond, y=finish_cond, out=cond)

        finished_flags = layers.reduce_sum(finished_flags, dim=1, keep_dim=True) / beam_size
        finished_flags = layers.cast(finished_flags, 'bool')
        mask = layers.cast(layers.reduce_any(input=finished_flags, dim=1, keep_dim=True), 'float32')
        mask = layers.expand(mask, [1, beam_size])

        mask2 = 1.0 - mask
        finished_seq = layers.cast(finished_seq, 'float32')
        alive_seq = layers.cast(alive_seq, 'float32')
        #print mask

        finished_seq = layers.elementwise_mul(finished_seq, mask, axis=0) + \
                        layers.elementwise_mul(alive_seq, mask2, axis = 0)
        finished_seq = layers.cast(finished_seq, 'int32')
        finished_scores = layers.elementwise_mul(finished_scores, mask, axis=0) + \
                            layers.elementwise_mul(alive_log_probs, mask2)
        finished_seq.persistable = True
        finished_scores.persistable = True

        return finished_seq, finished_scores