Example #1
0
def _process_type_leaf(condition, decoder, grammar_stack, next_inputs,
                       finished):
    """Process when output type is LEAF

    Args:
        condition (TYPE): NULL
        decoder (TYPE): NULL
        grammar_stack (StackData): (gmr_stack_data, gmr_stack_pos)
        next_inputs (DecoderInputsWrapper): (input_var, action, grammar_mask)
        finished (TYPE): NULL

    Returns: None

    Raises: NULL
    """
    ## pop stack
    next_output, valid_pos, gmr_stack_tmp = data_structure.Stack.pop(
        grammar_stack, mask=True, in_place=False)
    valid_pos = fluider.squeeze(valid_pos, [1])

    ## update next grammar mask
    next_actions = layers.elementwise_mul(decoder.grammar_action(next_output),
                                          layers.cast(
                                              valid_pos,
                                              dtype=next_inputs.action.dtype),
                                          axis=0)
    next_gmr_mask = layers.elementwise_mul(
        decoder.grammar_mask(next_output),
        layers.cast(valid_pos, dtype=next_inputs.gmr_mask.dtype),
        axis=0)

    ## save result, while condition is True
    new_gmr_stack_data, new_gmr_stack_pos, new_actions, new_gmr_mask = nn_utils.ifelse(
        condition,
        [gmr_stack_tmp.data, gmr_stack_tmp.pos, next_actions, next_gmr_mask], [
            grammar_stack.data, grammar_stack.pos, next_inputs.action,
            next_inputs.gmr_mask
        ])

    layers.utils.map_structure(
        layers.assign,
        [new_gmr_stack_data, new_gmr_stack_pos, next_actions, new_gmr_mask], [
            grammar_stack.data, grammar_stack.pos, next_inputs.action,
            next_inputs.gmr_mask
        ])
    layers.logical_or(finished,
                      layers.logical_and(condition,
                                         layers.logical_not(valid_pos)),
                      out=finished)
Example #2
0
def get_face_mask(densepose_map):
    """
    Obtain mask of faces. 

    Args:
        densepose_map (3D or 4D tensor)
    """
    need_reshape = len(densepose_map.shape) == 4
    if need_reshape:
        bo, t, h, w = densepose_map.shape
        densepose_map = L.reshape(densepose_map, (-1, h, w))

    b, h, w = densepose_map.shape
    part_map = (densepose_map / 2 + 0.5) * 24
    assert L.reduce_all((part_map >= 0)) and L.reduce_all((part_map < 25))

    mask = dg.to_variable(np.zeros((b, h, w)).astype('bool'))

    for j in [23, 24]:
        mask = L.logical_or(
            mask, L.logical_and((part_map > j - 0.1), (part_map < j + 0.1)))

    if need_reshape:
        mask = L.reshape(mask, (bo, t, h, w))

    return P.cast(mask, "float32")
Example #3
0
    def _grammar_step(self, logits, next_cell_states, decode_states, actions, gmr_mask):
        """跟进文法约束完成一步解码逻辑

        Args:
            logits (Variable): shape = [batch_size, beam_size, vocab_size]
            next_cell_states (Variable): NULL
            decode_states (StateWrapper): NULL

        Returns: TODO

        Raises: NULL

        """
        # 解码出符合语法规则的 token logits
        logits, valid_table_mask = self._output_layer(logits, actions, gmr_mask, decode_states.valid_table_mask)

        # 初始化 vocab size
        self._vocab_size = logits.shape[-1]
        self._vocab_size_tensor = layers.fill_constant(shape=[1], dtype='int64', value=logits.shape[-1])

        # 计算 log probs,并 mask 掉 finished 部分
        step_log_probs = layers.log(layers.softmax(logits))
        step_log_probs = self._mask_finished_probs(step_log_probs, decode_states.finished)

        scores = layers.reshape(step_log_probs, [-1, self._beam_size * self._vocab_size])
        topk_scores, topk_indices = layers.topk(input=scores, k=self._beam_size)
        topk_scores = layers.reshape(topk_scores, shape=[-1])
        topk_indices = layers.reshape(topk_indices, shape=[-1])

        # top-k 对应的 beam
        beam_indices = layers.elementwise_floordiv(topk_indices, self._vocab_size_tensor)
        # top-k 对应的 token id
        token_indices = layers.elementwise_mod(topk_indices, self._vocab_size_tensor)

        # 根据 top k 的来源,重新组织 step_log_probs
        next_log_probs = nn_utils.batch_gather(
                layers.reshape(step_log_probs, [-1, self._beam_size * self._vocab_size]),
                topk_indices)
        def _beam_gather(x, beam_indices):
            """reshape x to beam dim, and gather each beam_indices
            Args:
                x (TYPE): NULL
            Returns: Variable
            """
            x = self.split_batch_beams(x)
            return nn_utils.batch_gather(x, beam_indices)
        next_cell_states = layers.utils.map_structure(lambda x: _beam_gather(x, beam_indices),
                                                      next_cell_states)
        next_finished = _beam_gather(decode_states.finished, beam_indices)
        next_lens = _beam_gather(decode_states.lengths, beam_indices)

        next_lens = layers.elementwise_add(next_lens,
                layers.cast(layers.logical_not(next_finished), next_lens.dtype))
        next_finished = layers.logical_or(next_finished,
                layers.equal(token_indices, self._end_token_tensor))

        decode_output = OutputWrapper(topk_scores, token_indices, beam_indices)
        decode_states = StateWrapper(next_cell_states, next_log_probs, next_finished, next_lens, valid_table_mask)

        return decode_output, decode_states
    def simple_net(self):
        d0 = layers.data(
            "d0", shape=[10], append_batch_size=False, dtype='float32')
        d1 = layers.data(
            "d1", shape=[10], append_batch_size=False, dtype='float32')
        d2 = layers.data(
            "d2", shape=[10], append_batch_size=False, dtype='float32')
        # fill_constant npu op doesn't support int64
        i = layers.zeros(shape=[1], dtype='int32')
        i = layers.cast(i, 'int64')
        i.stop_gradient = True
        init = layers.zeros(shape=[10], dtype='float32')
        mem_array = layers.array_write(x=init, i=i)
        data_array = layers.array_write(x=d0, i=i)
        i = layers.increment(i)
        layers.array_write(d1, i, array=data_array)
        i = layers.increment(i)
        layers.array_write(d2, i, array=data_array)
        i = layers.zeros(shape=[1], dtype='int32')
        i = layers.cast(i, 'int64')
        i.stop_gradient = True
        array_len = layers.fill_constant(shape=[1], dtype='int32', value=5)
        array_len = layers.cast(array_len, 'int64')
        array_len.stop_gradient = True
        cond = layers.ones(shape=[1], dtype='int32')
        cond = layers.cast(cond, 'bool')
        j = layers.fill_constant(shape=[1], dtype='int32', value=1)
        j = layers.cast(j, 'int64')
        j.stop_gradient = True
        array_len2 = layers.fill_constant(shape=[1], dtype='int32', value=3)
        array_len2 = layers.cast(array_len2, 'int64')
        array_len2.stop_gradient = True
        cond2 = layers.logical_or(x=j, y=array_len2)
        cond2 = layers.ones(shape=[1], dtype='int32')
        cond2 = layers.cast(cond2, 'bool')
        while_op = layers.While(cond=cond)
        while_op2 = layers.While(cond=cond2)
        with while_op.block():
            d = layers.array_read(array=data_array, i=i)
            prev = layers.array_read(array=mem_array, i=i)
            result = layers.sums(input=[d, prev])

            i = layers.increment(x=i, in_place=True)
            layers.array_write(result, i=i, array=mem_array)
            layers.less_than(x=i, y=array_len, cond=cond)

            with while_op2.block():
                d2 = layers.array_read(array=data_array, i=j)
                prev2 = layers.array_read(array=mem_array, i=j)
                result2 = layers.sums(input=[d2, prev2])

                j = layers.increment(x=j, in_place=True)
                layers.array_write(result2, i=j, array=mem_array)
                layers.less_than(x=j, y=array_len2, cond=cond2)
        sum_result = layers.array_read(array=mem_array, i=j)
        loss = layers.mean(sum_result)
        return loss, sum_result
Example #5
0
    def logical_or(cls, x, y, *args, out=None, name=None):
        """wrapper of paddle.fluid.layers.logical_or

        Args:
            x (Variable): NULL
            y (Variable): NULL
            *args (TYPE): NULL
            out (TYPE): Default is None
            name (TYPE): Default is None

        Returns: TODO

        Raises: NULL

        """
        tmp = layers.logical_or(x, y, out=out, name=name)
        for var in args:
            tmp = layers.logical_or(tmp, var, out=out, name=name)
        return tmp
Example #6
0
def get_attention_mask(mask, nhead):
    # mask: [bs, L] -> attn_mask: [bs, nhead, L, L]
    bs, l = mask.shape
    row_mask = L.expand(L.unsqueeze(mask, [2]), (1, 1, l)) # [bs, L, L]
    col_mask = L.expand(L.unsqueeze(mask, [1]), (1, l, 1)) # [bs, L, L]
    mask = L.logical_or(row_mask, col_mask)
    attn_mask = L.zeros([bs, l, l], dtype="float32")
    attn_mask = attn_mask.numpy()
    mask = mask.numpy()
    attn_mask[mask] = -1e8
    attn_mask = dg.to_variable(attn_mask)
    attn_mask = L.expand(L.unsqueeze(attn_mask, [1]), (1, nhead, 1, 1)) # [bs, nhead, L1, L2]
    return attn_mask
Example #7
0
def _check_finished(decoder, next_inputs, finished, outputs_array):
    """check finished instance by next_inputs.action, and
    update finished tag and write END to outputs

    Args:
        decoder (TYPE): NULL
        next_inputs (TYPE): NULL
        finished (TYPE): NULL
        outputs_array (TYPE): NULL

    Returns: TODO

    Raises: NULL
    """
    act_stop = tensor.fill_constant_batch_size_like(
        next_inputs.action,
        shape=next_inputs.action.shape,
        value=decoder._grammar.ACTION_STOP,
        dtype='int64')
    new_finished = layers.logical_and(
        layers.equal(next_inputs.action, act_stop),
        layers.logical_not(finished))

    end_token_id = tensor.fill_constant_batch_size_like(
        outputs_array.data,
        shape=[-1],
        value=decoder._grammar.END,
        dtype=outputs_array.data.dtype)
    out_data_tmp, out_pos_tmp = data_structure.Array.push(outputs_array,
                                                          end_token_id,
                                                          in_place=False)
    new_data, new_pos = nn_utils.ifelse(
        new_finished, [out_data_tmp, out_pos_tmp],
        [outputs_array.data, outputs_array.pos])

    layers.assign(new_data, outputs_array.data)
    layers.assign(new_pos, outputs_array.pos)
    layers.logical_or(finished, new_finished, out=finished)
Example #8
0
    def beam_search(self,
                    src_word,
                    src_pos,
                    src_slf_attn_bias,
                    trg_word,
                    trg_src_attn_bias,
                    bos_id=0,
                    eos_id=1,
                    beam_size=4,
                    max_len=256):
        def expand_to_beam_size(tensor, beam_size):
            tensor = layers.reshape(tensor,
                                    [tensor.shape[0], 1] + tensor.shape[1:])
            tile_dims = [1] * len(tensor.shape)
            tile_dims[1] = beam_size
            return layers.expand(tensor, tile_dims)

        def merge_batch_beams(tensor):
            return layers.reshape(tensor, [tensor.shape[0] * tensor.shape[1]] +
                                  tensor.shape[2:])

        def split_batch_beams(tensor):
            return fluid.layers.reshape(tensor,
                                        shape=[-1, beam_size] +
                                        list(tensor.shape[1:]))

        def mask_probs(probs, finished, noend_mask_tensor):
            # TODO: use where_op
            finished = layers.cast(finished, dtype=probs.dtype)
            probs = layers.elementwise_mul(layers.expand(
                layers.unsqueeze(finished, [2]), [1, 1, self.trg_vocab_size]),
                                           noend_mask_tensor,
                                           axis=-1) - layers.elementwise_mul(
                                               probs, (finished - 1), axis=0)
            return probs

        def gather(x, indices, batch_pos):
            topk_coordinates = fluid.layers.stack([batch_pos, indices], axis=2)
            return layers.gather_nd(x, topk_coordinates)

        # run encoder
        enc_output = self.encoder(src_word, src_pos, src_slf_attn_bias)

        # constant number
        inf = float(1. * 1e7)
        batch_size = enc_output.shape[0]
        max_len = (enc_output.shape[1] + 20) if max_len is None else max_len
        vocab_size_tensor = layers.fill_constant(shape=[1],
                                                 dtype="int64",
                                                 value=self.trg_vocab_size)
        end_token_tensor = to_variable(
            np.full([batch_size, beam_size], eos_id, dtype="int64"))
        noend_array = [-inf] * self.trg_vocab_size
        noend_array[eos_id] = 0
        noend_mask_tensor = to_variable(np.array(noend_array, dtype="float32"))
        batch_pos = layers.expand(
            layers.unsqueeze(
                to_variable(np.arange(0, batch_size, 1, dtype="int64")), [1]),
            [1, beam_size])

        predict_ids = []
        parent_ids = []
        ### initialize states of beam search ###
        log_probs = to_variable(
            np.array([[0.] + [-inf] * (beam_size - 1)] * batch_size,
                     dtype="float32"))
        finished = to_variable(
            np.full([batch_size, beam_size], 0, dtype="bool"))
        ### initialize inputs and states of transformer decoder ###
        ## init inputs for decoder, shaped `[batch_size*beam_size, ...]`
        trg_word = layers.fill_constant(shape=[batch_size * beam_size, 1],
                                        dtype="int64",
                                        value=bos_id)
        trg_pos = layers.zeros_like(trg_word)
        trg_src_attn_bias = merge_batch_beams(
            expand_to_beam_size(trg_src_attn_bias, beam_size))
        enc_output = merge_batch_beams(
            expand_to_beam_size(enc_output, beam_size))
        ## init states (caches) for transformer, need to be updated according to selected beam
        caches = [{
            "k":
            layers.fill_constant(
                shape=[batch_size * beam_size, self.n_head, 0, self.d_key],
                dtype=enc_output.dtype,
                value=0),
            "v":
            layers.fill_constant(
                shape=[batch_size * beam_size, self.n_head, 0, self.d_value],
                dtype=enc_output.dtype,
                value=0),
        } for i in range(self.n_layer)]

        for i in range(max_len):
            trg_pos = layers.fill_constant(shape=trg_word.shape,
                                           dtype="int64",
                                           value=i)
            caches = map_structure(  # can not be reshaped since the 0 size
                lambda x: x if i == 0 else merge_batch_beams(x), caches)
            logits = self.decoder(trg_word, trg_pos, None, trg_src_attn_bias,
                                  enc_output, caches)
            caches = map_structure(split_batch_beams, caches)
            step_log_probs = split_batch_beams(
                fluid.layers.log(fluid.layers.softmax(logits)))
            step_log_probs = mask_probs(step_log_probs, finished,
                                        noend_mask_tensor)
            log_probs = layers.elementwise_add(x=step_log_probs,
                                               y=log_probs,
                                               axis=0)
            log_probs = layers.reshape(log_probs,
                                       [-1, beam_size * self.trg_vocab_size])
            scores = log_probs
            topk_scores, topk_indices = fluid.layers.topk(input=scores,
                                                          k=beam_size)
            beam_indices = fluid.layers.elementwise_floordiv(
                topk_indices, vocab_size_tensor)
            token_indices = fluid.layers.elementwise_mod(
                topk_indices, vocab_size_tensor)

            # update states
            caches = map_structure(
                lambda x: gather(x, beam_indices, batch_pos), caches)
            log_probs = gather(log_probs, topk_indices, batch_pos)
            finished = gather(finished, beam_indices, batch_pos)
            finished = layers.logical_or(
                finished, layers.equal(token_indices, end_token_tensor))
            trg_word = layers.reshape(token_indices, [-1, 1])

            predict_ids.append(token_indices)
            parent_ids.append(beam_indices)

            if layers.reduce_all(finished).numpy():
                break

        predict_ids = layers.stack(predict_ids, axis=0)
        parent_ids = layers.stack(parent_ids, axis=0)
        finished_seq = layers.transpose(
            layers.gather_tree(predict_ids, parent_ids), [1, 2, 0])
        finished_scores = topk_scores

        return finished_seq, finished_scores
Example #9
0
def _run_paddle_logical_or(x, y):
    x = cast_bool_if_necessary(x)
    y = cast_bool_if_necessary(y)
    return logical_or(x, y)
    def forward(self, x, y):
        # x,y误差一帧
        u1 = zeros_like(x)
        u2 = zeros_like(x)
        l_t = self.l * self.t
        taut = self.a / self.t

        grad2_x = self.conv_img_grad(y)
        # grad2_x[:, :, :, 0] = 0.5 * (x[:, :, :, 1] - x[:, :, :, 0])
        # grad2_x[:, :, :, -1] = 0.5 * (x[:, :, :, -1] - x[:, :, :, -2])

        grad2_y = self.conv_img_grad2(y)
        # grad2_y[:, :, 0, :] = 0.5 * (x[:, :, 1, :] - x[:, :, 0, :])
        # grad2_y[:, :, -1, :] = 0.5 * (x[:, :, -1, :] - x[:, :, -2, :])

        p11 = zeros_like(x)
        p12 = zeros_like(x)
        p21 = zeros_like(x)
        p22 = zeros_like(x)

        gsqx = grad2_x**2
        gsqy = grad2_y**2
        grad = gsqx + gsqy + 1e-12

        rho_c = y - grad2_x * u1 - grad2_y * u2 - x

        for i in range(self.n_iter):
            rho = rho_c + grad2_x * u1 + grad2_y * u2 + 1e-12

            v1 = zeros_like(x)
            v2 = zeros_like(x)
            mask1 = rho < -l_t * grad
            mask2 = rho > l_t * grad
            mask3 = logical_and(logical_not(logical_or(mask1, mask2)),
                                (grad > 1e-12))
            mask1 = cast(mask1, dtype='float32')
            mask2 = cast(mask2, dtype='float32')
            mask3 = cast(mask3, dtype='float32')
            mask1.stop_gradient = True
            mask2.stop_gradient = True
            mask3.stop_gradient = True

            # v1 = v1 + l_t * grad2_x * mask1 - l_t * grad2_x * mask2 - (rho / grad) * grad2_x * mask3
            # v2 = v2 + l_t * grad2_y * mask1 - l_t * grad2_y * mask2 - (rho / grad) * grad2_y * mask3
            v1 = elementwise_add(
                u1,
                elementwise_add(
                    elementwise_mul(l_t * grad2_x, mask1),
                    elementwise_add(
                        elementwise_mul(-l_t * grad2_x, mask2),
                        elementwise_mul(-elementwise_div(rho, grad),
                                        elementwise_mul(grad2_x, mask3)))))
            v2 = elementwise_add(
                u2,
                elementwise_add(
                    elementwise_mul(l_t * grad2_y, mask1),
                    elementwise_add(
                        elementwise_mul(-l_t * grad2_y, mask2),
                        elementwise_mul(-elementwise_div(rho, grad),
                                        elementwise_mul(grad2_y, mask3)))))

            del rho
            del mask1
            del mask2
            del mask3

            v1 += u1
            v2 += u2

            u1 = v1 + self.t * self.divergence(p11, p12)
            u2 = v2 + self.t * self.divergence(p21, p22)
            del v1
            del v2
            u1 = u1
            u2 = u2

            u1x, u1y = self.forward_grad(u1)
            u2x, u2y = self.forward_grad(u2)

            p11 = (p11 + taut * u1x) / (1. +
                                        taut * sqrt(u1x**2 + u1y**2 + 1e-12))
            p12 = (p12 + taut * u1y) / (1. +
                                        taut * sqrt(u1x**2 + u1y**2 + 1e-12))
            p21 = (p21 + taut * u2x) / (1. +
                                        taut * sqrt(u2x**2 + u2y**2 + 1e-12))
            p22 = (p22 + taut * u2y) / (1. +
                                        taut * sqrt(u2x**2 + u2y**2 + 1e-12))
            del u1x
            del u1y
            del u2x
            del u2y

        return u1, u2
Example #11
0
def grammar_output(inputs,
                   actions,
                   gmr_mask,
                   last_col2tbl_mask,
                   decode_vocab,
                   grammar,
                   name=None,
                   column2table=None):
    """output logits according to grammar

    Args:
        inputs (Variable): shape = [batch_size, max_len, hidden_size]. infer 阶段 max_len 恒为1
        actions (Variable): shape = [batch_size, max_len]. infer 阶段 max_len 恒为1
        gmr_mask (Variable): shape = [batch_size, max_len, grammar_size]. infer 阶段 max_len 恒为1
        last_col2tbl_mask (Variable): shape = [batch_size, max_len, max_table]. 解码过程中,上一个step为column时,其对应的 table mask
        decode_vocab (DecoderDynamicVocab): (table, table_len, column, column_len, value, value_len, column2table_mask).
                                            这里的column2table_mask是跟column一一对应的table mask。
        gramamr (Grammar): NULL
        name (str): Variable 的 name 前缀。用于多次调用时的参数共享。默认为 None,表示参数不会共享。

    Returns: (Variable, Variable)
        output: 词表输出概率
        valid_table_mask: 只在预测阶段有效

    Raises: NULL
    """
    batch_size = layers.shape(inputs)[0]
    max_len = inputs.shape[1]
    vocab_size = grammar.vocab_size

    action_shape = [batch_size, max_len]
    act_apply_rule = tensor.fill_constant(shape=action_shape,
                                          value=grammar.ACTION_APPLY,
                                          dtype='int64')
    act_stop = tensor.fill_constant(shape=action_shape,
                                    value=grammar.ACTION_STOP,
                                    dtype='int64')
    act_select_t = tensor.fill_constant(shape=action_shape,
                                        value=grammar.ACTION_SELECT_T,
                                        dtype='int64')
    act_select_c = tensor.fill_constant(shape=action_shape,
                                        value=grammar.ACTION_SELECT_C,
                                        dtype='int64')
    act_select_v = tensor.fill_constant(shape=action_shape,
                                        value=grammar.ACTION_SELECT_V,
                                        dtype='int64')
    cond_apply_rule = layers.logical_or(layers.equal(actions, act_apply_rule),
                                        layers.equal(actions, act_stop))
    cond_select_t = layers.equal(actions, act_select_t)
    cond_select_c = layers.equal(actions, act_select_c)
    cond_select_v = layers.equal(actions, act_select_v)

    # expand vocab to [-1, max_len, ...]
    if max_len == 1:
        expand_to_seq_len = lambda x: layers.unsqueeze(x, [1])
    else:
        expand_to_seq_len = lambda x: layers.expand(layers.unsqueeze(
            x, [1]), [1, max_len] + [1] * (len(x.shape) - 1))
    table_enc = expand_to_seq_len(decode_vocab.table)
    table_len = expand_to_seq_len(decode_vocab.table_len)
    column_enc = expand_to_seq_len(decode_vocab.column)
    column_len = expand_to_seq_len(decode_vocab.column_len)
    value_enc = expand_to_seq_len(decode_vocab.value)
    value_len = expand_to_seq_len(decode_vocab.value_len)
    column2table_mask = expand_to_seq_len(decode_vocab.column2table_mask)

    # merge batch & seq_len dim
    inputs = nn_utils.merge_first_ndim(inputs, n=2)
    actions = nn_utils.merge_first_ndim(actions, n=2)
    gmr_mask = nn_utils.merge_first_ndim(gmr_mask, n=2)
    last_col2tbl_mask = nn_utils.merge_first_ndim(last_col2tbl_mask, n=2)
    table_enc = nn_utils.merge_first_ndim(table_enc, n=2)
    table_len = nn_utils.merge_first_ndim(table_len, n=2)
    column_enc = nn_utils.merge_first_ndim(column_enc, n=2)
    column_len = nn_utils.merge_first_ndim(column_len, n=2)
    value_enc = nn_utils.merge_first_ndim(value_enc, n=2)
    value_len = nn_utils.merge_first_ndim(value_len, n=2)
    column2table_mask = nn_utils.merge_first_ndim(column2table_mask, n=2)
    cond_apply_rule = nn_utils.merge_first_ndim(cond_apply_rule, n=2)
    cond_select_t = nn_utils.merge_first_ndim(cond_select_t, n=2)
    cond_select_c = nn_utils.merge_first_ndim(cond_select_c, n=2)
    cond_select_v = nn_utils.merge_first_ndim(cond_select_v, n=2)

    t_ptr_net = models.PointerNetwork(score_type="affine",
                                      name='gmr_output_t_ptr')
    c_ptr_net = models.PointerNetwork(score_type="affine",
                                      name='gmr_output_c_ptr')
    v_ptr_net = models.PointerNetwork(score_type="affine",
                                      name='gmr_output_v_ptr')

    ## 核心处理逻辑 ##
    apply_rule_output = _apply_rule(cond_apply_rule,
                                    inputs,
                                    gmr_mask,
                                    grammar,
                                    name=name)
    select_t_output = \
            _select_table(cond_select_t, inputs, table_enc, table_len, last_col2tbl_mask, t_ptr_net, grammar)
    select_c_output, valid_table_mask = \
            _select_column(cond_select_c, inputs, column_enc, column_len, c_ptr_net, grammar, column2table_mask)
    select_v_output = _select_value(cond_select_v, inputs, value_enc,
                                    value_len, v_ptr_net, grammar)

    output = fluider.elementwise_add(apply_rule_output,
                                     select_t_output,
                                     select_c_output,
                                     select_v_output,
                                     axis=0)
    output = layers.reshape(output, shape=[batch_size, max_len, vocab_size])
    return output, valid_table_mask
Example #12
0
    def _greedy_search(self,
                       src_word,
                       src_pos,
                       src_slf_attn_bias,
                       trg_word,
                       trg_src_attn_bias,
                       bos_id=0,
                       eos_id=1,
                       max_len=256):
        # run encoder
        enc_output = self.encoder(src_word, src_pos, src_slf_attn_bias)

        # constant number
        batch_size = enc_output.shape[0]
        max_len = (enc_output.shape[1] + 20) if max_len is None else max_len
        end_token_tensor = layers.fill_constant(shape=[batch_size, 1],
                                                dtype="int64",
                                                value=eos_id)

        predict_ids = []
        log_probs = layers.fill_constant(shape=[batch_size, 1],
                                         dtype="float32",
                                         value=0)
        trg_word = layers.fill_constant(shape=[batch_size, 1],
                                        dtype="int64",
                                        value=bos_id)
        finished = layers.fill_constant(shape=[batch_size, 1],
                                        dtype="bool",
                                        value=0)

        ## init states (caches) for transformer
        caches = [{
            "k":
            layers.fill_constant(shape=[batch_size, self.n_head, 0, self.d_key],
                                 dtype=enc_output.dtype,
                                 value=0),
            "v":
            layers.fill_constant(
                shape=[batch_size, self.n_head, 0, self.d_value],
                dtype=enc_output.dtype,
                value=0),
        } for i in range(self.n_layer)]

        for i in range(max_len):
            trg_pos = layers.fill_constant(shape=trg_word.shape,
                                           dtype="int64",
                                           value=i)
            logits = self.decoder(trg_word, trg_pos, None, trg_src_attn_bias,
                                  enc_output, caches)
            step_log_probs = layers.log(layers.softmax(logits))
            log_probs = layers.elementwise_add(x=step_log_probs,
                                               y=log_probs,
                                               axis=0)
            scores = log_probs
            topk_scores, topk_indices = layers.topk(input=scores, k=1)

            finished = layers.logical_or(
                finished, layers.equal(topk_indices, end_token_tensor))
            trg_word = topk_indices
            log_probs = topk_scores

            predict_ids.append(topk_indices)

            if layers.reduce_all(finished).numpy():
                break

        predict_ids = layers.stack(predict_ids, axis=0)
        finished_seq = layers.transpose(predict_ids, [1, 2, 0])
        finished_scores = topk_scores

        return finished_seq, finished_scores