Beispiel #1
0
    def _to_tensor(self, *args):
        """
        Argument convert args to Tensor

        Args:
            value (float, list, numpy.ndarray, Tensor)
        Returns:
            Tensor of args.
        """
        numpy_args = []
        variable_args = []
        tmp = 0.

        for arg in args:
            if isinstance(arg, float):
                arg = [arg]
            if not isinstance(arg, (list, tuple, np.ndarray, tensor.Variable)):
                raise TypeError(
                    "Type of input args must be float, list, numpy.ndarray or Tensor, but received type {}".
                    format(type(arg)))

            arg_np = np.array(arg)
            arg_dtype = arg_np.dtype
            if str(arg_dtype) != 'float32':
                if str(arg_dtype) != 'float64':
                    # "assign" op doesn't support float64. if dtype is float64, float32 variable will be generated
                    #  and converted to float64 later using "cast".
                    warnings.warn(
                        "data type of argument only support float32 and float64, your argument will be convert to float32."
                    )
                arg_np = arg_np.astype('float32')
            # tmp is used to support broadcast, it summarizes shapes of all the args and get the mixed shape.
            tmp = tmp + arg_np
            numpy_args.append(arg_np)

        dtype = tmp.dtype
        for arg in numpy_args:
            arg_broadcasted, _ = np.broadcast_arrays(arg, tmp)
            arg_variable = tensor.create_tensor(dtype=dtype)
            tensor.assign(arg_broadcasted, arg_variable)
            variable_args.append(arg_variable)

        return tuple(variable_args)
Beispiel #2
0
def lr_warmup(learning_rate, warmup_steps, total_step, multiplier,
              step_each_epoch):
    with default_main_program()._lr_schedule_guard():
        lr = tensor.create_global_var(shape=[1],
                                      value=0.0,
                                      dtype='float32',
                                      persistable=True,
                                      name='learning_rate_warmup')
        global_step = _decay_step_counter()

        with control_flow.Switch() as switch:
            with switch.case(global_step <= warmup_steps):
                decay_lr = learning_rate * (
                    (multiplier - 1.) * global_step / warmup_steps + 1.)
                tensor.assign(decay_lr, lr)
            with switch.default():
                learning_rate = learning_rate * multiplier
                #cur_epoch = ops.floor(global_step/step_each_epoch)
                decay_lr = learning_rate * 0.5 * (ops.cos(
                    (global_step - warmup_steps) * math.pi / (total_step)) + 1)
                tensor.assign(decay_lr, lr)

    return lr
    def _lf_embedder(self, tokens, token_lens=None):
        """lf embedder.

        Args:
            tokens (Variable): [batch_size, seq_len]
            token_lens (Variable): Default is None.

        Returns: TODO

        Raises: NULL

        """
        self._batch_size = layers.shape(self.question_encoding)[0]

        ##  Grammar Rule Embedding
        self._grammar_vocab = tensor.cast(tensor.assign(
            self.grammar.gmr_vocab.astype(np.int32)),
                                          dtype='int64')
        self._grammar_emb = fluid.embedding(
            input=self._grammar_vocab,
            size=[self.grammar.grammar_size, self.lf_emb_size],
            dtype='float32',
            is_sparse=False,
            param_attr=fluid.ParamAttr(name="lf_embedding",
                                       initializer=nn_utils.uniform(
                                           self.init_scale)))

        batch_emb_lookup_grammar = layers.expand(
            layers.unsqueeze(self._grammar_emb, [0]), [self._batch_size, 1, 1])

        def _table_to_lf_input(ori_encoding):
            """trans ori_encoding to size of lf_embedding
            """
            output = layers.fc(input=ori_encoding,
                               size=self.lf_emb_size,
                               num_flatten_dims=2,
                               **nn_utils.param_attr('fc_table2lf_input',
                                                     self.init_scale,
                                                     need_bias=False))
            return output

        batch_emb_lookup_all = tensor.concat([
            batch_emb_lookup_grammar,
            _table_to_lf_input(self.tname_encoding),
            _table_to_lf_input(self.cname_encoding),
            _table_to_lf_input(self.value_encoding)
        ],
                                             axis=1)
        lf_embedding = nn_utils.batch_gather_2d(batch_emb_lookup_all, tokens)

        ## Grammar Rule 类型 Embedding
        self._grammar2name = layers.cast(layers.assign(
            self.grammar.gmr2name_arr.astype(np.int32)),
                                         dtype='int64')
        lf_name = layers.reshape(layers.gather(
            self._grammar2name, layers.reshape(tokens, shape=[-1])),
                                 shape=tokens.shape)
        lf_name.stop_gradient = True
        lf_name_emb = fluid.embedding(
            input=lf_name,
            size=[self.grammar.name_size, self.lf_name_emb_size],
            dtype='float32',
            is_sparse=False,
            param_attr=fluid.ParamAttr(name="lf_name_embedding",
                                       initializer=nn_utils.uniform(
                                           self.init_scale)))

        output = layers.concat([lf_embedding, lf_name_emb], axis=-1)
        if token_lens is not None:
            mask = layers.sequence_mask(token_lens,
                                        maxlen=layers.shape(tokens)[1],
                                        dtype='float32')
            output = layers.elementwise_mul(output, mask, axis=0)
        return output
Beispiel #4
0
def decode_with_grammar(decoder, inits, decode_vocab, max_step_num, **kwargs):
    """A modification of paddle.fluid.layers.dynamic_decode(...).
    Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned
    Tensor indicating finished status contains all True values or the number of
    decoding step reachs to :attr:`max_step_num`.
    :code:`decoder.initialize()` would be called once before the decoding loop.
    If the `decoder` has implemented `finalize` method, :code:`decoder.finalize()`
    would be called once after the decoding loop.

    Args:
        decoder(Decoder): An instance of `Decoder`.
        inits(tuple): Argument passed to `decoder.initialize`. 
        decode_vocab(DecoderDynamicVocab): namedtuple(table table_len column column_len value value_len)
        max_step_num(int): The maximum number of steps.
        **kwargs: Additional keyword arguments. Arguments passed to `decoder.step`. 

    Returns:
        tuple: A tuple( :code:`(final_outputs, final_states)` ) including the final \
            outputs and states, both are Tensor or nested structure of Tensor. \
            `final_outputs` has the same structure and data types as \
            :code:`decoder.output_dtype` , and each Tenser in `final_outputs` \
            is the stacked of all decoding steps' outputs, which might be revised \
            by :code:`decoder.finalize` . `final_states` is the counterpart \
            at last time step of initial states returned by :code:`decoder.initialize` , \
            thus has the same structure with it and has tensors with same shapes \
            and data types.
    """
    step_cnt = tensor.fill_constant(shape=[1], dtype="int64", value=1)
    max_step_num_tensor = tensor.fill_constant(shape=[1],
                                               dtype="int64",
                                               value=max_step_num - 2)

    # shape = [batch_size, beam_size, ...]
    initial_inputs, initial_states, initial_finished = decoder.initialize(
        inits, decode_vocab)
    global_inputs, global_states, global_finished = (initial_inputs,
                                                     initial_states,
                                                     initial_finished)
    inputs = initial_inputs
    states = initial_states

    # 保存输出结果
    outputs_arr_data = tensor.fill_constant_batch_size_like(
        inputs.input,
        shape=[-1, decoder.beam_size, max_step_num],
        dtype=decoder.output_dtype.predicted_ids,
        value=0)
    outputs_arr_pos = tensor.fill_constant_batch_size_like(
        inputs.input, shape=[-1, decoder.beam_size, 1], dtype='int64', value=0)
    outputs_array = data_structure.ArrayData(
        decoder.merge_batch_beams(outputs_arr_data),
        decoder.merge_batch_beams(outputs_arr_pos))

    sequence_lengths = tensor.cast(tensor.zeros_like(initial_finished),
                                   "int64")

    # 按语法解码的相关约束数据结构
    grammar_stack_dat = tensor.fill_constant_batch_size_like(
        inputs.input,
        shape=[-1, decoder.beam_size, max_step_num * STACK_EXPAND_TIMES],
        dtype='int64',
        value=0)
    grammar_stack_pos = tensor.fill_constant_batch_size_like(
        inputs.input, shape=[-1, decoder.beam_size, 1], dtype='int64', value=0)
    grammar_stack = data_structure.StackData(
        decoder.merge_batch_beams(grammar_stack_dat),
        decoder.merge_batch_beams(grammar_stack_pos))

    ############        循环解码,直到全部为 finish 状态        ############
    #   finish 的判断:通过 global_finished/next_finished && max_step_num 判断
    cond = layers.logical_not((layers.reduce_all(initial_finished)))
    while_op = layers.While(cond)
    with while_op.block():
        # step_outputs --> OutputWrapper
        # next_states  --> StateWrapper
        # next_inputs  --> DecoderInputsWrapper
        step_outputs, next_states, next_inputs = decoder.step(
            inputs, states, **kwargs)
        predicted_ids = step_outputs.predicted_ids
        _save_predict_output(outputs_array, predicted_ids,
                             next_states.finished)

        pred_gmr_type = decoder.grammar_type(predicted_ids)
        cond_type_leaf = layers.equal(pred_gmr_type, decoder.GMR_TYPE.LEAF)
        cond_type_midd = layers.equal(pred_gmr_type, decoder.GMR_TYPE.MID)

        _process_type_leaf(cond_type_leaf, decoder, grammar_stack, next_inputs,
                           next_states.finished)
        _process_type_midd(cond_type_midd, decoder, grammar_stack, next_inputs,
                           predicted_ids)

        ##next_sequence_lengths = layers.elementwise_add(sequence_lengths,
        ##                        tensor.cast(layers.logical_not(global_finished), sequence_lengths.dtype))

        _check_finished(decoder, next_inputs, next_states.finished,
                        outputs_array)

        layers.utils.map_structure(tensor.assign, next_inputs, global_inputs)
        layers.utils.map_structure(tensor.assign, next_states, global_states)
        tensor.assign(next_states.finished, global_finished)
        ##tensor.assign(next_sequence_lengths, sequence_lengths)

        # 更新循环条件
        layers.increment(x=step_cnt, value=1.0, in_place=True)
        layers.logical_and(
            layers.logical_not(layers.reduce_all(next_states.finished)),
            layers.less_equal(step_cnt, max_step_num_tensor), cond)

    final_outputs = outputs_array.data
    final_states = global_states

    final_outputs, final_states = decoder.finalize(final_outputs,
                                                   global_states,
                                                   sequence_lengths)

    return final_outputs, final_states