def _to_tensor(self, *args): """ Argument convert args to Tensor Args: value (float, list, numpy.ndarray, Tensor) Returns: Tensor of args. """ numpy_args = [] variable_args = [] tmp = 0. for arg in args: if isinstance(arg, float): arg = [arg] if not isinstance(arg, (list, tuple, np.ndarray, tensor.Variable)): raise TypeError( "Type of input args must be float, list, numpy.ndarray or Tensor, but received type {}". format(type(arg))) arg_np = np.array(arg) arg_dtype = arg_np.dtype if str(arg_dtype) != 'float32': if str(arg_dtype) != 'float64': # "assign" op doesn't support float64. if dtype is float64, float32 variable will be generated # and converted to float64 later using "cast". warnings.warn( "data type of argument only support float32 and float64, your argument will be convert to float32." ) arg_np = arg_np.astype('float32') # tmp is used to support broadcast, it summarizes shapes of all the args and get the mixed shape. tmp = tmp + arg_np numpy_args.append(arg_np) dtype = tmp.dtype for arg in numpy_args: arg_broadcasted, _ = np.broadcast_arrays(arg, tmp) arg_variable = tensor.create_tensor(dtype=dtype) tensor.assign(arg_broadcasted, arg_variable) variable_args.append(arg_variable) return tuple(variable_args)
def lr_warmup(learning_rate, warmup_steps, total_step, multiplier, step_each_epoch): with default_main_program()._lr_schedule_guard(): lr = tensor.create_global_var(shape=[1], value=0.0, dtype='float32', persistable=True, name='learning_rate_warmup') global_step = _decay_step_counter() with control_flow.Switch() as switch: with switch.case(global_step <= warmup_steps): decay_lr = learning_rate * ( (multiplier - 1.) * global_step / warmup_steps + 1.) tensor.assign(decay_lr, lr) with switch.default(): learning_rate = learning_rate * multiplier #cur_epoch = ops.floor(global_step/step_each_epoch) decay_lr = learning_rate * 0.5 * (ops.cos( (global_step - warmup_steps) * math.pi / (total_step)) + 1) tensor.assign(decay_lr, lr) return lr
def _lf_embedder(self, tokens, token_lens=None): """lf embedder. Args: tokens (Variable): [batch_size, seq_len] token_lens (Variable): Default is None. Returns: TODO Raises: NULL """ self._batch_size = layers.shape(self.question_encoding)[0] ## Grammar Rule Embedding self._grammar_vocab = tensor.cast(tensor.assign( self.grammar.gmr_vocab.astype(np.int32)), dtype='int64') self._grammar_emb = fluid.embedding( input=self._grammar_vocab, size=[self.grammar.grammar_size, self.lf_emb_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr(name="lf_embedding", initializer=nn_utils.uniform( self.init_scale))) batch_emb_lookup_grammar = layers.expand( layers.unsqueeze(self._grammar_emb, [0]), [self._batch_size, 1, 1]) def _table_to_lf_input(ori_encoding): """trans ori_encoding to size of lf_embedding """ output = layers.fc(input=ori_encoding, size=self.lf_emb_size, num_flatten_dims=2, **nn_utils.param_attr('fc_table2lf_input', self.init_scale, need_bias=False)) return output batch_emb_lookup_all = tensor.concat([ batch_emb_lookup_grammar, _table_to_lf_input(self.tname_encoding), _table_to_lf_input(self.cname_encoding), _table_to_lf_input(self.value_encoding) ], axis=1) lf_embedding = nn_utils.batch_gather_2d(batch_emb_lookup_all, tokens) ## Grammar Rule 类型 Embedding self._grammar2name = layers.cast(layers.assign( self.grammar.gmr2name_arr.astype(np.int32)), dtype='int64') lf_name = layers.reshape(layers.gather( self._grammar2name, layers.reshape(tokens, shape=[-1])), shape=tokens.shape) lf_name.stop_gradient = True lf_name_emb = fluid.embedding( input=lf_name, size=[self.grammar.name_size, self.lf_name_emb_size], dtype='float32', is_sparse=False, param_attr=fluid.ParamAttr(name="lf_name_embedding", initializer=nn_utils.uniform( self.init_scale))) output = layers.concat([lf_embedding, lf_name_emb], axis=-1) if token_lens is not None: mask = layers.sequence_mask(token_lens, maxlen=layers.shape(tokens)[1], dtype='float32') output = layers.elementwise_mul(output, mask, axis=0) return output
def decode_with_grammar(decoder, inits, decode_vocab, max_step_num, **kwargs): """A modification of paddle.fluid.layers.dynamic_decode(...). Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned Tensor indicating finished status contains all True values or the number of decoding step reachs to :attr:`max_step_num`. :code:`decoder.initialize()` would be called once before the decoding loop. If the `decoder` has implemented `finalize` method, :code:`decoder.finalize()` would be called once after the decoding loop. Args: decoder(Decoder): An instance of `Decoder`. inits(tuple): Argument passed to `decoder.initialize`. decode_vocab(DecoderDynamicVocab): namedtuple(table table_len column column_len value value_len) max_step_num(int): The maximum number of steps. **kwargs: Additional keyword arguments. Arguments passed to `decoder.step`. Returns: tuple: A tuple( :code:`(final_outputs, final_states)` ) including the final \ outputs and states, both are Tensor or nested structure of Tensor. \ `final_outputs` has the same structure and data types as \ :code:`decoder.output_dtype` , and each Tenser in `final_outputs` \ is the stacked of all decoding steps' outputs, which might be revised \ by :code:`decoder.finalize` . `final_states` is the counterpart \ at last time step of initial states returned by :code:`decoder.initialize` , \ thus has the same structure with it and has tensors with same shapes \ and data types. """ step_cnt = tensor.fill_constant(shape=[1], dtype="int64", value=1) max_step_num_tensor = tensor.fill_constant(shape=[1], dtype="int64", value=max_step_num - 2) # shape = [batch_size, beam_size, ...] initial_inputs, initial_states, initial_finished = decoder.initialize( inits, decode_vocab) global_inputs, global_states, global_finished = (initial_inputs, initial_states, initial_finished) inputs = initial_inputs states = initial_states # 保存输出结果 outputs_arr_data = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, max_step_num], dtype=decoder.output_dtype.predicted_ids, value=0) outputs_arr_pos = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, 1], dtype='int64', value=0) outputs_array = data_structure.ArrayData( decoder.merge_batch_beams(outputs_arr_data), decoder.merge_batch_beams(outputs_arr_pos)) sequence_lengths = tensor.cast(tensor.zeros_like(initial_finished), "int64") # 按语法解码的相关约束数据结构 grammar_stack_dat = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, max_step_num * STACK_EXPAND_TIMES], dtype='int64', value=0) grammar_stack_pos = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, 1], dtype='int64', value=0) grammar_stack = data_structure.StackData( decoder.merge_batch_beams(grammar_stack_dat), decoder.merge_batch_beams(grammar_stack_pos)) ############ 循环解码,直到全部为 finish 状态 ############ # finish 的判断:通过 global_finished/next_finished && max_step_num 判断 cond = layers.logical_not((layers.reduce_all(initial_finished))) while_op = layers.While(cond) with while_op.block(): # step_outputs --> OutputWrapper # next_states --> StateWrapper # next_inputs --> DecoderInputsWrapper step_outputs, next_states, next_inputs = decoder.step( inputs, states, **kwargs) predicted_ids = step_outputs.predicted_ids _save_predict_output(outputs_array, predicted_ids, next_states.finished) pred_gmr_type = decoder.grammar_type(predicted_ids) cond_type_leaf = layers.equal(pred_gmr_type, decoder.GMR_TYPE.LEAF) cond_type_midd = layers.equal(pred_gmr_type, decoder.GMR_TYPE.MID) _process_type_leaf(cond_type_leaf, decoder, grammar_stack, next_inputs, next_states.finished) _process_type_midd(cond_type_midd, decoder, grammar_stack, next_inputs, predicted_ids) ##next_sequence_lengths = layers.elementwise_add(sequence_lengths, ## tensor.cast(layers.logical_not(global_finished), sequence_lengths.dtype)) _check_finished(decoder, next_inputs, next_states.finished, outputs_array) layers.utils.map_structure(tensor.assign, next_inputs, global_inputs) layers.utils.map_structure(tensor.assign, next_states, global_states) tensor.assign(next_states.finished, global_finished) ##tensor.assign(next_sequence_lengths, sequence_lengths) # 更新循环条件 layers.increment(x=step_cnt, value=1.0, in_place=True) layers.logical_and( layers.logical_not(layers.reduce_all(next_states.finished)), layers.less_equal(step_cnt, max_step_num_tensor), cond) final_outputs = outputs_array.data final_states = global_states final_outputs, final_states = decoder.finalize(final_outputs, global_states, sequence_lengths) return final_outputs, final_states