def get_variable_info(var): if not isinstance(var, fluid.framework.Variable): raise TypeError("var shoule be an instance of fluid.framework.Variable") var_info = { 'name': var.name, 'stop_gradient': var.stop_gradient, 'is_data': var.is_data, 'error_clip': var.error_clip, 'type': var.type } try: var_info['dtype'] = convert_dtype_to_string(var.dtype) var_info['lod_level'] = var.lod_level var_info['shape'] = var.shape except: pass if isinstance(var, fluid.framework.Parameter): var_info['trainable'] = var.trainable var_info['optimize_attr'] = var.optimize_attr var_info['regularizer'] = var.regularizer if not version_compare(paddle.__version__, '1.8'): var_info['gradient_clip_attr'] = var.gradient_clip_attr var_info['do_model_average'] = var.do_model_average else: var_info['persistable'] = var.persistable return var_info
def _check_paddle_version(self): if version_compare(self.paddle_version, paddle.__version__): logger.warning( "This Module is generated by the PaddlePaddle with version %s, and the local PaddlePaddle version is %s, which may cause serious incompatible bug. Please upgrade PaddlePaddle to the latest version." % (self.paddle_version, paddle.__version__)) return False return True
def forward(self, input_ids, position_ids, segment_ids, input_mask): if version_compare(paddle.__version__, '1.8'): pooled_output, sequence_output = self.model_runner( input_ids, position_ids, segment_ids, input_mask) return { 'pooled_output': pooled_output, 'sequence_output': sequence_output } else: raise RuntimeError( '{} only support dynamic graph mode in paddle >= 1.8'.format( self.name))
def _build_net(self): if version_compare(paddle.__version__, "1.6"): self.seq_len = fluid.layers.data(name="seq_len", shape=[-1], dtype='int64') else: self.seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64') seq_len = fluid.layers.assign(self.seq_len) if self.add_crf: unpad_feature = fluid.layers.sequence_unpad(self.feature, length=self.seq_len) self.emission = fluid.layers.fc( size=self.num_classes, input=unpad_feature, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-0.1, high=0.1), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) size = self.emission.shape[1] fluid.layers.create_parameter(shape=[size + 2, size], dtype=self.emission.dtype, name='crfw') self.ret_infers = fluid.layers.crf_decoding( input=self.emission, param_attr=fluid.ParamAttr(name='crfw')) ret_infers = fluid.layers.assign(self.ret_infers) return [ret_infers] else: self.logits = fluid.layers.fc( input=self.feature, size=self.num_classes, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_seq_label_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name="cls_seq_label_out_b", initializer=fluid.initializer.Constant(0.))) self.ret_infers = fluid.layers.reshape(x=fluid.layers.argmax( self.logits, axis=2), shape=[-1, 1]) ret_infers = fluid.layers.assign(self.ret_infers) logits = self.logits logits = fluid.layers.flatten(logits, axis=2) logits = fluid.layers.softmax(logits) self.num_labels = logits.shape[1] return [logits]
def predict(self, data, load_best_model=True, return_result=False, accelerate_mode=True): """ make prediction for the input data. Args: data (list): the data will be predicted. load_best_model (bool): load the best model or not return_result (bool): return a readable result or just the raw run result accelerate_mode (bool): use high-performance predictor or not Returns: RunState: the running result of predict phase """ if not version_compare(paddle.__version__, "1.6.2") and accelerate_mode: logger.warning( "Fail to open predict accelerate mode as it does not support paddle < 1.6.2. Please update PaddlePaddle." ) accelerate_mode = False self.accelerate_mode = accelerate_mode with self.phase_guard(phase="predict"): self._predict_data = data self._predict_start_event() if load_best_model: self.init_if_load_best_model() else: self.init_if_necessary() if not self.accelerate_mode: run_states = self._run() else: if not self._predictor: self._predictor = self._create_predictor() run_states = self._run_with_predictor() self._predict_end_event(run_states) self._predict_data = None if return_result: return self._postprocessing(run_states) return run_states
def __init__(self, name=None, directory=None, module_dir=None, version=None, max_seq_len=128, **kwargs): if not directory: return super(TransformerModule, self).__init__( name=name, directory=directory, module_dir=module_dir, version=version, **kwargs) self.max_seq_len = max_seq_len if version_compare(paddle.__version__, '1.8'): with tmp_dir() as _dir: input_dict, output_dict, program = self.context( max_seq_len=max_seq_len) fluid.io.save_inference_model( dirname=_dir, main_program=program, feeded_var_names=[ input_dict['input_ids'].name, input_dict['position_ids'].name, input_dict['segment_ids'].name, input_dict['input_mask'].name ], target_vars=[ output_dict["pooled_output"], output_dict["sequence_output"] ], executor=fluid.Executor(fluid.CPUPlace())) with fluid.dygraph.guard(): self.model_runner = fluid.dygraph.StaticModelRunner(_dir)
def _build_net(self): self.seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) if version_compare(paddle.__version__, "1.6"): self.seq_len_used = fluid.layers.squeeze(self.seq_len, axes=[1]) else: self.seq_len_used = self.seq_len #增加gru层相关的代码 grnn_hidden_dim = 256 # 768 crf_lr = 0.2 bigru_num = 2 init_bound = 0.1 def _bigru_layer(input_feature): """define the bidirectional gru layer """ pre_gru = fluid.layers.fc( input=input_feature, size=grnn_hidden_dim * 3, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) gru = fluid.layers.dynamic_gru( input=pre_gru, size=grnn_hidden_dim, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) pre_gru_r = fluid.layers.fc( input=input_feature, size=grnn_hidden_dim * 3, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) gru_r = fluid.layers.dynamic_gru( input=pre_gru_r, size=grnn_hidden_dim, is_reverse=True, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-init_bound, high=init_bound), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) bi_merge = fluid.layers.concat(input=[gru, gru_r], axis=1) return bi_merge if self.add_crf: unpad_feature = fluid.layers.sequence_unpad( self.feature, length=self.seq_len_used) #增加gru层相关的代码 input_feature = unpad_feature for i in range(bigru_num): bigru_output = _bigru_layer(input_feature) input_feature = bigru_output unpad_feature = input_feature self.emission = fluid.layers.fc( size=self.num_classes, input=unpad_feature, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-0.1, high=0.1), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4))) size = self.emission.shape[1] fluid.layers.create_parameter(shape=[size + 2, size], dtype=self.emission.dtype, name='crfw') self.ret_infers = fluid.layers.crf_decoding( input=self.emission, param_attr=fluid.ParamAttr(name='crfw')) ret_infers = fluid.layers.assign(self.ret_infers) return [ret_infers] else: self.logits = fluid.layers.fc( input=self.feature, size=self.num_classes, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_seq_label_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name="cls_seq_label_out_b", initializer=fluid.initializer.Constant(0.))) self.ret_infers = fluid.layers.reshape(x=fluid.layers.argmax( self.logits, axis=2), shape=[-1, 1]) logits = self.logits logits = fluid.layers.flatten(logits, axis=2) logits = fluid.layers.softmax(logits) self.num_labels = logits.shape[1] return [logits]