def ernie_encoder_with_paddle_hub(ernie_inputs, max_seq_len): ernie = hub.Module(name="ernie") inputs, outputs, program = ernie.context( trainable=True, max_seq_len=max_seq_len, learning_rate=1) main_program = fluid.default_main_program() input_dict = { inputs["input_ids"].name: ernie_inputs["src_ids"], inputs["segment_ids"].name: ernie_inputs["sent_ids"], inputs["position_ids"].name: ernie_inputs["pos_ids"], inputs["input_mask"].name: ernie_inputs["input_mask"] } hub.connect_program( pre_program=main_program, next_program=program, input_dict=input_dict, inplace=True) enc_out = outputs["sequence_output"] unpad_enc_out = fluid.layers.sequence_unpad( enc_out, length=ernie_inputs["seq_lens"]) cls_feats = outputs["pooled_output"] embeddings = { "sentence_embeddings": cls_feats, "token_embeddings": unpad_enc_out, } for k, v in embeddings.items(): v.persistable = True return embeddings
def create_model(args, pyreader_name, is_prediction=False): pyreader, ernie_inputs, labels = ernie_pyreader(pyreader_name, args.max_seq_len) module = hub.Module(name="ernie") inputs, outputs, program = module.context(trainable="True", max_seq_len=args.max_seq_len) input_dict = { inputs["input_ids"].name: ernie_inputs["src_ids"], inputs["position_ids"].name: ernie_inputs["pos_ids"], inputs["segment_ids"].name: ernie_inputs["sent_ids"], inputs["input_mask"].name: ernie_inputs["input_mask"], } hub.connect_program( pre_program=fluid.default_main_program(), next_program=program, input_dict=input_dict) cls_feats = fluid.layers.dropout( x=outputs["pooled_output"], dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr( name="cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name="cls_out_b", initializer=fluid.initializer.Constant(0.))) ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) if args.use_fp16 and args.loss_scaling > 1.0: loss *= args.loss_scaling num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": ernie_inputs["qids"] } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def _build_env(self): if self.env.is_inititalized: return self._build_env_start_event() self.env.is_inititalized = True self.env.main_program = clone_program(self._base_main_program, for_test=False) self.env.startup_program = fluid.Program() with fluid.program_guard(self.env.main_program, self._base_startup_program): with fluid.unique_name.guard(self.env.UNG): self.env.outputs = self._build_net() if self.is_train_phase or self.is_test_phase: self.env.labels = self._add_label() self.env.loss = self._add_loss() self.env.metrics = self._add_metrics() if self.is_predict_phase or self.is_test_phase: self.env.main_program = clone_program(self.env.main_program, for_test=True) hub.common.paddle_helper.set_op_attr(self.env.main_program, is_test=True) if self.config.use_pyreader: t_program = fluid.Program() with fluid.program_guard(t_program, self.env.startup_program): self.env.py_reader = fluid.layers.py_reader( capacity=64, shapes=[var.shape for var in self.feed_var_list], dtypes=[ dtype_map[var.dtype] for var in self.feed_var_list ], lod_levels=[var.lod_level for var in self.feed_var_list], use_double_buffer=False) feed_var_list = self.feed_var_list py_vars = fluid.layers.read_file(self.env.py_reader) py_vars = to_list(py_vars) input_dict = { feed_var_list[index].name: py_var for index, py_var in enumerate(py_vars) } hub.connect_program(pre_program=t_program, next_program=self.env.main_program, input_dict=input_dict, need_log=False) self.env.main_program = t_program if not self.is_predict_phase: self.env.loss = self.env.main_program.global_block().vars[ self.env.loss.name] metrics_name = [var.name for var in self.env.metrics] self.env.metrics = [ self.env.main_program.global_block().vars[name] for name in metrics_name ] outputs_name = [var.name for var in self.env.outputs] self.env.outputs = [ self.env.main_program.global_block().vars[name] for name in outputs_name ] if self.config.enable_memory_optim: for var_name in self.fetch_list: var = self.env.main_program.global_block().vars[var_name] var.persistable = True if self.is_train_phase: with fluid.program_guard(self.env.main_program, self._base_startup_program): with fluid.unique_name.guard(self.env.UNG): self.config.strategy.execute(self.loss, self._base_data_reader, self.config) if self.is_train_phase: loss_name = self.env.loss.name share_vars_from = None else: loss_name = None if self._base_compiled_program is None: share_vars_from = None else: share_vars_from = self._base_compiled_program if not self.config.use_data_parallel: if self.config.enable_memory_optim: fluid.memory_optimize(self.env.main_program) self.env.main_program_compiled = None else: self.env.main_program_compiled = fluid.CompiledProgram( self.env.main_program).with_data_parallel( loss_name=loss_name, share_vars_from=share_vars_from, build_strategy=self.build_strategy) if self._base_compiled_program is None: self._base_compiled_program = self.env.main_program_compiled self.exe.run(self.env.startup_program) self._build_env_end_event()
def _build_env(self): if self.env.is_inititalized: return self._build_env_start_event() self.env.is_inititalized = True self.env.main_program = clone_program(self.base_main_program, for_test=False) self.env.startup_program = fluid.Program() with fluid.program_guard(self.env.main_program, self._base_startup_program): with fluid.unique_name.guard(self.env.UNG): self.env.outputs = self._build_net() if self.is_train_phase or self.is_test_phase: self.env.labels = self._add_label() self.env.loss = self._add_loss() self.env.metrics = self._add_metrics() if self.is_predict_phase or self.is_test_phase: # Todo: paddle.fluid.core_avx.EnforceNotMet: Getting 'tensor_desc' is not supported by the type of var kCUDNNFwdAlgoCache. at # self.env.main_program = clone_program( # self.env.main_program, for_test=True) hub.common.paddle_helper.set_op_attr(self.env.main_program, is_test=True) if self.config.use_pyreader: t_program = fluid.Program() with fluid.program_guard(t_program, self.env.startup_program): self.env.py_reader = fluid.layers.py_reader( capacity=64, shapes=[var.shape for var in self.feed_var_list], dtypes=[ dtype_map[var.dtype] for var in self.feed_var_list ], lod_levels=[var.lod_level for var in self.feed_var_list], use_double_buffer=False) feed_var_list = self.feed_var_list py_vars = fluid.layers.read_file(self.env.py_reader) py_vars = to_list(py_vars) input_dict = { feed_var_list[index].name: py_var for index, py_var in enumerate(py_vars) } hub.connect_program(pre_program=t_program, next_program=self.env.main_program, input_dict=input_dict, need_log=False) self.env.main_program = t_program if not self.is_predict_phase: self.env.loss = self.env.main_program.global_block().vars[ self.env.loss.name] metrics_name = [var.name for var in self.env.metrics] self.env.metrics = [ self.env.main_program.global_block().vars[name] for name in metrics_name ] outputs_name = [var.name for var in self.env.outputs] self.env.outputs = [ self.env.main_program.global_block().vars[name] for name in outputs_name ] if self.config.enable_memory_optim: for var_name in self.fetch_list: var = self.env.main_program.global_block().vars[var_name] var.persistable = True # to avoid to print logger two times in result of the logger usage of paddle-fluid 1.6 for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) if self.is_train_phase: with fluid.program_guard(self.env.main_program, self._base_startup_program): with fluid.unique_name.guard(self.env.UNG): self.scheduled_lr, self.max_train_steps = self.config.strategy.execute( self.loss, self._base_data_reader, self.config, self.device_count) if self.is_train_phase: loss_name = self.env.loss.name else: loss_name = None share_vars_from = self._base_compiled_program if not self.config.use_data_parallel: self.env.main_program_compiled = None else: self.env.main_program_compiled = fluid.CompiledProgram( self.env.main_program).with_data_parallel( loss_name=loss_name, share_vars_from=share_vars_from, build_strategy=self.build_strategy) self.exe.run(self.env.startup_program) # to avoid to print logger two times in result of the logger usage of paddle-fluid 1.5 for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) self._build_env_end_event()