def init_model(args, init_from_params): # Define model args.init_from_params = init_from_params transformer = SimultaneousTransformerDemo( args.src_vocab_size, args.trg_vocab_size, args.max_length + 1, args.n_layer, args.n_head, args.d_model, args.d_inner_hid, args.dropout, args.weight_sharing, args.bos_idx, args.eos_idx, args.waitk) # Load the trained model assert args.init_from_params, ( "Please set init_from_params to load the infer model.") model_dict = paddle.load( os.path.join(args.init_from_params, "transformer.pdparams")) # To avoid a longer length than training, reset the size of position # encoding to max_length model_dict[ "src_pos_embedding.pos_encoder.weight"] = position_encoding_init( args.max_length + 1, args.d_model) model_dict[ "trg_pos_embedding.pos_encoder.weight"] = position_encoding_init( args.max_length + 1, args.d_model) transformer.load_dict(model_dict) return transformer
def do_predict(args): if args.use_gpu: place = "gpu:0" else: place = "cpu" paddle.set_device(place) # Define data loader test_loader, to_tokens = create_infer_loader(args) # Define model transformer = InferTransformerModel(src_vocab_size=args.src_vocab_size, trg_vocab_size=args.trg_vocab_size, max_length=args.max_length + 1, n_layer=args.n_layer, n_head=args.n_head, d_model=args.d_model, d_inner_hid=args.d_inner_hid, dropout=args.dropout, weight_sharing=args.weight_sharing, bos_id=args.bos_idx, eos_id=args.eos_idx, beam_size=args.beam_size, max_out_len=args.max_out_len) # Load the trained model # assert args.init_from_params, ( # "Please set init_from_params to load the infer model.") init_from_params = 'trained_models/step_final' model_dict = paddle.load( os.path.join(init_from_params, "transformer.pdparams")) # To avoid a longer length than training, reset the size of position # encoding to max_length model_dict["encoder.pos_encoder.weight"] = position_encoding_init( args.max_length + 1, args.d_model) model_dict["decoder.pos_encoder.weight"] = position_encoding_init( args.max_length + 1, args.d_model) transformer.load_dict(model_dict) # Set evaluate mode transformer.eval() f = open(args.output_file, "w") with paddle.no_grad(): for (src_word, ) in test_loader: finished_seq = transformer(src_word=src_word) finished_seq = finished_seq.numpy().transpose([0, 2, 1]) for ins in finished_seq: for beam_idx, beam in enumerate(ins): if beam_idx >= args.n_best: break id_list = post_process_seq(beam, args.bos_idx, args.eos_idx) word_list = to_tokens(id_list) sequence = " ".join(word_list) + "\n" f.write(sequence) f.close()
def export_params(self, init_from_params, place): # Load the trained model assert init_from_params, ( "Please set init_from_params to load the infer model.") model_dict = paddle.load(init_from_params, return_numpy=True) # To set weight[padding_idx] to 0. model_dict["trg_word_embedding.word_embedding.weight"][ self.bos_id] = [0] * self.d_model # Dealing with weight sharing. if self.weight_sharing: model_dict["decoding_linear.weight"] = np.transpose( model_dict["trg_word_embedding.word_embedding.weight"]) else: model_dict["decoding_linear.weight"] = model_dict["linear.weight"] # NOTE: the data type of the embedding bias for logits is different # between decoding with beam search and top-k/top-p sampling in # Faster Transformer when using float16. # NOTE: This changes since FasterTransformer V4.0 and update accordingly # after update to FT-4.0. bias_dtype = "float32" if self.use_fp16_decoding and not self.decoding_strategy.startswith( "beam_search"): bias_dtype = "float16" model_dict["decoding_linear.bias"] = np.zeros([self.trg_vocab_size], dtype=bias_dtype) # To avoid a longer length than training, reset the size of position # encoding to max_length model_dict["encoder.pos_encoder.weight"] = position_encoding_init( self.max_length, self.d_model) model_dict["decoder.pos_encoder.weight"] = position_encoding_init( self.max_length, self.d_model) if self.use_fp16_decoding: for item in self.state_dict(): if "decoder" in item: model_dict[item] = np.float16(model_dict[item]) model_dict["decoding_linear.weight"] = np.float16( model_dict["decoding_linear.weight"]) model_dict[ "trg_word_embedding.word_embedding.weight"] = np.float16( model_dict["trg_word_embedding.word_embedding.weight"]) model_dict["trg_pos_embedding.pos_encoder.weight"] = np.float16( model_dict["trg_pos_embedding.pos_encoder.weight"]) for item in self.state_dict(): param = self attr_list = item.split(".") for attr in attr_list: param = getattr(param, attr) param_name = param.name var = paddle.static.global_scope().find_var( param_name).get_tensor() var.set(model_dict[item], place)
def __init__(self, max_length: int = 256, max_out_len: int = 256, beam_size: int = 5): super(MTTransformer, self).__init__() bpe_codes_file = os.path.join(MODULE_HOME, 'transformer_zh_en', 'assets', '2M.zh2en.dict4bpe.zh') src_vocab_file = os.path.join(MODULE_HOME, 'transformer_zh_en', 'assets', 'vocab.zh') trg_vocab_file = os.path.join(MODULE_HOME, 'transformer_zh_en', 'assets', 'vocab.en') checkpoint = os.path.join(MODULE_HOME, 'transformer_zh_en', 'assets', 'transformer.pdparams') self.max_length = max_length self.beam_size = beam_size self.tokenizer = MTTokenizer(bpe_codes_file=bpe_codes_file, lang_src=self.lang_config['source'], lang_trg=self.lang_config['target']) self.src_vocab = Vocab.load_vocabulary( filepath=src_vocab_file, unk_token=self.vocab_config['unk_token'], bos_token=self.vocab_config['bos_token'], eos_token=self.vocab_config['eos_token']) self.trg_vocab = Vocab.load_vocabulary( filepath=trg_vocab_file, unk_token=self.vocab_config['unk_token'], bos_token=self.vocab_config['bos_token'], eos_token=self.vocab_config['eos_token']) self.src_vocab_size = (len(self.src_vocab) + self.vocab_config['pad_factor'] - 1) \ // self.vocab_config['pad_factor'] * self.vocab_config['pad_factor'] self.trg_vocab_size = (len(self.trg_vocab) + self.vocab_config['pad_factor'] - 1) \ // self.vocab_config['pad_factor'] * self.vocab_config['pad_factor'] self.transformer = InferTransformerModel( src_vocab_size=self.src_vocab_size, trg_vocab_size=self.trg_vocab_size, bos_id=self.vocab_config['bos_id'], eos_id=self.vocab_config['eos_id'], max_length=self.max_length + 1, max_out_len=max_out_len, beam_size=self.beam_size, **self.model_config) state_dict = paddle.load(checkpoint) # To avoid a longer length than training, reset the size of position # encoding to max_length state_dict["encoder.pos_encoder.weight"] = position_encoding_init( self.max_length + 1, self.model_config['d_model']) state_dict["decoder.pos_encoder.weight"] = position_encoding_init( self.max_length + 1, self.model_config['d_model']) self.transformer.set_state_dict(state_dict)
def do_export(args): # Adapt vocabulary size reader.adapt_vocab_size(args) # Define model transformer = InferTransformerModel( src_vocab_size=args.src_vocab_size, trg_vocab_size=args.trg_vocab_size, max_length=args.max_length + 1, num_encoder_layers=args.n_layer, num_decoder_layers=args.n_layer, n_head=args.n_head, d_model=args.d_model, d_inner_hid=args.d_inner_hid, dropout=args.dropout, weight_sharing=args.weight_sharing, bos_id=args.bos_idx, eos_id=args.eos_idx, beam_size=args.beam_size, max_out_len=args.max_out_len) # Load the trained model assert args.init_from_params, ( "Please set init_from_params to load the infer model.") model_dict = paddle.load( os.path.join(args.init_from_params, "transformer.pdparams")) # To avoid a longer length than training, reset the size of position # encoding to max_length model_dict["encoder.pos_encoder.weight"] = position_encoding_init( args.max_length + 1, args.d_model) model_dict["decoder.pos_encoder.weight"] = position_encoding_init( args.max_length + 1, args.d_model) transformer.load_dict(model_dict) # Set evaluate mode transformer.eval() # Convert dygraph model to static graph model transformer = paddle.jit.to_static( transformer, input_spec=[ # src_word paddle.static.InputSpec( shape=[None, None], dtype="int64") ]) # Save converted static graph model paddle.jit.save(transformer, os.path.join(args.inference_model_dir, "transformer")) logger.info("Transformer has been saved to {}".format( args.inference_model_dir))
def load(self, init_from_params): # Load the trained model assert init_from_params, ( "Please set init_from_params to load the infer model.") model_dict = paddle.load(init_from_params, return_numpy=True) # To set weight[padding_idx] to 0. model_dict["trg_word_embedding.word_embedding.weight"][ self.bos_id] = [0] * self.d_model # Dealing with weight sharing. if self.weight_sharing: model_dict["decoding_linear.weight"] = np.transpose( model_dict["trg_word_embedding.word_embedding.weight"]) else: model_dict["decoding_linear.weight"] = model_dict["linear.weight"] # NOTE: the data type of the embedding bias for logits is different # between decoding with beam search and top-k/top-p sampling in # Faster Transformer when using float16. bias_dtype = "float32" if self.use_fp16_decoding and "beam_search" != self.decoding_strategy: bias_dtype = "float16" model_dict["decoding_linear.bias"] = np.zeros([self.trg_vocab_size], dtype=bias_dtype) # To avoid a longer length than training, reset the size of position # encoding to max_length model_dict["encoder.pos_encoder.weight"] = position_encoding_init( self.max_length, self.d_model) model_dict["decoder.pos_encoder.weight"] = position_encoding_init( self.max_length, self.d_model) if self.use_fp16_decoding: for item in self.state_dict(): if "decoder" in item: model_dict[item] = np.float16(model_dict[item]) model_dict["decoding_linear.weight"] = np.float16( model_dict["decoding_linear.weight"]) model_dict[ "trg_word_embedding.word_embedding.weight"] = np.float16( model_dict["trg_word_embedding.word_embedding.weight"]) model_dict["trg_pos_embedding.pos_encoder.weight"] = np.float16( model_dict["trg_pos_embedding.pos_encoder.weight"]) self.load_dict(model_dict)
def __init__( self, max_length=256, max_out_len=256, ): super(STTransformer, self).__init__() bpe_codes_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_all", "assets", "2M.zh2en.dict4bpe.zh") src_vocab_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_all", "assets", "nist.20k.zh.vocab") trg_vocab_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_all", "assets", "nist.10k.en.vocab") params_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_all", "assets", "transformer.pdparams") self.max_length = max_length self.max_out_len = max_out_len self.tokenizer = STACLTokenizer( bpe_codes_fpath, src_vocab_fpath, trg_vocab_fpath, ) src_vocab_size = self.tokenizer.src_vocab_size trg_vocab_size = self.tokenizer.trg_vocab_size self.transformer = SimultaneousTransformer( src_vocab_size, trg_vocab_size, max_length=self.max_length, n_layer=self.model_config['n_layer'], n_head=self.model_config['n_head'], d_model=self.model_config['d_model'], ) model_dict = paddle.load(params_fpath) # To avoid a longer length than training, reset the size of position # encoding to max_length model_dict[ "src_pos_embedding.pos_encoder.weight"] = position_encoding_init( self.max_length + 1, self.model_config['d_model']) model_dict[ "trg_pos_embedding.pos_encoder.weight"] = position_encoding_init( self.max_length + 1, self.model_config['d_model']) self.transformer.load_dict(model_dict)
def load(self, init_from_params): # Load the trained model assert init_from_params, ( "Please set init_from_params to load the infer model.") model_dict = paddle.load(init_from_params) # To set weight[padding_idx] to 0. model_dict["trg_word_embedding.word_embedding.weight"][ self.bos_id] = [0] * self.d_model # Dealing with weight sharing. if self.weight_sharing: model_dict["decoding_linear.weight"] = np.transpose(model_dict[ "trg_word_embedding.word_embedding.weight"]) model_dict["decoding_linear.bias"] = np.zeros( [self.trg_vocab_size], dtype="float32") else: model_dict["decoding_linear.weight"] = model_dict["linear.weight"] model_dict["decoding_linear.bias"] = np.zeros( [self.trg_vocab_size], dtype="float32") # To avoid a longer length than training, reset the size of position # encoding to max_length model_dict["encoder.pos_encoder.weight"] = position_encoding_init( self.max_length, self.d_model) model_dict["decoder.pos_encoder.weight"] = position_encoding_init( self.max_length, self.d_model) if self.use_fp16_decoding: for item in self.state_dict(): if "decoder" in item: model_dict[item] = np.float16(model_dict[item]) model_dict["decoding_linear.weight"] = np.float16(model_dict[ "decoding_linear.weight"]) self.load_dict(model_dict)
def load(self, init_from_params): # Load the trained model assert init_from_params, ( "Please set init_from_params to load the infer model.") model_dict = paddle.load(init_from_params, return_numpy=True) # To set weight[padding_idx] to 0. model_dict["trg_word_embedding.word_embedding.weight"][ self.bos_id] = [0] * self.d_model # To avoid a longer length than training, reset the size of position # encoding to max_length model_dict["encoder.pos_encoder.weight"] = position_encoding_init( self.max_length, self.d_model) model_dict["decoder.pos_encoder.weight"] = position_encoding_init( self.max_length, self.d_model) if self.use_fp16_decoder: for item in self.state_dict(): if "decoder.layers" in item: model_dict[item] = np.float16(model_dict[item]) self.load_dict(model_dict)
def do_predict(args): if args.device == 'gpu': place = "gpu:0" elif args.device == 'xpu': place = "xpu:0" elif args.device == 'cpu': place = "cpu" paddle.set_device(place) # Define data loader test_loader, to_tokens = reader.create_infer_loader(args) # Define model transformer = SimultaneousTransformer( args.src_vocab_size, args.trg_vocab_size, args.max_length + 1, args.n_layer, args.n_head, args.d_model, args.d_inner_hid, args.dropout, args.weight_sharing, args.bos_idx, args.eos_idx, args.waitk) # Load the trained model assert args.init_from_params, ( "Please set init_from_params to load the infer model.") model_dict = paddle.load( os.path.join(args.init_from_params, "transformer.pdparams")) # To avoid a longer length than training, reset the size of position # encoding to max_length model_dict[ "src_pos_embedding.pos_encoder.weight"] = position_encoding_init( args.max_length + 1, args.d_model) model_dict[ "trg_pos_embedding.pos_encoder.weight"] = position_encoding_init( args.max_length + 1, args.d_model) transformer.load_dict(model_dict) # Set evaluate mode transformer.eval() f = open(args.output_file, "w", encoding='utf8') with paddle.no_grad(): for input_data in test_loader: (src_word, ) = input_data finished_seq, finished_scores = transformer.greedy_search( src_word, max_len=args.max_out_len, waitk=args.waitk) finished_seq = finished_seq.numpy() finished_scores = finished_scores.numpy() for idx, ins in enumerate(finished_seq): for beam_idx, beam in enumerate(ins): if beam_idx >= args.n_best: break id_list = post_process_seq(beam, args.bos_idx, args.eos_idx) word_list = to_tokens(id_list) sequence = ' '.join(word_list) + "\n" f.write(sequence) f.close()