예제 #1
0
def init_model(args, init_from_params):
    # Define model
    args.init_from_params = init_from_params
    transformer = SimultaneousTransformerDemo(
        args.src_vocab_size, args.trg_vocab_size, args.max_length + 1,
        args.n_layer, args.n_head, args.d_model, args.d_inner_hid,
        args.dropout, args.weight_sharing, args.bos_idx, args.eos_idx,
        args.waitk)

    # Load the trained model
    assert args.init_from_params, (
        "Please set init_from_params to load the infer model.")

    model_dict = paddle.load(
        os.path.join(args.init_from_params, "transformer.pdparams"))

    # To avoid a longer length than training, reset the size of position
    # encoding to max_length
    model_dict[
        "src_pos_embedding.pos_encoder.weight"] = position_encoding_init(
            args.max_length + 1, args.d_model)
    model_dict[
        "trg_pos_embedding.pos_encoder.weight"] = position_encoding_init(
            args.max_length + 1, args.d_model)

    transformer.load_dict(model_dict)
    return transformer
예제 #2
0
def do_predict(args):
    if args.use_gpu:
        place = "gpu:0"
    else:
        place = "cpu"

    paddle.set_device(place)

    # Define data loader
    test_loader, to_tokens = create_infer_loader(args)

    # Define model
    transformer = InferTransformerModel(src_vocab_size=args.src_vocab_size,
                                        trg_vocab_size=args.trg_vocab_size,
                                        max_length=args.max_length + 1,
                                        n_layer=args.n_layer,
                                        n_head=args.n_head,
                                        d_model=args.d_model,
                                        d_inner_hid=args.d_inner_hid,
                                        dropout=args.dropout,
                                        weight_sharing=args.weight_sharing,
                                        bos_id=args.bos_idx,
                                        eos_id=args.eos_idx,
                                        beam_size=args.beam_size,
                                        max_out_len=args.max_out_len)

    # Load the trained model
    # assert args.init_from_params, (
    #     "Please set init_from_params to load the infer model.")
    init_from_params = 'trained_models/step_final'
    model_dict = paddle.load(
        os.path.join(init_from_params, "transformer.pdparams"))

    # To avoid a longer length than training, reset the size of position
    # encoding to max_length
    model_dict["encoder.pos_encoder.weight"] = position_encoding_init(
        args.max_length + 1, args.d_model)
    model_dict["decoder.pos_encoder.weight"] = position_encoding_init(
        args.max_length + 1, args.d_model)
    transformer.load_dict(model_dict)

    # Set evaluate mode
    transformer.eval()

    f = open(args.output_file, "w")
    with paddle.no_grad():
        for (src_word, ) in test_loader:
            finished_seq = transformer(src_word=src_word)
            finished_seq = finished_seq.numpy().transpose([0, 2, 1])
            for ins in finished_seq:
                for beam_idx, beam in enumerate(ins):
                    if beam_idx >= args.n_best:
                        break
                    id_list = post_process_seq(beam, args.bos_idx,
                                               args.eos_idx)
                    word_list = to_tokens(id_list)
                    sequence = " ".join(word_list) + "\n"
                    f.write(sequence)
    f.close()
예제 #3
0
    def export_params(self, init_from_params, place):
        # Load the trained model
        assert init_from_params, (
            "Please set init_from_params to load the infer model.")

        model_dict = paddle.load(init_from_params, return_numpy=True)

        # To set weight[padding_idx] to 0.
        model_dict["trg_word_embedding.word_embedding.weight"][
            self.bos_id] = [0] * self.d_model

        # Dealing with weight sharing.
        if self.weight_sharing:
            model_dict["decoding_linear.weight"] = np.transpose(
                model_dict["trg_word_embedding.word_embedding.weight"])
        else:
            model_dict["decoding_linear.weight"] = model_dict["linear.weight"]
        # NOTE: the data type of the embedding bias for logits is different
        # between decoding with beam search and top-k/top-p sampling in
        # Faster Transformer when using float16.
        # NOTE: This changes since FasterTransformer V4.0 and update accordingly
        # after update to FT-4.0.
        bias_dtype = "float32"
        if self.use_fp16_decoding and not self.decoding_strategy.startswith(
                "beam_search"):
            bias_dtype = "float16"
        model_dict["decoding_linear.bias"] = np.zeros([self.trg_vocab_size],
                                                      dtype=bias_dtype)

        # To avoid a longer length than training, reset the size of position
        # encoding to max_length
        model_dict["encoder.pos_encoder.weight"] = position_encoding_init(
            self.max_length, self.d_model)
        model_dict["decoder.pos_encoder.weight"] = position_encoding_init(
            self.max_length, self.d_model)

        if self.use_fp16_decoding:
            for item in self.state_dict():
                if "decoder" in item:
                    model_dict[item] = np.float16(model_dict[item])
            model_dict["decoding_linear.weight"] = np.float16(
                model_dict["decoding_linear.weight"])
            model_dict[
                "trg_word_embedding.word_embedding.weight"] = np.float16(
                    model_dict["trg_word_embedding.word_embedding.weight"])
            model_dict["trg_pos_embedding.pos_encoder.weight"] = np.float16(
                model_dict["trg_pos_embedding.pos_encoder.weight"])

        for item in self.state_dict():
            param = self
            attr_list = item.split(".")
            for attr in attr_list:
                param = getattr(param, attr)
            param_name = param.name
            var = paddle.static.global_scope().find_var(
                param_name).get_tensor()
            var.set(model_dict[item], place)
예제 #4
0
    def __init__(self,
                 max_length: int = 256,
                 max_out_len: int = 256,
                 beam_size: int = 5):
        super(MTTransformer, self).__init__()
        bpe_codes_file = os.path.join(MODULE_HOME, 'transformer_zh_en',
                                      'assets', '2M.zh2en.dict4bpe.zh')
        src_vocab_file = os.path.join(MODULE_HOME, 'transformer_zh_en',
                                      'assets', 'vocab.zh')
        trg_vocab_file = os.path.join(MODULE_HOME, 'transformer_zh_en',
                                      'assets', 'vocab.en')
        checkpoint = os.path.join(MODULE_HOME, 'transformer_zh_en', 'assets',
                                  'transformer.pdparams')

        self.max_length = max_length
        self.beam_size = beam_size
        self.tokenizer = MTTokenizer(bpe_codes_file=bpe_codes_file,
                                     lang_src=self.lang_config['source'],
                                     lang_trg=self.lang_config['target'])
        self.src_vocab = Vocab.load_vocabulary(
            filepath=src_vocab_file,
            unk_token=self.vocab_config['unk_token'],
            bos_token=self.vocab_config['bos_token'],
            eos_token=self.vocab_config['eos_token'])
        self.trg_vocab = Vocab.load_vocabulary(
            filepath=trg_vocab_file,
            unk_token=self.vocab_config['unk_token'],
            bos_token=self.vocab_config['bos_token'],
            eos_token=self.vocab_config['eos_token'])
        self.src_vocab_size = (len(self.src_vocab) + self.vocab_config['pad_factor'] - 1) \
            // self.vocab_config['pad_factor'] * self.vocab_config['pad_factor']
        self.trg_vocab_size = (len(self.trg_vocab) + self.vocab_config['pad_factor'] - 1) \
            // self.vocab_config['pad_factor'] * self.vocab_config['pad_factor']
        self.transformer = InferTransformerModel(
            src_vocab_size=self.src_vocab_size,
            trg_vocab_size=self.trg_vocab_size,
            bos_id=self.vocab_config['bos_id'],
            eos_id=self.vocab_config['eos_id'],
            max_length=self.max_length + 1,
            max_out_len=max_out_len,
            beam_size=self.beam_size,
            **self.model_config)

        state_dict = paddle.load(checkpoint)

        # To avoid a longer length than training, reset the size of position
        # encoding to max_length
        state_dict["encoder.pos_encoder.weight"] = position_encoding_init(
            self.max_length + 1, self.model_config['d_model'])
        state_dict["decoder.pos_encoder.weight"] = position_encoding_init(
            self.max_length + 1, self.model_config['d_model'])

        self.transformer.set_state_dict(state_dict)
예제 #5
0
def do_export(args):
    # Adapt vocabulary size
    reader.adapt_vocab_size(args)
    # Define model
    transformer = InferTransformerModel(
        src_vocab_size=args.src_vocab_size,
        trg_vocab_size=args.trg_vocab_size,
        max_length=args.max_length + 1,
        num_encoder_layers=args.n_layer,
        num_decoder_layers=args.n_layer,
        n_head=args.n_head,
        d_model=args.d_model,
        d_inner_hid=args.d_inner_hid,
        dropout=args.dropout,
        weight_sharing=args.weight_sharing,
        bos_id=args.bos_idx,
        eos_id=args.eos_idx,
        beam_size=args.beam_size,
        max_out_len=args.max_out_len)

    # Load the trained model
    assert args.init_from_params, (
        "Please set init_from_params to load the infer model.")

    model_dict = paddle.load(
        os.path.join(args.init_from_params, "transformer.pdparams"))

    # To avoid a longer length than training, reset the size of position
    # encoding to max_length
    model_dict["encoder.pos_encoder.weight"] = position_encoding_init(
        args.max_length + 1, args.d_model)
    model_dict["decoder.pos_encoder.weight"] = position_encoding_init(
        args.max_length + 1, args.d_model)
    transformer.load_dict(model_dict)
    # Set evaluate mode
    transformer.eval()

    # Convert dygraph model to static graph model 
    transformer = paddle.jit.to_static(
        transformer,
        input_spec=[
            # src_word
            paddle.static.InputSpec(
                shape=[None, None], dtype="int64")
        ])

    # Save converted static graph model
    paddle.jit.save(transformer,
                    os.path.join(args.inference_model_dir, "transformer"))
    logger.info("Transformer has been saved to {}".format(
        args.inference_model_dir))
예제 #6
0
    def load(self, init_from_params):
        # Load the trained model
        assert init_from_params, (
            "Please set init_from_params to load the infer model.")

        model_dict = paddle.load(init_from_params, return_numpy=True)

        # To set weight[padding_idx] to 0.
        model_dict["trg_word_embedding.word_embedding.weight"][
            self.bos_id] = [0] * self.d_model

        # Dealing with weight sharing.
        if self.weight_sharing:
            model_dict["decoding_linear.weight"] = np.transpose(
                model_dict["trg_word_embedding.word_embedding.weight"])
        else:
            model_dict["decoding_linear.weight"] = model_dict["linear.weight"]
        # NOTE: the data type of the embedding bias for logits is different
        # between decoding with beam search and top-k/top-p sampling in
        # Faster Transformer when using float16.
        bias_dtype = "float32"
        if self.use_fp16_decoding and "beam_search" != self.decoding_strategy:
            bias_dtype = "float16"
        model_dict["decoding_linear.bias"] = np.zeros([self.trg_vocab_size],
                                                      dtype=bias_dtype)

        # To avoid a longer length than training, reset the size of position
        # encoding to max_length
        model_dict["encoder.pos_encoder.weight"] = position_encoding_init(
            self.max_length, self.d_model)
        model_dict["decoder.pos_encoder.weight"] = position_encoding_init(
            self.max_length, self.d_model)

        if self.use_fp16_decoding:
            for item in self.state_dict():
                if "decoder" in item:
                    model_dict[item] = np.float16(model_dict[item])
            model_dict["decoding_linear.weight"] = np.float16(
                model_dict["decoding_linear.weight"])
            model_dict[
                "trg_word_embedding.word_embedding.weight"] = np.float16(
                    model_dict["trg_word_embedding.word_embedding.weight"])
            model_dict["trg_pos_embedding.pos_encoder.weight"] = np.float16(
                model_dict["trg_pos_embedding.pos_encoder.weight"])

        self.load_dict(model_dict)
예제 #7
0
파일: module.py 프로젝트: houj04/PaddleHub
 def __init__(
     self,
     max_length=256,
     max_out_len=256,
 ):
     super(STTransformer, self).__init__()
     bpe_codes_fpath = os.path.join(MODULE_HOME,
                                    "transformer_nist_wait_all", "assets",
                                    "2M.zh2en.dict4bpe.zh")
     src_vocab_fpath = os.path.join(MODULE_HOME,
                                    "transformer_nist_wait_all", "assets",
                                    "nist.20k.zh.vocab")
     trg_vocab_fpath = os.path.join(MODULE_HOME,
                                    "transformer_nist_wait_all", "assets",
                                    "nist.10k.en.vocab")
     params_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_all",
                                 "assets", "transformer.pdparams")
     self.max_length = max_length
     self.max_out_len = max_out_len
     self.tokenizer = STACLTokenizer(
         bpe_codes_fpath,
         src_vocab_fpath,
         trg_vocab_fpath,
     )
     src_vocab_size = self.tokenizer.src_vocab_size
     trg_vocab_size = self.tokenizer.trg_vocab_size
     self.transformer = SimultaneousTransformer(
         src_vocab_size,
         trg_vocab_size,
         max_length=self.max_length,
         n_layer=self.model_config['n_layer'],
         n_head=self.model_config['n_head'],
         d_model=self.model_config['d_model'],
     )
     model_dict = paddle.load(params_fpath)
     # To avoid a longer length than training, reset the size of position
     # encoding to max_length
     model_dict[
         "src_pos_embedding.pos_encoder.weight"] = position_encoding_init(
             self.max_length + 1, self.model_config['d_model'])
     model_dict[
         "trg_pos_embedding.pos_encoder.weight"] = position_encoding_init(
             self.max_length + 1, self.model_config['d_model'])
     self.transformer.load_dict(model_dict)
예제 #8
0
    def load(self, init_from_params):
        # Load the trained model
        assert init_from_params, (
            "Please set init_from_params to load the infer model.")

        model_dict = paddle.load(init_from_params)

        # To set weight[padding_idx] to 0.
        model_dict["trg_word_embedding.word_embedding.weight"][
            self.bos_id] = [0] * self.d_model

        # Dealing with weight sharing. 
        if self.weight_sharing:
            model_dict["decoding_linear.weight"] = np.transpose(model_dict[
                "trg_word_embedding.word_embedding.weight"])
            model_dict["decoding_linear.bias"] = np.zeros(
                [self.trg_vocab_size], dtype="float32")
        else:
            model_dict["decoding_linear.weight"] = model_dict["linear.weight"]
            model_dict["decoding_linear.bias"] = np.zeros(
                [self.trg_vocab_size], dtype="float32")

        # To avoid a longer length than training, reset the size of position
        # encoding to max_length
        model_dict["encoder.pos_encoder.weight"] = position_encoding_init(
            self.max_length, self.d_model)
        model_dict["decoder.pos_encoder.weight"] = position_encoding_init(
            self.max_length, self.d_model)

        if self.use_fp16_decoding:
            for item in self.state_dict():
                if "decoder" in item:
                    model_dict[item] = np.float16(model_dict[item])
            model_dict["decoding_linear.weight"] = np.float16(model_dict[
                "decoding_linear.weight"])

        self.load_dict(model_dict)
예제 #9
0
    def load(self, init_from_params):
        # Load the trained model
        assert init_from_params, (
            "Please set init_from_params to load the infer model.")

        model_dict = paddle.load(init_from_params, return_numpy=True)

        # To set weight[padding_idx] to 0.
        model_dict["trg_word_embedding.word_embedding.weight"][
            self.bos_id] = [0] * self.d_model

        # To avoid a longer length than training, reset the size of position
        # encoding to max_length
        model_dict["encoder.pos_encoder.weight"] = position_encoding_init(
            self.max_length, self.d_model)
        model_dict["decoder.pos_encoder.weight"] = position_encoding_init(
            self.max_length, self.d_model)

        if self.use_fp16_decoder:
            for item in self.state_dict():
                if "decoder.layers" in item:
                    model_dict[item] = np.float16(model_dict[item])

        self.load_dict(model_dict)
예제 #10
0
def do_predict(args):
    if args.device == 'gpu':
        place = "gpu:0"
    elif args.device == 'xpu':
        place = "xpu:0"
    elif args.device == 'cpu':
        place = "cpu"

    paddle.set_device(place)

    # Define data loader
    test_loader, to_tokens = reader.create_infer_loader(args)

    # Define model
    transformer = SimultaneousTransformer(
        args.src_vocab_size, args.trg_vocab_size, args.max_length + 1,
        args.n_layer, args.n_head, args.d_model, args.d_inner_hid,
        args.dropout, args.weight_sharing, args.bos_idx, args.eos_idx,
        args.waitk)

    # Load the trained model
    assert args.init_from_params, (
        "Please set init_from_params to load the infer model.")

    model_dict = paddle.load(
        os.path.join(args.init_from_params, "transformer.pdparams"))

    # To avoid a longer length than training, reset the size of position
    # encoding to max_length
    model_dict[
        "src_pos_embedding.pos_encoder.weight"] = position_encoding_init(
            args.max_length + 1, args.d_model)
    model_dict[
        "trg_pos_embedding.pos_encoder.weight"] = position_encoding_init(
            args.max_length + 1, args.d_model)

    transformer.load_dict(model_dict)

    # Set evaluate mode
    transformer.eval()

    f = open(args.output_file, "w", encoding='utf8')

    with paddle.no_grad():
        for input_data in test_loader:
            (src_word, ) = input_data

            finished_seq, finished_scores = transformer.greedy_search(
                src_word, max_len=args.max_out_len, waitk=args.waitk)
            finished_seq = finished_seq.numpy()
            finished_scores = finished_scores.numpy()
            for idx, ins in enumerate(finished_seq):
                for beam_idx, beam in enumerate(ins):
                    if beam_idx >= args.n_best:
                        break
                    id_list = post_process_seq(beam, args.bos_idx,
                                               args.eos_idx)
                    word_list = to_tokens(id_list)
                    sequence = ' '.join(word_list) + "\n"
                    f.write(sequence)
    f.close()