Exemple #1
0
def do_predict(args):
    place = "gpu"
    place = paddle.set_device(place)
    reader.adapt_vocab_size(args)

    # Define model
    transformer = FasterTransformer(
        src_vocab_size=args.src_vocab_size,
        trg_vocab_size=args.trg_vocab_size,
        max_length=args.max_length + 1,
        num_encoder_layers=args.n_layer,
        num_decoder_layers=args.n_layer,
        n_head=args.n_head,
        d_model=args.d_model,
        d_inner_hid=args.d_inner_hid,
        dropout=args.dropout,
        weight_sharing=args.weight_sharing,
        bos_id=args.bos_idx,
        eos_id=args.eos_idx,
        decoding_strategy=args.decoding_strategy,
        beam_size=args.beam_size,
        max_out_len=args.max_out_len,
        decoding_lib=args.decoding_lib,
        use_fp16_decoding=args.use_fp16_decoding,
        enable_faster_encoder=args.enable_faster_encoder,
        use_fp16_encoder=args.use_fp16_encoder,
        rel_len=args.use_rel_len,
        alpha=args.alpha)

    # Set evaluate mode
    transformer.eval()

    # Load checkpoint.
    transformer.load(init_from_params=os.path.join(args.init_from_params,
                                                   "transformer.pdparams"))

    # Convert dygraph model to static graph model
    transformer = paddle.jit.to_static(
        transformer,
        input_spec=[
            # src_word
            paddle.static.InputSpec(shape=[None, None], dtype="int64"),
            # trg_word
            # Support exporting model which support force decoding
            # NOTE: Data type MUST be int32 !
            # paddle.static.InputSpec(
            #     shape=[None, None], dtype="int32")
        ])

    # Save converted static graph model
    paddle.jit.save(transformer,
                    os.path.join(args.inference_model_dir, "transformer"))
    logger.info("Transformer has been saved to {}".format(
        args.inference_model_dir))
def do_predict(args):
    paddle.enable_static()
    place = "gpu"
    place = paddle.set_device(place)
    reader.adapt_vocab_size(args)

    test_program = paddle.static.Program()
    startup_program = paddle.static.Program()
    with paddle.static.program_guard(test_program, startup_program):
        src_word = paddle.static.data(name="src_word",
                                      shape=[None, None],
                                      dtype="int64")

        # Define model
        transformer = FasterTransformer(
            src_vocab_size=args.src_vocab_size,
            trg_vocab_size=args.trg_vocab_size,
            max_length=args.max_length + 1,
            num_encoder_layers=args.n_layer,
            num_decoder_layers=args.n_layer,
            n_head=args.n_head,
            d_model=args.d_model,
            d_inner_hid=args.d_inner_hid,
            dropout=args.dropout,
            weight_sharing=args.weight_sharing,
            bos_id=args.bos_idx,
            eos_id=args.eos_idx,
            decoding_strategy=args.decoding_strategy,
            beam_size=args.beam_size,
            max_out_len=args.max_out_len,
            decoding_lib=args.decoding_lib,
            use_fp16_decoding=args.use_fp16_decoding,
            rel_len=args.use_rel_len,
            alpha=args.alpha)

        finished_seq = transformer(src_word=src_word)

    test_program = test_program.clone(for_test=True)

    exe = paddle.static.Executor(place)
    exe.run(startup_program)

    # Load checkpoint.
    transformer.export_params(init_from_params=os.path.join(
        args.init_from_params, "transformer.pdparams"),
                              place=place)

    paddle.static.save_inference_model(os.path.join(args.inference_model_dir,
                                                    "transformer"),
                                       feed_vars=src_word,
                                       fetch_vars=finished_seq,
                                       executor=exe,
                                       program=test_program)
def do_export(args):
    # Adapt vocabulary size
    reader.adapt_vocab_size(args)
    # Define model
    transformer = InferTransformerModel(
        src_vocab_size=args.src_vocab_size,
        trg_vocab_size=args.trg_vocab_size,
        max_length=args.max_length + 1,
        num_encoder_layers=args.n_layer,
        num_decoder_layers=args.n_layer,
        n_head=args.n_head,
        d_model=args.d_model,
        d_inner_hid=args.d_inner_hid,
        dropout=args.dropout,
        weight_sharing=args.weight_sharing,
        bos_id=args.bos_idx,
        eos_id=args.eos_idx,
        beam_size=args.beam_size,
        max_out_len=args.max_out_len)

    # Load the trained model
    assert args.init_from_params, (
        "Please set init_from_params to load the infer model.")

    model_dict = paddle.load(
        os.path.join(args.init_from_params, "transformer.pdparams"))

    # To avoid a longer length than training, reset the size of position
    # encoding to max_length
    model_dict["encoder.pos_encoder.weight"] = position_encoding_init(
        args.max_length + 1, args.d_model)
    model_dict["decoder.pos_encoder.weight"] = position_encoding_init(
        args.max_length + 1, args.d_model)
    transformer.load_dict(model_dict)
    # Set evaluate mode
    transformer.eval()

    # Convert dygraph model to static graph model 
    transformer = paddle.jit.to_static(
        transformer,
        input_spec=[
            # src_word
            paddle.static.InputSpec(
                shape=[None, None], dtype="int64")
        ])

    # Save converted static graph model
    paddle.jit.save(transformer,
                    os.path.join(args.inference_model_dir, "transformer"))
    logger.info("Transformer has been saved to {}".format(
        args.inference_model_dir))