Exemple #1
0
def do_predict(args):
    if args.use_gpu:
        place = "gpu:0"
    else:
        place = "cpu"

    paddle.set_device(place)

    # Define data loader
    test_loader, to_tokens = reader.create_infer_loader(args)

    # Define model
    transformer = InferTransformerModel(src_vocab_size=args.src_vocab_size,
                                        trg_vocab_size=args.trg_vocab_size,
                                        max_length=args.max_length + 1,
                                        n_layer=args.n_layer,
                                        n_head=args.n_head,
                                        d_model=args.d_model,
                                        d_inner_hid=args.d_inner_hid,
                                        dropout=args.dropout,
                                        weight_sharing=args.weight_sharing,
                                        bos_id=args.bos_idx,
                                        eos_id=args.eos_idx,
                                        beam_size=args.beam_size,
                                        max_out_len=args.max_out_len)

    # Load the trained model
    assert args.init_from_params, (
        "Please set init_from_params to load the infer model.")

    model_dict = paddle.load(
        os.path.join(args.init_from_params, "transformer.pdparams"))

    # To avoid a longer length than training, reset the size of position
    # encoding to max_length
    model_dict["encoder.pos_encoder.weight"] = position_encoding_init(
        args.max_length + 1, args.d_model)
    model_dict["decoder.pos_encoder.weight"] = position_encoding_init(
        args.max_length + 1, args.d_model)
    transformer.load_dict(model_dict)

    # Set evaluate mode
    transformer.eval()

    f = open(args.output_file, "w")
    with paddle.no_grad():
        for (src_word, ) in test_loader:
            finished_seq = transformer(src_word=src_word)
            finished_seq = finished_seq.numpy().transpose([0, 2, 1])
            for ins in finished_seq:
                for beam_idx, beam in enumerate(ins):
                    if beam_idx >= args.n_best:
                        break
                    id_list = post_process_seq(beam, args.bos_idx,
                                               args.eos_idx)
                    word_list = to_tokens(id_list)
                    sequence = " ".join(word_list) + "\n"
                    f.write(sequence)
Exemple #2
0
def do_inference(args):
    # Define data loader
    test_loader, to_tokens = reader.create_infer_loader(args, True)

    predictor = Predictor.create_predictor(args=args,
                                           profile=args.profile,
                                           model_name=args.model_name)
    sequence_outputs = predictor.predict(test_loader, to_tokens, args.n_best,
                                         args.bos_idx, args.eos_idx)

    f = open(args.output_file, "w")
    for target in sequence_outputs:
        for sequence in target:
            f.write(sequence + "\n")
    f.close()
def do_predict(args):
    place = "gpu"
    paddle.set_device(place)

    # Define data loader
    test_loader, to_tokens = reader.create_infer_loader(args)

    # Define model
    transformer = FasterTransformer(
        src_vocab_size=args.src_vocab_size,
        trg_vocab_size=args.trg_vocab_size,
        max_length=args.max_length + 1,
        n_layer=args.n_layer,
        n_head=args.n_head,
        d_model=args.d_model,
        d_inner_hid=args.d_inner_hid,
        dropout=args.dropout,
        weight_sharing=args.weight_sharing,
        bos_id=args.bos_idx,
        eos_id=args.eos_idx,
        decoding_strategy="beam_search",
        beam_size=args.beam_size,
        max_out_len=args.max_out_len,
        decoding_lib=args.decoding_lib,
        use_fp16_decoding=args.use_fp16_decoding)

    # Set evaluate mode
    transformer.eval()

    # Load checkpoint.
    transformer.load(init_from_params=os.path.join(args.init_from_params,
                                                   "transformer.pdparams"))

    f = open(args.output_file, "w")
    with paddle.no_grad():
        for (src_word, ) in test_loader:
            finished_seq = transformer(src_word=src_word)
            finished_seq = finished_seq.numpy().transpose([1, 2, 0])
            for ins in finished_seq:
                for beam_idx, beam in enumerate(ins):
                    if beam_idx >= args.n_best:
                        break
                    id_list = post_process_seq(beam, args.bos_idx, args.eos_idx)
                    word_list = to_tokens(id_list)
                    sequence = " ".join(word_list) + "\n"
                    f.write(sequence)
Exemple #4
0
def do_inference(args):
    # Define data loader
    test_loader, to_tokens = reader.create_infer_loader(args)

    predictor = Predictor.create_predictor(args)
    sequence_outputs = predictor.predict(test_loader)

    f = open(args.output_file, "w")
    for finished_sequence in sequence_outputs:
        finished_sequence = finished_sequence[0].transpose([0, 2, 1])
        for ins in finished_sequence:
            for beam_idx, beam in enumerate(ins):
                if beam_idx >= args.n_best:
                    break
                id_list = post_process_seq(beam, args.bos_idx, args.eos_idx)
                word_list = to_tokens(id_list)
                sequence = " ".join(word_list) + "\n"
                f.write(sequence)
Exemple #5
0
def do_predict(args):
    paddle.enable_static()
    if args.use_gpu:
        place = paddle.set_device("gpu:0")
    else:
        place = paddle.set_device("cpu")

    # Define data loader
    test_loader, to_tokens = reader.create_infer_loader(args)

    test_program = paddle.static.Program()
    startup_program = paddle.static.Program()
    with paddle.static.program_guard(test_program, startup_program):
        src_word = paddle.static.data(
            name="src_word", shape=[None, None], dtype="int64")

        # Define model
        transformer = InferTransformerModel(
            src_vocab_size=args.src_vocab_size,
            trg_vocab_size=args.trg_vocab_size,
            max_length=args.max_length + 1,
            n_layer=args.n_layer,
            n_head=args.n_head,
            d_model=args.d_model,
            d_inner_hid=args.d_inner_hid,
            dropout=args.dropout,
            weight_sharing=args.weight_sharing,
            bos_id=args.bos_idx,
            eos_id=args.eos_idx,
            beam_size=args.beam_size,
            max_out_len=args.max_out_len)

        finished_seq = transformer(src_word=src_word)

    test_program = test_program.clone(for_test=True)

    exe = paddle.static.Executor(place)
    exe.run(startup_program)

    assert (
        args.init_from_params), "must set init_from_params to load parameters"
    paddle.static.load(test_program,
                       os.path.join(args.init_from_params, "transformer"), exe)
    print("finish initing model from params from %s" % (args.init_from_params))

    # cast weights from fp16 to fp32 after loading
    if args.use_pure_fp16:
        cast_parameters_to_fp32(place, test_program)

    f = open(args.output_file, "w")
    for data in test_loader:
        finished_sequence, = exe.run(test_program,
                                     feed={'src_word': data[0]},
                                     fetch_list=finished_seq.name)
        finished_sequence = finished_sequence.transpose([0, 2, 1])
        for ins in finished_sequence:
            for beam_idx, beam in enumerate(ins):
                if beam_idx >= args.n_best:
                    break
                id_list = post_process_seq(beam, args.bos_idx, args.eos_idx)
                word_list = to_tokens(id_list)
                sequence = " ".join(word_list) + "\n"
                f.write(sequence)

    paddle.disable_static()
Exemple #6
0
def do_predict(args):
    if args.device == 'gpu':
        place = "gpu:0"
    elif args.device == 'xpu':
        place = "xpu:0"
    elif args.device == 'cpu':
        place = "cpu"

    paddle.set_device(place)

    # Define data loader
    test_loader, to_tokens = reader.create_infer_loader(args)

    # Define model
    transformer = SimultaneousTransformer(
        args.src_vocab_size, args.trg_vocab_size, args.max_length + 1,
        args.n_layer, args.n_head, args.d_model, args.d_inner_hid,
        args.dropout, args.weight_sharing, args.bos_idx, args.eos_idx,
        args.waitk)

    # Load the trained model
    assert args.init_from_params, (
        "Please set init_from_params to load the infer model.")

    model_dict = paddle.load(
        os.path.join(args.init_from_params, "transformer.pdparams"))

    # To avoid a longer length than training, reset the size of position
    # encoding to max_length
    model_dict[
        "src_pos_embedding.pos_encoder.weight"] = position_encoding_init(
            args.max_length + 1, args.d_model)
    model_dict[
        "trg_pos_embedding.pos_encoder.weight"] = position_encoding_init(
            args.max_length + 1, args.d_model)

    transformer.load_dict(model_dict)

    # Set evaluate mode
    transformer.eval()

    f = open(args.output_file, "w", encoding='utf8')

    with paddle.no_grad():
        for input_data in test_loader:
            (src_word, ) = input_data

            finished_seq, finished_scores = transformer.greedy_search(
                src_word, max_len=args.max_out_len, waitk=args.waitk)
            finished_seq = finished_seq.numpy()
            finished_scores = finished_scores.numpy()
            for idx, ins in enumerate(finished_seq):
                for beam_idx, beam in enumerate(ins):
                    if beam_idx >= args.n_best:
                        break
                    id_list = post_process_seq(beam, args.bos_idx,
                                               args.eos_idx)
                    word_list = to_tokens(id_list)
                    sequence = ' '.join(word_list) + "\n"
                    f.write(sequence)
    f.close()
Exemple #7
0
def do_predict(args):
    if args.device == "gpu":
        place = "gpu"
    else:
        place = "cpu"

    paddle.set_device(place)

    # Define data loader
    test_loader, to_tokens = reader.create_infer_loader(args)

    # Define model
    # `TransformerGenerator` automatically chioces using `FasterTransformer`
    # (with jit building) or the slower verison `InferTransformerModel`.
    transformer = TransformerGenerator(
        src_vocab_size=args.src_vocab_size,
        trg_vocab_size=args.trg_vocab_size,
        max_length=args.max_length + 1,
        num_encoder_layers=args.n_layer,
        num_decoder_layers=args.n_layer,
        n_head=args.n_head,
        d_model=args.d_model,
        d_inner_hid=args.d_inner_hid,
        dropout=args.dropout,
        weight_sharing=args.weight_sharing,
        bos_id=args.bos_idx,
        eos_id=args.eos_idx,
        beam_size=args.beam_size,
        max_out_len=args.max_out_len,
        use_ft=not args.without_ft,
        beam_search_version=args.beam_search_version,
        rel_len=args.use_rel_len,  # only works when using FT or beam search v2
        alpha=args.alpha,  # only works when using beam search v2
        use_fp16_decoding=False)  # only works when using FT

    # Load the trained model
    assert args.init_from_params, (
        "Please set init_from_params to load the infer model.")

    transformer.load(
        os.path.join(args.init_from_params, "transformer.pdparams"))

    # Set evaluate mode
    transformer.eval()

    f = open(args.output_file, "w", encoding="utf-8")
    with paddle.no_grad():
        for (src_word, ) in test_loader:
            # When `output_time_major` argument is `True` for TransformerGenerator,
            # the shape of finished_seq is `[seq_len, batch_size, beam_size]`
            # for beam search v1 or `[seq_len, batch_size, beam_size * 2]` for
            # beam search v2.
            finished_seq = transformer(src_word=src_word)
            finished_seq = finished_seq.numpy().transpose([1, 2, 0])
            for ins in finished_seq:
                for beam_idx, beam in enumerate(ins):
                    if beam_idx >= args.n_best:
                        break
                    id_list = post_process_seq(beam, args.bos_idx,
                                               args.eos_idx)
                    word_list = to_tokens(id_list)
                    sequence = " ".join(word_list) + "\n"
                    f.write(sequence)
def do_predict(args):
    place = "gpu"
    place = paddle.set_device(place)

    # Define data loader
    # NOTE: Data yielded by DataLoader may be on CUDAPinnedPlace,
    # but custom op doesn't support CUDAPinnedPlace. Hence,
    # disable using CUDAPinnedPlace in DataLoader.
    paddle.fluid.reader.use_pinned_memory(False)
    test_loader, to_tokens = reader.create_infer_loader(args)

    # Define model
    transformer = FasterTransformer(
        src_vocab_size=args.src_vocab_size,
        trg_vocab_size=args.trg_vocab_size,
        max_length=args.max_length + 1,
        num_encoder_layers=args.n_layer,
        num_decoder_layers=args.n_layer,
        n_head=args.n_head,
        d_model=args.d_model,
        d_inner_hid=args.d_inner_hid,
        dropout=args.dropout,
        weight_sharing=args.weight_sharing,
        bos_id=args.bos_idx,
        eos_id=args.eos_idx,
        decoding_strategy=args.decoding_strategy,
        beam_size=args.beam_size,
        max_out_len=args.max_out_len,
        diversity_rate=args.diversity_rate,
        decoding_lib=args.decoding_lib,
        use_fp16_decoding=args.use_fp16_decoding,
        enable_faster_encoder=args.enable_faster_encoder,
        use_fp16_encoder=args.use_fp16_encoder)

    # Set evaluate mode
    transformer.eval()

    # Load checkpoint.
    transformer.load(init_from_params=os.path.join(args.init_from_params,
                                                   "transformer.pdparams"))

    f = open(args.output_file, "w")
    with paddle.no_grad():
        if args.profile:
            import time
            start = time.time()
        for (src_word, ) in test_loader:
            finished_seq = transformer(src_word=src_word)
            if not args.profile:
                if args.decoding_strategy == "beam_search" or args.decoding_strategy == "beam_search_v2":
                    finished_seq = finished_seq.numpy().transpose([1, 2, 0])
                elif args.decoding_strategy == "topk_sampling" or args.decoding_strategy == "topp_sampling":
                    finished_seq = np.expand_dims(
                        finished_seq.numpy().transpose([1, 0]), axis=1)
                for ins in finished_seq:
                    for beam_idx, beam in enumerate(ins):
                        if beam_idx >= args.n_best:
                            break
                        id_list = post_process_seq(beam, args.bos_idx,
                                                   args.eos_idx)
                        word_list = to_tokens(id_list)
                        sequence = " ".join(word_list) + "\n"
                        f.write(sequence)
        if args.profile:
            if args.decoding_strategy == "beam_search" or args.decoding_strategy == "beam_search_v2":
                logger.info(
                    "Setting info: batch size: {}, beam size: {}, use fp16: {}. ".
                    format(args.infer_batch_size, args.beam_size,
                           args.use_fp16_decoding))
            elif args.decoding_strategy == "topk_sampling":
                logger.info(
                    "Setting info: batch size: {}, topk: {}, use fp16: {}. ".
                    format(args.infer_batch_size, args.topk,
                           args.use_fp16_decoding))
            elif args.decoding_strategy == "topp_sampling":
                logger.info(
                    "Setting info: batch size: {}, topp: {}, use fp16: {}. ".
                    format(args.infer_batch_size, args.topp,
                           args.use_fp16_decoding))
            paddle.fluid.core._cuda_synchronize(place)
            logger.info("Average time latency is {} ms/batch. ".format((
                time.time() - start) / len(test_loader) * 1000))