예제 #1
0
def predict_static(args, batch_generator):
    test_prog = fluid.Program()
    with fluid.program_guard(test_prog):
        paddle.manual_seed(SEED)
        paddle.framework.random._manual_program_seed(SEED)

        # define input and reader
        input_field_names = util.encoder_data_input_fields + util.fast_decoder_data_input_fields
        input_descs = util.get_input_descs(args, 'test')
        input_slots = [{
            "name": name,
            "shape": input_descs[name][0],
            "dtype": input_descs[name][1]
        } for name in input_field_names]

        input_field = util.InputField(input_slots)
        feed_list = input_field.feed_list
        loader = fluid.io.DataLoader.from_generator(
            feed_list=feed_list, capacity=10)

        # define model
        transformer = Transformer(
            args.src_vocab_size, args.trg_vocab_size, args.max_length + 1,
            args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model,
            args.d_inner_hid, args.prepostprocess_dropout,
            args.attention_dropout, args.relu_dropout, args.preprocess_cmd,
            args.postprocess_cmd, args.weight_sharing, args.bos_idx,
            args.eos_idx)

        out_ids, out_scores = transformer.beam_search(
            *feed_list,
            bos_id=args.bos_idx,
            eos_id=args.eos_idx,
            beam_size=args.beam_size,
            max_len=args.max_out_len)

    # This is used here to set dropout to the test mode.
    test_prog = test_prog.clone(for_test=True)

    # define the executor and program for training
    exe = fluid.Executor(place)

    util.load(test_prog,
              os.path.join(args.save_static_model_path, "transformer"), exe)

    loader.set_batch_generator(batch_generator, places=place)

    step_idx = 0
    speed_list = []
    for feed_dict in loader:
        seq_ids, seq_scores = exe.run(
            test_prog,
            feed=feed_dict,
            fetch_list=[out_ids.name, out_scores.name],
            return_numpy=True)
        if step_idx % args.print_step == 0:
            if step_idx == 0:
                logging.info(
                    "Static Predict: step_idx: %d, 1st seq_id: %d, 1st seq_score: %.2f,"
                    % (step_idx, seq_ids[0][0][0], seq_scores[0][0]))
                avg_batch_time = time.time()
            else:
                speed = args.print_step / (time.time() - avg_batch_time)
                speed_list.append(speed)
                logging.info(
                    "Static Predict: step_idx: %d, 1st seq_id: %d, 1st seq_score: %.2f, speed: %.3f steps/s"
                    % (step_idx, seq_ids[0][0][0], seq_scores[0][0], speed))
                avg_batch_time = time.time()

        step_idx += 1
        if step_idx == STEP_NUM:
            break
    logging.info("Static Predict:  avg_speed: %.4f steps/s" %
                 (np.mean(speed_list)))

    return seq_ids, seq_scores
예제 #2
0
def predict_dygraph(args, batch_generator):
    with fluid.dygraph.guard(place):
        paddle.manual_seed(SEED)
        paddle.framework.random._manual_program_seed(SEED)

        # define data loader
        test_loader = fluid.io.DataLoader.from_generator(capacity=10)
        test_loader.set_batch_generator(batch_generator, places=place)

        # define model
        transformer = Transformer(
            args.src_vocab_size, args.trg_vocab_size, args.max_length + 1,
            args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model,
            args.d_inner_hid, args.prepostprocess_dropout,
            args.attention_dropout, args.relu_dropout, args.preprocess_cmd,
            args.postprocess_cmd, args.weight_sharing, args.bos_idx,
            args.eos_idx)

        # load the trained model
        model_dict, _ = util.load_dygraph(
            os.path.join(args.save_dygraph_model_path, "transformer"))
        # to avoid a longer length than training, reset the size of position
        # encoding to max_length
        model_dict["encoder.pos_encoder.weight"] = position_encoding_init(
            args.max_length + 1, args.d_model)
        model_dict["decoder.pos_encoder.weight"] = position_encoding_init(
            args.max_length + 1, args.d_model)
        transformer.load_dict(model_dict)

        # set evaluate mode
        transformer.eval()

        step_idx = 0
        speed_list = []
        for input_data in test_loader():
            (src_word, src_pos, src_slf_attn_bias, trg_word,
             trg_src_attn_bias) = input_data
            seq_ids, seq_scores = transformer.beam_search(
                src_word,
                src_pos,
                src_slf_attn_bias,
                trg_word,
                trg_src_attn_bias,
                bos_id=args.bos_idx,
                eos_id=args.eos_idx,
                beam_size=args.beam_size,
                max_len=args.max_out_len)
            seq_ids = seq_ids.numpy()
            seq_scores = seq_scores.numpy()
            if step_idx % args.print_step == 0:
                if step_idx == 0:
                    logging.info(
                        "Dygraph Predict: step_idx: %d, 1st seq_id: %d, 1st seq_score: %.2f"
                        % (step_idx, seq_ids[0][0][0], seq_scores[0][0]))
                    avg_batch_time = time.time()
                else:
                    speed = args.print_step / (time.time() - avg_batch_time)
                    speed_list.append(speed)
                    logging.info(
                        "Dygraph Predict: step_idx: %d, 1st seq_id: %d, 1st seq_score: %.2f, speed: %.3f steps/s"
                        % (step_idx, seq_ids[0][0][0], seq_scores[0][0], speed))
                    avg_batch_time = time.time()

            step_idx += 1
            if step_idx == STEP_NUM:
                break
        logging.info("Dygraph Predict:  avg_speed: %.4f steps/s" %
                     (np.mean(speed_list)))
        return seq_ids, seq_scores