Ejemplo n.º 1
0
def main(args):
    place = set_device(args.device)
    fluid.enable_dygraph(place) if args.dynamic else None

    inputs = [
        Input(
            [None, None], 'int64', name='words'), Input(
                [None], 'int64', name='length')
    ]

    dataset = LacDataset(args)
    predict_dataset = LacDataLoader(args, place, phase="predict")

    vocab_size = dataset.vocab_size
    num_labels = dataset.num_labels
    model = SeqTagging(args, vocab_size, num_labels, mode="predict")

    model.mode = "test"
    model.prepare(inputs=inputs)

    model.load(args.init_from_checkpoint, skip_mismatch=True)

    f = open(args.output_file, "wb")
    for data in predict_dataset.dataloader:
        if len(data) == 1:
            input_data = data[0]
        else:
            input_data = data
        results, length = model.test_batch(inputs=flatten(input_data))
        for i in range(len(results)):
            word_len = length[i]
            word_ids = results[i][:word_len]
            tags = [dataset.id2label_dict[str(id)] for id in word_ids]
            f.write("\002".join(tags) + "\n")
Ejemplo n.º 2
0
Archivo: eval.py Proyecto: swtkiwi/hapi
def main(args):
    place = set_device(args.device)
    fluid.enable_dygraph(place) if args.dynamic else None

    inputs = [
        Input([None, None], 'int64', name='words'),
        Input([None], 'int64', name='length'),
        Input([None, None], 'int64', name='target')
    ]
    labels = [Input([None, None], 'int64', name='labels')]

    dataset = LacDataset(args)
    eval_dataset = LacDataLoader(args, place, phase="test")

    vocab_size = dataset.vocab_size
    num_labels = dataset.num_labels
    model = SeqTagging(args, vocab_size, num_labels, mode="test")

    model.mode = "test"
    model.prepare(metrics=ChunkEval(num_labels),
                  inputs=inputs,
                  labels=labels,
                  device=place)
    model.load(args.init_from_checkpoint, skip_mismatch=True)

    eval_result = model.evaluate(eval_dataset.dataloader,
                                 batch_size=args.batch_size)
    print("precison: %.5f" % (eval_result["precision"][0]))
    print("recall: %.5f" % (eval_result["recall"][0]))
    print("F1: %.5f" % (eval_result["F1"][0]))
Ejemplo n.º 3
0
def main(args):
    place = set_device(args.device)
    fluid.enable_dygraph(place) if args.dynamic else None

    inputs = [
        Input(
            [None, None], 'int64', name='words'), Input(
                [None], 'int64', name='length'), Input(
                    [None, None], 'int64', name='target')
    ]

    labels = [Input([None, None], 'int64', name='labels')]

    feed_list = None if args.dynamic else [
        x.forward() for x in inputs + labels
    ]

    dataset = LacDataset(args)
    train_dataset = LacDataLoader(args, place, phase="train")

    vocab_size = dataset.vocab_size
    num_labels = dataset.num_labels
    model = SeqTagging(args, vocab_size, num_labels, mode="train")

    optim = AdamOptimizer(
        learning_rate=args.base_learning_rate,
        parameter_list=model.parameters())

    model.prepare(
        optim,
        LacLoss(),
        ChunkEval(num_labels),
        inputs=inputs,
        labels=labels,
        device=args.device)

    if args.init_from_checkpoint:
        model.load(args.init_from_checkpoint)

    if args.init_from_pretrain_model:
        model.load(args.init_from_pretrain_model, reset_optimizer=True)

    model.fit(train_dataset.dataloader,
              epochs=args.epoch,
              batch_size=args.batch_size,
              eval_freq=args.eval_freq,
              save_freq=args.save_freq,
              save_dir=args.save_dir)