Esempio n. 1
0
                                  unk_token='[UNK]',
                                  pad_token='[PAD]')
    tokenizer = JiebaTokenizer(vocab)
    label_map = {0: 'dissimilar', 1: 'similar'}

    # Constructs the newtork.
    model = SimNet(network=args.network,
                   vocab_size=len(vocab),
                   num_classes=len(label_map))

    # Loads model parameters.
    state_dict = paddle.load(args.params_path)
    model.set_dict(state_dict)
    print("Loaded parameters from %s" % args.params_path)

    # Firstly pre-processing prediction data  and then do predict.
    data = [
        ['世界上什么东西最小', '世界上什么东西最小?'],
        ['光眼睛大就好看吗', '眼睛好看吗?'],
        ['小蝌蚪找妈妈怎么样', '小蝌蚪找妈妈是谁画的'],
    ]
    examples = preprocess_prediction_data(data, tokenizer)
    results = predict(model,
                      examples,
                      label_map=label_map,
                      batch_size=args.batch_size,
                      pad_token_id=vocab.token_to_idx.get('[PAD]', 0))

    for idx, text in enumerate(data):
        print('Data: {} \t Label: {}'.format(text, results[idx]))
Esempio n. 2
0
    # Loads vocab.
    vocab = load_vocab(args.vocab_path)
    label_map = {0: 'negative', 1: 'positive'}

    # Constructs the newtork.
    model = ppnlp.models.Senta(network=args.network,
                               vocab_size=len(vocab),
                               num_classes=len(label_map))

    # Loads model parameters.
    state_dict = paddle.load(args.params_path)
    model.set_dict(state_dict)
    print("Loaded parameters from %s" % args.params_path)

    # Firstly pre-processing prediction data  and then do predict.
    data = [
        '这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般',
        '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片',
        '作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。',
    ]
    examples = preprocess_prediction_data(data, vocab)

    results = predict(model,
                      examples,
                      label_map=label_map,
                      batch_size=args.batch_size,
                      collate_fn=generate_batch)

    for idx, text in enumerate(data):
        print('Data: {} \t Label: {}'.format(text, results[idx]))