예제 #1
0
def predict(name, command):
    command = command.lower()

    label_path = path.join(path.dirname(path.realpath(__file__)), "intents",
                           "config", "labels", "%s_labels.json" % name)
    with open(label_path, encoding="utf8") as f:
        labels = json.load(f)

    word_vocab = Vocabulary()
    word_vocab.load("%s_word_vocab.json" % name)

    #char embedding
    char_vocab = Vocabulary()
    char_vocab.load("%s_char_vocab.json" % name)

    idx2label = dict((idx, label) for idx, label in enumerate(labels))

    preprocessor = Preprocessor(word_vocab, None, char_vocab)
    model = BiLSTMCRF(labels, len(word_vocab), len(char_vocab))
    model.load_weights('intents/config/weights/%s.hdf5' % name)

    sentence = tokenize(command)
    features = preprocessor.transform([sentence])

    p = model.predict(features)
    predicted_labels = []
    for pred in p:
        predicted_labels.append(idx2label[pred])

    for word, label in zip(sentence, predicted_labels):
        print('%s: %s' % (word, label))
예제 #2
0
def single_predict():

    vocab_size = 4688
    embed_size = 128
    units = 64
    num_tags = 4

    _, _, char_index_dict, index_char_dict = open_file("./data/data.txt")

    optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.1)

    my_model = BiLSTMCRF(vocab_size, embed_size, units, num_tags)
    ckpt = tf.train.Checkpoint(optimizer=optimizer, my_model=my_model)
    ckpt.restore(tf.train.latest_checkpoint("./save_checkpoint/"))

    text = input_text()

    char_index_list = [char_index_dict.get(char, 0) for char in text]

    text_list = [char for char in text]
    tag_list = ['b', 'm', 'e', 's']

    inputs = tf.keras.preprocessing.sequence.pad_sequences([char_index_list],
                                                           padding='post')

    #predict得到numpy矩阵
    logits, inputs_length = my_model.predict(inputs)

    #viterbi_decode得到最优路径
    path, _ = tfa_crf.viterbi_decode(logits[0], my_model.transition_params)

    path_list = [tag_list[index] for index in path]
    new_path_list = tag_finetune(path_list)

    #衡量标签路径更改的程度
    print("标签正常率%.2f%%" %
          (100 * sum([i1 == i2 for i1, i2 in zip(path_list, new_path_list)]) /
           len(path_list)))

    seg_text(text, new_path_list)
예제 #3
0
def predict(text, config, params, is_export=False):
    """模型预测。"""
    # 读取词典
    vocab2id, id2vocab = read_vocab(config["vocab_file"])
    tag2id, id2tag = read_vocab(config["tag_file"])

    # 构建模型
    model = BiLSTMCRF(hidden_num=params["hidden_num"],
                      vocab_size=len(vocab2id),
                      label_size=len(tag2id),
                      embedding_size=params["embedding_size"])
    model.load_weights(config["ckpt_path"])

    # 数据预处理
    dataset = tf.keras.preprocessing.sequence.pad_sequences(
        [[vocab2id.get(char, 0) for char in text]],
        padding='post',
        maxlen=params["maxlen"])

    # 模型预测
    result = model.predict(dataset)[0]
    result = np.argmax(result, axis=-1)
    result = [id2tag[i] for i in result]
    print(result)
    # 结果处理
    entities_result = format_result(list(text), result)
    print(json.dumps(entities_result, indent=4, ensure_ascii=False))

    if is_export:
        # 导出模型
        tf.keras.models.save_model(model,
                                   config["export_dir"],
                                   overwrite=True,
                                   include_optimizer=True,
                                   save_format=None,
                                   options=None)
예제 #4
0
def predict(text, config, params):
    """模型预测。"""
    # 读取词典
    vocab2id, id2vocab = read_vocab(config["vocab_file"])
    tag2id, id2tag = read_vocab(config["tag_file"])

    # 构建模型
    model = BiLSTMCRF(
        hidden_num=params["hidden_num"], vocab_size=len(vocab2id),
        label_size=len(tag2id), embedding_size=params["embedding_size"])
    model.load_weights(config["ckpt_path"])

    # 数据预处理
    dataset = tf.keras.preprocessing.sequence.pad_sequences(
        [[vocab2id.get(char, 0) for char in text]], padding='post')

    # 模型预测
    result = model.predict(dataset)[0]
    result = np.argmax(result, axis=-1)
    result = [id2tag[i] for i in result]
    print(result)
    # 结果处理
    entities_result = format_result(list(text), result)
    print(json.dumps(entities_result, indent=4, ensure_ascii=False))