def predict(name, command): command = command.lower() label_path = path.join(path.dirname(path.realpath(__file__)), "intents", "config", "labels", "%s_labels.json" % name) with open(label_path, encoding="utf8") as f: labels = json.load(f) word_vocab = Vocabulary() word_vocab.load("%s_word_vocab.json" % name) #char embedding char_vocab = Vocabulary() char_vocab.load("%s_char_vocab.json" % name) idx2label = dict((idx, label) for idx, label in enumerate(labels)) preprocessor = Preprocessor(word_vocab, None, char_vocab) model = BiLSTMCRF(labels, len(word_vocab), len(char_vocab)) model.load_weights('intents/config/weights/%s.hdf5' % name) sentence = tokenize(command) features = preprocessor.transform([sentence]) p = model.predict(features) predicted_labels = [] for pred in p: predicted_labels.append(idx2label[pred]) for word, label in zip(sentence, predicted_labels): print('%s: %s' % (word, label))
def predict(text, config, params, is_export=False): """模型预测。""" # 读取词典 vocab2id, id2vocab = read_vocab(config["vocab_file"]) tag2id, id2tag = read_vocab(config["tag_file"]) # 构建模型 model = BiLSTMCRF(hidden_num=params["hidden_num"], vocab_size=len(vocab2id), label_size=len(tag2id), embedding_size=params["embedding_size"]) model.load_weights(config["ckpt_path"]) # 数据预处理 dataset = tf.keras.preprocessing.sequence.pad_sequences( [[vocab2id.get(char, 0) for char in text]], padding='post', maxlen=params["maxlen"]) # 模型预测 result = model.predict(dataset)[0] result = np.argmax(result, axis=-1) result = [id2tag[i] for i in result] print(result) # 结果处理 entities_result = format_result(list(text), result) print(json.dumps(entities_result, indent=4, ensure_ascii=False)) if is_export: # 导出模型 tf.keras.models.save_model(model, config["export_dir"], overwrite=True, include_optimizer=True, save_format=None, options=None)
def predict(text, config, params): """模型预测。""" # 读取词典 vocab2id, id2vocab = read_vocab(config["vocab_file"]) tag2id, id2tag = read_vocab(config["tag_file"]) # 构建模型 model = BiLSTMCRF( hidden_num=params["hidden_num"], vocab_size=len(vocab2id), label_size=len(tag2id), embedding_size=params["embedding_size"]) model.load_weights(config["ckpt_path"]) # 数据预处理 dataset = tf.keras.preprocessing.sequence.pad_sequences( [[vocab2id.get(char, 0) for char in text]], padding='post') # 模型预测 result = model.predict(dataset)[0] result = np.argmax(result, axis=-1) result = [id2tag[i] for i in result] print(result) # 结果处理 entities_result = format_result(list(text), result) print(json.dumps(entities_result, indent=4, ensure_ascii=False))