Beispiel #1
0
class ComplexMatch:
    def __init__(self):
        self.data, self.questions = self.load_complex_qa()
        self.predictor = Predictor(Path.relation_match_model)

    def predict(self, text):
        results = []
        result = self.predictor.predict_texts(text_a_s=text,
                                              text_b_s=self.questions)
        for (qa_type, question), (label, confidence) in zip(self.data, result):
            if label == 'No':
                confidence = 1 - confidence
            results.append((qa_type, question, confidence))
        top_5_result = sorted(results, key=lambda x: x[2], reverse=True)[:5]
        for qa_type, question, confidence in top_5_result:
            logger.debug('{} {} {}'.format(qa_type, question, confidence))

        top_qa_type, question, top_confidence = top_5_result[0]
        if top_confidence < 0.97:
            top_qa_type = ''
        return top_qa_type, top_confidence

    def load_complex_qa(self):
        df = pd.read_excel(os.path.join(Path.data_path, 'complex_qa.xlsx'))
        data = []
        for _, row in df.iterrows():
            data.append((row['qa_type'], row['question']))
        questions = [i[1] for i in data]
        return data, questions
Beispiel #2
0
class RelationClassifier:
    """
    关系分类,使用bert进行文本分类
    """
    def __init__(self):
        self.predictor = Predictor(Path.relation_classifier_model)

    def predict(self, text):
        label, confidence = self.predictor.predict_text(text_a=text)
        logger.debug('{} {} {}'.format(label, confidence, confidence >= 0.6))
        if confidence < 0.6:
            label = ''
        return label, confidence
Beispiel #3
0
class RelationMatch:
    """
    关系匹配,使用bert文本相似度方法进行
    """
    def __init__(self):
        self.predicates = self.load_predictes()
        self.predictor = Predictor(Path.relation_match_model)

    def predict(self, text):
        results = []
        result = self.predictor.predict_texts(text_a_s=text, text_b_s=self.predicates)
        for predicate, (label, confidence) in zip(self.predicates, result):
            if label == 'No':
                confidence = 1 - confidence
            results.append((predicate, confidence))
        top_5_result = sorted(results, key=lambda x: x[1], reverse=True)[:5]
        for label, confidence in top_5_result:
            logger.debug('{} {}'.format(label, confidence))

        top_label, top_confidence = top_5_result[0]
        if top_confidence < 0.97:
            top_label = ''
        return top_label, top_confidence

    def load_predictes(self):
        predicates = []
        config_path = os.path.join(Path.dictionary, 'config.txt')
        with open(config_path, 'r') as r_f:
            for line in r_f:
                line = line.rstrip('\n')
                if not line:
                    continue
                iri, *words = line.split('\t')
                for word in words:
                    predicates.append(word)
        return predicates
Beispiel #4
0
import os
import sys
import pandas as pd
from sklearn import metrics

base_path = os.path.join(os.path.dirname(__file__), '..')
sys.path.append(base_path)
from model.text_classification import Predictor

predictor = Predictor(os.path.join(base_path, 'carbot_data/model/kbqapc'))

examples = predictor.processor.get_dev_examples(os.path.join(base_path, 'data/train/kbqa/predicate_classification/'))
data_df = []
for example in examples:
    pred, confidence = predictor.predict_text(example.text_a, example.text_b)
    text = example.text_a
    true = example.label
    data_df.append({
        'text': text,
        'true': true,
        'pred': pred,
        '是否正确': '是' if true == pred else '否'
    })

df = pd.DataFrame(data_df, columns=['text', 'true', 'pred', '是否正确'])
trues = df['true']
preds = df['pred']
labels = sorted(set(trues))
precision, recall, f1, num = metrics.precision_recall_fscore_support(y_true=trues, y_pred=preds, labels=labels,
                                                                     average=None)
precision_total, recall_total, f1_total, num_total = metrics.precision_recall_fscore_support(y_true=trues, y_pred=preds,
Beispiel #5
0
 def __init__(self):
     self.predicates = self.load_predictes()
     self.predictor = Predictor(Path.relation_match_model)
Beispiel #6
0
 def __init__(self):
     self.predictor = Predictor(Path.relation_classifier_model)
Beispiel #7
0
 def __init__(self):
     self.data, self.questions = self.load_complex_qa()
     self.predictor = Predictor(Path.relation_match_model)