Beispiel #1
0
def get_ans(query1, depart):
    df = pd.read_csv("./data/total_disease2_count_FINAL.csv", encoding='cp949').fillna('Null')
    df.rename(columns = {'symptoms': 'content'}, inplace = True)

    if depart != "없음":
        df = df[df["subject"].isin([depart])].reset_index()
    else:
        df = df[df["subject"].isin(classification_(query1))].reset_index()
    retriever = BM25Retriever(lowercase=False,
                            tokenizer = stop_word
                            )
    retriever.fit(df)
    query1 = " ".join(stop_word(query1))
    rp = retriever.predict(query1)
    for i in rp:
        rp[i] = float(rp[i]) * df.iloc[i]['count']
        
    rp = OrderedDict(sorted(rp.items(), key=lambda x: x[1], reverse=True))

    top_5 = list(rp.keys())[:5]
    list_score = list(map(float,list(rp.values())))[:5]

    if list_score[0] != 0:
        percent_score = [round((i / sum(list_score)) ,2) * 100 for i in list_score]
    else:
        percent_score = [20 for _ in range(5)]

    top_5_df = df.iloc[top_5,[0,1,2]]
    top_5_df["score"] = percent_score
    return top_5_df
Beispiel #2
0
	
	
def Pos_extract(Data) :
    Noun = []
    Extract_a = json.loads(str(Data.data,"utf-8"))['return_object']['sentence']
    for i in range(len(Extract_a)) : 
        Extract_b = dict(Extract_a[i])
        for i in range(len(Extract_b['morp'])) : 
            if (Extract_b['morp'][i]['type'] =='NNG' or Extract_b['morp'][i]['type'] =='NNP') or Extract_b['morp'][i]['type'] =='VV': 
                Noun.append(Extract_b['morp'][i]['lemma'])
    return " ".join(Noun)

df = pd.read_csv('data/bnpp_newsroom_v1.1/jungchat_result_191031.csv',converters={'paragraphs': literal_eval})


retriever = BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)
retriever_temp= BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)
#retriever_doc= BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)
retriever.fit(df)

df = filter_paragraphs(df,min_length=10)

cdqa_pipeline = QAPipeline(reader='models/bert_qa_korquad_vCPU.joblib')


best_idx_scores=''

while 100:
    query=input('입력창:')
    if query=='quit':
        break
from cdqa.utils.evaluation import f1_score, exact_match_score

# dataset
df = pd.read_csv('./data/data_augmentation.csv', converters={'paragraphs': literal_eval},encoding='utf-8')
# paragraphs 새로 정의 : Title + Paragraph
df['paragraphs_old'] = df['paragraphs']
df['paragraphs'] = df.apply(lambda row: [row['title']] + row['paragraphs_old'], axis=1).copy()

data = pd.read_csv('./data/data_augmentation.csv',encoding='utf-8')
data_sampling = data.sample(100,random_state=66)

from cdqa.retriever import TfidfRetriever, BM25Retriever
cdqa_pipeline = QAPipeline(reader='bert_qa_multi_epoch3.joblib', retrieve_by_doc=True,retriever='bm25')
cdqa_pipeline.fit_retriever(df=df)
cdqa_pipeline.cuda()
retriever = BM25Retriever(ngram_range=(1,2), max_df=0.8, min_df=3, stop_words=None,lowercase=True, top_n=5)
retriever.fit(df=df)
def f1(dataframe,dataframe2):
    number = 0
    exact_number = 0
    # score = []
    answer_list=[] 
    while number < 100:
        # print("Question?")
        question = dataframe2.iloc[number,2] # 질문
        # question = input()
        best_idx_scores = retriever.predict(question)
        prediction = df.loc[best_idx_scores.keys()]['paragraphs'].apply(lambda x:x[1]).tolist()[0].replace(u'\xa0',u'')
        number+=1
        answer_list.append(prediction)
    return answer_list
from flask import Flask, request, jsonify
from ast import literal_eval
import pandas as pd
from cdqa.utils.filters import filter_paragraphs
from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline
from cdqa.retriever import BM25Retriever
from ETRI import *
import time
from khaiii_def import *
app = Flask(__name__)
df = pd.read_csv('jungchat_result_191102.csv',converters={'paragraphs': literal_eval})
cdqa_pipeline = QAPipeline(reader='bert_qa_korquad_vCPU.joblib')#모델을 불러온다
retriever = BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)#문서와의 유사도를 구하기위한 리트리버
retriever_temp= BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)#문장과의 유사도를 구하기 위한 리트리버
retriever.fit(df)#모든 문서의 내용을 담는다
df = filter_paragraphs(df)
best_idx_scores = ''

def text_tranform(text) :
    return '\n'.join(text.split(', '))

def make_query(text) :
    dataSend = {
          "version": "2.0",
          "template": {
             "outputs": [{
                    "simpleText":{
                       "text" : text}
               }]
           }