def create_answers(deck_id, query):
    decks_collection = db["decks"]
    slides_collection = db["slides"]
    decks = decks_collection.find({"_id": deck_id})
    target_deck = decks[0]
    author_id = target_deck["user"]
    author_slides = slides_collection.find({"user": author_id})

    df = pd.DataFrame()

    for slide in author_slides:
        revision = slide["revisions"][-1]
        usages = revision["usage"]
        for usage in usages:
            if usage["id"] == deck_id:
                content = html2text.html2text(revision["content"])
                paragraphs = content.split('\n\n')
                df = df.append(
                    {
                        "date": revision["timestamp"],
                        "title": revision["title"],
                        "category": "Infromation",
                        "link": "",
                        "abstract": "",
                        "paragraphs": paragraphs,
                        "revision_id": revision["id"],
                        "slide_id": slide["_id"]
                    },
                    ignore_index=True)
                break

    download_model(model='bert-squad_1.1', dir='./models')

    # df = filter_paragraphs(df)
    cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib',
                               max_df=0.95,
                               min_df=3)
    cdqa_pipeline.fit_retriever(df)

    predictions = cdqa_pipeline.predict(
        query, n_predictions=5)  #retriever_score_weight=0.99

    answers = []
    i = 1
    for prediction in predictions:
        slide_id = df.loc[df["title"] == prediction[1]].iloc[0]['slide_id']
        revision_id = df.loc[df["title"] ==
                             prediction[1]].iloc[0]['revision_id']
        answers.append({
            "slide_id": int(slide_id),
            "revision_id": int(revision_id),
            "answer": prediction[0],
            "title": prediction[1],
            "paragraph": prediction[2],
            "score": prediction[3]
        })
        i += 1

    return answers
def get_distilbert_model():
    if not os.path.exists('./models'):
        os.makedirs('./models')
    if not os.path.exists('./models/distilbert_qa.joblib'):
        download_model(model="{}-squad_1.1".format('distilbert'),
                       dir='./models')
    return QAPipeline(reader='./models/distilbert_qa.joblib',
                      max_df=1.0,
                      min_df=1)
Esempio n. 3
0
def question(text, query):
    print(text)
    test = []
    for i in sent_tokenize(text):
        if len(i) > 2:
            test.append(i)

    n = 4
    # using list comprehension
    final = [test[i * n:(i + 1) * n] for i in range((len(test) + n - 1) // n)]
    title_s = []
    for j in range(len(final)):
        title_s.append(f'Title{j}')

    data = [title_s, final]
    df3 = pd.DataFrame(data=data)
    df3 = df3.transpose()
    df3.columns = ['title', 'paragraphs']
    print(df3)
    #st.text('Hold on this will take some time')

    from ast import literal_eval

    from cdqa.utils.filters import filter_paragraphs
    from cdqa.utils.download import download_model, download_bnpp_data
    from cdqa.pipeline.cdqa_sklearn import QAPipeline

    # Download data and models
    #download_bnpp_data(dir='./data/bnpp_newsroom_v1.1/')
    #download_model(model='bert-squad_1.1', dir='./models')

    # Loading data and filtering / preprocessing the documents
    df = pd.read_csv(
        'D:/devjams/Machine-Learning-Web-Apps-master/NLPIffy_NLP_Based_SpaCy_Flask_App&_API/cdQA/data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
        converters={'paragraphs': literal_eval})
    df = filter_paragraphs(df)
    #st.text('Please Wait. We are looking for the answer to your question')
    # Loading QAPipeline with CPU version of BERT Reader pretrained on SQuAD 1.1
    cdqa_pipeline = QAPipeline(
        reader=
        'D:/devjams/Machine-Learning-Web-Apps-master/NLPIffy_NLP_Based_SpaCy_Flask_App&_API/bert_qa_vGPU-sklearn.joblib'
    )

    # Fitting the retriever to the list of documents in the dataframe
    cdqa_pipeline.fit_retriever(df3)
    print(query)
    #st.text('Almost done.......')
    #query = 'Intellectual Property Rights'
    try:
        prediction = cdqa_pipeline.predict(query)
    except Exception as e:
        print(e)
    #st.text(prediction[2])
    return prediction[2]
Esempio n. 4
0
def find_answer(question):
    # Set your path to pdf directory
    df = pdf_converter(directory_path='pdf_folder/')
    cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib')
    cdqa_pipeline.fit_retriever(df)
    query = question + '?'
    prediction = cdqa_pipeline.predict(query)

    # print('query: {}\n'.format(query))
    # print('answer: {}\n'.format(prediction[0]))
    # print('title: {}\n'.format(prediction[1]))
    # print('paragraph: {}\n'.format(prediction[2]))
    return prediction[0]
Esempio n. 5
0
def search_view(request):
    if request.POST:
        question = request.POST.get('question')
        for idx, url in enumerate(
                search(question, tld="com", num=10, stop=3, pause=2)):
            crawl_result(url, idx)
        # change path to pdfs folder
        df = pdf_converter(directory_path='/path/to/pdfs')
        cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib')
        cdqa_pipeline.fit_retriever(df)
        prediction = cdqa_pipeline.predict(question)
        data = {'answer': prediction[0]}
        return JsonResponse(data)
    return render(request, 'search.html')
Esempio n. 6
0
def execute_pipeline(query):
    df = pd.read_csv('data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
                     converters={'paragraphs': literal_eval})
    df = filter_paragraphs(df)

    cdqa_pipeline = QAPipeline(
        reader='models/bert_qa_vCPU-sklearn.joblib')
    cdqa_pipeline.fit(X=df)
    cdqa_pipeline.reader.output_dir = None

    prediction = cdqa_pipeline.predict(X=query)

    result = (prediction[0], prediction[1])

    return result
Esempio n. 7
0
def execute_pipeline(query):
    download_bnpp_data('./data/bnpp_newsroom_v1.1/')
    download_model('bert-squad_1.1', dir='./models')
    df = pd.read_csv('./data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
                     converters={'paragraphs': literal_eval})
    df = filter_paragraphs(df)

    cdqa_pipeline = QAPipeline(reader='models/bert_qa_vCPU-sklearn.joblib')
    cdqa_pipeline.fit_retriever(X=df)

    prediction = cdqa_pipeline.predict(X=query)

    result = (prediction[0], prediction[1])

    return result
Esempio n. 8
0
def test_evaluate_pipeline():

    download_bnpp_data("./data/bnpp_newsroom_v1.1/")
    download_model("bert-squad_1.1", dir="./models")
    df = pd.read_csv(
        "./data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv",
        converters={"paragraphs": literal_eval},
    )
    df = filter_paragraphs(df)

    test_data = {
        "data": [
            {
                "title": "BNP Paribas’ commitment to universities and schools",
                "paragraphs": [
                    {
                        "context": "Since January 2016, BNP Paribas has offered an Excellence Program targeting new Master’s level graduates (BAC+5) who show high potential. The aid program lasts 18 months and comprises three assignments of six months each. It serves as a strong career accelerator that enables participants to access high-level management positions at a faster rate. The program allows participants to discover the BNP Paribas Group and its various entities in France and abroad, build an internal and external network by working on different assignments and receive personalized assistance from a mentor and coaching firm at every step along the way.",
                        "qas": [
                            {
                                "answers": [
                                    {"answer_start": 6, "text": "January 2016"},
                                    {"answer_start": 6, "text": "January 2016"},
                                    {"answer_start": 6, "text": "January 2016"},
                                ],
                                "question": "Since when does the Excellence Program of BNP Paribas exist?",
                                "id": "56be4db0acb8001400a502ec",
                            }
                        ],
                    }
                ],
            }
        ],
        "version": "1.1",
    }

    with open("./test_data.json", "w") as f:
        json.dump(test_data, f)

    cdqa_pipeline = QAPipeline(reader="./models/bert_qa_vCPU-sklearn.joblib", n_jobs=-1)
    cdqa_pipeline.fit_retriever(X=df)

    eval_dict = evaluate_pipeline(cdqa_pipeline, "./test_data.json", output_dir=None)

    assert eval_dict["exact_match"] > 0.8

    assert eval_dict["f1"] > 0.8
Esempio n. 9
0
def ask():
    name = request.form['btn-input']

    #print(name)
    f = open('current.txt')
    file1 = f.read().rstrip()
    f.close()
    cdqa_pipeline = QAPipeline(reader='models/bert_qa_vCPU-sklearn.joblib')
    row = file_open(file1)
    df = pd.DataFrame(row)
    df = df.T
    df.columns = ['title', 'paragraphs']
    #print(df.head())
    # Fitting the retriever to the list of documents in the dataframe
    cdqa_pipeline.fit_retriever(df)
    prediction = cdqa_pipeline.predict(name)
    ret = [name, prediction[0], prediction[1], prediction[2]]
    speech = ret[1] + "\n\n Related Paragraph" + ret[3]
    print('This is error output', speech)
    #return speech
    return render_template('index.html', value1=name, value2=speech)
Esempio n. 10
0
def execute_pipeline(query, n_predictions=None):
    download_bnpp_data("./data/bnpp_newsroom_v1.1/")
    download_model("bert-squad_1.1", dir="./models")
    df = pd.read_csv(
        "./data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv",
        converters={"paragraphs": literal_eval},
    )
    df = filter_paragraphs(df)

    cdqa_pipeline = QAPipeline(reader="models/bert_qa_vCPU-sklearn.joblib")
    cdqa_pipeline.fit_retriever(X=df)
    if n_predictions is not None:
        predictions = cdqa_pipeline.predict(X=query,
                                            n_predictions=n_predictions)
        result = []

        for answer, title, paragraph in predictions:
            prediction = (answer, title)
            result.append(prediction)
        return result
    else:
        prediction = cdqa_pipeline.predict(X=query)
        result = (prediction[0], prediction[1])
        return result
Esempio n. 11
0
# coding: utf-8
import os
import pandas as pd
from ast import literal_eval
import cdqa
from cdqa.utils.filters import filter_paragraphs
from cdqa.pipeline.cdqa_sklearn import QAPipeline

df = pd.read_csv('/home/ubuntu/data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
                 converters={'paragraphs': literal_eval})
df = filter_paragraphs(df)

df['title'] = df['category']

cdqa_pipeline = QAPipeline(
    reader='/home/ubuntu/data/bert_qa_vCPU-sklearn.joblib')
cdqa_pipeline.fit(X=df)

print('At result')


class QAModule():
    def __init__(self):
        self.query = 'Since when does the Excellence Program of BNP Paribas exist?'

    def getAnswer(self, query):
        prediction = cdqa_pipeline.predict(X=query)
        return prediction


class SentimentModule():
Esempio n. 12
0
#df = pd.read_csv('data/my_data/homework.csv', converters={'paragraphs': literal_eval})
#df = filter_paragraphs(df)

df = pd.DataFrame(columns=['title', 'paragraphs'])
paragraphs = input("Text to Analyze:\n").split('\n')
df = df.append({
    'title': 'Inputed Data',
    'paragraphs': paragraphs
},
               ignore_index=True)

print(df)

cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib',
                           min_df=1,
                           max_df=1000)

cdqa_pipeline.fit_retriever(df=df)

while True:
    query = input('> ')
    prediction = cdqa_pipeline.predict(query=query)

    #if prediction[3] < -2:
    #    print("cdQA: Sorry, I don't know.")
    #else:
    #print('query: {}\n'.format(query))
    print('cdQA: {}'.format(prediction[0]))
    #print('title: {}\n'.format(prediction[1]))
    #print('paragraph: {}\n'.format(prediction[2]))
Esempio n. 13
0
import pandas as pd
from ast import literal_eval

from cdqa.utils.filters import filter_paragraphs
from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline

# Download data and models
#download_bnpp_data(dir='./data/bnpp_newsroom_v1.1/')
#download_model(model='bert-squad_1.1', dir='./models')

# Loading data and filtering / preprocessing the documents
df = pd.read_csv('data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
                 converters={'paragraphs': literal_eval})
df = filter_paragraphs(df)

# Loading QAPipeline with CPU version of BERT Reader pretrained on SQuAD 1.1
cdqa_pipeline = QAPipeline(reader='models/bert_qa_vCPU-sklearn.joblib')

# Fitting the retriever to the list of documents in the dataframe
_ = cdqa_pipeline.fit_retriever(df)

# Sending a question to the pipeline and getting prediction
query = 'Since when does the Excellence Program of BNP Paribas exist?'
prediction = cdqa_pipeline.predict(query)

print('query: {}\n'.format(query))
print('answer: {}\n'.format(prediction[0]))
print('title: {}\n'.format(prediction[1]))
print('paragraph: {}\n'.format(prediction[2]))
Esempio n. 14
0
        else:
            default = "The use of artificial intelligence in public action is often identified as an opportunity to interrogate documentary texts and to create automatic question / answer tools for users. Querying natural language work code, providing a conversational agent for a given service, developing high-performance search engines, improving knowledge management, all activities that require quality training data corpus to develop question and answer algorithms. Today, there are no public and open French training data sets that would train these algorithms. The ambition of the PIAF project is to build this set of Francophone data for AI in an open and contributive way."
            default_query = 'What is the aim of PIAF?'

        para = st.text_area('Ecrivez ici le paragraphe source', default)
        df = pd.DataFrame([[0, 'My paragraph', [para]]],
                          columns=['id', 'title', 'paragraphs'])

    ### MODEL TRAINING SECTION ###

    s1 = time.time()

    if not "Français" in langu:
        download_model(model='bert-squad_1.1', dir='./models')
        cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib',
                                   max_df=1.0,
                                   min_df=1)
    else:
        cdqa_pipeline = QAPipeline(reader='models/bert_qa_fr.joblib',
                                   max_df=1.0,
                                   min_df=1)

    # cdqa_pipeline.cuda()
    t1 = time.time() - s1

    s2 = time.time()
    # Fitting the retriever to the list of documents in the dataframe
    cdqa_pipeline.fit_retriever(df)
    t2 = time.time() - s2

    # Querying and displaying
Esempio n. 15
0
from flask import Flask, request, jsonify
from ast import literal_eval
import pandas as pd
from cdqa.utils.filters import filter_paragraphs
from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline
from cdqa.retriever import BM25Retriever
from ETRI import *
import time
from khaiii_def import *
app = Flask(__name__)
df = pd.read_csv('jungchat_result_191102.csv',converters={'paragraphs': literal_eval})
cdqa_pipeline = QAPipeline(reader='bert_qa_korquad_vCPU.joblib')#모델을 불러온다
retriever = BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)#문서와의 유사도를 구하기위한 리트리버
retriever_temp= BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)#문장과의 유사도를 구하기 위한 리트리버
retriever.fit(df)#모든 문서의 내용을 담는다
df = filter_paragraphs(df)
best_idx_scores = ''

def text_tranform(text) :
    return '\n'.join(text.split(', '))

def make_query(text) :
    dataSend = {
          "version": "2.0",
          "template": {
             "outputs": [{
                    "simpleText":{
                       "text" : text}
               }]
           }
Esempio n. 16
0
File: qa_index.py Progetto: viqee/qa
    cfgs = return reader.read()
configs = json.loads(cfgs)

question = configs['question']

data_directory = '/data/'
models_directory = '/models/'

# download_squad(dir = './' + data_directory)
download_bnpp_data(dir = './' + data_directory)
# download_model('distilbert-squad_1.1', dir = './' + models_directory)
download_model('bert-squad_1.1', dir = './' + models_directory)

df = pandas.read_csv(data_directory + '/bnpp_paribas/-??-.csv', converter = {'paragraphs': ast.literal_evl})
df = filter_paragraphs(df)
cdqa_pipeline = QAPipeline(reader = models_directory + '/bert_qa/bert_qa.joblib')
cdqa_pipeline.fit_retriever(q = df) 
# cdqa_pipeline.fit_reader('path to squad like dataset . json')
prediction = cdqa_pipeline.predict(q = question, n_prediction = ?) # ? = predictions
# cdqa_pipeline.dump_reader('path to save . joblib') # save reader model

query = 'query: {}\n'.format(query),
answer = 'answer: {}\n'.format(prediction[0]),
title = 'title: {}\n'.format(prediction[1]),
paragraph = 'paragraph: {}\n'.format(prediction[2])

result = query, answer, title, paragraph

notify2.init('question answer')
notif = notify2.Notification('qa', result)
# notif.set_urgency(notify2.URGENCY_CRITICAL)
Esempio n. 17
0
import os
from ast import literal_eval
import pandas as pd

from cdqa.utils.filters import filter_paragraphs
from cdqa.pipeline.cdqa_sklearn import QAPipeline

app = Flask(__name__)
CORS(app)

dataset_path = os.environ["dataset_path"]
reader_path = os.environ["reader_path"]

df = pd.read_csv(dataset_path, converters={"paragraphs": literal_eval})
df = filter_paragraphs(df)

cdqa_pipeline = QAPipeline(reader=reader_path)
cdqa_pipeline.fit(X=df)


@app.route("/api", methods=["GET"])
def api():

    query = request.args.get("query")
    prediction = cdqa_pipeline.predict(X=query)

    return jsonify(query=query,
                   answer=prediction[0],
                   title=prediction[1],
                   paragraph=prediction[2])
Esempio n. 18
0
import requests
from flask import Flask, request, Response
from flask_ngrok import run_with_ngrok
import pandas as pd
from ast import literal_eval
from cdqa.utils.filters import filter_paragraphs
from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline

API_KEY = '936714777:AAGFCBbeOAClrTsgmMMOsYG3HkaV7Ck5p-w'

app = Flask(__name__)
run_with_ngrok(app)
df = pd.read_csv('data/bnpp_newsroom_v1.1/jungchat_result.csv',
                 converters={'paragraphs': literal_eval})
cdqa_pipeline = QAPipeline(reader='models/bert_qa_korquad_vCPU.joblib')
cdqa_pipeline.fit_retriever(df)


def parse_message(message):
    chat_id = message['message']['chat']['id']
    msg = message['message']['text']

    return chat_id, msg


def send_message(chat_id, query):
    url = 'https://api.telegram.org/bot{token}/sendMessage'.format(
        token=API_KEY)
    # 변수들을 딕셔너리 형식으로 묶음
Esempio n. 19
0
download_squad(dir=directory)
download_model('bert-squad_1.1', dir=directory)
download_model('distilbert-squad_1.1', dir=directory)

from ast import literal_eval
from cdqa.utils.filters import filter_paragraphs
from cdqa.pipeline.cdqa_sklearn import QAPipeline
from nltk import tokenize


def load_from_csv(file):
    df = pd.read_csv(file)
    df = df.rename(str.lower, axis='columns')
    df['paragraphs'] = df['paragraphs'].apply(
        lambda x: x.replace("'s", " "
                            "s").replace("\n", " "))
    df['paragraphs'] = df['paragraphs'].apply(
        lambda x: tokenize.sent_tokenize(x))
    return df


df = load_from_csv('./data/test.csv')
#make sure bert_qa.joblib is the same directory (cdQA), if not move it here from data
cdqa_pipeline = QAPipeline(reader='bert_qa.joblib')
cdqa_pipeline.fit_retriever(df=df)

querry = st.text_area('enter mssage', 'type')
if st.button('analyze'):
    message = cdqa_pipeline.predict(query=querry, n_predictions=2)
    st.success(message)
Esempio n. 20
0
from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline

# Download data and models
download_bnpp_data(dir='./data/bnpp_newsroom_v1.1/')
# download_model(model='bert-squad_1.1', dir='./models')

# Loading data and filtering / preprocessing the documents
# df = pd.read_csv('data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv', converters={'paragraphs': literal_eval})
df = pd.read_csv('data/bnpp_newsroom_v1.1/custom_tax_jlb.csv',
                 converters={'paragraphs': literal_eval})
df = filter_paragraphs(df)

# Loading QAPipeline with CPU version of BERT Reader pretrained on SQuAD 1.1
# cdqa_pipeline = QAPipeline(reader='models/bert_qa_vCPU-sklearn.joblib')
cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib')

# Fitting the retriever to the list of documents in the dataframe
# cdqa_pipeline.fit_retriever(X=df)
cdqa_pipeline.fit_retriever(df=df)

# Sending a question to the pipeline and getting prediction
# query = 'Since when does the Excellence Program of BNP Paribas exist?'
# query = 'Who should investors  consult with prior to investing?'
# query = 'Who do custom animal farmers need to consult with before buying fertilizer?'
queries = [
    'Who do custom animal farmers need to consult with before buying fertilizer?',
    'do I qualify for an automatic extension of time to file without filing Form 4868?',
    'Did the coronavirus pandemic extend the deadline to pay taxes?',
    'What is the new tax deadline?', 'What is the tax payer advocate service?',
    'What is the job of the taxpayer advocate service?',
Esempio n. 21
0
import pandas as pd
from ast import literal_eval
from cdqa.pipeline.cdqa_sklearn import QAPipeline
from rasa_sdk import Action

# read the csv file
df = pd.read_csv(
    '/Users/ashutoshvishnoi/Data_Science/intern_2/products/BankCurrupcy/qa_system/sample_data2/'
    'answs.csv',
    converters={'paragraphs': literal_eval})

# Load the bert qa model
cdqa_pipeline = QAPipeline(
    reader='/Users/ashutoshvishnoi/Data_Science/intern_2/products/BankCurrupcy/'
    'qa_system/models/bert_qa.joblib')

ques_dict = []

cdqa_pipeline.fit_retriever(df)
print('-----Model loaded successfully and fit successfully----')


class ActionGetNewst(Action):
    def name(self):
        return 'action_get_bertAns'

    def run(self, dispatcher, tracker, domain):
        query = tracker.latest_message['text']
        prediction = cdqa_pipeline.predict(query, n_predictions=3)

        # dispatcher.utter_message('query: {}\n'.format(query))
#11915052	Kapil Bindal

import pandas as pd
from ast import literal_eval

from cdqa.utils.filters import filter_paragraphs
from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline

#read the cleaned dataset and just take question and context for our model
df = pd.read_csv('data/dataset_collected.csv', usecols=['question', 'context'])

#convert paragraphs to a list
df['paragraphs'] = df[df.columns[1:]].apply(
    lambda x: x.dropna().values.tolist(), axis=1)

df.rename(columns={"question": "title"}, inplace=True)
df.drop(columns='context', inplace=True)
df.to_csv('df_corona.csv', index=False)

#use a lighter pipleline model to build pipeline on top of it
cdqa_pipeline = QAPipeline(reader='models/model.joblib')
cdqa_pipeline.fit_retriever(df=df)

query = "Can I travel ?"
prediction = cdqa_pipeline.predict(query=query)

print('Query : {}\n'.format(query))
print('Answer from Bot: {}\n'.format(prediction[0]))
print('Matched to Question : {}\n'.format(prediction[1]))
print('Paragraph pickup from : {}\n'.format(prediction[2]))
Esempio n. 23
0
reader.fit(X=(train_examples, train_features))

# Output fine-tuned model
reader.model.to('cpu')
reader.device = torch.device('cpu')
joblib.dump(reader, os.path.join(reader.output_dir, 'bert_tim_qa_vCPU.joblib'))

#%% [markdown]
# ### Training

#%%
from cdqa.pipeline.cdqa_sklearn import QAPipeline

# Load standard model
cdqa_pipeline = QAPipeline(model='./cdqa/bert_qa_vCPU-sklearn.joblib', max_answer_length=60)
cdqa_pipeline.fit_retriever(X=df_X)


#%%
# Evaluate QnA system
from cdqa.utils.evaluation import evaluate_pipeline
evaluate_pipeline(cdqa_pipeline, 'cdqa-v1.1-tim_qna.json')

# Standard pre trained model: {'exact_match': 0.0, 'f1': 5.025362668068075}
# Fine-tuned model: {'exact_match': 0.0, 'f1': 5.684362620078064}

#%% [markdown]
# ### Inference

#%%