Esempi in Python per QAPipeline.QAPipeline, esempi in Python per cdqa.pipeline.cdqa_sklearn.QAPipeline.QAPipeline

Esempio n. 1

0

Mostra file

File: services.py Progetto: rezatavakoli/questions_answering

def create_answers(deck_id, query):
    decks_collection = db["decks"]
    slides_collection = db["slides"]
    decks = decks_collection.find({"_id": deck_id})
    target_deck = decks[0]
    author_id = target_deck["user"]
    author_slides = slides_collection.find({"user": author_id})

    df = pd.DataFrame()

    for slide in author_slides:
        revision = slide["revisions"][-1]
        usages = revision["usage"]
        for usage in usages:
            if usage["id"] == deck_id:
                content = html2text.html2text(revision["content"])
                paragraphs = content.split('\n\n')
                df = df.append(
                    {
                        "date": revision["timestamp"],
                        "title": revision["title"],
                        "category": "Infromation",
                        "link": "",
                        "abstract": "",
                        "paragraphs": paragraphs,
                        "revision_id": revision["id"],
                        "slide_id": slide["_id"]
                    },
                    ignore_index=True)
                break

    download_model(model='bert-squad_1.1', dir='./models')

    # df = filter_paragraphs(df)
    cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib',
                               max_df=0.95,
                               min_df=3)
    cdqa_pipeline.fit_retriever(df)

    predictions = cdqa_pipeline.predict(
        query, n_predictions=5)  #retriever_score_weight=0.99

    answers = []
    i = 1
    for prediction in predictions:
        slide_id = df.loc[df["title"] == prediction[1]].iloc[0]['slide_id']
        revision_id = df.loc[df["title"] ==
                             prediction[1]].iloc[0]['revision_id']
        answers.append({
            "slide_id": int(slide_id),
            "revision_id": int(revision_id),
            "answer": prediction[0],
            "title": prediction[1],
            "paragraph": prediction[2],
            "score": prediction[3]
        })
        i += 1

    return answers

Esempio n. 2

0

Mostra file

File: qa_interface.py Progetto: gandalf012/DistilBert-question-answering-system

def get_distilbert_model():
    if not os.path.exists('./models'):
        os.makedirs('./models')
    if not os.path.exists('./models/distilbert_qa.joblib'):
        download_model(model="{}-squad_1.1".format('distilbert'),
                       dir='./models')
    return QAPipeline(reader='./models/distilbert_qa.joblib',
                      max_df=1.0,
                      min_df=1)

Esempio n. 3

0

Mostra file

def question(text, query):
    print(text)
    test = []
    for i in sent_tokenize(text):
        if len(i) > 2:
            test.append(i)

    n = 4
    # using list comprehension
    final = [test[i * n:(i + 1) * n] for i in range((len(test) + n - 1) // n)]
    title_s = []
    for j in range(len(final)):
        title_s.append(f'Title{j}')

    data = [title_s, final]
    df3 = pd.DataFrame(data=data)
    df3 = df3.transpose()
    df3.columns = ['title', 'paragraphs']
    print(df3)
    #st.text('Hold on this will take some time')

    from ast import literal_eval

    from cdqa.utils.filters import filter_paragraphs
    from cdqa.utils.download import download_model, download_bnpp_data
    from cdqa.pipeline.cdqa_sklearn import QAPipeline

    # Download data and models
    #download_bnpp_data(dir='./data/bnpp_newsroom_v1.1/')
    #download_model(model='bert-squad_1.1', dir='./models')

    # Loading data and filtering / preprocessing the documents
    df = pd.read_csv(
        'D:/devjams/Machine-Learning-Web-Apps-master/NLPIffy_NLP_Based_SpaCy_Flask_App&_API/cdQA/data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
        converters={'paragraphs': literal_eval})
    df = filter_paragraphs(df)
    #st.text('Please Wait. We are looking for the answer to your question')
    # Loading QAPipeline with CPU version of BERT Reader pretrained on SQuAD 1.1
    cdqa_pipeline = QAPipeline(
        reader=
        'D:/devjams/Machine-Learning-Web-Apps-master/NLPIffy_NLP_Based_SpaCy_Flask_App&_API/bert_qa_vGPU-sklearn.joblib'
    )

    # Fitting the retriever to the list of documents in the dataframe
    cdqa_pipeline.fit_retriever(df3)
    print(query)
    #st.text('Almost done.......')
    #query = 'Intellectual Property Rights'
    try:
        prediction = cdqa_pipeline.predict(query)
    except Exception as e:
        print(e)
    #st.text(prediction[2])
    return prediction[2]

Esempio n. 4

0

Mostra file

def find_answer(question):
    # Set your path to pdf directory
    df = pdf_converter(directory_path='pdf_folder/')
    cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib')
    cdqa_pipeline.fit_retriever(df)
    query = question + '?'
    prediction = cdqa_pipeline.predict(query)

    # print('query: {}\n'.format(query))
    # print('answer: {}\n'.format(prediction[0]))
    # print('title: {}\n'.format(prediction[1]))
    # print('paragraph: {}\n'.format(prediction[2]))
    return prediction[0]

Esempio n. 5

0

Mostra file

def search_view(request):
    if request.POST:
        question = request.POST.get('question')
        for idx, url in enumerate(
                search(question, tld="com", num=10, stop=3, pause=2)):
            crawl_result(url, idx)
        # change path to pdfs folder
        df = pdf_converter(directory_path='/path/to/pdfs')
        cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib')
        cdqa_pipeline.fit_retriever(df)
        prediction = cdqa_pipeline.predict(question)
        data = {'answer': prediction[0]}
        return JsonResponse(data)
    return render(request, 'search.html')

Esempio n. 6

0

Mostra file

def execute_pipeline(query):
    df = pd.read_csv('data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
                     converters={'paragraphs': literal_eval})
    df = filter_paragraphs(df)

    cdqa_pipeline = QAPipeline(
        reader='models/bert_qa_vCPU-sklearn.joblib')
    cdqa_pipeline.fit(X=df)
    cdqa_pipeline.reader.output_dir = None

    prediction = cdqa_pipeline.predict(X=query)

    result = (prediction[0], prediction[1])

    return result

Esempio n. 7

0

Mostra file

File: test_pipeline.py Progetto: wbj0110/cdQA

def execute_pipeline(query):
    download_bnpp_data('./data/bnpp_newsroom_v1.1/')
    download_model('bert-squad_1.1', dir='./models')
    df = pd.read_csv('./data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
                     converters={'paragraphs': literal_eval})
    df = filter_paragraphs(df)

    cdqa_pipeline = QAPipeline(reader='models/bert_qa_vCPU-sklearn.joblib')
    cdqa_pipeline.fit_retriever(X=df)

    prediction = cdqa_pipeline.predict(X=query)

    result = (prediction[0], prediction[1])

    return result

Esempio n. 8

0

Mostra file

File: test_evaluation.py Progetto: vinaybysani/cdQA

def test_evaluate_pipeline():

    download_bnpp_data("./data/bnpp_newsroom_v1.1/")
    download_model("bert-squad_1.1", dir="./models")
    df = pd.read_csv(
        "./data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv",
        converters={"paragraphs": literal_eval},
    )
    df = filter_paragraphs(df)

    test_data = {
        "data": [
            {
                "title": "BNP Paribas’ commitment to universities and schools",
                "paragraphs": [
                    {
                        "context": "Since January 2016, BNP Paribas has offered an Excellence Program targeting new Master’s level graduates (BAC+5) who show high potential. The aid program lasts 18 months and comprises three assignments of six months each. It serves as a strong career accelerator that enables participants to access high-level management positions at a faster rate. The program allows participants to discover the BNP Paribas Group and its various entities in France and abroad, build an internal and external network by working on different assignments and receive personalized assistance from a mentor and coaching firm at every step along the way.",
                        "qas": [
                            {
                                "answers": [
                                    {"answer_start": 6, "text": "January 2016"},
                                    {"answer_start": 6, "text": "January 2016"},
                                    {"answer_start": 6, "text": "January 2016"},
                                ],
                                "question": "Since when does the Excellence Program of BNP Paribas exist?",
                                "id": "56be4db0acb8001400a502ec",
                            }
                        ],
                    }
                ],
            }
        ],
        "version": "1.1",
    }

    with open("./test_data.json", "w") as f:
        json.dump(test_data, f)

    cdqa_pipeline = QAPipeline(reader="./models/bert_qa_vCPU-sklearn.joblib", n_jobs=-1)
    cdqa_pipeline.fit_retriever(X=df)

    eval_dict = evaluate_pipeline(cdqa_pipeline, "./test_data.json", output_dir=None)

    assert eval_dict["exact_match"] > 0.8

    assert eval_dict["f1"] > 0.8

Esempio n. 9

0

Mostra file

def ask():
    name = request.form['btn-input']

    #print(name)
    f = open('current.txt')
    file1 = f.read().rstrip()
    f.close()
    cdqa_pipeline = QAPipeline(reader='models/bert_qa_vCPU-sklearn.joblib')
    row = file_open(file1)
    df = pd.DataFrame(row)
    df = df.T
    df.columns = ['title', 'paragraphs']
    #print(df.head())
    # Fitting the retriever to the list of documents in the dataframe
    cdqa_pipeline.fit_retriever(df)
    prediction = cdqa_pipeline.predict(name)
    ret = [name, prediction[0], prediction[1], prediction[2]]
    speech = ret[1] + "\n\n Related Paragraph" + ret[3]
    print('This is error output', speech)
    #return speech
    return render_template('index.html', value1=name, value2=speech)

Esempio n. 10

0

Mostra file

File: test_pipeline.py Progetto: vinaybysani/cdQA

def execute_pipeline(query, n_predictions=None):
    download_bnpp_data("./data/bnpp_newsroom_v1.1/")
    download_model("bert-squad_1.1", dir="./models")
    df = pd.read_csv(
        "./data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv",
        converters={"paragraphs": literal_eval},
    )
    df = filter_paragraphs(df)

    cdqa_pipeline = QAPipeline(reader="models/bert_qa_vCPU-sklearn.joblib")
    cdqa_pipeline.fit_retriever(X=df)
    if n_predictions is not None:
        predictions = cdqa_pipeline.predict(X=query,
                                            n_predictions=n_predictions)
        result = []

        for answer, title, paragraph in predictions:
            prediction = (answer, title)
            result.append(prediction)
        return result
    else:
        prediction = cdqa_pipeline.predict(X=query)
        result = (prediction[0], prediction[1])
        return result

Esempio n. 11

0

Mostra file

File: result.py Progetto: imratnesh/DjangoStripeApp

# coding: utf-8
import os
import pandas as pd
from ast import literal_eval
import cdqa
from cdqa.utils.filters import filter_paragraphs
from cdqa.pipeline.cdqa_sklearn import QAPipeline

df = pd.read_csv('/home/ubuntu/data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
                 converters={'paragraphs': literal_eval})
df = filter_paragraphs(df)

df['title'] = df['category']

cdqa_pipeline = QAPipeline(
    reader='/home/ubuntu/data/bert_qa_vCPU-sklearn.joblib')
cdqa_pipeline.fit(X=df)

print('At result')


class QAModule():
    def __init__(self):
        self.query = 'Since when does the Excellence Program of BNP Paribas exist?'

    def getAnswer(self, query):
        prediction = cdqa_pipeline.predict(X=query)
        return prediction


class SentimentModule():

Esempio n. 12

0

Mostra file

File: cdqa_test.py Progetto: CircuitSacul/MachineLearning

#df = pd.read_csv('data/my_data/homework.csv', converters={'paragraphs': literal_eval})
#df = filter_paragraphs(df)

df = pd.DataFrame(columns=['title', 'paragraphs'])
paragraphs = input("Text to Analyze:\n").split('\n')
df = df.append({
    'title': 'Inputed Data',
    'paragraphs': paragraphs
},
               ignore_index=True)

print(df)

cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib',
                           min_df=1,
                           max_df=1000)

cdqa_pipeline.fit_retriever(df=df)

while True:
    query = input('> ')
    prediction = cdqa_pipeline.predict(query=query)

    #if prediction[3] < -2:
    #    print("cdQA: Sorry, I don't know.")
    #else:
    #print('query: {}\n'.format(query))
    print('cdQA: {}'.format(prediction[0]))
    #print('title: {}\n'.format(prediction[1]))
    #print('paragraph: {}\n'.format(prediction[2]))

Esempio n. 13

0

Mostra file

import pandas as pd
from ast import literal_eval

from cdqa.utils.filters import filter_paragraphs
from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline

# Download data and models
#download_bnpp_data(dir='./data/bnpp_newsroom_v1.1/')
#download_model(model='bert-squad_1.1', dir='./models')

# Loading data and filtering / preprocessing the documents
df = pd.read_csv('data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv',
                 converters={'paragraphs': literal_eval})
df = filter_paragraphs(df)

# Loading QAPipeline with CPU version of BERT Reader pretrained on SQuAD 1.1
cdqa_pipeline = QAPipeline(reader='models/bert_qa_vCPU-sklearn.joblib')

# Fitting the retriever to the list of documents in the dataframe
_ = cdqa_pipeline.fit_retriever(df)

# Sending a question to the pipeline and getting prediction
query = 'Since when does the Excellence Program of BNP Paribas exist?'
prediction = cdqa_pipeline.predict(query)

print('query: {}\n'.format(query))
print('answer: {}\n'.format(prediction[0]))
print('title: {}\n'.format(prediction[1]))
print('paragraph: {}\n'.format(prediction[2]))

Esempio n. 14

0

Mostra file

File: demo.py Progetto: etalab-ia/demo-piaf

        else:
            default = "The use of artificial intelligence in public action is often identified as an opportunity to interrogate documentary texts and to create automatic question / answer tools for users. Querying natural language work code, providing a conversational agent for a given service, developing high-performance search engines, improving knowledge management, all activities that require quality training data corpus to develop question and answer algorithms. Today, there are no public and open French training data sets that would train these algorithms. The ambition of the PIAF project is to build this set of Francophone data for AI in an open and contributive way."
            default_query = 'What is the aim of PIAF?'

        para = st.text_area('Ecrivez ici le paragraphe source', default)
        df = pd.DataFrame([[0, 'My paragraph', [para]]],
                          columns=['id', 'title', 'paragraphs'])

    ### MODEL TRAINING SECTION ###

    s1 = time.time()

    if not "Français" in langu:
        download_model(model='bert-squad_1.1', dir='./models')
        cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib',
                                   max_df=1.0,
                                   min_df=1)
    else:
        cdqa_pipeline = QAPipeline(reader='models/bert_qa_fr.joblib',
                                   max_df=1.0,
                                   min_df=1)

    # cdqa_pipeline.cuda()
    t1 = time.time() - s1

    s2 = time.time()
    # Fitting the retriever to the list of documents in the dataframe
    cdqa_pipeline.fit_retriever(df)
    t2 = time.time() - s2

    # Querying and displaying

Esempio n. 15

0

Mostra file

File: kakao_alpha_v2_ubuntu.py Progetto: kjb4494/jungchatbot

from flask import Flask, request, jsonify
from ast import literal_eval
import pandas as pd
from cdqa.utils.filters import filter_paragraphs
from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline
from cdqa.retriever import BM25Retriever
from ETRI import *
import time
from khaiii_def import *
app = Flask(__name__)
df = pd.read_csv('jungchat_result_191102.csv',converters={'paragraphs': literal_eval})
cdqa_pipeline = QAPipeline(reader='bert_qa_korquad_vCPU.joblib')#모델을 불러온다
retriever = BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)#문서와의 유사도를 구하기위한 리트리버
retriever_temp= BM25Retriever(ngram_range=(1, 2), max_df=1.00,min_df=1, stop_words=None)#문장과의 유사도를 구하기 위한 리트리버
retriever.fit(df)#모든 문서의 내용을 담는다
df = filter_paragraphs(df)
best_idx_scores = ''

def text_tranform(text) :
    return '\n'.join(text.split(', '))

def make_query(text) :
    dataSend = {
          "version": "2.0",
          "template": {
             "outputs": [{
                    "simpleText":{
                       "text" : text}
               }]
           }

Esempio n. 16

0

Mostra file

File: qa_index.py Progetto: viqee/qa

    cfgs = return reader.read()
configs = json.loads(cfgs)

question = configs['question']

data_directory = '/data/'
models_directory = '/models/'

# download_squad(dir = './' + data_directory)
download_bnpp_data(dir = './' + data_directory)
# download_model('distilbert-squad_1.1', dir = './' + models_directory)
download_model('bert-squad_1.1', dir = './' + models_directory)

df = pandas.read_csv(data_directory + '/bnpp_paribas/-??-.csv', converter = {'paragraphs': ast.literal_evl})
df = filter_paragraphs(df)
cdqa_pipeline = QAPipeline(reader = models_directory + '/bert_qa/bert_qa.joblib')
cdqa_pipeline.fit_retriever(q = df) 
# cdqa_pipeline.fit_reader('path to squad like dataset . json')
prediction = cdqa_pipeline.predict(q = question, n_prediction = ?) # ? = predictions
# cdqa_pipeline.dump_reader('path to save . joblib') # save reader model

query = 'query: {}\n'.format(query),
answer = 'answer: {}\n'.format(prediction[0]),
title = 'title: {}\n'.format(prediction[1]),
paragraph = 'paragraph: {}\n'.format(prediction[2])

result = query, answer, title, paragraph

notify2.init('question answer')
notif = notify2.Notification('qa', result)
# notif.set_urgency(notify2.URGENCY_CRITICAL)

Esempio n. 17

0

Mostra file

File: api.py Progetto: vinaybysani/cdQA

import os
from ast import literal_eval
import pandas as pd

from cdqa.utils.filters import filter_paragraphs
from cdqa.pipeline.cdqa_sklearn import QAPipeline

app = Flask(__name__)
CORS(app)

dataset_path = os.environ["dataset_path"]
reader_path = os.environ["reader_path"]

df = pd.read_csv(dataset_path, converters={"paragraphs": literal_eval})
df = filter_paragraphs(df)

cdqa_pipeline = QAPipeline(reader=reader_path)
cdqa_pipeline.fit(X=df)


@app.route("/api", methods=["GET"])
def api():

    query = request.args.get("query")
    prediction = cdqa_pipeline.predict(X=query)

    return jsonify(query=query,
                   answer=prediction[0],
                   title=prediction[1],
                   paragraph=prediction[2])

Esempio n. 18

0

Mostra file

File: Tuto02.py Progetto: kjb4494/jungchatbot

import requests
from flask import Flask, request, Response
from flask_ngrok import run_with_ngrok
import pandas as pd
from ast import literal_eval
from cdqa.utils.filters import filter_paragraphs
from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline

API_KEY = '936714777:AAGFCBbeOAClrTsgmMMOsYG3HkaV7Ck5p-w'

app = Flask(__name__)
run_with_ngrok(app)
df = pd.read_csv('data/bnpp_newsroom_v1.1/jungchat_result.csv',
                 converters={'paragraphs': literal_eval})
cdqa_pipeline = QAPipeline(reader='models/bert_qa_korquad_vCPU.joblib')
cdqa_pipeline.fit_retriever(df)


def parse_message(message):
    chat_id = message['message']['chat']['id']
    msg = message['message']['text']

    return chat_id, msg


def send_message(chat_id, query):
    url = 'https://api.telegram.org/bot{token}/sendMessage'.format(
        token=API_KEY)
    # 변수들을 딕셔너리 형식으로 묶음

Esempio n. 19

0

Mostra file

download_squad(dir=directory)
download_model('bert-squad_1.1', dir=directory)
download_model('distilbert-squad_1.1', dir=directory)

from ast import literal_eval
from cdqa.utils.filters import filter_paragraphs
from cdqa.pipeline.cdqa_sklearn import QAPipeline
from nltk import tokenize


def load_from_csv(file):
    df = pd.read_csv(file)
    df = df.rename(str.lower, axis='columns')
    df['paragraphs'] = df['paragraphs'].apply(
        lambda x: x.replace("'s", " "
                            "s").replace("\n", " "))
    df['paragraphs'] = df['paragraphs'].apply(
        lambda x: tokenize.sent_tokenize(x))
    return df


df = load_from_csv('./data/test.csv')
#make sure bert_qa.joblib is the same directory (cdQA), if not move it here from data
cdqa_pipeline = QAPipeline(reader='bert_qa.joblib')
cdqa_pipeline.fit_retriever(df=df)

querry = st.text_area('enter mssage', 'type')
if st.button('analyze'):
    message = cdqa_pipeline.predict(query=querry, n_predictions=2)
    st.success(message)

Esempio n. 20

0

Mostra file

File: tutorial-cdqa.py Progetto: jaredbebb/cdQA

from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline

# Download data and models
download_bnpp_data(dir='./data/bnpp_newsroom_v1.1/')
# download_model(model='bert-squad_1.1', dir='./models')

# Loading data and filtering / preprocessing the documents
# df = pd.read_csv('data/bnpp_newsroom_v1.1/bnpp_newsroom-v1.1.csv', converters={'paragraphs': literal_eval})
df = pd.read_csv('data/bnpp_newsroom_v1.1/custom_tax_jlb.csv',
                 converters={'paragraphs': literal_eval})
df = filter_paragraphs(df)

# Loading QAPipeline with CPU version of BERT Reader pretrained on SQuAD 1.1
# cdqa_pipeline = QAPipeline(reader='models/bert_qa_vCPU-sklearn.joblib')
cdqa_pipeline = QAPipeline(reader='models/bert_qa.joblib')

# Fitting the retriever to the list of documents in the dataframe
# cdqa_pipeline.fit_retriever(X=df)
cdqa_pipeline.fit_retriever(df=df)

# Sending a question to the pipeline and getting prediction
# query = 'Since when does the Excellence Program of BNP Paribas exist?'
# query = 'Who should investors  consult with prior to investing?'
# query = 'Who do custom animal farmers need to consult with before buying fertilizer?'
queries = [
    'Who do custom animal farmers need to consult with before buying fertilizer?',
    'do I qualify for an automatic extension of time to file without filing Form 4868?',
    'Did the coronavirus pandemic extend the deadline to pay taxes?',
    'What is the new tax deadline?', 'What is the tax payer advocate service?',
    'What is the job of the taxpayer advocate service?',

Esempio n. 21

0

Mostra file

import pandas as pd
from ast import literal_eval
from cdqa.pipeline.cdqa_sklearn import QAPipeline
from rasa_sdk import Action

# read the csv file
df = pd.read_csv(
    '/Users/ashutoshvishnoi/Data_Science/intern_2/products/BankCurrupcy/qa_system/sample_data2/'
    'answs.csv',
    converters={'paragraphs': literal_eval})

# Load the bert qa model
cdqa_pipeline = QAPipeline(
    reader='/Users/ashutoshvishnoi/Data_Science/intern_2/products/BankCurrupcy/'
    'qa_system/models/bert_qa.joblib')

ques_dict = []

cdqa_pipeline.fit_retriever(df)
print('-----Model loaded successfully and fit successfully----')


class ActionGetNewst(Action):
    def name(self):
        return 'action_get_bertAns'

    def run(self, dispatcher, tracker, domain):
        query = tracker.latest_message['text']
        prediction = cdqa_pipeline.predict(query, n_predictions=3)

        # dispatcher.utter_message('query: {}\n'.format(query))

Esempio n. 22

0

Mostra file

File: FAQ_codebase_2.py Progetto: anmolmore/Chatbot-for-COVID-19-FAQ-using-Dialogflow

#11915052	Kapil Bindal

import pandas as pd
from ast import literal_eval

from cdqa.utils.filters import filter_paragraphs
from cdqa.utils.download import download_model, download_bnpp_data
from cdqa.pipeline.cdqa_sklearn import QAPipeline

#read the cleaned dataset and just take question and context for our model
df = pd.read_csv('data/dataset_collected.csv', usecols=['question', 'context'])

#convert paragraphs to a list
df['paragraphs'] = df[df.columns[1:]].apply(
    lambda x: x.dropna().values.tolist(), axis=1)

df.rename(columns={"question": "title"}, inplace=True)
df.drop(columns='context', inplace=True)
df.to_csv('df_corona.csv', index=False)

#use a lighter pipleline model to build pipeline on top of it
cdqa_pipeline = QAPipeline(reader='models/model.joblib')
cdqa_pipeline.fit_retriever(df=df)

query = "Can I travel ?"
prediction = cdqa_pipeline.predict(query=query)

print('Query : {}\n'.format(query))
print('Answer from Bot: {}\n'.format(prediction[0]))
print('Matched to Question : {}\n'.format(prediction[1]))
print('Paragraph pickup from : {}\n'.format(prediction[2]))

Esempio n. 23

0

Mostra file

reader.fit(X=(train_examples, train_features))

# Output fine-tuned model
reader.model.to('cpu')
reader.device = torch.device('cpu')
joblib.dump(reader, os.path.join(reader.output_dir, 'bert_tim_qa_vCPU.joblib'))

#%% [markdown]
# ### Training

#%%
from cdqa.pipeline.cdqa_sklearn import QAPipeline

# Load standard model
cdqa_pipeline = QAPipeline(model='./cdqa/bert_qa_vCPU-sklearn.joblib', max_answer_length=60)
cdqa_pipeline.fit_retriever(X=df_X)


#%%
# Evaluate QnA system
from cdqa.utils.evaluation import evaluate_pipeline
evaluate_pipeline(cdqa_pipeline, 'cdqa-v1.1-tim_qna.json')

# Standard pre trained model: {'exact_match': 0.0, 'f1': 5.025362668068075}
# Fine-tuned model: {'exact_match': 0.0, 'f1': 5.684362620078064}

#%% [markdown]
# ### Inference

#%%