Example #1
0
async def task():
    tokenizer = AutoTokenizer.from_pretrained(
        "ozcangundes/mt5-multitask-qa-qg-turkish")
    model = AutoModelForSeq2SeqLM.from_pretrained(
        "ozcangundes/mt5-multitask-qa-qg-turkish")
    multimodel = pipeline("multitask-qa-qg", tokenizer=tokenizer, model=model)

    queue = await aioredis.create_redis_pool(
        "redis://redis:6379/0?encoding=utf-8")
    logging.warning("Connected to Redis")

    logging.warning("QAQG task is running asynchronously...")
    while True:
        pipe = queue.pipeline()
        pipe.lrange("qaqg", 0, 7)
        pipe.ltrim("qaqg", 8, -1)
        requests, _ = await pipe.execute()

        for r in requests:
            r = ujson.loads(r)
            results = {}
            if r.get("question", None) is None:
                results = multimodel(r["text"])
            else:
                results = multimodel({
                    "context": r["text"],
                    "question": r["question"]
                })

            await queue.set(r["id"], ujson.dumps(results))

        asyncio.sleep(0.1)
def generate_questions():
    from pipelines import pipeline
    print('Getting the text to analyse', file=sys.stderr)
    text2=request.form['togenerate']
    print('got the get to analyse below', file=sys.stderr)
    print(text2, file=sys.stderr)
    print('Starting to generate questions', file=sys.stderr)
    nlp = pipeline("question-generation")
    res= nlp(text2)
    response = make_response(jsonify(res),200,)
    response.headers["Content-Type"] = "application/json"
    return response
Example #3
0
def generate_questions():
    # getting the request data
    data = request.json
    text = data['text']

    nlp = pipeline("multitask-qa-qg", model="valhalla/t5-small-qa-qg-hl")
    result = nlp(text)

    response = make_response({
        'status': True,
        'message': 'Success',
        'data': result
    })
    response.mimetype = 'application/json'

    return response
Example #4
0
from dotenv import load_dotenv
load_dotenv()

db_ip = os.environ.get('PUBQUAIZ_DB_IP')
db_port = os.environ.get('PUBQUAIZ_DB_PORT')
db_user = os.environ.get('PUBQUAIZ_DB_ADMIN_USER')
db_pwd = os.environ.get('PUBQUAIZ_DB_ADMIN_PWD')

client = pymongo.MongoClient(f"mongodb://{db_user}:{db_pwd}@{db_ip}:{db_port}/")
db = client["PubQuAIZ"]
m_dataset_names = db['CollectionLookup'].distinct("dataset")
print ('Test db connection...',m_dataset_names)

from pipelines import pipeline
nlp = pipeline("question-generation",model="valhalla/t5-small-qg-prepend", qg_format="prepend")
#nlp = pipeline("e2e-qg")
#nlp = pipeline("multitask-qa-qg")


def insert_into_db(qa,topic):
    dataset = db[f"WikiNLP"]
    if topic[0]=='"' and topic[-1]=='"':
        topic = topic[1:-1]
    for x in qa:
        x['topic'] = topic
    dataset.insert_many(qa)

    #get all the ids we just created
    ids = list(dataset.find({'topic':topic},{'_id':True}))
    print (ids)
Example #5
0
from pipelines import pipeline

nlp = pipeline("multitask-qa-qg")
qg = pipeline("e2e-qg")
print("preload finished.")
from pipelines import pipeline
import wikipedia


def filter_length_answers(json_result):
    result = []
    for qa_pair in json_result:
        lenght_of_answer = len(qa_pair["answer"])
        if (lenght_of_answer > 30):
            continue
        result.append(qa_pair)
    return result


nlp = pipeline("question-generation")

topic = input("Give a Topic: ")
topics = print(wikipedia.search(topic))
topic = input("Choose a Topic: ")

summary = wikipedia.summary(topic)
summary = summary.replace("\n", "")

json_result = nlp(summary)

json_result = filter_length_answers(json_result)

for idx, qa_pair in enumerate(json_result):
    print("Question " + str(idx + 1) + ":")
    print(qa_pair["question"] + "\n")
    print("Press Enter to see the answer\n")
Example #7
0
from werkzeug.utils import secure_filename
from queue import Queue, Empty
import time
import threading
from pipelines import pipeline
import pandas as pd

app = Flask(__name__, template_folder='templates')
app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024

requests_queue = Queue()
BATCH_SIZE = 10
CHECK_INTERVAL = 0.1

#preload model
nlp = pipeline("e2e-qg", model="valhalla/t5-base-e2e-qg")
qg = pipeline("e2e-qg")


def handle_requests_by_batch():
    while True:
        requests_batch = []
        while not (len(requests_batch) >= BATCH_SIZE):
            try:
                requests_batch.append(
                    requests_queue.get(timeout=CHECK_INTERVAL))
            except Empty:
                continue
            batch_outputs = []
            for request in requests_batch:
                batch_outputs.append(run(request['input'][0]))
Example #8
0
def Question_Generation(text):
    from pipelines import pipeline
    nlp = pipeline("question-generation")
    return (nlp(text))
Example #9
0
from flask import Flask, request, jsonify
from pipelines import pipeline

nlp = pipeline("question-generation",
               model="valhalla/t5-base-qg-hl",
               qg_format="prepend")


def get_free_questions(request):
    """Responds to any HTTP request.
    Args:
        request (flask.Request): HTTP request object.
    Returns:
        The response text or any set of values that can be turned into a
        Response object using
        `make_response <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>`.
    """
    text = request.get_data()
    text = ''.join([i if ord(i) < 128 else ' ' for i in text])
    questions = nlp(text)
    corrects = []
    for question in questions:
        corrects.append(question['answer'].strip())
    types = ['free_response'] * len(questions)
    return questions, corrects, types
import spacy
import json
import random
import numpy as np
from tqdm import tqdm
from pipelines import pipeline

nlp_sent = spacy.load("en_core_web_sm")
doc = nlp_sent(
    'European authorities fined Google a record $5.1 billion on Wednesday for abusing its power in the mobile phone market and ordered the company to alter its practices'
)
print([(X.text, X.label_) for X in doc.ents])

# Please train your own model on SQuAD and load as below
nlp = pipeline("multitask-qa-qg",
               model="t5-large-multi-hl/checkpoint-3500",
               qg_format="highlight")

data_path = '/home/data/squad/train-v1.1.json'
sample = False
print(f'reading {data_path} with sampling: {sample}')
train_set = json.load(open(data_path))
new_train_set = {'data': []}
cnt = 0
answer_stats = []
bs = 16
tmp_path = data_path.replace('.json', '_qg_t5l35-sqd_tmp.json')
tmp_file = open(tmp_path, 'a')

for article in tqdm(train_set['data']):
    new_article = {'title': article['title'], 'paragraphs': []}
Example #11
0
from flask import Flask, render_template, request

from pipelines import pipeline

app = Flask(__name__)

print("Loading model...")
model = pipeline("question-generation")
print("Model loaded!")


@app.route('/')
def index():

    return render_template("index.html", data="hey")


@app.route("/qselect", methods=["POST"])
def qselect():
    text = request.form['sometext']
    data = model(text)
    return render_template("q-select.html", data=data, text=text)


if __name__ == "__main__":
    app.run(debug=True, use_reloader=False)
Example #12
0
from pipelines import pipeline
from utils import score_questions
from tableqa.agent import Agent
from tableqa.nlp import qa
import os
qg = pipeline("e2e-qg")


class insi:
    """
    Get insights from texts 
    """
    def get_scores(self, questions):
        """
        

        Parameters
        ----------
        questions : `list` or `tuple` of `str`
            Each string is a question.

        Returns
        -------
        `dict`
            Maps questions with scores

        """
        self.qmaps = score_questions(questions)
        return self.qmaps

    def get_questions(self, text, csv=False):
Example #13
0
#!/usr/bin/env python3
"""Create a testing bench callable without arguments."""
from models import Baseline, CNN
from pipelines import pipeline

ROUNDS = 20
EPOCHS = 25
MBS = 100
LR = 1e-3
pipeline(Baseline,
         "Baseline",
         rounds=ROUNDS,
         epochs=EPOCHS,
         mini_batch_size=MBS,
         lr=LR)

pipeline(Baseline,
         "Baseline with auxiliary",
         rounds=ROUNDS,
         epochs=EPOCHS,
         mini_batch_size=MBS,
         lr=LR,
         auxiliary=True)

LR = 5e-4
pipeline(CNN, "CNN", rounds=ROUNDS, epochs=EPOCHS, mini_batch_size=MBS, lr=LR)

pipeline(CNN,
         "CNN with auxiliary",
         rounds=ROUNDS,
         epochs=EPOCHS,
Example #14
0
from pipelines import pipeline
nlp = pipeline("e2e-qg")
nlp("Python is a programming language. Created by Guido van Rossum and first released in 1991.")

#### INFERENCE DATABASE  ###
inference_test_list = ['Socrates is a man', 'All men are mortal']
p1 = read_expr('man(socrates)')
p2 = read_expr('all x.(man(x) -> mortal(x))')
c  = read_expr('mortal(socrates)')
Prover9().prove(c, [p1,p2]) #Uses Prover9 to logically infer that Socrates is mortal
## TODO: Need a way to convert Strings into Expressions


## TODO: make a copy of database_list, make each item an expression, set up Prover9 and equiv()
from pipelines import pipeline
import wikipedia
from nltk import sent_tokenize

FILE = "first_topic.txt"  # insert the document to direct the results to

nlp = pipeline("multitask-qa-qg")

# nlp = pipeline("question-generation", model="valhalla/t5-small-qg-prepend",
#                qg_format="prepend")


def prepare():
    """segment the text into chunks that meet the max_length criterium"""
    text = wikipedia.page("Cat").content
    index = 252
    result = []

    sentences = sent_tokenize(text)  # segment text into list of sentences

    position = 0
    container = []

    for sentence in sentences:
        if (position <= index):
            number_words = len(sentence.split())
            number_tokens = number_words + number_words - 1
            container.append(sentence)
            position += number_tokens
            continue
        container = " ".join(container)
Example #16
0
text3 = "42 is the answer to life, universe and everything."

text4 = "Forrest Gump is a 1994 American comedy-drama film directed by Robert Zemeckis and written by Eric Roth. \
It is based on the 1986 novel of the same name by Winston Groom and stars Tom Hanks, Robin Wright, Gary Sinise, \
Mykelti Williamson and Sally Field. The story depicts several decades in the life of Forrest Gump (Hanks), \
a slow-witted but kind-hearted man from Alabama who witnesses and unwittingly influences several defining \
historical events in the 20th century United States. The film differs substantially from the novel."

"""## Single task QA"""

# Commented out IPython magic to ensure Python compatibility.
# %cd question_generation

from pipelines import pipeline

nlp = pipeline("question-generation")

nlp(text3)

"""If you want to use the t5-base model, then pass the path through model parameter"""

nlp = pipeline("question-generation", model="valhalla/t5-base-qg-hl")

nlp(text3)

nlp(text4)

nlp(text2)


Example #17
0
from pipelines import pipeline
from text2text.text_generator import TextGenerator
import nltk
from nltk.stem.porter import *
import spacy
from sense2vec import Sense2VecComponent

spacy_nlp = spacy.load("en_core_web_sm")
s2v = Sense2VecComponent(spacy_nlp.vocab).from_disk("./s2v_old")
spacy_nlp.add_pipe(s2v)

t5_generator = pipeline("question-generation")
t2t_generator = TextGenerator(output_type="question")


def generate_from_T5(context, n=5):
    res = t5_generator(context)
    ans = []
    que = []
    for i, r in enumerate(res):
        if i < n:
            ans.append(r['answer'])
            que.append(r['question'])
    return que, ans


def generate_from_t2t(context, n=5):
    res = t2t_generator.predict([context] * n)
    ans = []
    que = []
    for r in res: