async def task(): tokenizer = AutoTokenizer.from_pretrained( "ozcangundes/mt5-multitask-qa-qg-turkish") model = AutoModelForSeq2SeqLM.from_pretrained( "ozcangundes/mt5-multitask-qa-qg-turkish") multimodel = pipeline("multitask-qa-qg", tokenizer=tokenizer, model=model) queue = await aioredis.create_redis_pool( "redis://redis:6379/0?encoding=utf-8") logging.warning("Connected to Redis") logging.warning("QAQG task is running asynchronously...") while True: pipe = queue.pipeline() pipe.lrange("qaqg", 0, 7) pipe.ltrim("qaqg", 8, -1) requests, _ = await pipe.execute() for r in requests: r = ujson.loads(r) results = {} if r.get("question", None) is None: results = multimodel(r["text"]) else: results = multimodel({ "context": r["text"], "question": r["question"] }) await queue.set(r["id"], ujson.dumps(results)) asyncio.sleep(0.1)
def generate_questions(): from pipelines import pipeline print('Getting the text to analyse', file=sys.stderr) text2=request.form['togenerate'] print('got the get to analyse below', file=sys.stderr) print(text2, file=sys.stderr) print('Starting to generate questions', file=sys.stderr) nlp = pipeline("question-generation") res= nlp(text2) response = make_response(jsonify(res),200,) response.headers["Content-Type"] = "application/json" return response
def generate_questions(): # getting the request data data = request.json text = data['text'] nlp = pipeline("multitask-qa-qg", model="valhalla/t5-small-qa-qg-hl") result = nlp(text) response = make_response({ 'status': True, 'message': 'Success', 'data': result }) response.mimetype = 'application/json' return response
from dotenv import load_dotenv load_dotenv() db_ip = os.environ.get('PUBQUAIZ_DB_IP') db_port = os.environ.get('PUBQUAIZ_DB_PORT') db_user = os.environ.get('PUBQUAIZ_DB_ADMIN_USER') db_pwd = os.environ.get('PUBQUAIZ_DB_ADMIN_PWD') client = pymongo.MongoClient(f"mongodb://{db_user}:{db_pwd}@{db_ip}:{db_port}/") db = client["PubQuAIZ"] m_dataset_names = db['CollectionLookup'].distinct("dataset") print ('Test db connection...',m_dataset_names) from pipelines import pipeline nlp = pipeline("question-generation",model="valhalla/t5-small-qg-prepend", qg_format="prepend") #nlp = pipeline("e2e-qg") #nlp = pipeline("multitask-qa-qg") def insert_into_db(qa,topic): dataset = db[f"WikiNLP"] if topic[0]=='"' and topic[-1]=='"': topic = topic[1:-1] for x in qa: x['topic'] = topic dataset.insert_many(qa) #get all the ids we just created ids = list(dataset.find({'topic':topic},{'_id':True})) print (ids)
from pipelines import pipeline nlp = pipeline("multitask-qa-qg") qg = pipeline("e2e-qg") print("preload finished.")
from pipelines import pipeline import wikipedia def filter_length_answers(json_result): result = [] for qa_pair in json_result: lenght_of_answer = len(qa_pair["answer"]) if (lenght_of_answer > 30): continue result.append(qa_pair) return result nlp = pipeline("question-generation") topic = input("Give a Topic: ") topics = print(wikipedia.search(topic)) topic = input("Choose a Topic: ") summary = wikipedia.summary(topic) summary = summary.replace("\n", "") json_result = nlp(summary) json_result = filter_length_answers(json_result) for idx, qa_pair in enumerate(json_result): print("Question " + str(idx + 1) + ":") print(qa_pair["question"] + "\n") print("Press Enter to see the answer\n")
from werkzeug.utils import secure_filename from queue import Queue, Empty import time import threading from pipelines import pipeline import pandas as pd app = Flask(__name__, template_folder='templates') app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 requests_queue = Queue() BATCH_SIZE = 10 CHECK_INTERVAL = 0.1 #preload model nlp = pipeline("e2e-qg", model="valhalla/t5-base-e2e-qg") qg = pipeline("e2e-qg") def handle_requests_by_batch(): while True: requests_batch = [] while not (len(requests_batch) >= BATCH_SIZE): try: requests_batch.append( requests_queue.get(timeout=CHECK_INTERVAL)) except Empty: continue batch_outputs = [] for request in requests_batch: batch_outputs.append(run(request['input'][0]))
def Question_Generation(text): from pipelines import pipeline nlp = pipeline("question-generation") return (nlp(text))
from flask import Flask, request, jsonify from pipelines import pipeline nlp = pipeline("question-generation", model="valhalla/t5-base-qg-hl", qg_format="prepend") def get_free_questions(request): """Responds to any HTTP request. Args: request (flask.Request): HTTP request object. Returns: The response text or any set of values that can be turned into a Response object using `make_response <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>`. """ text = request.get_data() text = ''.join([i if ord(i) < 128 else ' ' for i in text]) questions = nlp(text) corrects = [] for question in questions: corrects.append(question['answer'].strip()) types = ['free_response'] * len(questions) return questions, corrects, types
import spacy import json import random import numpy as np from tqdm import tqdm from pipelines import pipeline nlp_sent = spacy.load("en_core_web_sm") doc = nlp_sent( 'European authorities fined Google a record $5.1 billion on Wednesday for abusing its power in the mobile phone market and ordered the company to alter its practices' ) print([(X.text, X.label_) for X in doc.ents]) # Please train your own model on SQuAD and load as below nlp = pipeline("multitask-qa-qg", model="t5-large-multi-hl/checkpoint-3500", qg_format="highlight") data_path = '/home/data/squad/train-v1.1.json' sample = False print(f'reading {data_path} with sampling: {sample}') train_set = json.load(open(data_path)) new_train_set = {'data': []} cnt = 0 answer_stats = [] bs = 16 tmp_path = data_path.replace('.json', '_qg_t5l35-sqd_tmp.json') tmp_file = open(tmp_path, 'a') for article in tqdm(train_set['data']): new_article = {'title': article['title'], 'paragraphs': []}
from flask import Flask, render_template, request from pipelines import pipeline app = Flask(__name__) print("Loading model...") model = pipeline("question-generation") print("Model loaded!") @app.route('/') def index(): return render_template("index.html", data="hey") @app.route("/qselect", methods=["POST"]) def qselect(): text = request.form['sometext'] data = model(text) return render_template("q-select.html", data=data, text=text) if __name__ == "__main__": app.run(debug=True, use_reloader=False)
from pipelines import pipeline from utils import score_questions from tableqa.agent import Agent from tableqa.nlp import qa import os qg = pipeline("e2e-qg") class insi: """ Get insights from texts """ def get_scores(self, questions): """ Parameters ---------- questions : `list` or `tuple` of `str` Each string is a question. Returns ------- `dict` Maps questions with scores """ self.qmaps = score_questions(questions) return self.qmaps def get_questions(self, text, csv=False):
#!/usr/bin/env python3 """Create a testing bench callable without arguments.""" from models import Baseline, CNN from pipelines import pipeline ROUNDS = 20 EPOCHS = 25 MBS = 100 LR = 1e-3 pipeline(Baseline, "Baseline", rounds=ROUNDS, epochs=EPOCHS, mini_batch_size=MBS, lr=LR) pipeline(Baseline, "Baseline with auxiliary", rounds=ROUNDS, epochs=EPOCHS, mini_batch_size=MBS, lr=LR, auxiliary=True) LR = 5e-4 pipeline(CNN, "CNN", rounds=ROUNDS, epochs=EPOCHS, mini_batch_size=MBS, lr=LR) pipeline(CNN, "CNN with auxiliary", rounds=ROUNDS, epochs=EPOCHS,
from pipelines import pipeline nlp = pipeline("e2e-qg") nlp("Python is a programming language. Created by Guido van Rossum and first released in 1991.") #### INFERENCE DATABASE ### inference_test_list = ['Socrates is a man', 'All men are mortal'] p1 = read_expr('man(socrates)') p2 = read_expr('all x.(man(x) -> mortal(x))') c = read_expr('mortal(socrates)') Prover9().prove(c, [p1,p2]) #Uses Prover9 to logically infer that Socrates is mortal ## TODO: Need a way to convert Strings into Expressions ## TODO: make a copy of database_list, make each item an expression, set up Prover9 and equiv()
from pipelines import pipeline import wikipedia from nltk import sent_tokenize FILE = "first_topic.txt" # insert the document to direct the results to nlp = pipeline("multitask-qa-qg") # nlp = pipeline("question-generation", model="valhalla/t5-small-qg-prepend", # qg_format="prepend") def prepare(): """segment the text into chunks that meet the max_length criterium""" text = wikipedia.page("Cat").content index = 252 result = [] sentences = sent_tokenize(text) # segment text into list of sentences position = 0 container = [] for sentence in sentences: if (position <= index): number_words = len(sentence.split()) number_tokens = number_words + number_words - 1 container.append(sentence) position += number_tokens continue container = " ".join(container)
text3 = "42 is the answer to life, universe and everything." text4 = "Forrest Gump is a 1994 American comedy-drama film directed by Robert Zemeckis and written by Eric Roth. \ It is based on the 1986 novel of the same name by Winston Groom and stars Tom Hanks, Robin Wright, Gary Sinise, \ Mykelti Williamson and Sally Field. The story depicts several decades in the life of Forrest Gump (Hanks), \ a slow-witted but kind-hearted man from Alabama who witnesses and unwittingly influences several defining \ historical events in the 20th century United States. The film differs substantially from the novel." """## Single task QA""" # Commented out IPython magic to ensure Python compatibility. # %cd question_generation from pipelines import pipeline nlp = pipeline("question-generation") nlp(text3) """If you want to use the t5-base model, then pass the path through model parameter""" nlp = pipeline("question-generation", model="valhalla/t5-base-qg-hl") nlp(text3) nlp(text4) nlp(text2)
from pipelines import pipeline from text2text.text_generator import TextGenerator import nltk from nltk.stem.porter import * import spacy from sense2vec import Sense2VecComponent spacy_nlp = spacy.load("en_core_web_sm") s2v = Sense2VecComponent(spacy_nlp.vocab).from_disk("./s2v_old") spacy_nlp.add_pipe(s2v) t5_generator = pipeline("question-generation") t2t_generator = TextGenerator(output_type="question") def generate_from_T5(context, n=5): res = t5_generator(context) ans = [] que = [] for i, r in enumerate(res): if i < n: ans.append(r['answer']) que.append(r['question']) return que, ans def generate_from_t2t(context, n=5): res = t2t_generator.predict([context] * n) ans = [] que = [] for r in res: