Esempio n. 1
0
def get_model_api():

    model = QA('model')
    nlp = en_core_web_sm.load()
    stop_words = set(stopwords.words('english'))

    def model_api(question):
        try:
            question = [w.capitalize() for w in question.split(" ")]
            question = " ".join(question)
            doc = nlp(question)
            search = []

            for chunk in doc.noun_chunks:
                query = chunk.text
                check_query = nlp(query.lower())

                if 'PROPN' in [token.pos_ for token in check_query]:
                    querywords = query.split()
                    query_sentence = [
                        w.lower() for w in querywords
                        if not w.lower() in stop_words
                    ]
                    query_sentence = ' '.join(query_sentence)
                    search.append(query_sentence)

            search = [w for w in search if w != '']
            print(search)

            all_content = ''
            if len(search) != 0:
                for i in search:
                    i = [w.capitalize() for w in i.split(" ")]
                    i = " ".join(i)

                    for j in wiki_search(i.capitalize()):
                        all_content = all_content + j + '.'

            answer = model.predict(all_content, question)

            return answer['answer']

        except:
            return "Sorry, I don't know, can you be a bit more specific OR Wikipedia Server is Busy so can't get Response ."

    return model_api
    def __init__(self, locs, objs, relations, args):
        self.graph = nx.Graph()
        self.graph.add_nodes_from(locs,
                                  type='location',
                                  fillcolor="yellow",
                                  style="filled")
        self.graph.add_nodes_from(objs, type='object')
        self.graph.add_edges_from(relations)

        self.locations = {v for v in locs}
        self.objects = {v for v in objs}
        self.edge_labels = {}

        self.args = args

        # init GPT-2
        with open(args.input_text) as f:
            self.input_text = f.read()

        self.model = QA('model/albert-large-squad')
Esempio n. 3
0
from flask import Flask, render_template, request, send_file
import warnings
warnings.filterwarnings("ignore")
from bert import QA

DEBUG = False
app = Flask(__name__)
app.config.from_object(__name__)


model = QA("model")

@app.route('/')
def single_people_code():
    return render_template('index.html')

@app.route("/",methods=['POST','GET'])
def predict():
    if request.method == 'POST':

        form = request.form
        results = request.form

        doc = results['passage']

        q = results['question']

        try:


            out = model.predict(doc,q)
Esempio n. 4
0
from flask import Flask, request, jsonify
from flask_cors import CORS

from bert import QA

app = Flask(__name__)
CORS(app)

model = QA("bert-large-cased-whole-word-masking-finetuned-squad")


@app.route("/predict", methods=['POST'])
def predict():
    doc = request.json["document"]
    q = request.json["question"]
    try:
        out = model.predict(doc, q)
        return jsonify({"result": out})
    except Exception as e:
        print(e)
        return jsonify({"result": "Model Failed"})


if __name__ == "__main__":
    app.run('0.0.0.0', port=8000)
Esempio n. 5
0
def test(use_jit=False,
         fp16=False,
         onnx_runtime=False,
         export_onnx=False,
         tf_onnx=False,
         tf_version=False,
         vsl='none',
         min_batch=0,
         max_batch=1,
         num_predicts=300):
    document1 = 'Two partially reusable launch systems were developed, the Space Shuttle and Falcon 9. ' \
               'The Space Shuttle was partially reusable: the orbiter (which included the Space Shuttle ' \
               'main engines and the Orbital Maneuvering System engines), and the two solid rocket boosters ' \
               'were reused after several months of refitting work for each launch. The external tank was ' \
               'discarded after each flight. and the two solid rocket boosters were reused after several ' \
               'months of refitting work for each launch. The external tank was discarded after each flight.'
    document2 = 'This contrasts with expendable launch systems, where each launch vehicle is launched once ' \
                'and then discarded. No completely reusable orbital launch system has ever been created.'
    document3 = 'A reusable launch system (RLS, or reusable launch vehicle, RLV) is a launch system which is ' \
                'capable of launching a payload into space more than once. This contrasts with expendable ' \
                'launch systems, where each launch vehicle is launched once and then discarded. No completely ' \
                'reusable orbital launch system has ever been created.'
    question = 'How many partially reusable launch systems were developed?'
    # passages = [document1, document2, document3, document1, document2, document3, document1, document2, document3]
    # passages = [document1, document2, document3]
    # passages = [document1]

    if tf_onnx or tf_version:
        from multiprocessing import Pool

        convert_onnx_to_tf = False
        if tf_onnx and convert_onnx_to_tf:
            onnx_model = onnx.load(ONNX_PATH)
            # prepare tf representation
            tf_exp = onnx_tf.backend.prepare(onnx_model)
            # export the model
            tf_exp.export_graph(ONNX_TF_PB_PATH)

        onnx_pb_graph = tf.Graph()
        with onnx_pb_graph.as_default():
            tf_pb_path = ONNX_TF_PB_PATH if tf_onnx else TF_PB_PATH
            onnx_pb_graph_def = tf.GraphDef()
            with tf.gfile.GFile(tf_pb_path, 'rb') as fid:
                serialized_graph = fid.read()

            onnx_pb_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(onnx_pb_graph_def, name='')

            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            if use_jit:
                # config.gpu_options.per_process_gpu_memory_fraction = 0.5
                config.log_device_placement = False
                config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

            with tf.Session(config=config) as sess:
                # INFERENCE using session.run
                model = QA(MODEL_PATH,
                           use_jit=use_jit,
                           fp16=fp16,
                           onnx=onnx_runtime,
                           sess=sess,
                           vsl=vsl,
                           tf_onnx=tf_onnx)

                print('-- BENCHMARKING: JIT={} | FP16={} | ONNX_RUNTIME={} | '
                      'TF_ONNX_VERSION={} | TF_VERSION={} | EXACT_VSL={} --'.
                      format(use_jit, fp16, onnx_runtime, tf_onnx, tf_version,
                             vsl))
                for passage_batch in range(min_batch, max_batch):
                    passage_batch = pow(3, passage_batch - 1)
                    if passage_batch < 1:
                        passages = [document1]
                    else:
                        passages = []
                        for i in range(passage_batch):
                            passages.append(document1)
                            passages.append(document2)
                            passages.append(document3)

                    if max_batch > 2:
                        num_predicts = 50
                    time_taken, rps = measure_inference(
                        model, passages, question, num_predicts)
                    # print('Time taken for test: {} s'.format(time_taken))
                    print('RPS: {}'.format(rps))

                sess.close()

            del model, sess
    else:
        model = QA(MODEL_PATH,
                   use_jit=use_jit,
                   fp16=fp16,
                   onnx=onnx_runtime,
                   export_onnx=export_onnx,
                   vsl=vsl,
                   onnx_path=ONNX_PATH)

        if not export_onnx:
            print(
                '-- BENCHMARKING: JIT={} | FP16={} | ONNX_RUNTIME={} | '
                'TF_ONNX_VERSION={} | TF_VERSION={} | EXACT_VSL={} --'.format(
                    use_jit, fp16, onnx_runtime, tf_onnx, tf_version, vsl))
            for passage_batch in range(min_batch, max_batch):
                passage_batch = pow(3, passage_batch - 1)
                if passage_batch < 1:
                    passages = [document1]
                else:
                    passages = []
                    for i in range(passage_batch):
                        passages.append(document1)
                        passages.append(document2)
                        passages.append(document3)

                if max_batch > 2:
                    num_predicts = 50
                time_taken, rps = measure_inference(model, passages, question,
                                                    num_predicts)
                # print('Time taken for test: {} s'.format(time_taken))
                print('RPS: {}'.format(rps))
        del model
        torch.cuda.empty_cache()
Esempio n. 6
0
import os

from flask import Flask, request, jsonify
from flask_cors import CORS
from dotenv import load_dotenv

from bert import QA

app = Flask(__name__)
CORS(app)

load_dotenv()
model = QA(os.getenv("OUTPUT_DIR"))


@app.route("/predict", methods=['POST'])
def predict():
    doc = request.json["document"]
    q = request.json["question"]
    try:
        out = model.predict(doc, q)
        return jsonify({"result": out})
    except Exception as e:
        app.logger.warning(e)
        return jsonify({"result": "Model Failed"})


if __name__ == "__main__":
    app.run('0.0.0.0', port=8000, debug=True)
Esempio n. 7
0
import gradio as gr
import os, sys
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__), "utils"))
from bert import QA

model = QA('bert-large-uncased-whole-word-masking-finetuned-squad')
def qa_func(paragraph, question):
    return model.predict(paragraph, question)["answer"]

iface = gr.Interface(qa_func, 
    [
        gr.inputs.Textbox(lines=7, label="Context", default="Victoria has a written constitution enacted in 1975, but based on the 1855 colonial constitution, passed by the United Kingdom Parliament as the Victoria Constitution Act 1855, which establishes the Parliament as the state's law-making body for matters coming under state responsibility. The Victorian Constitution can be amended by the Parliament of Victoria, except for certain 'entrenched' provisions that require either an absolute majority in both houses, a three-fifths majority in both houses, or the approval of the Victorian people in a referendum, depending on the provision."), 
        gr.inputs.Textbox(lines=1, label="Question", default="When did Victoria enact its constitution?"), 
    ], 
    gr.outputs.Textbox(label="Answer"))
if __name__ == "__main__":
    iface.launch()
Esempio n. 8
0
from bert import QA

model = QA('model')

doc = "Victoria has a written constitution enacted in 1975, but based on the 1855 colonial constitution, passed by the United Kingdom Parliament as the Victoria Constitution Act 1855, which establishes the Parliament as the state's law-making body for matters coming under state responsibility. The Victorian Constitution can be amended by the Parliament of Victoria, except for certain 'entrenched' provisions that require either an absolute majority in both houses, a three-fifths majority in both houses, or the approval of the Victorian people in a referendum, depending on the provision."
doc = "According to the Indian census of 2001, there were 30,803,747 speakers of Malayalam in Kerala, making up 93.2% of the total number of Malayalam speakers in India, and 96.7% of the total population of the state. There were a further 701,673 (2.1% of the total number) in Karnataka, 557,705 (1.7%) in Tamil Nadu and 406,358 (1.2%) in Maharashtra. The number of Malayalam speakers in Lakshadweep is 51,100, which is only 0.15% of the total number, but is as much as about 84% of the population of Lakshadweep. In all, Malayalis made up 3.22% of the total Indian population in 2001. Of the total 33,066,392 Malayalam speakers in India in 2001, 33,015,420 spoke the standard dialects, 19,643 spoke the Yerava dialect and 31,329 spoke non-standard regional variations like Eranadan. As per the 1991 census data, 28.85% of all Malayalam speakers in India spoke a second language and 19.64% of the total knew three or more languages.  Large numbers of Malayalis have settled in Bangalore, Mangalore, Delhi, Coimbatore, Hyderabad, Mumbai (Bombay), Ahmedabad, Pune, and Chennai (Madras). A large number of Malayalis have also emigrated to the Middle East, the United States, and Europe. Accessed November 22, 2014.</ref> including a large number of professionals. There were 7,093 Malayalam speakers in Australia in 2006. The 2001 Canadian census reported 7,070 people who listed Malayalam as their mother tongue, mostly in the Greater Toronto Area and Southern Ontario. In 2010, the Census of Population of Singapore reported that there were 26,348 Malayalees in Singapore. The 2006 New Zealand census reported 2,139 speakers. 134 Malayalam speaking households were reported in 1956 in Fiji. There is also a considerable Malayali population in the Persian Gulf regions, especially in Bahrain, Muscat, Doha, Dubai, Abu Dhabi, Kuwait and European region mainly in London.  World Malayalee Council, the organisation working with the Malayali diaspora across the Globe has embarked upon a project for making a data bank of the diaspora. CANNOTANSWER"
q = 'When did Victoria enact its constitution?'
q = "What other languages are spoken there?"
answer = model.predict(doc, q)

print(answer['answer'])
# 1975

# dict_keys(['answer', 'start', 'end', 'confidence', 'document']))
Esempio n. 9
0
from word2vec_repo.DocSim import DocSim
from nltk.corpus import stopwords
import nltk
import os
import numpy as np

#some of the code are from https://stackoverflow.com/a/8897648

model_path = 'word2vec_repo/model.bin'
stopwords = stopwords.words('english')

model = KeyedVectors.load(model_path)
ds = DocSim(model, stopwords=stopwords)

from bert import QA
model = QA('BERTap/model')


#fetch top 3 doc using tf-idf weighting method(for large corpus set it to 10)
def get_doc(qu):
    final_docs = matching_score(10, qu)
    #print(final_docs)
    #print(final_docs)
    nd = len(final_docs)
    if len(final_docs) > 3:
        nd = 3
    answer_doc = []
    for i in range(nd):
        file = open(final_docs[i], 'r', encoding='cp1250')
        text = file.read().strip()
        file.close()
Esempio n. 10
0
import time

basedir = os.path.abspath(os.path.dirname(__file__))
app = Flask(__name__)
SECRET_KEY = os.urandom(32)
app.config['SECRET_KEY'] = SECRET_KEY

MODEL_PATH=os.path.join(basedir, 'model')
DATA_PATH=os.path.join(basedir, 'dataUIT')
data=load_data(DATA_PATH)
stopwords = set(open(basedir+'\stopwords.txt',encoding="utf-8").read().split(' ')[:-1])

#load model
print("Load model...")
start = time.time()
model=QA(MODEL_PATH) #path to model
end = time.time()
print("time load model: "+str(round((end - start),2)))

#Building index
print('Building index...')
start = time.time()
data_standard=standardize_data(data,stopwords)
vect = TfidfVectorizer(min_df=1, max_df=0.8,max_features=5000,sublinear_tf=True,ngram_range=(1,3)) 
vect.fit(data_standard)
end = time.time()
print("Time building index: "+str(round((end - start),2)))



@app.route('/', methods=['GET','POST'])
Esempio n. 11
0
from flask import Flask,request,jsonify
from flask_cors import CORS

from bert import QA

app = Flask(__name__)
CORS(app)

#model = QA("model")
model = QA("/home/k3ijo/bert/nlp_model/multi_cased_L-12_H-768_A-12/")

@app.route("/predict",methods=['POST'])
def predict():
    doc = request.json["document"]
    q = request.json["question"]
    try:
        out = model.predict(doc,q)
        return jsonify({"result":out})
    except Exception as e:
        print(e)
        return jsonify({"result":"Model Failed"})

if __name__ == "__main__":
    app.run('0.0.0.0',port=8000)
Esempio n. 12
0
from flask import Flask, request, jsonify
import flask_cors
import os

from bert import QA
from gcp import GCP

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "fyp-qa-eb7816dfb87e.json"
os.environ["CUDA_VISIBLE_DEVICES"] = ""

app = Flask(__name__, static_url_path='/static')
flask_cors.CORS(app)

model = QA()
gcp = GCP()


@app.route("/file/raw", methods=['POST'])
def file_upload_raw():
    try:
        filename = request.json["filename"]
        text = request.json["text"]

        if filename == '' or text == '':
            return 'No file text or name empty .', 400

        doc_id = gcp.upload_raw(filename, text)

        return jsonify({'success': True, 'id': doc_id})
    except Exception as e:
        print(e)
Esempio n. 13
0
        def load_model(_):

            model = QA('model')
            return model
Esempio n. 14
0
from flask import Flask, request, jsonify
from flask_cors import CORS

from bert import QA

app = Flask(__name__)
CORS(app)
modelName = "mrm8488/bert-small-finetuned-squadv2"
model = QA(modelName)


@app.route("/predict", methods=['POST'])
def predict():
    doc = request.json["document"]
    q = request.json["question"]
    try:
        out = model.predict(doc, q)
        return jsonify({"result": out})
    except Exception as e:
        print(e)
        return jsonify({"result": "Model Failed"})


if __name__ == "__main__":
    app.run('0.0.0.0', port=8000)