def __init__(self): self.nlp = spacy.load("en_core_web_lg") self.model = opennre.get_model('wiki80_cnn_softmax') self.umm_normalized_path = "/content/drive/MyDrive/Archive/NASA-CMR/UMM_DATA/cmr_normalized.csv" self.df_umm = pd.read_csv(umm_normalized_path) self.sentences_extend = [] self.filter_entity_labels = ["CARDINAL", "DATE"]
def main(): URL = input('Enter a URL: ') html = get_html(URL) text = html2text(html) sent_text = nltk.sent_tokenize(text) two_sents = two_sent_maker(sent_text) model = opennre.get_model('wiki80_bertentity_softmax') pars = [] for sent in two_sents: ents = run_re(sent) if len(ents) > 1 and ents != 'Empty': ent_combos = entities_combos(ents) if len(ent_combos) > 10: ent_combos = ent_combos[ 0: 11] #When a lot of entities are recognized, the combo is infinite and thus was shorten for combo in ent_combos: prediction = model.infer({ 'text': sent, 'h': { 'pos': combo[0] }, 't': { 'pos': combo[1] } }) if prediction[0] == 'subsidiary' and prediction[1] > 0.85: print(sent) print(prediction) head_ent = ents[combo[0]] tail_ent = ents[combo[1]] print(f'{tail_ent} is a subsidiary company of {head_ent}') pars.append(sent) elif prediction[0] == 'owned by' and prediction[1] > 0.85: head_ent = ents[combo[0]] tail_ent = ents[combo[1]] print(sent) print(prediction) pars.append(sent) print(f'{head_ent} is a subsidiary company of {tail_ent}') else: pass else: print('No relevant entities were recognized') df = pd.DataFrame(pars) df.to_csv('pars_BERT.csv')
def test_wiki80_cnn_softmax(self): model = opennre.get_model('wiki80_cnn_softmax') result = model.infer({ 'text': 'He was the son of Máel Dúin mac Máele Fithrich, and grandson of the high king Áed Uaridnach (died 612).', 'h': { 'pos': (18, 46) }, 't': { 'pos': (78, 91) } }) print(result) self.assertEqual(result[0], 'father') self.assertTrue(abs(result[1] - 0.7500484585762024) < 1e-6)
def nre(text, head, tail): model = opennre.get_model('wiki80_bert_softmax') #model = getModel() hStart = text.find(head) hEnd = hStart + len(head) tStart = text.find(tail) tEnd = tStart + len(tail) relation = model.infer({ 'text': text, 'h': { 'pos': (hStart, hEnd) }, 't': { 'pos': (tStart, tEnd) } }) print(relation) return relation
def main(): text = input('Please enter URL of choice: ') html = get_html(text) text = html2text(html) reg_model = open('paragraph_reg_model.pkl', 'rb') reg_model = pickle.load(reg_model) vectorizer = open('paragraph_vectorizer.pkl', 'rb') vectorizer = pickle.load(vectorizer) pars = get_prediction(text, vectorizer, reg_model) model = opennre.get_model('wiki80_bertentity_softmax') final_pars = [] for sent in pars: ents = run_re(sent) if len(ents) > 1 and ents != 'Empty': ent_combos = entities_combos(ents) if len(ent_combos)>10: ent_combos = ent_combos[0:11] #When a lot of entities are recognized, the combo is infinite and thus was shorten for combo in ent_combos: prediction = model.infer({'text': sent, 'h': {'pos': combo[0]}, 't': {'pos': combo[1]}}) if prediction[0] == 'subsidiary' and prediction[1] > 0.85: head_ent = ents[combo[0]] tail_ent = ents[combo[1]] print(f'{tail_ent} is a subsidiary company of {head_ent}') final_pars.append(sent) elif prediction[0] == 'owned by' and prediction[1] > 0.85: head_ent = ents[combo[0]] tail_ent = ents[combo[1]] final_pars.append(sent) print(f'{head_ent} is a subsidiary company of {tail_ent}') else: pass else: print('No relevant entities were recognized') df = pd.DataFrame(final_pars) df.to_csv('pars_comb_BOW_BERT.csv') return final_pars
import html
''' __author__ = "Shahan A. Memon" __copyright__ = "Copyright 2020, Carnegie Mellon" import sys import opennre from nltk.corpus import stopwords from nltk.tokenize import WhitespaceTokenizer import spacy import itertools from itertools import combinations MODEL_NRE = 'wiki80_bert_softmax' model = opennre.get_model(MODEL_NRE) def infer(text, pos_one_st, pos_one_end, pos_two_st, pos_two_end): """ This function will be used to infer the relationship between two entities in text[pos_one_st:post_one_end] and text[pos_two_st:pos_two_end] Args: text: Any line of text pos_one_st: start index of first entitity pos_one_end: end index of first entity pos_two_st: start index of second entity pos_two_end: end index of second entity
from flask import Flask, send_file, request, make_response from flask_cors import CORS, cross_origin import os import opennre app = Flask(__name__) cors = CORS(app) modelBERT = opennre.get_model('wiki80_bert_softmax') modelCNN = opennre.get_model('wiki80_cnn_softmax') @app.route('/') def run(): return 'Relation running' def extract_relation(ner_output): res = [] for obj in ner_output: obj['h']['pos'] = tuple(obj['h']['pos']) obj['t']['pos'] = tuple(obj['t']['pos']) CNNres = modelCNN.infer(obj) BERTres = modelBERT.infer(obj) if CNNres[1] > 0.5 or BERTres[1] > 0.5: if CNNres[1] >= BERTres[1]: obj['relation'] = CNNres[0] else: obj['relation'] = BERTres[0] res.append(obj) return res
#!D:\ProgramData\Anaconda3\python3.6.5 (3.7.3) # coding: utf-8 # @Author: Mark Clemens # @Date: 2021/02/23 # @File: nre.py * '''@notes: /TEST/ERR/OK; Scheme: ''' import opennre import torch print(torch.cuda.is_available()) model = opennre.get_model('wiki80_cnn_softmax') res = model.infer({ 'text': 'He was the son of Máel Dúin mac Máele Fithrich, and grandson of the high king Áed Uaridnach (died 612).', 'h': { 'pos': (18, 46) }, 't': { 'pos': (78, 91) } }) # ('father', 0.5108704566955566) print(res) # model = opennre.get_model('wiki80_bert_softmax') # data = { # "text": "one embodiment includes combining sensors that measure changes in physiological signals of stress such as speech rate and pitch, galvanic skin response, or heart rate variability, and, using a machine learning algorithm on personalized data, can determine whether these changes are likely to benefit from regulation.", # # "relation": "NA", # "h": {"token": "sensor", "pos": (35, 41), "id": "entity_1007"},
def load_model(self): import opennre self._model = opennre.get_model(self.model_name) return self
def model_fn(model_dir): model = opennre.get_model('finre_pretrain_entity', '/opt/ml/code') return model
# -*- coding: utf-8 -*- import sys import json import os import warnings import flask import opennre # The flask app for serving predictions app = flask.Flask(__name__) model = opennre.get_model('finre_pretrain_entity', '/opt/ml/code') @app.route('/ping', methods=['GET']) def ping(): """Determine if the container is working and healthy. In this sample container, we declare it healthy if we can load the model successfully.""" # health = ScoringService.get_model() is not None # You can insert a health check here health = 1 status = 200 if health else 404 # print("===================== PING ===================") return flask.Response(response="{'status': 'Healthy'}\n", status=status, mimetype='application/json') @app.route('/invocations', methods=['POST']) def invocations(): """Do an inference on a single batch of data. In this sample server, we take data as CSV, convert it to a pandas data frame for internal use and then convert the predictions back to CSV (which really
import spacy from flask import Flask, request import neuralcoref import opennre ENTITY_TYPES = ["human", "person", "company", "enterprise", "business", "geographic region", "human settlement", "geographic entity", "territorial entity type", "organization"] # Load SpaCy nlp = spacy.load('en') # Add neural coref to SpaCy's pipe neuralcoref.add_to_pipe(nlp) # Load opennre relation_model = opennre.get_model('wiki80_bert_softmax') # Load NLTK nltk.download('punkt') def wikifier(text, lang="en", threshold=0.8): """Function that fetches entity linking results from wikifier.com API""" # Prepare the URL. data = urllib.parse.urlencode([ ("text", text), ("lang", lang), ("userKey", "tgbdmkpmkluegqfbawcwjywieevmza"), ("pageRankSqThreshold", "%g" % threshold), ("applyPageRankSqThreshold", "true"), ("nTopDfValuesToIgnore", "100"), ("nWordsToIgnoreFromList", "100"), ("wikiDataClasses", "true"), ("wikiDataClassIds", "false"),
def infer(text: str, head: str, tail: str, model_id: str = 'wiki80_cnn_softmax'): model = opennre.get_model(model_id, root_path=os.path.join(os.getcwd(), 'opennre_downloads')) return model.infer({'text': text, 'h': {'pos': start_end_indices(text, head)}, 't': {'pos': start_end_indices(text, tail)}})