Exemple #1
0
 def runNER(text):
     model = Ner("out_large/")
     chunks = ContactFinding.splitIntoChunks(text)
     output = model.predict(chunks[0])
     for i in range(1, len(chunks)):
         output = output + (model.predict(chunks[i]))
     return output
Exemple #2
0
    def __init__(self,
                 model_dir,
                 dim_relation_embed,
                 n_classes,
                 activation: str = "relu"):
        super(relation_extracter, self).__init__()
        self.ner = Ner(model_dir)  # load Ner fine-tuned bert
        self.bert = self.ner.model.bert
        self.config = self.bert.config
        self.dropout = nn.Dropout(self.config.hidden_dropout_prob)
        self.tokeniser = self.ner.tokenizer
        self.hidden_size = self.config.hidden_size
        self.max_seq_length = self.ner.max_seq_length
        self._activation = activation
        self._n_classes = n_classes

        self._U = nn.Parameter(
            torch.Tensor(self.hidden_size, dim_relation_embed))
        self._W = nn.Parameter(
            torch.Tensor(self.hidden_size, dim_relation_embed))
        self._V = nn.Parameter(torch.Tensor(dim_relation_embed, n_classes))
        self._b = nn.Parameter(torch.Tensor(dim_relation_embed))

        self.init_weights()

        self._relation_metric = RelationF1Measure()

        self._loss_fn = nn.BCEWithLogitsLoss()
Exemple #3
0
 def __init__(self, model_dir):
     super(ner2pos, self).__init__()
     self.ner_module = Ner(model_dir)
     self.bert = self.ner_module.model.bert
     self.config = self.bert.config
     self.dropout = nn.Dropout(self.config.hidden_dropout_prob)
     self.tokeniser = self.ner_module.tokenizer
     self.PosProcessor = PosProcessor()
     self.num_labels = len(self.PosProcessor.get_labels()) + 1
     self.classifier = nn.Linear(self.config.hidden_size, self.num_labels)
from bert import Ner

model = Ner("out_base")

_t = '我是一个很好的记录者这大概也是我孤独的由来我又被谁记录着呢到最后我才明白所有的他们拼凑出来就是一个完整的我'
_t = ' '.join(_t)
_o = model.predict(_t)
res = []
for _item in _o:
    res.append(_item['word'])
    if _item['tag'] != 'word':
        res.append(_item['tag'])

print(f'断句前: {_t}')

print('断句后: ' + ''.join(res).replace("#other#", ""))
Exemple #5
0
from bert import Ner
import pprint
import pandas as pd
import re
import os
import json
import glob
import numpy as np
import nltk
nltk.download('punkt')

pp = pprint.PrettyPrinter(indent=4)
model = Ner("output/")


# mylist = []

# for chunk in  pd.read_csv('emails.csv', chunksize=50000):
#     mylist.append(chunk)
#     break

# df = pd.concat(mylist, axis= 0)
# del mylist
# print(df.head())


# output = model.predict("In 1949, an Italian Jesuit priest named Roberto Busa presented a pitch to Thomas J. Watson, of I.B.M. Busa was trained in philosophy, and had just published his thesis on St. Thomas Aquinas, the Catholic theologian with a famously unmanageable œuvre. ")
# output = model.predict("Cecil,	Can you take a look at this presentation	and see if the numbers make sense?	thanks,	Monika") 
# pp.pprint(output)

Exemple #6
0
# -*- coding: utf-8 -*-
"""
Created on Tue Jun 16 10:45:41 2020

@author: dohertyguirand
"""

from bert import Ner
import PyPDF2 as p2

import sys
sys.stdout = open('log32.txt', 'w')

PDFfile = open("PDACW260.pdf", "rb")
pdfread = p2.PdfFileReader(PDFfile)
model = Ner("out_large/")

i = 0
fullText = ""
while i < pdfread.getNumPages():
    pageinfo = pdfread.getPage(i)
    fullText += str(pageinfo.extractText())
    i = i + 1

dic = {}
n = 500
chunks = [fullText[i:i + n] for i in range(0, len(fullText), n)]
seen = []
for c in chunks:
    output = model.predict(c)
    line = ""
Exemple #7
0
import json
from collections import defaultdict
from bert import Ner
model = Ner("out/")


def main():
    data = load_data()

    key = 'B-INV'
    # key = 'B-ADDR'
    percent = test(data[0:1000], key)
    print(key, percent)


def test(data, key):
    total = 0
    checked = 0
    failed = []
    for row in data:
        valid = validate(row['Content'], key, row[key])
        checked += 1
        if valid is True:
            total += 1
            percent = total / checked
            print('OK ', percent)
        else:
            failed.append(row)
            print('F')

    with open('data/failed.json', 'w') as outfile:
Exemple #8
0
import json
from bert import Ner
import snoop

model = Ner('out_base/')


def convert_entities(entities):
    ents = set()
    w = ''
    for entity, next_entity in zip(entities, entities[1:] + [(".", "O")]):
        word, tag = entity
        if tag != "O":
            ent_position, ent_type = tag.split("-")
            if ent_position == "U":
                ents.add((word, ent_type))
            else:
                if ent_position == "B":
                    w = word
                elif ent_position == "I":
                    w += " " + word
                if next_entity[1].split("-")[0] != "I":
                    ents.add((w, ent_type))
    return ents


def get_store_results(text):
    output = model.predict(text)
    for entity in convert_entities([(sin_put["word"], sin_put["tag"])
                                    for sin_put in output]):
        if entity[1] == "ORG":
Exemple #9
0
from bert import Ner

model = Ner("out/")

# output = model.predict("KD13921 RG19049640, 19049641")
output = model.predict("Rg.19048725 31.01.2019, 3.183,98")
for word in output:
    key, value = list(word.items())[0]
    if value['tag'] != 'O':
        print("%s %s" % (key, value['tag']))
Exemple #10
0
from bert import Ner
import nltk

model = Ner("out_base/")


def extract_location(text):
    locations = []
    entities = model.predict(text)
    for ent in entities:
        if 'B-LOC' in ent['tag']:
            locations.append(ent['word'])
    return locations
Exemple #11
0
# -*- coding: utf-8 -*-

#BERT
from bert import Ner

model = Ner("out_base/")

text = "My name is Gabriel and I live in Brazil"

out = model.predict(text)

print(out)

#Spacy
from bert import Ner

model = Ner("model_sep20/")

output = model.predict("Steve went to Paris")

print(output)
Exemple #13
0
from flask import Flask, request, jsonify
from flask_cors import CORS

from bert import Ner

app = Flask(__name__)
CORS(app)

model = Ner("out_base")


@app.route("/predict", methods=['POST'])
def predict():
    text = request.json["text"]
    try:
        out = model.predict(text)
        return jsonify({"result": out})
    except Exception as e:
        print(e)
        return jsonify({"result": "Model Failed"})


if __name__ == "__main__":
    app.run('0.0.0.0', port=8000)
Exemple #14
0
            if ent is not None:
                ents.append(ent.lower())
            ent = k
        if v['tag'].startswith('I') or v['tag'].startswith('X'):
            if ent is None:
                ent = k
            elif k.startswith("'"):
                ent += k
            else:
                ent += ' ' + k
    return ents


if __name__ == '__main__':

    model = Ner("../../../ner/BERT-NER/out/")
    device = torch.device("cuda")
    model.model.to(device)

    files = [
        "../../../Squad2Generative/data/train-v2.0.json",
        "../../../Squad2Generative/data/dev-v2.0.json"
    ]
    for file in files:
        questions = []
        contexts = []
        examples = []
        labels = []
        id2idx = {}
        with open(file, 'r') as handle:
            jdata = json.load(handle)
Exemple #15
0
from bert import Ner

model = Ner("out_base/")

output1 = model.predict(
    "The customer rating is average for the child friendly fast food place is average and its in Riverside near the Café Rouge and The Golden Curry"
)
output2 = model.predict(
    "Located in the City Centre by Burger King with a one star rating from customers is a coffee shop by the name of The Eagle It's cheap and for adults only"
)
print(output1)
print(output2)
"""NER Web application"""

from bert import Ner
from flask import Flask, jsonify, request
from flask_cors import CORS

app = Flask(__name__)
CORS(app)

model = Ner("out_large")


@app.route("/predict", methods=["POST"])
def predict():
    text = request.json["text"]
    try:
        out = model.predict(text)
        return jsonify({"result": out})
    except Exception as e:
        print(e)
        return jsonify({"result": "Model Failed"})


if __name__ == "__main__":
    app.run("0.0.0.0", port=8000)
Exemple #17
0
import torch
import pandas as pd
import re
from transformers import *
sys.path.append('/home/long8v/BERT-NER')
from bert import Ner

## Data Loading
path = '/home/long8v/ICDAR-2019-SROIE/task3/data/data_dict.pth'
data_dict = torch.load(path)
data_dict = {key: value[0] for key, value in data_dict.items()}

## inference

## model loading
model = Ner('/home/long8v/sroie_bert/experiment/{}'.format(
    input('your experiment name is ...')))
pretrained_weights = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(pretrained_weights)

re_int = re.compile('\d+')
re_float = re.compile('(\d+\.\d+)')
re_percent = re.compile('(\d+.?\d+%)')
re_date = re.compile('(\d{2}[/-]\d{2}[/-]\d{2,4})')
re_row = re.compile('\n')
re_dict = {
    re_float: ' float ',
    re_percent: ' percent ',
    re_date: ' date ',
    re_int: ' int ',
    re_row: ' row '
}
Exemple #18
0
model_path = "/home/jilei/Bert_Model/Bert_Pytorch_Model"

from bert import Ner

model = Ner(model_path)

output = model.predict("功能失调性子宫出血 高血压 糖尿病病")

print(output)
# ('Steve', {'tag': 'B-PER', 'confidence': 0.9981840252876282})
# ('went', {'tag': 'O', 'confidence': 0.9998939037322998})
# ('to', {'tag': 'O', 'confidence': 0.999891996383667})
# ('Paris', {'tag': 'B-LOC', 'confidence': 0.9991968274116516})
Exemple #19
0
# -*- coding:utf-8 -*-

from bert import Ner

model = Ner('output/')
output = model.predict('2 0 1 4 年 新 的 开 始 ,王 兴 很 高 兴')
print(output)
from bert import Ner

model = Ner("out/")

output,_ = model.predict("Barack Obama the former president of US went to France",ok=True)

print(output)
import preprocess
from bert import Ner

model = Ner("bert_base_cased_2")


def predict(input):

    for text in input:
        text_cleaned = preprocess.clean_data(text['address'])
        print(text_cleaned)
        out = model.predict(text_cleaned)
        print(out)
        # print(type(mobilebert_uncased))
        # return jsonify({"result":mobilebert_uncased})
        words = {}
        a1 = []
        a2 = []
        a3 = []
        s = []
        c = []
        ps = []

        for item in out:

            tag = item['tag'].split('-')
            word = item['word']

            if len(tag) == 2:
                if tag[1] == 'A1':
                    # print(word)
Exemple #22
0
from flask import Flask,request,jsonify
from flask_cors import CORS

from bert import Ner

app = Flask(__name__)
CORS(app)

model = Ner("out_!x")

@app.route("/predict",methods=['POST'])
def predict():
    text = request.json["text"]
    try:
        out = model.predict(text)
        return jsonify({"result":out})
    except Exception as e:
        print(e)
        return jsonify({"result":"Model Failed"})

if __name__ == "__main__":
    app.run('0.0.0.0',port=8000)