コード例 #1
0
class TestAbbreviationDetector(unittest.TestCase):
    def setUp(self):
        super().setUp()
        self.nlp = spacy.load("en_blackstone_proto")
        self.detector = AbbreviationDetector(self.nlp)
        self.text = "The European Court of Human Rights (ECtHR) is \
            responsible for applying the European Convention of Human \
            Rights. The Proceeds of Crime Act 2002 has is nothing to the point."

    def test_containsQuotes(self):
        # Straight double quote
        QUOTES = ['"', "'", "‘", "’", "“", "”"]
        doc = self.nlp('abbreviation ("abbrn")')
        long = doc[0:1]
        short = doc[2:5]
        short_form = short.text
        _, long_form = find_abbreviation(long, short)
        assert contains(short_form, QUOTES) is True

        # Straight single quote
        doc = self.nlp("abbreviation ('abbrn')")
        long = doc[0:1]
        short = doc[2:5]
        short_form = short.text
        _, long_form = find_abbreviation(long, short)
        assert contains(short_form, QUOTES) is True

        # Opening and closing single quotes
        doc = self.nlp("abbreviation (‘abbrn’)")
        long = doc[0:1]
        short = doc[2:5]
        short_form = short.text
        _, long_form = find_abbreviation(long, short)
        assert contains(short_form, QUOTES) is True

        # Opening and closing double quotes
        doc = self.nlp("abbreviation (“abbrn”)")
        long = doc[0:1]
        short = doc[2:5]
        short_form = short.text
        _, long_form = find_abbreviation(long, short)
        assert contains(short_form, QUOTES) is True

        # No quotes
        doc = self.nlp("abbreviation (abbrn)")
        long = doc[0:1]
        short = doc[2:5]
        short_form = short.text
        _, long_form = find_abbreviation(long, short)
        assert contains(short_form, QUOTES) is False

    def test_find_abbreviation(self):
        # Basic case
        doc = self.nlp("abbreviation (abbrn)")
        long = doc[0:1]
        short = doc[2:3]
        _, long_form = find_abbreviation(long, short)
        assert long_form.text == "abbreviation"

        # Hypenation and numbers within abbreviation
        doc = self.nlp("abbreviation (ab-b9rn)")
        long = doc[0:1]
        short = doc[2:3]
        _, long_form = find_abbreviation(long, short)
        assert long_form.text == "abbreviation"

        # No match
        doc = self.nlp("abbreviation (aeb-b9rn)")
        long = doc[0:1]
        short = doc[2:3]
        _, long_form = find_abbreviation(long, short)
        assert long_form is None

        # First letter must match start of word.
        doc = self.nlp("aaaabbreviation (ab-b9rn)")
        long = doc[0:1]
        short = doc[2:3]
        _, long_form = find_abbreviation(long, short)
        assert long_form.text == "aaaabbreviation"

        # Matching is greedy for first letter (are is not included).
        doc = self.nlp("more words are considered aaaabbreviation (ab-b9rn)")
        long = doc[0:5]
        short = doc[6:7]
        _, long_form = find_abbreviation(long, short)
        assert long_form.text == "aaaabbreviation"

    def test_filter_matches(self):
        # Act and year separated with a space
        doc = self.nlp("Companies Act 2006 (CA 2006)")
        filtered = filter_matches([(1, 4, 6)], doc)
        long_form_candidate = filtered[0][0]
        short_form_candidate = filtered[0][1]
        assert long_form_candidate.text == "Companies Act 2006"
        assert short_form_candidate.text == "CA 2006"

        # Act and year separated with a space, with quote marks
        doc = self.nlp("Companies Act 2006 ('CA 2006')")
        filtered = filter_matches([(1, 4, 8)], doc)
        long_form_candidate = filtered[0][0]
        short_form_candidate = filtered[0][1]
        assert long_form_candidate.text == "Companies Act 2006"
        assert short_form_candidate.text == "CA 2006"

    def test_find(self):
        doc = self.nlp(self.text)
        long, shorts = self.detector.find(doc[1:6], doc)
        assert long.string == "European Court of Human Rights "
コード例 #2
0
 def setUp(self):
     super().setUp()
     self.nlp = spacy.load("en_blackstone_proto")
     self.detector = AbbreviationDetector(self.nlp)
     self.text = "The European Court of Human Rights (ECtHR) is \
コード例 #3
0
from blackstone.utils.legislation_linker import extract_legislation_relations
from models.Leg import Leg
from models.NamedEntity import NamedEntity
from models.Abrv import Abrv
from models.Sentence import Sentence
from blackstone.pipeline.sentence_segmenter import SentenceSegmenter
from blackstone.rules import CITATION_PATTERNS


class Request(BaseModel):
    text: str


nlp = spacy.load("en_blackstone_proto")

abbreviation_pipe = AbbreviationDetector(nlp)
nlp.add_pipe(abbreviation_pipe)

app = FastAPI()


@app.get("/")
def read_root():
    return {"Status": "Working"}


@app.post("/abbreviation")
def Abbreviation(item: Request):
    abbreviation = []

    doc = nlp(item.text)