Ejemplo n.º 1
0
import re

from lexnlp.extract.en.utils import NPExtractor
act_grammar = r"""
    NBAR:
        {<NNP.*|JJ|\(|\)|,>*<NNP.*>}  # Nouns, Adj-s, brackets, terminated with Nouns
    IN:
        {<CC|IN|,>}   # &, and, of
    NP:
        {(<NBAR><IN>)*<NBAR>(<IN><CD>)?}
"""

ACT_NPE = NPExtractor(act_grammar)
ACT_NPE.exception_sym += ['And', 'Of']
ACT_RE = re.compile('\s+Act(?:\W|$)')


def get_acts(text):
    act_names = [i for i in set(ACT_NPE.get_np(text)) if ACT_RE.search(i)]
    for act_name in act_names:
        act_name_re = re.compile(re.escape(act_name))
        for match in act_name_re.finditer(text):
            location_start, location_end = match.span()
            yield {'location_start': location_start,
                   'location_end': location_end,
                   'value': act_name}


def get_act_list(*args, **kwargs):
    return list(get_acts(*args, **kwargs))
__version__ = "0.1.6"
__maintainer__ = "LexPredict, LLC"
__email__ = "*****@*****.**"

TRADEMARK_PTN = r"[A-Z0-9][^\)]+(?:[a-z]TM|[ \(]TM(?:\W|$)|™|\s*\(R\)|Ⓡ|®)"
TRADEMARK_PTN_RE = re.compile(TRADEMARK_PTN)

grammar = r"""
    NBAR:
        {<NNP.*|JJ|\(|,>*<NNP.*|\)>}  # Nouns, Adj-s, brackets, terminated with Nouns or brackets
    IN:
        {<CC|IN>}   # &, and, of
    NP:
        {(<NBAR><IN>)*<NBAR>}
"""
np_extractor = NPExtractor(grammar=grammar)


def get_trademarks(text) -> Generator:
    """
    Find trademarks in text.
    :param text:
    :return:
    """
    # Iterate through sentences
    if TRADEMARK_PTN_RE.search(text):
        for sentence in get_sentence_list(text):
            for phrase in np_extractor.get_np(sentence):
                tms = TRADEMARK_PTN_RE.findall(phrase)
                for tm in tms:
                    yield tm