import re from lexnlp.extract.en.utils import NPExtractor act_grammar = r""" NBAR: {<NNP.*|JJ|\(|\)|,>*<NNP.*>} # Nouns, Adj-s, brackets, terminated with Nouns IN: {<CC|IN|,>} # &, and, of NP: {(<NBAR><IN>)*<NBAR>(<IN><CD>)?} """ ACT_NPE = NPExtractor(act_grammar) ACT_NPE.exception_sym += ['And', 'Of'] ACT_RE = re.compile('\s+Act(?:\W|$)') def get_acts(text): act_names = [i for i in set(ACT_NPE.get_np(text)) if ACT_RE.search(i)] for act_name in act_names: act_name_re = re.compile(re.escape(act_name)) for match in act_name_re.finditer(text): location_start, location_end = match.span() yield {'location_start': location_start, 'location_end': location_end, 'value': act_name} def get_act_list(*args, **kwargs): return list(get_acts(*args, **kwargs))
__version__ = "0.1.6" __maintainer__ = "LexPredict, LLC" __email__ = "*****@*****.**" TRADEMARK_PTN = r"[A-Z0-9][^\)]+(?:[a-z]TM|[ \(]TM(?:\W|$)|™|\s*\(R\)|Ⓡ|®)" TRADEMARK_PTN_RE = re.compile(TRADEMARK_PTN) grammar = r""" NBAR: {<NNP.*|JJ|\(|,>*<NNP.*|\)>} # Nouns, Adj-s, brackets, terminated with Nouns or brackets IN: {<CC|IN>} # &, and, of NP: {(<NBAR><IN>)*<NBAR>} """ np_extractor = NPExtractor(grammar=grammar) def get_trademarks(text) -> Generator: """ Find trademarks in text. :param text: :return: """ # Iterate through sentences if TRADEMARK_PTN_RE.search(text): for sentence in get_sentence_list(text): for phrase in np_extractor.get_np(sentence): tms = TRADEMARK_PTN_RE.findall(phrase) for tm in tms: yield tm