def check_clause_sub(sents:Text, lang:Text, domain:Text, cla:Text, rel:Text, cats:Union[Text, Set, List]): """ >>> from sagas.nlu.inspector_clauses import check_clause_sub >>> check_clause_sub(sents, 'pt', 'verb_domains', 'obl', 'cop', {'be'}) :param sents: :param lang: :param domain: :param cla: :param rel: :param cats: :return: """ from sagas.nlu.uni_chunks import get_chunk from sagas.nlu.ruleset_procs import cached_chunks # cla = 'obl', rel = 'cop', cat='be' chunks = cached_chunks(sents, lang, cf.engine(lang)) result = get_chunk(chunks, domain, cla, lambda w: {'rel': w.dependency_relation, 'pos': w.upos.lower(), 'word': f"{w.text}/{w.lemma}"}) word = next((w['word'] for w in result if w['rel'] == rel), None) if word: if isinstance(cats, str): return check_chain(cats, word, '*', lang) else: return any([check_chain(cat, word, '*', lang) for cat in cats]) return False
def test_class_matcher(): from sagas.nlu.uni_chunks import get_chunk from pampy import match, _ from dataclasses import dataclass @dataclass class WordData: index: int rel: str pos: str word: str # She denied being my mother sents = 'Ela negou ser minha mãe.' lang = 'pt' domain = 'verb_domains' chunks = cached_chunks(sents, lang, cf.engine(lang)) cla = 'obl' ana = get_chunk( chunks, domain, cla, lambda w: WordData(index=w.index, rel=w.dependency_relation, pos=w.upos.lower(), word=f"{w.text}/{w.lemma}")) t_rs = [] for word_data in ana: r = match(word_data, WordData(_, _, 'aux', _), lambda *arg: f"aux: {arg[2]}", WordData(_, 'obl', 'noun', _), lambda *arg: arg, _, None) t_rs.append(r) assert t_rs == ['aux: ser/ser', None, (5, 'mãe/mãe')]
def ex_chunk(key: Text, cnt: Text, comp: Text, ctx: cla_meta_intf, clo): from sagas.nlu.uni_chunks import get_chunk from sagas.nlu.ruleset_procs import list_words, cached_chunks from sagas.conf.conf import cf # get_chunk(f'verb_domains', 'xcomp/obj', lambda w: w.upos) chunks = cached_chunks(ctx.sents, ctx.lang, cf.engine(ctx.lang)) domain, path = key.split(':') result = get_chunk(chunks, f'{domain}_domains' if domain != 'predicts' else domain, path, clo=clo) logger.debug(f"extract chunk: {domain}, {path}, {result}") if len(result) > 0: ctx.add_result(extractor, comp, key, result) return True return False
def has_pos_in_part(part: Text, pos: Union[list, str]): from sagas.nlu.uni_chunks import get_chunk from sagas.nlu.ruleset_procs import list_words, cached_chunks from sagas.conf.conf import cf chunks = cached_chunks(ctx.sents, ctx.lang, cf.engine(ctx.lang)) domain, path = part.split(':') result = get_chunk( chunks, f'{domain}_domains' if domain != 'predicts' else domain, path, lambda w: (w.upos.lower(), w.text)) if isinstance(pos, str): pos = [pos] succ = False for el in result: if el[0] in pos: ctx.add_result(self.name(), f'has_pos_{"_or_".join(pos)}', part, el[1]) succ = True return succ
def chunks(self, sents, lang, domain, path): """ $ python -m sagas.nlu.extractor_cli chunks 'I like to eat sweet corn.' en verb 'xcomp/obj' $ python -m sagas.nlu.extractor_cli chunks 'A casa tem dezenove quartos.' pt verb 'obj' ☇ [('dezenove', 'num'), ('quartos', 'noun')] :param sents: :param lang: :param domain: :param path: :return: """ from sagas.nlu.uni_chunks import get_chunk from sagas.nlu.ruleset_procs import list_words, cached_chunks from sagas.conf.conf import cf # get_chunk(f'verb_domains', 'xcomp/obj', lambda w: w.upos) # get_chunk(f'domain_domains', path, lambda w: w.upos) chunks = cached_chunks(sents, lang, cf.engine(lang)) result = get_chunk( chunks, f'{domain}_domains' if domain != 'predicts' else domain, path, lambda w: (w.text, w.upos.lower())) print(result)
def test_chunk_matcher(): from sagas.nlu.uni_chunks import get_chunk from pampy import match, _ # She denied being my mother sents = 'Ela negou ser minha mãe.' lang = 'pt' domain = 'verb_domains' chunks = cached_chunks(sents, lang, cf.engine(lang)) cla = 'obl' raw = get_chunk( chunks, domain, cla, lambda w: { 'rel': w.dependency_relation, 'pos': w.upos.lower(), 'word': f"{w.text}/{w.lemma}" }) rs = {e['rel']: e for e in raw} r = match(rs, { 'cop': { 'word': _ }, 'obl': { 'pos': 'noun', 'word': _ } }, lambda *arg: arg, _, "anything else") assert r == ('ser/ser', 'mãe/mãe') r = match(rs, { _: { 'pos': 'aux' }, 'obl': { 'pos': 'noun', 'word': _ } }, lambda *arg: arg, _, "anything else") assert r == ('cop', 'mãe/mãe')