def check_clause_sub(sents:Text, lang:Text, domain:Text, cla:Text,
          rel:Text, cats:Union[Text, Set, List]):
    """
    >>> from sagas.nlu.inspector_clauses import check_clause_sub
    >>> check_clause_sub(sents, 'pt', 'verb_domains', 'obl', 'cop', {'be'})
    :param sents:
    :param lang:
    :param domain:
    :param cla:
    :param rel:
    :param cats:
    :return:
    """
    from sagas.nlu.uni_chunks import get_chunk
    from sagas.nlu.ruleset_procs import cached_chunks

    # cla = 'obl', rel = 'cop', cat='be'
    chunks = cached_chunks(sents, lang, cf.engine(lang))
    result = get_chunk(chunks, domain, cla,
                       lambda w: {'rel': w.dependency_relation,
                                  'pos': w.upos.lower(),
                                  'word': f"{w.text}/{w.lemma}"})

    word = next((w['word'] for w in result if w['rel'] == rel), None)
    if word:
        if isinstance(cats, str):
            return check_chain(cats, word, '*', lang)
        else:
            return any([check_chain(cat, word, '*', lang) for cat in cats])
    return False
Beispiel #2
0
def test_class_matcher():
    from sagas.nlu.uni_chunks import get_chunk
    from pampy import match, _
    from dataclasses import dataclass

    @dataclass
    class WordData:
        index: int
        rel: str
        pos: str
        word: str

        # She denied being my mother

    sents = 'Ela negou ser minha mãe.'
    lang = 'pt'
    domain = 'verb_domains'
    chunks = cached_chunks(sents, lang, cf.engine(lang))

    cla = 'obl'
    ana = get_chunk(
        chunks, domain, cla, lambda w: WordData(index=w.index,
                                                rel=w.dependency_relation,
                                                pos=w.upos.lower(),
                                                word=f"{w.text}/{w.lemma}"))
    t_rs = []
    for word_data in ana:
        r = match(word_data, WordData(_, _, 'aux',
                                      _), lambda *arg: f"aux: {arg[2]}",
                  WordData(_, 'obl', 'noun', _), lambda *arg: arg, _, None)
        t_rs.append(r)
    assert t_rs == ['aux: ser/ser', None, (5, 'mãe/mãe')]
Beispiel #3
0
def ex_chunk(key: Text, cnt: Text, comp: Text, ctx: cla_meta_intf, clo):
    from sagas.nlu.uni_chunks import get_chunk
    from sagas.nlu.ruleset_procs import list_words, cached_chunks
    from sagas.conf.conf import cf
    # get_chunk(f'verb_domains', 'xcomp/obj', lambda w: w.upos)
    chunks = cached_chunks(ctx.sents, ctx.lang, cf.engine(ctx.lang))
    domain, path = key.split(':')
    result = get_chunk(chunks,
                       f'{domain}_domains' if domain != 'predicts' else domain,
                       path,
                       clo=clo)
    logger.debug(f"extract chunk: {domain}, {path}, {result}")
    if len(result) > 0:
        ctx.add_result(extractor, comp, key, result)
        return True
    return False
 def has_pos_in_part(part: Text, pos: Union[list, str]):
     from sagas.nlu.uni_chunks import get_chunk
     from sagas.nlu.ruleset_procs import list_words, cached_chunks
     from sagas.conf.conf import cf
     chunks = cached_chunks(ctx.sents, ctx.lang, cf.engine(ctx.lang))
     domain, path = part.split(':')
     result = get_chunk(
         chunks,
         f'{domain}_domains' if domain != 'predicts' else domain, path,
         lambda w: (w.upos.lower(), w.text))
     if isinstance(pos, str):
         pos = [pos]
     succ = False
     for el in result:
         if el[0] in pos:
             ctx.add_result(self.name(), f'has_pos_{"_or_".join(pos)}',
                            part, el[1])
             succ = True
     return succ
Beispiel #5
0
    def chunks(self, sents, lang, domain, path):
        """
        $ python -m sagas.nlu.extractor_cli chunks 'I like to eat sweet corn.' en verb 'xcomp/obj'
        $ python -m sagas.nlu.extractor_cli chunks 'A casa tem dezenove quartos.' pt verb 'obj'
            ☇ [('dezenove', 'num'), ('quartos', 'noun')]

        :param sents:
        :param lang:
        :param domain:
        :param path:
        :return:
        """
        from sagas.nlu.uni_chunks import get_chunk
        from sagas.nlu.ruleset_procs import list_words, cached_chunks
        from sagas.conf.conf import cf
        # get_chunk(f'verb_domains', 'xcomp/obj', lambda w: w.upos)
        # get_chunk(f'domain_domains', path, lambda w: w.upos)
        chunks = cached_chunks(sents, lang, cf.engine(lang))
        result = get_chunk(
            chunks, f'{domain}_domains' if domain != 'predicts' else domain,
            path, lambda w: (w.text, w.upos.lower()))
        print(result)
Beispiel #6
0
def test_chunk_matcher():
    from sagas.nlu.uni_chunks import get_chunk
    from pampy import match, _

    # She denied being my mother
    sents = 'Ela negou ser minha mãe.'
    lang = 'pt'
    domain = 'verb_domains'
    chunks = cached_chunks(sents, lang, cf.engine(lang))

    cla = 'obl'
    raw = get_chunk(
        chunks, domain, cla, lambda w: {
            'rel': w.dependency_relation,
            'pos': w.upos.lower(),
            'word': f"{w.text}/{w.lemma}"
        })
    rs = {e['rel']: e for e in raw}
    r = match(rs, {
        'cop': {
            'word': _
        },
        'obl': {
            'pos': 'noun',
            'word': _
        }
    }, lambda *arg: arg, _, "anything else")
    assert r == ('ser/ser', 'mãe/mãe')

    r = match(rs, {
        _: {
            'pos': 'aux'
        },
        'obl': {
            'pos': 'noun',
            'word': _
        }
    }, lambda *arg: arg, _, "anything else")
    assert r == ('cop', 'mãe/mãe')