Example #1
0
    def run(self, key, ctx: Context):
        from jsonpath_ng import jsonpath, parse
        from sagas.nlu.inspector_wordnet import predicate
        from sagas.nlu.ruleset_procs import cached_chunks

        lang = ctx.lang
        domain_name = f"{self.domains}_domains"  # like: 'verb_domains'
        parsers = [parse(normal_path(expr)) for expr in self.paths]
        results = []
        engine = cf.engine(lang) if self.engine is None else self.engine
        chunks = cached_chunks(ctx.sents, lang, engine)
        for chunk in chunks[domain_name]:
            json_data = chunk
            # for expr in exprs:
            for idx, parser in enumerate(parsers):
                # print([(match.value, str(match.full_path)) for match in parser.find(json_data)])
                word = '/'.join(
                    [match.value for match in parser.find(json_data)])
                pred_r = predicate(self.kind, word, lang, self.pos)
                # tc.emp('yellow' if not pred_r else 'green', f".. {word} is {self.kind}: {pred_r}")
                logger.debug(f".. {word} is {self.kind}: {pred_r}")
                results.append(pred_r)
                if pred_r:
                    ctx.add_result(self.name(),
                                   'default',
                                   f"{self.domains}:{self.paths[idx]}", {
                                       'category': self.kind,
                                       'pos': self.pos,
                                       **word_values(word, lang)
                                   },
                                   delivery_type='sentence')

        logger.debug(f"{results}")
        return any(results) if self.match_method == 'any' else all(results)
Example #2
0
def vis_domains(sents, lang, domain=None, engine=None, all_subsents=False):
    """
    >>> from sagas.kit.analysis_kit import vis_domains
    >>> sents='What do you think about the war?'
    >>> lang='en'
    >>> domain='subj_domains' # 'verb_domains', 'aux_domains'
    >>> vis_domains(sents, lang, domain)

    :param sents:
    :param lang:
    :param domain:
    :return:
    """
    from sagas.nlu.ruleset_procs import cached_chunks, get_main_domains
    from sagas.conf.conf import cf

    engine = cf.engine(lang) if engine is None else engine
    if domain is None:
        domain, domains = get_main_domains(sents, lang, engine)
    else:
        chunks = cached_chunks(sents, lang, engine)
        domains = chunks[domain]

    if len(domains) == 0:
        return None

    if not all_subsents:
        el = domains[0]
        return vis_domains_data(domain, el)
    else:
        return [vis_domains_data(domain, el) for el in domains]
Example #3
0
def check_clause_sub(sents:Text, lang:Text, domain:Text, cla:Text,
          rel:Text, cats:Union[Text, Set, List]):
    """
    >>> from sagas.nlu.inspector_clauses import check_clause_sub
    >>> check_clause_sub(sents, 'pt', 'verb_domains', 'obl', 'cop', {'be'})
    :param sents:
    :param lang:
    :param domain:
    :param cla:
    :param rel:
    :param cats:
    :return:
    """
    from sagas.nlu.uni_chunks import get_chunk
    from sagas.nlu.ruleset_procs import cached_chunks

    # cla = 'obl', rel = 'cop', cat='be'
    chunks = cached_chunks(sents, lang, cf.engine(lang))
    result = get_chunk(chunks, domain, cla,
                       lambda w: {'rel': w.dependency_relation,
                                  'pos': w.upos.lower(),
                                  'word': f"{w.text}/{w.lemma}"})

    word = next((w['word'] for w in result if w['rel'] == rel), None)
    if word:
        if isinstance(cats, str):
            return check_chain(cats, word, '*', lang)
        else:
            return any([check_chain(cat, word, '*', lang) for cat in cats])
    return False
Example #4
0
def vis_doc(sents, lang):
    from sagas.nlu.ruleset_procs import cached_chunks
    from sagas.nlu.uni_remote_viz import list_contrast, display_doc_deps
    from sagas.conf.conf import cf

    chunks = cached_chunks(sents, lang, cf.engine(lang))
    return display_doc_deps(chunks['doc'], None)
Example #5
0
def test_class_matcher():
    from sagas.nlu.uni_chunks import get_chunk
    from pampy import match, _
    from dataclasses import dataclass

    @dataclass
    class WordData:
        index: int
        rel: str
        pos: str
        word: str

        # She denied being my mother

    sents = 'Ela negou ser minha mãe.'
    lang = 'pt'
    domain = 'verb_domains'
    chunks = cached_chunks(sents, lang, cf.engine(lang))

    cla = 'obl'
    ana = get_chunk(
        chunks, domain, cla, lambda w: WordData(index=w.index,
                                                rel=w.dependency_relation,
                                                pos=w.upos.lower(),
                                                word=f"{w.text}/{w.lemma}"))
    t_rs = []
    for word_data in ana:
        r = match(word_data, WordData(_, _, 'aux',
                                      _), lambda *arg: f"aux: {arg[2]}",
                  WordData(_, 'obl', 'noun', _), lambda *arg: arg, _, None)
        t_rs.append(r)
    assert t_rs == ['aux: ser/ser', None, (5, 'mãe/mãe')]
Example #6
0
    def run(self, key, ctx: Context):
        from sagas.nlu.ruleset_procs import list_words, cached_chunks, get_main_domains
        from sagas.conf.conf import cf

        logger.debug(f".. check against {key}")
        if key not in ctx.indexes:
            return False

        # lemma = ctx.lemmas[key]
        sents = ctx.sents
        lang = ctx.lang
        chunks = cached_chunks(sents, lang, cf.engine(lang))
        doc = chunks['doc']
        ents = get_entities(sents)

        prt = ctx.indexes[key]
        indexes = get_children_index(doc, prt)
        idx_ent = {
            el['index']: el['entity']
            for el in get_entity_mapping(sents, doc, ents)
        }
        children_ents = [(idx, idx_ent[idx] if idx in idx_ent else '_')
                         for idx in indexes]

        result = self.test_ent in {e[1] for e in children_ents}
        if result:
            ctx.add_result(self.name(), 'default', key, idx_ent)
        return result
Example #7
0
def parse(data):
    if 'engine' not in data:
        data['engine'] = cf.engine(data['lang'])
    engine = data['engine']
    response = requests.post(f'{cf.servant(engine)}/verb_domains', json=data)
    if response.status_code == 200:
        return response.json()
    return None
Example #8
0
    def get_domains(self, ctx:Context):
        from sagas.nlu.ruleset_procs import cached_chunks
        from sagas.conf.conf import cf

        # dn = lambda domain: f'{domain}_domains' if domain != 'predicts' else domain
        chunks = cached_chunks(ctx.sents, ctx.lang, cf.engine(ctx.lang))
        domains = chunks[ctx.domain_type]
        return domains
Example #9
0
def is_noun_desc(ctx: Context, domain):
    sents, lang = ctx.sents, ctx.lang
    chunks = cached_chunks(sents, lang, cf.engine(lang))
    domains = chunks[domain]
    domain = domains[0]
    comps = [k for k, v in domain.items() if isinstance(v, list)]
    logger.debug(f'.. {comps}')
    return domain['upos']=='NOUN' and \
            all(c for c in comps if c.endswith('mod') or c in ('punct'))
Example #10
0
 def run(self, key, ctx:Context):
     from sagas.nlu.ruleset_procs import list_words, cached_chunks, get_main_domains
     from sagas.conf.conf import cf
     chunks = cached_chunks(ctx.sents, ctx.lang, cf.engine(ctx.lang))
     index = next((x[1] for x in ctx.domains if x[0] == self.part), -1)
     if index!=-1:
         rs=self.collect_children(chunks, ctx.lang, index+1)
         if rs:
             ctx.add_result(self.name(), 'default', self.part, rs)
     return True
Example #11
0
def parse_comps(sents, source):
    sents = fix_sents(sents, source)

    engine = cf.engine(source)
    doc_jsonify, resp = dep_parse(sents, source, engine, ['predicts'])
    if len(resp['predicts']) > 0:
        rs = resp['predicts']
    else:
        rs = get_chunks(doc_jsonify)
    return rs
Example #12
0
    def analyse_domains(self, sents, lang, engine=None, domain=None):
        from sagas.nlu.ruleset_procs import cached_chunks, get_main_domains
        from sagas.conf.conf import cf

        engine = cf.engine(lang) if engine is None else engine
        if domain is None:
            domain, domains = get_main_domains(sents, lang, engine)
        else:
            chunks = cached_chunks(sents, lang, engine)
            domains = chunks[domain]
        return domains
Example #13
0
def get_feats_map(sents, lang, domain, path):
    domain_name = f'{domain}_domains' if domain != 'predicts' else domain
    from sagas.nlu.ruleset_procs import cached_chunks
    chunks = cached_chunks(sents, lang, cf.engine(lang))
    parser = parse(feats_for_path(path))
    results = []
    for chunk in chunks[domain_name]:
        vals = [match.value for match in parser.find(chunk)]
        if vals:
            results.extend([feats_map(val) for val in vals])
    return results
Example #14
0
 def check_aux(self, sents, lang):
     """
     $ python -m sagas.nlu.predicts_cli check_aux 'what will be the weather in three days?' en
     :param sents:
     :param lang:
     :return:
     """
     data = {'lang': lang, "sents": sents, 'engine': cf.engine(lang)}
     doc_jsonify, resp = parse_sents(data)
     domains = get_aux_domain(doc_jsonify)
     ps = PredictSamples()
     tc.emp('cyan', f"result: {ps.check_domains(domains, lang)}")
Example #15
0
    def check_rule(self, sents, lang, rule, engine=None):
        """
        $ nlu check_rule '彼のパソコンは便利じゃない。' ja \
            "subj('adj',ガ=kindof('artifact', 'n'))"

        :param sents:
        :param lang:
        :param rule:
        :return:
        """
        from sagas.tool.dynamic_rules import DynamicRules
        data = {'lang': lang, "sents": sents}
        DynamicRules().predict(data, rule, engine=engine or cf.engine(lang))
Example #16
0
    def check_subj(self, sents, lang):
        """
        $ python -m sagas.nlu.predicts_cli check_subj 'Яблоко - это здоровый фрукт.' ru
        :param sents:
        :param lang:
        :return:
        """
        data = {'lang': lang, "sents": sents, 'engine': cf.engine(lang)}
        doc_jsonify, resp = parse_sents(data)

        domains = get_subj_domain(doc_jsonify)
        ps=PredictSamples()
        tc.emp('cyan', f"result: {ps.check_domains(domains, lang)}")
Example #17
0
def parse_subj():
    lang = 'ru'
    sents = 'Яблоко - это здоровый фрукт.'
    st.write(sents)
    data = {'lang': lang, "sents": sents, 'engine': cf.engine(lang)}
    doc_jsonify, resp = parse_sents(data)

    domains = get_subj_domain(doc_jsonify)
    testing(domains, 'ru')

    gv = display_doc_deps(doc_jsonify, resp)
    st.graphviz_chart(gv)

    st.write(domains)
Example #18
0
def parse_aux():
    lang = 'en'
    sents = 'what will be the weather in three days?'
    st.write(sents)
    data = {'lang': lang, "sents": sents, 'engine': cf.engine(lang)}
    doc_jsonify, resp = parse_sents(data)

    domains = get_aux_domain(doc_jsonify)
    testing(domains, 'en')

    # show analyse graph
    gv = display_doc_deps(doc_jsonify, resp)
    st.graphviz_chart(gv)

    st.write(domains)
Example #19
0
def parse_deps(text, lang, translit=None):
    text = fix_sents(text, lang)
    engine = cf.engine(lang)
    # g = sentence_view(lang, text, engine=engine, translit_lang=lang, enable_contrast=True)
    doc_jsonify, resp = dep_parse(text, lang, engine, ['predicts'])
    if doc_jsonify is not None:
        list_chunks(doc_jsonify, resp, lang, enable_contrast=True)
        g = display_doc_deps(doc_jsonify, resp, translit_lang=lang)

        st.graphviz_chart(g)
        if translit is not None:
            st.text(f"♤ {translit}")

        words = [word.text for word in doc_jsonify.words]
        tools.contrast(text, lang, word_map=words)
Example #20
0
def get_source(sents, lang, domain_type=None)-> Observable:
    from sagas.nlu.ruleset_procs import cached_chunks, get_main_domains
    from sagas.conf.conf import cf
    import rx

    engine=cf.engine(lang)
    if domain_type is None:
        domain_type, domains=get_main_domains(sents, lang, engine)
    else:
        chunks = cached_chunks(sents, lang, engine)
        domains = chunks[domain_type]
    table_rs = []
    for ds in domains:
        flat_table(ds, '', table_rs)
    return rx.of(*table_rs)
Example #21
0
    def descrip(self, sents, lang, engine=None):
        """
        $ python -m sagas.nlu.anal_corpus descrip 'Karpet di kantor saya abu-abu.' id
        $ sid 'Celana ini bisa diperbesar.'

        :param sents:
        :param lang:
        :param engine:
        :return:
        """
        from sagas.nlu.anal import build_anal_tree, Doc, AnalNode
        from sagas.conf.conf import cf
        f = build_anal_tree(sents, lang, cf.engine(lang))
        f.draw()
        model = f.model()
        model_info(model)
Example #22
0
def ex_chunk(key: Text, cnt: Text, comp: Text, ctx: cla_meta_intf, clo):
    from sagas.nlu.uni_chunks import get_chunk
    from sagas.nlu.ruleset_procs import list_words, cached_chunks
    from sagas.conf.conf import cf
    # get_chunk(f'verb_domains', 'xcomp/obj', lambda w: w.upos)
    chunks = cached_chunks(ctx.sents, ctx.lang, cf.engine(ctx.lang))
    domain, path = key.split(':')
    result = get_chunk(chunks,
                       f'{domain}_domains' if domain != 'predicts' else domain,
                       path,
                       clo=clo)
    logger.debug(f"extract chunk: {domain}, {path}, {result}")
    if len(result) > 0:
        ctx.add_result(extractor, comp, key, result)
        return True
    return False
Example #23
0
    def clip_parse(self, source, sents='', specified='default', do_test=False):
        """
        >> clip text: ‫یک آبجو مى خواهم.‬
        $ nlu clip_parse fa
        $ engine='stanford' nluc ar
        $ nlu clip_parse fi 'Tuolla ylhäällä asuu vanha nainen.'
        $ nluc nl 'De vrouw heeft verschillende appels.'
        $ nluc id 'Ini adalah judul buku yang saya baca.' aux
        $ nluc fi 'Voiko täältä lainata aurinkovarjoa?' default True

        :param source:
        :return:
        """
        from sagas.nlu.uni_remote import dep_parse
        from sagas.nlu.common import get_from_clip
        from sagas.conf.conf import cf
        from sagas.nlu.uni_remote_viz import list_chunks
        from sagas.nlu.utils import fix_sents

        if sents=='':
            sents = get_from_clip()
            if sents.strip()=='':
                tc.info('no text avaliable in clipboard.')
                return
        sents=fix_sents(sents, source)
        tc.info(sents)

        # Parse the sentence and display it's chunks, domains and contrast translations.
        engine=cf.engine(source)
        doc_jsonify, resp = dep_parse(sents, source, engine, ['predicts'])
        if doc_jsonify is None:
            raise Exception(f'Cannot parse sentence for lang {source}')

        list_chunks(doc_jsonify, resp, source,
                    enable_contrast=True,
                    specified=None if specified=='default' else specified)
        words = [word.text for word in doc_jsonify.words]
        self.contrast(sents, source, word_map=words)

        ## visual tree
        self.main_domains(sents, source, engine, False)
        ## add rulesets procs
        from sagas.nlu.inferencer import do_infers
        cli_cmd, pats = do_infers(sents, source)
        if do_test:
            for pat in pats:
                self.check_rule(sents, source, pat)
Example #24
0
    def print_sents(self, sents, lang, engine=None):
        """
        $ python -m sagas.nlu.ruleset_procs print_sents 'I want to play music.' en
        $ python -m sagas.nlu.ruleset_procs print_sents "クモは4つの右の目をしています。" ja corenlp

        :param sents:
        :param lang:
        :return:
        """
        # lang = 'en'
        if engine is None:
            engine = cf.engine(lang)
        data = {'lang': lang, "sents": sents, 'engine': engine}
        doc_jsonify, resp = parse_sents(data)
        rs = get_chunks(doc_jsonify)

        if lang in non_spaces:
            delim = ''
        else:
            delim = ' '
        for serial, r in enumerate(rs):
            meta = build_meta(r, data)
            domains = r['domains']
            # print([(x[0], x[2]) for x in domains])
            #
            keys = {x[0] for x in domains}
            grp = lambda p, idx: [x[idx] for x in domains if x[0] == p]
            tokens = {x: grp(x, 2) for x in keys}
            words = {x: delim.join(grp(x, 2)) for x in keys}
            lemmas = {x: delim.join(grp(x, 3)) for x in keys}
            print('meta keys', meta.keys())
            print('tokens', tokens)
            print('words', meta['word'], words)
            print('lemmas', lemmas)
            #
            ctx = Context(meta, domains)
            # print(ctx.lemmas)
            print('chunks', ctx._chunks)

        g = display_doc_deps(doc_jsonify, resp, translit_lang=lang)
        print(*[(w.index, w.text, w.governor,
                 doc_jsonify.words[w.governor - 1].text)
                for w in doc_jsonify.words],
              sep='\n')
        tc.gv(g)
Example #25
0
    def doc(self, sents, lang, engine=None):
        """
        $ nlu doc 'これを作ってあげました。' ja analspa
        $ nlu doc '主FAX番号はありますか' ja analspa
        $ nlu doc '你在北京的公司的主要传真号码是什么' zh analz
        $ nlu doc '你在北京的公司的主要传真号码是什么' zh analspa
        $ nlu doc "Alex Smith was working at Acme Corp Inc." en spacy
        $ nlu doc 'this is a digital good' en

        :param sents:
        :param lang:
        :param engine:
        :return:
        """
        from sagas.nlu.ruleset_procs import parse_sents
        data = {'lang': lang, "sents": sents, 'engine': engine or cf.engine(lang)}
        doc_jsonify, resp = parse_sents(data)
        pprint(doc_jsonify.as_json)
Example #26
0
    def run(self, key: Text, ctx: Context) -> bool:
        from sagas.nlu.predicts import predicate
        from sagas.nlu.operators import ud

        final_rs = []

        sents, lang = ctx.sents, ctx.lang
        chunks = cached_chunks(sents, lang, cf.engine(lang))
        domains = chunks[self.domain]
        for el in domains:
            # logger.debug(f"`{el['lemma']}` >> *{el['dc']['lemma']}*")
            # r1 = predicate(el, ud.__text('will') >> [ud.nsubj('what'), ud.dc_cat('weather')], lang)
            rs: List[Any] = predicate(el, self.checker, lang)
            # r2=predicate(el, ud.__cat('be') >> [ud.nsubj('what'), ud.dc_cat('animal/object')], lang)
            result = all([r[0] for r in rs])
            final_rs.append(result)
            logger.debug(f'{[r[0] for r in rs]}, {result}')
        return any(final_rs)
Example #27
0
    def asserts(self, sents, lang='en'):
        """
        $ python -m sagas.nlu.ruleset_procs asserts 'I want to play music.' en

        :param sents:
        :param lang:
        :return:
        """
        import sagas.nlu.ruleset_fixtures as rf

        data = {'lang': lang, "sents": sents, 'engine': cf.engine(lang)}
        doc_jsonify, resp = parse_sents(data)
        v_domains = get_verb_domain(doc_jsonify)

        host = create_host()
        for v in v_domains:
            r1 = host.assert_fact('verbs', v)
            pprint(r1)
Example #28
0
 def verb_domains(self, sents, lang='en', engine=None):
     """
     $ python -m sagas.tool.misc verb_domains 'Мы написали три книги за год.' ru
     $ python -m sagas.tool.misc verb_domains 'Ivan is the best dancer .' en
     $ python -m sagas.tool.misc verb_domains 'Ivan is the best dancer .' en spacy
     $ domains 'Die Aufnahmen begannen im November.' de
     $ domains '伊万是最好的舞者' zh ltp
     $ domains '现在是几点' zh ltp
     $ domains '现在是几点?' zh corenlp
     :param sents:
     :param lang:
     :return:
     """
     data = {
         'lang': lang,
         "sents": sents,
         'engine': engine or cf.engine(lang)
     }
     get_verb_domains(data)
Example #29
0
 def has_pos_in_part(part: Text, pos: Union[list, str]):
     from sagas.nlu.uni_chunks import get_chunk
     from sagas.nlu.ruleset_procs import list_words, cached_chunks
     from sagas.conf.conf import cf
     chunks = cached_chunks(ctx.sents, ctx.lang, cf.engine(ctx.lang))
     domain, path = part.split(':')
     result = get_chunk(
         chunks,
         f'{domain}_domains' if domain != 'predicts' else domain, path,
         lambda w: (w.upos.lower(), w.text))
     if isinstance(pos, str):
         pos = [pos]
     succ = False
     for el in result:
         if el[0] in pos:
             ctx.add_result(self.name(), f'has_pos_{"_or_".join(pos)}',
                            part, el[1])
             succ = True
     return succ
Example #30
0
    def __init__(self, meta, domains, name=''):
        self.meta = meta
        self.name = name
        self.domains = domains
        self.domain_type = meta['domain_type']
        self.domain_name = self.domain_type.replace('_domains', '')

        # self.chunks = {x[0]: x[4] for x in domains}
        self._chunks = [Chunk(x[0], x[4]) for x in domains]
        # all universal syntactic relations
        self.rels = {x[0] for x in domains}
        self._stems = meta['stems']
        if len(self._stems) == 0:
            self._stems = [(x[0], x[4]) for x in domains]

        self._lang = meta['lang']
        if self._lang in non_spaces:
            self.delim = ''
        else:
            self.delim = ' '
        self._sents = meta['sents'] if 'sents' in meta else ''
        self._engine = meta['engine'] if 'engine' in meta else cf.engine(
            self._lang)

        # self.lemmas = {x[0]: x[3] for x in domains}
        # self.words = {x[0]: x[2] for x in domains}
        # Support repeated keys
        keys = {x[0] for x in domains}
        self.indexes = {x[0]: x[1] for x in domains}

        grp = lambda p, idx: [x[idx] for x in domains if x[0] == p]
        grp_join = lambda p, idx1, idx2: [
            f"{x[idx1]}/{x[idx2]}" for x in domains if x[0] == p
        ]
        # self.tokens = {x: grp(x, 2) for x in keys}
        self.tokens = {x: grp_join(x, 2, 3) for x in keys}
        self.words = {x: self.delim.join(grp(x, 2)) for x in keys}
        self.lemmas = {x: self.delim.join(grp(x, 3)) for x in keys}

        self.feats = {x[0]: x[5] for x in domains}
        # self.meta['intermedia']={}
        self._results = []