Ejemplo n.º 1
0
    def langspec_id(self, sents, engine='corenlp'):
        """
        $ python -m sagas.nlu.rules_lang_spec langspec_id 'Berapa umur kamu?'
        :param sents:
        :param engine:
        :return:
        """
        from sagas.nlu.uni_remote import dep_parse
        from sagas.nlu.uni_parser import get_chunks
        from sagas.nlu.rules_lang_spec_de import Rules_de
        from sagas.nlu.rules_lang_spec_id import Rules_id

        pipelines = ['predicts']
        lang='id'
        doc_jsonify, resp = dep_parse(sents, lang, engine, pipelines)
        rs = get_chunks(doc_jsonify)
        # rs_repr(rs, data={'lang': lang, "sents": sents, 'engine': engine, 'pipelines': pipelines})
        data = {'lang': lang, "sents": sents, 'engine': engine, 'pipelines': pipelines}
        for serial, r in enumerate(rs):
            common = {'lemma': r['lemma'], 'word': r['word'],
                      'stems': r['stems']}
            meta = {'rel': r['rel'], **common, **data}
            c=Rules_id(meta, r['domains'], doc=doc_jsonify)
            c.root_rules()
            c.execute()
Ejemplo n.º 2
0
def viz_sample(lang,
               sents,
               engine='corenlp',
               translit_lang=None,
               enable_contrast=False):
    """
    >>> from sagas.nlu.uni_remote_viz import viz_sample
    >>> sents='what time is it ?'
    >>> viz_sample('en', sents)

    en="I have to turn off the lights in the room."
    zh="我必须关掉房间里的灯。"
    ja="部屋の明かりを消さなければなりません。"
    viz_sample('en', en)

    :param lang:
    :param sents:
    :param engine:
    :return:
    """
    # uni=UniCli()
    # doc=uni.parsers[engine](lang, sents)
    from sagas.nlu.uni_remote import dep_parse

    doc_jsonify, resp = dep_parse(sents, lang, engine, ['predicts'])
    if doc_jsonify is None:
        raise Exception(f'Cannot parse sentence for lang {lang}')

    display_root_predicate(doc_jsonify, resp)
    list_chunks(doc_jsonify, resp, lang, enable_contrast=enable_contrast)

    return display_doc_deps(doc_jsonify, resp, translit_lang=translit_lang)
Ejemplo n.º 3
0
def build_anal_tree(sents: Text,
                    lang: Text,
                    engine: Text,
                    nodecls=None,
                    docimpl=None):
    """
    >>> from sagas.nlu.anal import build_anal_tree
    >>> from anytree.search import findall, findall_by_attr
    >>> f=build_anal_tree(sents, lang, engine)
    >>> words = findall_by_attr(f, name='upos', value='VERB')
    >>> objs = findall(words[0], filter_=lambda n: n.dependency_relation in ("obj"))

    :param sents:
    :param lang:
    :param engine:
    :return:
    """
    from sagas.nlu.uni_remote import dep_parse
    from sagas.nlu.utils import fix_sents

    sents = fix_sents(sents, lang)
    # doc is SentenceIntf
    doc, resp = dep_parse(sents,
                          lang=lang,
                          engine=engine,
                          pipelines=['predicts'],
                          doc_impl=docimpl)
    predicts = resp['predicts'] if resp and 'predicts' in resp else []
    return from_doc(doc, lang, engine, nodecls, predicts)
Ejemplo n.º 4
0
def parse_comps(sents, source):
    sents = fix_sents(sents, source)

    engine = cf.engine(source)
    doc_jsonify, resp = dep_parse(sents, source, engine, ['predicts'])
    if len(resp['predicts']) > 0:
        rs = resp['predicts']
    else:
        rs = get_chunks(doc_jsonify)
    return rs
Ejemplo n.º 5
0
def get_domains(sents, lang, engine='corenlp', options=None):
    """
    >>> from sagas.nlu.legacy.aiobj_kit import get_domains
    >>> get_domains('你有几台笔记本电脑?', 'zh', 'ltp')
    >>> get_domains('列出上周编辑的文件。', 'zh', 'ltp', DomainGetOptions(enable_predicts=True))

    :param sents:
    :param lang:
    :param engine:
    :param options:
    :return:
    """
    # from IPython.display import display

    if options is None:
        options=DomainGetOptions()
    pipelines=['predicts'] if options.enable_predicts else []
    doc_jsonify, resp = dep_parse(sents, lang, engine, pipelines)
    result_set=[]
    if doc_jsonify is not None:
        tc.emp('cyan', resp)
        if resp is not None and 'predicts' in resp and len(resp['predicts'])>0:
            rs=resp['predicts']
            # print(rs)
        else:
            # print(doc_jsonify.words_string())
            rs = get_chunks(doc_jsonify)
        if len(rs)>0:
            if options.list_chunks:
                list_rs(rs, lang)
            if options.deps_graph:
                # display(display_doc_deps(doc_jsonify, resp))
                tc.gv(display_doc_deps(doc_jsonify, resp,
                                       translit_lang=lang if lang in ('ja', 'ko', 'zh', 'fa', 'ar', 'he') else None))
            # rs_represent(rs, data = {'lang': lang, "sents": sents, 'engine': engine,
            #                         'pipelines':pipelines})
            data = {'lang': lang, "sents": sents, 'engine': engine,
                                     'pipelines':pipelines}
            for r in rs:
                # fixture.print_table(r, False)
                # print(f"lemma: {r['lemma']}")
                # df = sagas.to_df(r['domains'], ['rel', 'index', 'text', 'lemma', 'children', 'features'])
                # display(df)
                domains = r['domains']
                common = {'lemma': r['lemma'], 'word': r['word'],
                          'stems': r['stems']}
                meta = {'rel': r['rel'], **common, **data}
                result_set.append((domains, meta))
        else:
            tc.emp('red', '.. no found predefined chunk-patterns.')
            tc.info(doc_jsonify.words_string())
            tc.info(doc_jsonify.dependencies_string())
    return result_set
Ejemplo n.º 6
0
    def exec_rules(self, sents, lang='en', engine='corenlp'):
        """
        $ python -m sagas.tool.misc exec_rules "今何時ですか?" ja
        $ python -m sagas.tool.misc exec_rules "今何時ですか?" ja knp
        $ python -m sagas.tool.misc exec_rules "望遠鏡で泳いでいる少女を見た。" ja knp
        $ python -m sagas.tool.misc exec_rules 'Мы написали три книги за год.' ru
        $ python -m sagas.tool.misc exec_rules "现在是几点?" zh ltp
        $ rules '我在臺灣開計程車。' zh
        $ rules '我在台湾开出租车。' zh ltp
        $ rules "吸烟对你的健康有害。" zh ltp
        $ rules 'Tini berumur sepuluh tahun.' id
        $ rules 'Berapa umur kamu?' id  (因为找不到预定义的chunks模式, 所以会输出所有单词和依赖关系)

        :param sents:
        :param lang:
        :param engine:
        :return:
        """
        from sagas.nlu.uni_parser import get_chunks
        from sagas.nlu.uni_remote import dep_parse

        pipelines = ['predicts']
        doc_jsonify, resp = dep_parse(sents, lang, engine, pipelines)
        if doc_jsonify is not None:
            color_print('cyan', resp)
            if len(resp['predicts']) > 0:
                rs_represent(resp['predicts'],
                             data={
                                 'lang': lang,
                                 "sents": sents,
                                 'engine': engine,
                                 'pipelines': pipelines
                             })
            else:
                rs = get_chunks(doc_jsonify)
                if len(rs) > 0:
                    # rs_summary(rs)
                    rs_represent(rs,
                                 data={
                                     'lang': lang,
                                     "sents": sents,
                                     'engine': engine,
                                     'pipelines': pipelines
                                 })
                else:
                    color_print('red',
                                '.. no found predefined chunk-patterns.')
                    print(doc_jsonify.words_string())
                    print(doc_jsonify.dependencies_string())
Ejemplo n.º 7
0
def parse_deps(text, lang, translit=None):
    text = fix_sents(text, lang)
    engine = cf.engine(lang)
    # g = sentence_view(lang, text, engine=engine, translit_lang=lang, enable_contrast=True)
    doc_jsonify, resp = dep_parse(text, lang, engine, ['predicts'])
    if doc_jsonify is not None:
        list_chunks(doc_jsonify, resp, lang, enable_contrast=True)
        g = display_doc_deps(doc_jsonify, resp, translit_lang=lang)

        st.graphviz_chart(g)
        if translit is not None:
            st.text(f"♤ {translit}")

        words = [word.text for word in doc_jsonify.words]
        tools.contrast(text, lang, word_map=words)
Ejemplo n.º 8
0
    def clip_parse(self, source, sents='', specified='default', do_test=False):
        """
        >> clip text: ‫یک آبجو مى خواهم.‬
        $ nlu clip_parse fa
        $ engine='stanford' nluc ar
        $ nlu clip_parse fi 'Tuolla ylhäällä asuu vanha nainen.'
        $ nluc nl 'De vrouw heeft verschillende appels.'
        $ nluc id 'Ini adalah judul buku yang saya baca.' aux
        $ nluc fi 'Voiko täältä lainata aurinkovarjoa?' default True

        :param source:
        :return:
        """
        from sagas.nlu.uni_remote import dep_parse
        from sagas.nlu.common import get_from_clip
        from sagas.conf.conf import cf
        from sagas.nlu.uni_remote_viz import list_chunks
        from sagas.nlu.utils import fix_sents

        if sents=='':
            sents = get_from_clip()
            if sents.strip()=='':
                tc.info('no text avaliable in clipboard.')
                return
        sents=fix_sents(sents, source)
        tc.info(sents)

        # Parse the sentence and display it's chunks, domains and contrast translations.
        engine=cf.engine(source)
        doc_jsonify, resp = dep_parse(sents, source, engine, ['predicts'])
        if doc_jsonify is None:
            raise Exception(f'Cannot parse sentence for lang {source}')

        list_chunks(doc_jsonify, resp, source,
                    enable_contrast=True,
                    specified=None if specified=='default' else specified)
        words = [word.text for word in doc_jsonify.words]
        self.contrast(sents, source, word_map=words)

        ## visual tree
        self.main_domains(sents, source, engine, False)
        ## add rulesets procs
        from sagas.nlu.inferencer import do_infers
        cli_cmd, pats = do_infers(sents, source)
        if do_test:
            for pat in pats:
                self.check_rule(sents, source, pat)
Ejemplo n.º 9
0
def row_view(row):
    text = row[1]
    if display_translit and len(row) > 2:
        label = row[2]
    else:
        label = text
    if st.button(f"{label} ✁ {row[0]}"):
        text = fix_sents(text, lang)
        engine = get_engine(lang)
        # g = sentence_view(lang, text, engine=engine, translit_lang=lang, enable_contrast=True)
        doc_jsonify, resp = dep_parse(text, lang, engine, ['predicts'])
        if doc_jsonify is not None:
            list_chunks(doc_jsonify, resp, lang, enable_contrast=True)
            g=display_doc_deps(doc_jsonify, resp, translit_lang=lang)

            st.graphviz_chart(g)
            if len(row) > 2:
                st.text(f"♤ {row[2]}")

            words = [word.text for word in doc_jsonify.words]
            tools.contrast(text, lang, word_map=words)
Ejemplo n.º 10
0
def sents_summary(sents, source):
    from sagas.nlu.uni_remote import dep_parse
    from sagas.nlu.uni_remote_viz import list_contrast
    from sagas.conf.conf import cf
    from sagas.nlu.utils import fix_sents
    from sagas.nlu.uni_parser import get_chunks

    sents=fix_sents(sents, source)
    engine=cf.engine(source)
    doc_jsonify, resp = dep_parse(sents, source, engine, ['predicts'])
    types=[]
    if doc_jsonify is None:
        raise Exception(f'Cannot parse sentence for lang {source}')
    if len(resp['predicts']) > 0:
        rs=resp['predicts']
    else:
        rs = get_chunks(doc_jsonify)

    for serial, r in enumerate(rs):
        print(f"{serial}. {r['type']} -> {r['word']}")
        types.append(f"{source}:{r['type']}")
    list_contrast(rs, source)
    return types
Ejemplo n.º 11
0
    def dep_parse(self, sents, lang='en', engine='corenlp'):
        """
        $ python -m sagas.tool.misc dep_parse 'Мы написали три книги за год.' ru
        $ python -m sagas.tool.misc dep_parse "今何時ですか?" ja
        $ python -m sagas.tool.misc dep_parse "今何時ですか?" ja knp
        $ python -m sagas.tool.misc dep_parse "私の趣味は、多くの小旅行をすることです。" ja knp
        $ python -m sagas.tool.misc dep_parse "自由を手に入れる" ja
        $ python -m sagas.tool.misc dep_parse "现在是几点?" zh ltp
        :param sents:
        :param lang:
        :param engine:
        :return:
        """
        from sagas.nlu.uni_jsonifier import rs_summary
        from sagas.nlu.uni_parser import get_chunks
        from sagas.nlu.uni_remote import dep_parse

        doc_jsonify, resp = dep_parse(sents, lang, engine, ['predicts'])
        rs = get_chunks(doc_jsonify)
        rs_summary(rs)
        print('-' * 25, 'predicts')
        pprint(resp)
        print('-' * 25, 'doc')
        pprint(doc_jsonify.as_json)
Ejemplo n.º 12
0
    def predict(self,
                data: Dict[Text, Any],
                rule_str: Text,
                name='_none_',
                engine=None,
                graph=False,
                operator=all) -> bool:
        """
        >>> from sagas.tool.dynamic_rules import DynamicRules
        >>> data = {'lang': 'ja', "sents": '彼のパソコンは便利じゃない。'}
        >>> DynamicRules().predict(data, "subj('adj',ガ=kindof('artifact', 'n'))", engine='knp')

        :param data:
        :param rule_str:
        :param name:
        :param engine:
        :return:
        """
        import sagas.tracker_fn as tc
        from sagas.kit.analysis_kit import AnalysisKit

        # ft=InspectorFixture()
        # domains, meta=ft.request_domains(data, engine=engine)
        if engine is None:
            engine = cf.engine(data['lang'])
        pipelines = ['predicts']

        tc.emp('magenta', f"({data['lang']}) {data['sents']}")
        doc_jsonify, resp = dep_parse(data['sents'], data['lang'], engine,
                                      pipelines)
        if doc_jsonify is not None:
            if len(resp['predicts']) > 0:
                domains_set = resp['predicts']
            else:
                domains_set = get_chunks(doc_jsonify)

            if graph:
                AnalysisKit().console_vis(data['sents'], data['lang'])

            check_r = []
            for r in domains_set:
                domains = r['domains']
                meta = build_meta(r, data)
                print(r['type'], meta['index'], meta['word'], meta['lemma'],
                      list(meta.keys()))
                position = doc_jsonify.get_position(meta['index'])
                pprint(domains)
                # agency = ['c_pron', 'c_noun']
                pat = lambda p, name='': Patterns(
                    domains, meta, p, name=name, doc=doc_jsonify)
                # rs = interp(f"[Patterns(domains, meta, {self.priority}, name='{name}').{rule_str}]",
                if rule_str.startswith('pat('):
                    pattern_text = f"[{rule_str}]"
                else:
                    pattern_text = f"[pat({self.priority}, name='{name}').{rule_str}]"
                rs = interp(pattern_text, domains, meta, pat)
                print_result(rs)

                # collect matched context's results
                # r[1] is true/false, r[3] is context
                results = [el for r in rs for el in r[3].results if r[1]]
                # r[2] is priority
                succ = [abs(r[2]) for r in rs if r[1]]
                priority = max(succ) if len(succ) > 0 else 0
                self.priority_list.append(priority)

                self.result_set.extend(results)
                self.rasa_ents.append({
                    'confidence': None,
                    'start': position[0],
                    'end': position[1],
                    'entity': r['type'],
                    'extractor': 'ruleset',
                    'value': f"{meta['word']}/{meta['lemma']}",
                    'additional_info': results,
                })

                check_r.append(operator([r[1] for r in rs]))

            return operator(check_r)

        return False