Esempio n. 1
0
    def run(self, key, ctx: Context):
        from sagas.nlu.ruleset_procs import list_words, cached_chunks, get_main_domains
        from sagas.conf.conf import cf

        logger.debug(f".. check against {key}")
        if key not in ctx.indexes:
            return False

        # lemma = ctx.lemmas[key]
        sents = ctx.sents
        lang = ctx.lang
        chunks = cached_chunks(sents, lang, cf.engine(lang))
        doc = chunks['doc']
        ents = get_entities(sents)

        prt = ctx.indexes[key]
        indexes = get_children_index(doc, prt)
        idx_ent = {
            el['index']: el['entity']
            for el in get_entity_mapping(sents, doc, ents)
        }
        children_ents = [(idx, idx_ent[idx] if idx in idx_ent else '_')
                         for idx in indexes]

        result = self.test_ent in {e[1] for e in children_ents}
        if result:
            ctx.add_result(self.name(), 'default', key, idx_ent)
        return result
Esempio n. 2
0
    def run(self, key, ctx: Context):
        logger.debug(f"check key: {key}")
        lang = ctx.lang
        words = self.extract_specs(key, ctx)
        pos = self.pos_indicator

        self.check_opts(key, ctx)

        resultset: List[bool] = []
        valid_words = set()
        for kind in self.cats:
            for word in words:
                result = self.subs_fn(kind, word, lang, pos)
                logger.debug(
                    f"check word {word} against {kind}, result is {result}")
                resultset.append(result)
                if result:
                    valid_words.add(word)

        fin = any(resultset)
        if fin:
            ctx.add_result(self.name(),
                           'default',
                           '_' if '/' in key else key, {
                               **self.result_base, 'pos': pos,
                               'words': list(valid_words)
                           },
                           delivery_type='sentence')

        return fin
Esempio n. 3
0
    def run(self, key, ctx: Context):
        from jsonpath_ng import jsonpath, parse
        from sagas.nlu.inspector_wordnet import predicate
        from sagas.nlu.ruleset_procs import cached_chunks

        lang = ctx.lang
        domain_name = f"{self.domains}_domains"  # like: 'verb_domains'
        parsers = [parse(normal_path(expr)) for expr in self.paths]
        results = []
        engine = cf.engine(lang) if self.engine is None else self.engine
        chunks = cached_chunks(ctx.sents, lang, engine)
        for chunk in chunks[domain_name]:
            json_data = chunk
            # for expr in exprs:
            for idx, parser in enumerate(parsers):
                # print([(match.value, str(match.full_path)) for match in parser.find(json_data)])
                word = '/'.join(
                    [match.value for match in parser.find(json_data)])
                pred_r = predicate(self.kind, word, lang, self.pos)
                # tc.emp('yellow' if not pred_r else 'green', f".. {word} is {self.kind}: {pred_r}")
                logger.debug(f".. {word} is {self.kind}: {pred_r}")
                results.append(pred_r)
                if pred_r:
                    ctx.add_result(self.name(),
                                   'default',
                                   f"{self.domains}:{self.paths[idx]}", {
                                       'category': self.kind,
                                       'pos': self.pos,
                                       **word_values(word, lang)
                                   },
                                   delivery_type='sentence')

        logger.debug(f"{results}")
        return any(results) if self.match_method == 'any' else all(results)
Esempio n. 4
0
def extract_nouns(key: Text, ctx: Context, check_fn) -> bool:
    # rs=extract_ko('nouns', ctx.words[key])
    rs = extract_ko('nouns', ctx.get_single_chunk_text(key))
    if rs:
        ctx.add_result('cust', 'nouns', key, rs)
        return True
    return False
Esempio n. 5
0
    def run(self, key, ctx:Context):
        import fnmatch, re

        if '/' in key:
            lemma=key.split('/')[-1]  # the key is formatted like 'word/lemma'
        else:
            lemma=ctx.lemmas[key]

        if self.match_method=='equals':
            return lemma==self.target
        elif self.match_method=='in':
            return lemma in self.target
        elif self.match_method=='chunk':
            if isinstance(self.target, list):
                for t in self.target:
                    if t in ctx.chunk_pieces(key, lowercase=True):
                        return True
                return False
            else:
                return self.target in ctx.chunk_pieces(key, lowercase=True)
        elif self.match_method=='glob':
            regex = fnmatch.translate(self.target)
            reobj = re.compile(regex)
            return reobj.match(lemma) is not None
        elif self.match_method=='regex':
            reobj = re.compile(self.target)
            return reobj.match(lemma) is not None
        else:
            raise ValueError(f"Cannot support match method {self.match_method}")
Esempio n. 6
0
def extract_datetime(key: Text, ctx: Context, check_fn):
    from sagas.nlu.content_representers import cnt_repr
    rs = cnt_repr.parse_snips(ctx.get_single_chunk_text(key), 'ko')
    if rs:
        ctx.add_result('cust', 'datetime', key, rs)
        return True
    return False
Esempio n. 7
0
def expand(dispathcer: DispatcherIntf, data, keys, specific_domains):
    fixt = InspectorFixture()
    domains, meta = fixt.request_domains(data)
    ctx = Context(meta, domains)
    for key in keys:
        for chunk in ctx.chunk_pieces(key):
            dispathcer.execute(chunk)
Esempio n. 8
0
def fn_inherit(key:Text, ctx:Context, *args, **kwargs):
    lemma=ctx.lemmas[key]
    pos=ctx.get_feat_pos(key)
    logger.debug(f"predicate {lemma}, {pos} : {args[0]}")
    succ= inherit_axis(lemma, pos, args[0])
    if succ:
        ctx.add_result('axis', fn_inherit.__name__, key,
                       val={'lemma':lemma, 'pos':pos, 'axis':args[0]})
    return succ
Esempio n. 9
0
 def run_simp(self, key, ctx:Context):
     if ctx.meta['lang']=='da':
         # if 'ikke' in ctx.chunks[key] or 'ikke'==ctx.lemmas[key]:
         if ctx.chunk_contains(key, ['ikke']) or ctx.lemmas[key] in ['ikke']:
             return True
     elif ctx.meta['lang']=='de':
         if ctx.chunk_contains(key, ['nicht']) or ctx.lemmas[key] in ['nicht']:
             return True
     return False
Esempio n. 10
0
 def run(self, key, ctx:Context):
     from sagas.nlu.ruleset_procs import list_words, cached_chunks, get_main_domains
     from sagas.conf.conf import cf
     chunks = cached_chunks(ctx.sents, ctx.lang, cf.engine(ctx.lang))
     index = next((x[1] for x in ctx.domains if x[0] == self.part), -1)
     if index!=-1:
         rs=self.collect_children(chunks, ctx.lang, index+1)
         if rs:
             ctx.add_result(self.name(), 'default', self.part, rs)
     return True
Esempio n. 11
0
 def run(self, key, ctx: Context):
     from sagas.nlu.signals import signals
     results = signals.fire(self.name(),
                            self.signal,
                            key=key,
                            ctx=ctx,
                            **self.parameters)
     for r in results:
         ctx.add_result(self.name(),
                        provider=r['name'],
                        part_name=key,
                        val=r['result'])
     return True
Esempio n. 12
0
def extract_noun_chunk(key: Text, ctx: Context):
    rs = extract_ko('nouns', ctx.get_single_chunk_text(key))
    if rs:
        # return rs[0]['text']
        # 任意一个名词块符合条件即可, 所以用'/'串接
        return '/'.join([w['text'] for w in rs])
    return ctx.words[key]
Esempio n. 13
0
    def run(self, key, ctx: Context):
        lang = ctx.meta['lang']
        # word=key  # the key == word
        word = self.extract_word(key, ctx)
        if self.pos_indicator == '~':
            pos = 'v'
        else:
            pos = self.pos_indicator

        result = self.substitute(word, lang, pos)
        logger.debug(
            f"check word {word} against {self.kind}, result is {result}")
        if result:
            ctx.add_result(self.name(),
                           'default',
                           self.norm_path(key), {
                               **self.result_base, 'pos': pos,
                               'word': word
                           },
                           delivery_type='sentence')
        return result
Esempio n. 14
0
    def run(self, key, ctx:Context):
        from sagas.nlu.inspectors_dataset import interrogative_maps, trans_val

        lang=ctx.meta['lang']

        if lang in interrogative_maps:
            data_map=interrogative_maps[lang][self.cat]
            if self.is_part:
                # val=ctx.lemmas[key]
                word_full=ctx.get_word(key)
                val=trans_val(word_full, lang)
                succ= ctx.chunk_contains(key, data_map) or val in data_map
                if succ:
                    ctx.add_result(self.name(), 'default', key,
                                   {'category': self.cat, **word_values(word_full, lang)},
                                   delivery_type='sentence')
                return succ
            else:
                word_val=trans_val(key, lang)
                logger.debug(f"*** {key} -- {word_val}, {data_map}")

                succ= word_val in data_map
                if succ:
                    ctx.add_result(self.name(), 'default', 'head',
                                   {'category': self.cat, **word_values(key, lang)},
                                   delivery_type='sentence')
                return succ
        return False
Esempio n. 15
0
def display_synsets(theme, meta, r, lang, collect=False):
    from sagas.nlu.nlu_cli import retrieve_word_info
    # from termcolor import colored

    from sagas.nlu.inspector_common import Context
    ctx = Context(meta, r['domains'])

    resp = []

    # word = r['lemma']
    def retrieve(word, indicator, pos='*'):
        from sagas.nlu.synonyms import synonyms
        word_syn = synonyms.match(word, lang)
        # print(f".. subs {word}: {word_syn}")
        if word_syn is not None:
            rs = retrieve_word_info('get_synsets', word_syn, 'en', pos=pos)
        else:
            rs = retrieve_word_info('get_synsets', word, lang, pos=pos)
        if len(rs) > 0:
            mean = get_possible_mean(rs)
            if collect:
                resp.append({
                    'word': word,
                    'indicator': indicator,
                    'spec': mean,
                    'comments': rs
                })
            else:
                comments = ', '.join(rs)[:25]
                # tc.info('♥ %s(%s): %s...' % (colored(word, 'magenta'), indicator, comments))
                tc.emp(
                    'magenta',
                    '♥ %s(%s, %s): %s...' % (word, indicator, mean, comments))
                resp.append('♥ %s(%s): %s...' % (word, indicator, comments))
            return True
        return False

    retrieve(f"{r['word']}/{r['lemma']}", theme,
             'v' if theme == '[verb]' else '*')
    if 'head' in meta:
        # print('.........')
        retrieve(meta['head'], 'head')
    # print(f'.. lemmas: {ctx.lemmas.keys()}')
    for opt in display_synsets_opts:
        if opt in ctx.lemmas:
            # print(f".. retrieve {ctx.lemmas[opt]}, tokens: {ctx.tokens[opt]}")
            # retrieve(ctx.lemmas[opt], opt)
            for tok in ctx.tokens[opt]:
                retrieve(tok, opt)
    return resp
Esempio n. 16
0
    def run(self, key, ctx:Context):
        checkers = []
        lang = ctx.meta['lang']
        # cnt = ' '.join(ctx.chunks['obl'])
        # cnt = ' '.join(ctx.chunks[key])

        if self.entire:
            checkers.append(self.providers[self.provider](key, lang, ctx, 'sents'))
        else:
            for cnt in ctx.chunk_pieces(key):
                checkers.append(self.providers[self.provider](cnt, lang, ctx, key))
        # print('... put %s'%self.cache_key(key))
        # print(ctx.meta['intermedia'])
        return any(checkers)
Esempio n. 17
0
    def run(self, key, ctx: Context):
        # result=False
        lang = ctx.meta['lang']
        # word=ctx.lemmas[key]
        word = self.extract_word(key, ctx)
        # print(f".. predicate {word}")
        if self.pos_indicator == '~':
            pos = self.get_pos_by_feat(ctx.feats[key])
        else:
            pos = self.pos_indicator

        # result= predicate(self.kind, word, lang, pos, self.only_first)
        result = self.substitute(word, lang, pos)
        logger.debug(f"result base: {self.result_base}")
        if result:
            ctx.add_result(self.name(),
                           'default',
                           self.norm_path(key), {
                               **self.result_base, 'pos': pos,
                               'word': word
                           },
                           delivery_type='sentence')
        return result
Esempio n. 18
0
 def process_result(self, ctx:Context, results:List[Dict[Text, Any]]) -> bool:
     """
     Results sample:
     [{'name': 'collect_verb',
         'result': [token_data(word='think/think', pos='verb', path='/root')]}]
     :param results:
     :return:
     """
     has_result=False
     for result in results:
         logger.debug(result)
         vals=result['result']
         if vals:
             # 任意一个判定管道最终有输出, 即表示成功匹配,
             # 如果希望所有管道都必须有匹配才为真, 可以分成多个pipes(即多个inspector)编写,
             # 因为pattern的匹配规则就是所有inspectors均为真
             has_result=True
             path_val=ctx.domain_name+':'+vals[0]['path'][1:] if vals else '_'
             ctx.add_result(self.name(),
                            provider=f"{result['sig']}/{result['name']}",
                            part_name=path_val,
                            val=vals)
     return has_result
Esempio n. 19
0
    def run(self, key, ctx: Context):
        from sagas.nlu.rasa_procs import invoke_nlu

        lang = ctx.meta['lang']
        if lang not in default_projects:
            return False
        # proj=default_projects[lang]
        proj = lang

        def proc(cnt: Text) -> bool:
            succ = False
            logger.debug('query with rasa-nlu: %s', cnt)
            # print(('query with rasa-nlu: %s'%cnt))
            resp = invoke_nlu(self.endpoint, proj, "current", cnt)
            if resp is not None:
                intent = resp["intent"]
                entities = resp['entities']

                ent_names = {ent['entity'] for ent in entities}
                intent_name = intent['name']
                intent_confidence = float(intent['confidence'])
                self._result = intent_confidence
                logger.info('%s(%s) -> %f, with entities %s' %
                            (cnt, intent_name, intent_confidence,
                             ', '.join(ent_names)))
                # print(f'{self.intent}, {self.confidence}')
                if self.intent == intent_name and intent_confidence > self.confidence:
                    # print('... matched intent and confidence')
                    ctx.add_result(self.name(), 'default', key, {
                        'intent': intent_name,
                        'confidence': intent_confidence
                    })
                    if self.contains_entity is None:
                        succ = True
                    elif self.contains_entity is not None and ent_names.issubset(
                            self.contains_entity):
                        succ = True
            return succ

        if self.entire:
            # print('proc -> %s'%key)
            return proc(key)
        else:
            for cnt in ctx.stem_pieces(key):
                result = proc(cnt)
                if result:
                    return True

        return False
Esempio n. 20
0
    def run(self, key, ctx:Context):
        from sagas.nlu.inspectors_dataset import negative_maps
        from sagas.nlu.inspectors_dataset import translit_langs
        from sagas.nlu.transliterations import translits

        lang=ctx.meta['lang']
        if lang in negative_maps:
            data_map=negative_maps[lang]
            if lang in translit_langs:
                word_val=translits.translit(ctx.words[key], lang)
            else:
                word_val=ctx.lemmas[key]
            if ctx.chunk_contains(key, data_map) or word_val in data_map:
                return True
        return False
Esempio n. 21
0
def build_context(data: Dict[Text, Text],
                  dominator: Text,
                  name='_noname_',
                  **kwargs):
    from sagas.nlu.inferencer import parse

    rs = parse(data)
    for serial, r in enumerate(rs):
        # type_name = r['type']
        # theme = type_name.split('_')[0]
        domains = r['domains']
        # print(type_name)
        meta = build_meta(r, data)
        ctx = Context(meta, domains, name=name)
        pat = Patterns(domains, meta, 5, name=name).opts(**kwargs)
        serv = pat.prepare(dominator)
        yield ctx, serv
Esempio n. 22
0
    def print_sents(self, sents, lang, engine=None):
        """
        $ python -m sagas.nlu.ruleset_procs print_sents 'I want to play music.' en
        $ python -m sagas.nlu.ruleset_procs print_sents "クモは4つの右の目をしています。" ja corenlp

        :param sents:
        :param lang:
        :return:
        """
        # lang = 'en'
        if engine is None:
            engine = cf.engine(lang)
        data = {'lang': lang, "sents": sents, 'engine': engine}
        doc_jsonify, resp = parse_sents(data)
        rs = get_chunks(doc_jsonify)

        if lang in non_spaces:
            delim = ''
        else:
            delim = ' '
        for serial, r in enumerate(rs):
            meta = build_meta(r, data)
            domains = r['domains']
            # print([(x[0], x[2]) for x in domains])
            #
            keys = {x[0] for x in domains}
            grp = lambda p, idx: [x[idx] for x in domains if x[0] == p]
            tokens = {x: grp(x, 2) for x in keys}
            words = {x: delim.join(grp(x, 2)) for x in keys}
            lemmas = {x: delim.join(grp(x, 3)) for x in keys}
            print('meta keys', meta.keys())
            print('tokens', tokens)
            print('words', meta['word'], words)
            print('lemmas', lemmas)
            #
            ctx = Context(meta, domains)
            # print(ctx.lemmas)
            print('chunks', ctx._chunks)

        g = display_doc_deps(doc_jsonify, resp, translit_lang=lang)
        print(*[(w.index, w.text, w.governor,
                 doc_jsonify.words[w.governor - 1].text)
                for w in doc_jsonify.words],
              sep='\n')
        tc.gv(g)
Esempio n. 23
0
 def run(self, key, ctx:Context):
     result = False
     lang = ctx.meta['lang']
     # cnt = ' '.join(ctx.chunks[key])
     # cnt=ctx.get_single_chunk_text(key)
     requestors={'ru':lambda rc: query_entities_by_url(cf.ensure('ner_ru'), rc),
                 }
     for cnt in ctx.chunk_pieces(key):
         data={'lang': lang, 'sents': cnt}
         if lang in requestors:
             resp=requestors[lang](data)
         else:
             resp = query_entities(data)
         if resp['result'] == 'success':
             dims = [d['entity'] for d in resp['data']]
             # print('entities ->', ', '.join(dims))
             logger.info('entities -> %s, self.dim -> %s', ', '.join(dims), self.dim)
             if self.dim in dims:
                 print('\t%s ∈' % cnt, self.dim)
                 result = True
     return result
Esempio n. 24
0
def check_interr(key: Text, ctx: Context, check_fn, lang='pt') -> bool:
    for stem in ctx.stem_pieces(key):
        interr = get_interrogative(stem, lang)
        if interr and check_fn(interr):
            return True
    return False
Esempio n. 25
0
def ex_word(key: Text, cnt: Text, comp: Text, ctx: Context):
    ctx.add_result(extractor, comp, key, {
        'text': cnt,
        'lemma': ctx.lemmas[key]
    })
    return True
Esempio n. 26
0
    def run(self, key, ctx: Context):
        # 当pickup为'_'时, key就是value
        comp_val = key if self.pickup == '_' else ''
        key = self.pickup or key

        ex_map = {
            'date_search':
            lambda cnt, comp: ex_date_search(key, cnt, comp, ctx),
            # .. extract_for('plain+date_search+date_parse', '時間'),
            'date_parse':
            lambda cnt, comp: ex_date_parse(key, cnt, comp, ctx),
            'plain':
            lambda cnt, comp: ex_plain(key, cnt, comp, ctx),
            'word':
            lambda cnt, comp: ex_word(key, cnt, comp, ctx),
            # .. extract_for('plain+translit', 'obj'),
            'translit':
            lambda cnt, comp: ex_translit(key, cnt, comp, ctx),
            'email':
            lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'email'),
            # .. extract_for('number', 'obl'),
            'number':
            lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'number'),
            # .. extract_for('time', 'advmod'),
            'time':
            lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'time'),
            # .. extract_for('plain+temperature', 'ニ'),
            'temperature':
            lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'temperature'),
            # example: extract_for('rasa', '_')
            'rasa':
            lambda cnt, comp: ex_rasa(key, cnt, comp, ctx),
            # example: extract_for('chunk', 'verb:xcomp/obj')
            'chunk':
            lambda cnt, comp: ex_chunk(key, cnt, comp, ctx, lambda w:
                                       (w.text, w.upos.lower())),
            # example: extract_for('chunk_text', 'verb:xcomp/obj')
            'chunk_text':
            lambda cnt, comp: ex_chunk(key, cnt, comp, ctx, lambda w: w.text),
            'chunk_feats':
            lambda cnt, comp: ex_chunk(key, cnt, comp, ctx, lambda w: w.feats),
            # .. extract_for('feats', 'verb:_'),
            #        extract_for('feats', 'verb:obj')
            'feats':
            lambda cnt, comp: ex_feats(key, cnt, comp, ctx),
            # example: extract_for('ner', '_'), extract_for('ner', 'xcomp')
            'ner':
            lambda cnt, comp: ex_ner(key, cnt, comp, ctx),
        }

        if self.pickup == '_' or is_full_domain_path(self.pickup):
            self.results['_'] = []
            for comp in self.comp_as:
                op = ex_map[comp](comp_val, comp)
                self.results['_'].append((comp, op))
        else:
            for cnt in ctx.chunk_pieces(key):
                self.results[key] = []
                for comp in self.comp_as:
                    ex = ex_map[comp]
                    op = ex(cnt, comp)
                    # self.results[comp] = op
                    self.results[key].append((comp, op))

        return True  # 只负责提取, 并不参与判定, 所以始终返回True
Esempio n. 27
0
        def service_method(*args_, **kwargs_):
            """Return the result of the check request."""
            result = True
            options = []

            ctx = Context(self.meta, self.domains, name=self.name)
            if not self.verify(ctx):
                options.append(f"verify fail: {self._opts}")
                return "%s with %s" % (method, ', '.join(options)), \
                       False, \
                       self.priority, \
                       ctx

            def_args = self._opts[
                ctx.domain_name].args if ctx.domain_name in self._opts else []
            def_kwargs = self._opts[
                ctx.
                domain_name].kwargs if ctx.domain_name in self._opts else {}
            args = [*args_, *def_args]
            kwargs = {**kwargs_, **def_kwargs}

            # the args has been checked as pos or inspector or callable functor
            if self.meta is not None and len(args) > 0:
                # opt_ret=check_item(self.meta, 'pos', args, ctx)
                # if not opt_ret:
                #     result = False
                # options.append('{} is {}: {}'.format('pos', args, opt_ret))
                if not self.funcs[method](args, ctx, options):
                    result = False

            # rel_feats = {x[0]: x[5] for x in self.domains}
            rel_feats = ctx.feats

            for key, value in kwargs.items():
                if not key.startswith('head_'):
                    key = key.replace('_', ':')
                key = trip_number_suffix(key)
                if key.startswith('::'):
                    # starts with '__', likes '__engine'
                    opt_name = key[2:]
                    opt_ret = self.meta[opt_name] == value
                    if not opt_ret:
                        logger.debug('%s=%s checker fail, skip this pattern.' %
                                     (key, value))
                elif key.startswith(':'):
                    opt_ret = check_item(self.meta, key[1:], value, ctx)
                else:
                    opt_ret = check_item(rel_feats, key, value, ctx)

                if not opt_ret:
                    result = False
                options.append('{} is {}: {}'.format(key, value, opt_ret))

            single_insps = [
                insp for insp in args if isinstance(insp, Inspector)
            ]
            pair_insps = {
                k: insp
                for k, insp in kwargs.items() if isinstance(insp, Inspector)
            }
            if len(self.after_evs) > 0:
                logger.debug(
                    f".. after_evs {[(el[0].name(), el[1]) for el in self.after_evs]}"
                )
                for arg, key_val in self.after_evs:
                    if not result and arg.when_succ:
                        continue

                    arg.infer(single_insps, pair_insps)
                    opt_ret = arg.check(key_val, ctx)
                    # 这样的写法是希望当result=False之后, 不再被True值置换,
                    # 也就是说一旦result=False之后, 就一直保持False值
                    if not opt_ret:
                        result = False
                    options.append('{} is {}: {}'.format('pos', arg, opt_ret))

                self.after_evs.clear()

            return "%s with %s" % (method, ', '.join(options)), \
                   result, \
                   self.priority, \
                   ctx
Esempio n. 28
0
 def run(self, key, ctx: Context):
     ctx.add_result(self.name(), 'default', 'sents', list(self.tags))
     return True
Esempio n. 29
0
 def run(self, key, ctx: Context):
     self.fields.update(self.result_map)
     ctx.add_result(self.name(), self.provider, 'defined', self.fields)
     return True
Esempio n. 30
0
 def run(self, key, ctx: Context):
     ctx.add_result(self.name(), 'default', 'defined', list(self.tools))
     return True