def extract_datetime(key: Text, ctx: Context, check_fn): from sagas.nlu.content_representers import cnt_repr rs = cnt_repr.parse_snips(ctx.get_single_chunk_text(key), 'ko') if rs: ctx.add_result('cust', 'datetime', key, rs) return True return False
def run(self, key, ctx: Context): from sagas.nlu.ruleset_procs import list_words, cached_chunks, get_main_domains from sagas.conf.conf import cf logger.debug(f".. check against {key}") if key not in ctx.indexes: return False # lemma = ctx.lemmas[key] sents = ctx.sents lang = ctx.lang chunks = cached_chunks(sents, lang, cf.engine(lang)) doc = chunks['doc'] ents = get_entities(sents) prt = ctx.indexes[key] indexes = get_children_index(doc, prt) idx_ent = { el['index']: el['entity'] for el in get_entity_mapping(sents, doc, ents) } children_ents = [(idx, idx_ent[idx] if idx in idx_ent else '_') for idx in indexes] result = self.test_ent in {e[1] for e in children_ents} if result: ctx.add_result(self.name(), 'default', key, idx_ent) return result
def run(self, key, ctx: Context): from jsonpath_ng import jsonpath, parse from sagas.nlu.inspector_wordnet import predicate from sagas.nlu.ruleset_procs import cached_chunks lang = ctx.lang domain_name = f"{self.domains}_domains" # like: 'verb_domains' parsers = [parse(normal_path(expr)) for expr in self.paths] results = [] engine = cf.engine(lang) if self.engine is None else self.engine chunks = cached_chunks(ctx.sents, lang, engine) for chunk in chunks[domain_name]: json_data = chunk # for expr in exprs: for idx, parser in enumerate(parsers): # print([(match.value, str(match.full_path)) for match in parser.find(json_data)]) word = '/'.join( [match.value for match in parser.find(json_data)]) pred_r = predicate(self.kind, word, lang, self.pos) # tc.emp('yellow' if not pred_r else 'green', f".. {word} is {self.kind}: {pred_r}") logger.debug(f".. {word} is {self.kind}: {pred_r}") results.append(pred_r) if pred_r: ctx.add_result(self.name(), 'default', f"{self.domains}:{self.paths[idx]}", { 'category': self.kind, 'pos': self.pos, **word_values(word, lang) }, delivery_type='sentence') logger.debug(f"{results}") return any(results) if self.match_method == 'any' else all(results)
def run(self, key, ctx: Context): logger.debug(f"check key: {key}") lang = ctx.lang words = self.extract_specs(key, ctx) pos = self.pos_indicator self.check_opts(key, ctx) resultset: List[bool] = [] valid_words = set() for kind in self.cats: for word in words: result = self.subs_fn(kind, word, lang, pos) logger.debug( f"check word {word} against {kind}, result is {result}") resultset.append(result) if result: valid_words.add(word) fin = any(resultset) if fin: ctx.add_result(self.name(), 'default', '_' if '/' in key else key, { **self.result_base, 'pos': pos, 'words': list(valid_words) }, delivery_type='sentence') return fin
def run(self, key, ctx:Context): from sagas.nlu.inspectors_dataset import interrogative_maps, trans_val lang=ctx.meta['lang'] if lang in interrogative_maps: data_map=interrogative_maps[lang][self.cat] if self.is_part: # val=ctx.lemmas[key] word_full=ctx.get_word(key) val=trans_val(word_full, lang) succ= ctx.chunk_contains(key, data_map) or val in data_map if succ: ctx.add_result(self.name(), 'default', key, {'category': self.cat, **word_values(word_full, lang)}, delivery_type='sentence') return succ else: word_val=trans_val(key, lang) logger.debug(f"*** {key} -- {word_val}, {data_map}") succ= word_val in data_map if succ: ctx.add_result(self.name(), 'default', 'head', {'category': self.cat, **word_values(key, lang)}, delivery_type='sentence') return succ return False
def extract_nouns(key: Text, ctx: Context, check_fn) -> bool: # rs=extract_ko('nouns', ctx.words[key]) rs = extract_ko('nouns', ctx.get_single_chunk_text(key)) if rs: ctx.add_result('cust', 'nouns', key, rs) return True return False
def fn_inherit(key:Text, ctx:Context, *args, **kwargs): lemma=ctx.lemmas[key] pos=ctx.get_feat_pos(key) logger.debug(f"predicate {lemma}, {pos} : {args[0]}") succ= inherit_axis(lemma, pos, args[0]) if succ: ctx.add_result('axis', fn_inherit.__name__, key, val={'lemma':lemma, 'pos':pos, 'axis':args[0]}) return succ
def run(self, key, ctx:Context): from sagas.nlu.ruleset_procs import list_words, cached_chunks, get_main_domains from sagas.conf.conf import cf chunks = cached_chunks(ctx.sents, ctx.lang, cf.engine(ctx.lang)) index = next((x[1] for x in ctx.domains if x[0] == self.part), -1) if index!=-1: rs=self.collect_children(chunks, ctx.lang, index+1) if rs: ctx.add_result(self.name(), 'default', self.part, rs) return True
def run(self, key, ctx: Context): from sagas.nlu.signals import signals results = signals.fire(self.name(), self.signal, key=key, ctx=ctx, **self.parameters) for r in results: ctx.add_result(self.name(), provider=r['name'], part_name=key, val=r['result']) return True
def run(self, key, ctx: Context): lang = ctx.meta['lang'] # word=key # the key == word word = self.extract_word(key, ctx) if self.pos_indicator == '~': pos = 'v' else: pos = self.pos_indicator result = self.substitute(word, lang, pos) logger.debug( f"check word {word} against {self.kind}, result is {result}") if result: ctx.add_result(self.name(), 'default', self.norm_path(key), { **self.result_base, 'pos': pos, 'word': word }, delivery_type='sentence') return result
def run(self, key, ctx: Context): # result=False lang = ctx.meta['lang'] # word=ctx.lemmas[key] word = self.extract_word(key, ctx) # print(f".. predicate {word}") if self.pos_indicator == '~': pos = self.get_pos_by_feat(ctx.feats[key]) else: pos = self.pos_indicator # result= predicate(self.kind, word, lang, pos, self.only_first) result = self.substitute(word, lang, pos) logger.debug(f"result base: {self.result_base}") if result: ctx.add_result(self.name(), 'default', self.norm_path(key), { **self.result_base, 'pos': pos, 'word': word }, delivery_type='sentence') return result
def process_result(self, ctx:Context, results:List[Dict[Text, Any]]) -> bool: """ Results sample: [{'name': 'collect_verb', 'result': [token_data(word='think/think', pos='verb', path='/root')]}] :param results: :return: """ has_result=False for result in results: logger.debug(result) vals=result['result'] if vals: # 任意一个判定管道最终有输出, 即表示成功匹配, # 如果希望所有管道都必须有匹配才为真, 可以分成多个pipes(即多个inspector)编写, # 因为pattern的匹配规则就是所有inspectors均为真 has_result=True path_val=ctx.domain_name+':'+vals[0]['path'][1:] if vals else '_' ctx.add_result(self.name(), provider=f"{result['sig']}/{result['name']}", part_name=path_val, val=vals) return has_result
def ex_word(key: Text, cnt: Text, comp: Text, ctx: Context): ctx.add_result(extractor, comp, key, { 'text': cnt, 'lemma': ctx.lemmas[key] }) return True
def run(self, key, ctx:Context): ctx.add_result(self.name(), type(self.arg).__name__, self.arg.part, self.arg.__dict__) return True
def run(self, key, ctx: Context): ctx.add_result(self.name(), 'default', 'defined', list(self.tools)) return True
def run(self, key, ctx: Context): self.fields.update(self.result_map) ctx.add_result(self.name(), self.provider, 'defined', self.fields) return True
def run(self, key, ctx: Context): ctx.add_result(self.name(), 'default', 'sents', list(self.tags)) return True