def run(self, key, ctx: Context): from sagas.nlu.ruleset_procs import list_words, cached_chunks, get_main_domains from sagas.conf.conf import cf logger.debug(f".. check against {key}") if key not in ctx.indexes: return False # lemma = ctx.lemmas[key] sents = ctx.sents lang = ctx.lang chunks = cached_chunks(sents, lang, cf.engine(lang)) doc = chunks['doc'] ents = get_entities(sents) prt = ctx.indexes[key] indexes = get_children_index(doc, prt) idx_ent = { el['index']: el['entity'] for el in get_entity_mapping(sents, doc, ents) } children_ents = [(idx, idx_ent[idx] if idx in idx_ent else '_') for idx in indexes] result = self.test_ent in {e[1] for e in children_ents} if result: ctx.add_result(self.name(), 'default', key, idx_ent) return result
def run(self, key, ctx: Context): logger.debug(f"check key: {key}") lang = ctx.lang words = self.extract_specs(key, ctx) pos = self.pos_indicator self.check_opts(key, ctx) resultset: List[bool] = [] valid_words = set() for kind in self.cats: for word in words: result = self.subs_fn(kind, word, lang, pos) logger.debug( f"check word {word} against {kind}, result is {result}") resultset.append(result) if result: valid_words.add(word) fin = any(resultset) if fin: ctx.add_result(self.name(), 'default', '_' if '/' in key else key, { **self.result_base, 'pos': pos, 'words': list(valid_words) }, delivery_type='sentence') return fin
def run(self, key, ctx: Context): from jsonpath_ng import jsonpath, parse from sagas.nlu.inspector_wordnet import predicate from sagas.nlu.ruleset_procs import cached_chunks lang = ctx.lang domain_name = f"{self.domains}_domains" # like: 'verb_domains' parsers = [parse(normal_path(expr)) for expr in self.paths] results = [] engine = cf.engine(lang) if self.engine is None else self.engine chunks = cached_chunks(ctx.sents, lang, engine) for chunk in chunks[domain_name]: json_data = chunk # for expr in exprs: for idx, parser in enumerate(parsers): # print([(match.value, str(match.full_path)) for match in parser.find(json_data)]) word = '/'.join( [match.value for match in parser.find(json_data)]) pred_r = predicate(self.kind, word, lang, self.pos) # tc.emp('yellow' if not pred_r else 'green', f".. {word} is {self.kind}: {pred_r}") logger.debug(f".. {word} is {self.kind}: {pred_r}") results.append(pred_r) if pred_r: ctx.add_result(self.name(), 'default', f"{self.domains}:{self.paths[idx]}", { 'category': self.kind, 'pos': self.pos, **word_values(word, lang) }, delivery_type='sentence') logger.debug(f"{results}") return any(results) if self.match_method == 'any' else all(results)
def extract_nouns(key: Text, ctx: Context, check_fn) -> bool: # rs=extract_ko('nouns', ctx.words[key]) rs = extract_ko('nouns', ctx.get_single_chunk_text(key)) if rs: ctx.add_result('cust', 'nouns', key, rs) return True return False
def run(self, key, ctx:Context): import fnmatch, re if '/' in key: lemma=key.split('/')[-1] # the key is formatted like 'word/lemma' else: lemma=ctx.lemmas[key] if self.match_method=='equals': return lemma==self.target elif self.match_method=='in': return lemma in self.target elif self.match_method=='chunk': if isinstance(self.target, list): for t in self.target: if t in ctx.chunk_pieces(key, lowercase=True): return True return False else: return self.target in ctx.chunk_pieces(key, lowercase=True) elif self.match_method=='glob': regex = fnmatch.translate(self.target) reobj = re.compile(regex) return reobj.match(lemma) is not None elif self.match_method=='regex': reobj = re.compile(self.target) return reobj.match(lemma) is not None else: raise ValueError(f"Cannot support match method {self.match_method}")
def extract_datetime(key: Text, ctx: Context, check_fn): from sagas.nlu.content_representers import cnt_repr rs = cnt_repr.parse_snips(ctx.get_single_chunk_text(key), 'ko') if rs: ctx.add_result('cust', 'datetime', key, rs) return True return False
def expand(dispathcer: DispatcherIntf, data, keys, specific_domains): fixt = InspectorFixture() domains, meta = fixt.request_domains(data) ctx = Context(meta, domains) for key in keys: for chunk in ctx.chunk_pieces(key): dispathcer.execute(chunk)
def fn_inherit(key:Text, ctx:Context, *args, **kwargs): lemma=ctx.lemmas[key] pos=ctx.get_feat_pos(key) logger.debug(f"predicate {lemma}, {pos} : {args[0]}") succ= inherit_axis(lemma, pos, args[0]) if succ: ctx.add_result('axis', fn_inherit.__name__, key, val={'lemma':lemma, 'pos':pos, 'axis':args[0]}) return succ
def run_simp(self, key, ctx:Context): if ctx.meta['lang']=='da': # if 'ikke' in ctx.chunks[key] or 'ikke'==ctx.lemmas[key]: if ctx.chunk_contains(key, ['ikke']) or ctx.lemmas[key] in ['ikke']: return True elif ctx.meta['lang']=='de': if ctx.chunk_contains(key, ['nicht']) or ctx.lemmas[key] in ['nicht']: return True return False
def run(self, key, ctx:Context): from sagas.nlu.ruleset_procs import list_words, cached_chunks, get_main_domains from sagas.conf.conf import cf chunks = cached_chunks(ctx.sents, ctx.lang, cf.engine(ctx.lang)) index = next((x[1] for x in ctx.domains if x[0] == self.part), -1) if index!=-1: rs=self.collect_children(chunks, ctx.lang, index+1) if rs: ctx.add_result(self.name(), 'default', self.part, rs) return True
def run(self, key, ctx: Context): from sagas.nlu.signals import signals results = signals.fire(self.name(), self.signal, key=key, ctx=ctx, **self.parameters) for r in results: ctx.add_result(self.name(), provider=r['name'], part_name=key, val=r['result']) return True
def extract_noun_chunk(key: Text, ctx: Context): rs = extract_ko('nouns', ctx.get_single_chunk_text(key)) if rs: # return rs[0]['text'] # 任意一个名词块符合条件即可, 所以用'/'串接 return '/'.join([w['text'] for w in rs]) return ctx.words[key]
def run(self, key, ctx: Context): lang = ctx.meta['lang'] # word=key # the key == word word = self.extract_word(key, ctx) if self.pos_indicator == '~': pos = 'v' else: pos = self.pos_indicator result = self.substitute(word, lang, pos) logger.debug( f"check word {word} against {self.kind}, result is {result}") if result: ctx.add_result(self.name(), 'default', self.norm_path(key), { **self.result_base, 'pos': pos, 'word': word }, delivery_type='sentence') return result
def run(self, key, ctx:Context): from sagas.nlu.inspectors_dataset import interrogative_maps, trans_val lang=ctx.meta['lang'] if lang in interrogative_maps: data_map=interrogative_maps[lang][self.cat] if self.is_part: # val=ctx.lemmas[key] word_full=ctx.get_word(key) val=trans_val(word_full, lang) succ= ctx.chunk_contains(key, data_map) or val in data_map if succ: ctx.add_result(self.name(), 'default', key, {'category': self.cat, **word_values(word_full, lang)}, delivery_type='sentence') return succ else: word_val=trans_val(key, lang) logger.debug(f"*** {key} -- {word_val}, {data_map}") succ= word_val in data_map if succ: ctx.add_result(self.name(), 'default', 'head', {'category': self.cat, **word_values(key, lang)}, delivery_type='sentence') return succ return False
def display_synsets(theme, meta, r, lang, collect=False): from sagas.nlu.nlu_cli import retrieve_word_info # from termcolor import colored from sagas.nlu.inspector_common import Context ctx = Context(meta, r['domains']) resp = [] # word = r['lemma'] def retrieve(word, indicator, pos='*'): from sagas.nlu.synonyms import synonyms word_syn = synonyms.match(word, lang) # print(f".. subs {word}: {word_syn}") if word_syn is not None: rs = retrieve_word_info('get_synsets', word_syn, 'en', pos=pos) else: rs = retrieve_word_info('get_synsets', word, lang, pos=pos) if len(rs) > 0: mean = get_possible_mean(rs) if collect: resp.append({ 'word': word, 'indicator': indicator, 'spec': mean, 'comments': rs }) else: comments = ', '.join(rs)[:25] # tc.info('♥ %s(%s): %s...' % (colored(word, 'magenta'), indicator, comments)) tc.emp( 'magenta', '♥ %s(%s, %s): %s...' % (word, indicator, mean, comments)) resp.append('♥ %s(%s): %s...' % (word, indicator, comments)) return True return False retrieve(f"{r['word']}/{r['lemma']}", theme, 'v' if theme == '[verb]' else '*') if 'head' in meta: # print('.........') retrieve(meta['head'], 'head') # print(f'.. lemmas: {ctx.lemmas.keys()}') for opt in display_synsets_opts: if opt in ctx.lemmas: # print(f".. retrieve {ctx.lemmas[opt]}, tokens: {ctx.tokens[opt]}") # retrieve(ctx.lemmas[opt], opt) for tok in ctx.tokens[opt]: retrieve(tok, opt) return resp
def run(self, key, ctx:Context): checkers = [] lang = ctx.meta['lang'] # cnt = ' '.join(ctx.chunks['obl']) # cnt = ' '.join(ctx.chunks[key]) if self.entire: checkers.append(self.providers[self.provider](key, lang, ctx, 'sents')) else: for cnt in ctx.chunk_pieces(key): checkers.append(self.providers[self.provider](cnt, lang, ctx, key)) # print('... put %s'%self.cache_key(key)) # print(ctx.meta['intermedia']) return any(checkers)
def run(self, key, ctx: Context): # result=False lang = ctx.meta['lang'] # word=ctx.lemmas[key] word = self.extract_word(key, ctx) # print(f".. predicate {word}") if self.pos_indicator == '~': pos = self.get_pos_by_feat(ctx.feats[key]) else: pos = self.pos_indicator # result= predicate(self.kind, word, lang, pos, self.only_first) result = self.substitute(word, lang, pos) logger.debug(f"result base: {self.result_base}") if result: ctx.add_result(self.name(), 'default', self.norm_path(key), { **self.result_base, 'pos': pos, 'word': word }, delivery_type='sentence') return result
def process_result(self, ctx:Context, results:List[Dict[Text, Any]]) -> bool: """ Results sample: [{'name': 'collect_verb', 'result': [token_data(word='think/think', pos='verb', path='/root')]}] :param results: :return: """ has_result=False for result in results: logger.debug(result) vals=result['result'] if vals: # 任意一个判定管道最终有输出, 即表示成功匹配, # 如果希望所有管道都必须有匹配才为真, 可以分成多个pipes(即多个inspector)编写, # 因为pattern的匹配规则就是所有inspectors均为真 has_result=True path_val=ctx.domain_name+':'+vals[0]['path'][1:] if vals else '_' ctx.add_result(self.name(), provider=f"{result['sig']}/{result['name']}", part_name=path_val, val=vals) return has_result
def run(self, key, ctx: Context): from sagas.nlu.rasa_procs import invoke_nlu lang = ctx.meta['lang'] if lang not in default_projects: return False # proj=default_projects[lang] proj = lang def proc(cnt: Text) -> bool: succ = False logger.debug('query with rasa-nlu: %s', cnt) # print(('query with rasa-nlu: %s'%cnt)) resp = invoke_nlu(self.endpoint, proj, "current", cnt) if resp is not None: intent = resp["intent"] entities = resp['entities'] ent_names = {ent['entity'] for ent in entities} intent_name = intent['name'] intent_confidence = float(intent['confidence']) self._result = intent_confidence logger.info('%s(%s) -> %f, with entities %s' % (cnt, intent_name, intent_confidence, ', '.join(ent_names))) # print(f'{self.intent}, {self.confidence}') if self.intent == intent_name and intent_confidence > self.confidence: # print('... matched intent and confidence') ctx.add_result(self.name(), 'default', key, { 'intent': intent_name, 'confidence': intent_confidence }) if self.contains_entity is None: succ = True elif self.contains_entity is not None and ent_names.issubset( self.contains_entity): succ = True return succ if self.entire: # print('proc -> %s'%key) return proc(key) else: for cnt in ctx.stem_pieces(key): result = proc(cnt) if result: return True return False
def run(self, key, ctx:Context): from sagas.nlu.inspectors_dataset import negative_maps from sagas.nlu.inspectors_dataset import translit_langs from sagas.nlu.transliterations import translits lang=ctx.meta['lang'] if lang in negative_maps: data_map=negative_maps[lang] if lang in translit_langs: word_val=translits.translit(ctx.words[key], lang) else: word_val=ctx.lemmas[key] if ctx.chunk_contains(key, data_map) or word_val in data_map: return True return False
def build_context(data: Dict[Text, Text], dominator: Text, name='_noname_', **kwargs): from sagas.nlu.inferencer import parse rs = parse(data) for serial, r in enumerate(rs): # type_name = r['type'] # theme = type_name.split('_')[0] domains = r['domains'] # print(type_name) meta = build_meta(r, data) ctx = Context(meta, domains, name=name) pat = Patterns(domains, meta, 5, name=name).opts(**kwargs) serv = pat.prepare(dominator) yield ctx, serv
def print_sents(self, sents, lang, engine=None): """ $ python -m sagas.nlu.ruleset_procs print_sents 'I want to play music.' en $ python -m sagas.nlu.ruleset_procs print_sents "クモは4つの右の目をしています。" ja corenlp :param sents: :param lang: :return: """ # lang = 'en' if engine is None: engine = cf.engine(lang) data = {'lang': lang, "sents": sents, 'engine': engine} doc_jsonify, resp = parse_sents(data) rs = get_chunks(doc_jsonify) if lang in non_spaces: delim = '' else: delim = ' ' for serial, r in enumerate(rs): meta = build_meta(r, data) domains = r['domains'] # print([(x[0], x[2]) for x in domains]) # keys = {x[0] for x in domains} grp = lambda p, idx: [x[idx] for x in domains if x[0] == p] tokens = {x: grp(x, 2) for x in keys} words = {x: delim.join(grp(x, 2)) for x in keys} lemmas = {x: delim.join(grp(x, 3)) for x in keys} print('meta keys', meta.keys()) print('tokens', tokens) print('words', meta['word'], words) print('lemmas', lemmas) # ctx = Context(meta, domains) # print(ctx.lemmas) print('chunks', ctx._chunks) g = display_doc_deps(doc_jsonify, resp, translit_lang=lang) print(*[(w.index, w.text, w.governor, doc_jsonify.words[w.governor - 1].text) for w in doc_jsonify.words], sep='\n') tc.gv(g)
def run(self, key, ctx:Context): result = False lang = ctx.meta['lang'] # cnt = ' '.join(ctx.chunks[key]) # cnt=ctx.get_single_chunk_text(key) requestors={'ru':lambda rc: query_entities_by_url(cf.ensure('ner_ru'), rc), } for cnt in ctx.chunk_pieces(key): data={'lang': lang, 'sents': cnt} if lang in requestors: resp=requestors[lang](data) else: resp = query_entities(data) if resp['result'] == 'success': dims = [d['entity'] for d in resp['data']] # print('entities ->', ', '.join(dims)) logger.info('entities -> %s, self.dim -> %s', ', '.join(dims), self.dim) if self.dim in dims: print('\t%s ∈' % cnt, self.dim) result = True return result
def check_interr(key: Text, ctx: Context, check_fn, lang='pt') -> bool: for stem in ctx.stem_pieces(key): interr = get_interrogative(stem, lang) if interr and check_fn(interr): return True return False
def ex_word(key: Text, cnt: Text, comp: Text, ctx: Context): ctx.add_result(extractor, comp, key, { 'text': cnt, 'lemma': ctx.lemmas[key] }) return True
def run(self, key, ctx: Context): # 当pickup为'_'时, key就是value comp_val = key if self.pickup == '_' else '' key = self.pickup or key ex_map = { 'date_search': lambda cnt, comp: ex_date_search(key, cnt, comp, ctx), # .. extract_for('plain+date_search+date_parse', '時間'), 'date_parse': lambda cnt, comp: ex_date_parse(key, cnt, comp, ctx), 'plain': lambda cnt, comp: ex_plain(key, cnt, comp, ctx), 'word': lambda cnt, comp: ex_word(key, cnt, comp, ctx), # .. extract_for('plain+translit', 'obj'), 'translit': lambda cnt, comp: ex_translit(key, cnt, comp, ctx), 'email': lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'email'), # .. extract_for('number', 'obl'), 'number': lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'number'), # .. extract_for('time', 'advmod'), 'time': lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'time'), # .. extract_for('plain+temperature', 'ニ'), 'temperature': lambda cnt, comp: ex_dims(key, cnt, comp, ctx, 'temperature'), # example: extract_for('rasa', '_') 'rasa': lambda cnt, comp: ex_rasa(key, cnt, comp, ctx), # example: extract_for('chunk', 'verb:xcomp/obj') 'chunk': lambda cnt, comp: ex_chunk(key, cnt, comp, ctx, lambda w: (w.text, w.upos.lower())), # example: extract_for('chunk_text', 'verb:xcomp/obj') 'chunk_text': lambda cnt, comp: ex_chunk(key, cnt, comp, ctx, lambda w: w.text), 'chunk_feats': lambda cnt, comp: ex_chunk(key, cnt, comp, ctx, lambda w: w.feats), # .. extract_for('feats', 'verb:_'), # extract_for('feats', 'verb:obj') 'feats': lambda cnt, comp: ex_feats(key, cnt, comp, ctx), # example: extract_for('ner', '_'), extract_for('ner', 'xcomp') 'ner': lambda cnt, comp: ex_ner(key, cnt, comp, ctx), } if self.pickup == '_' or is_full_domain_path(self.pickup): self.results['_'] = [] for comp in self.comp_as: op = ex_map[comp](comp_val, comp) self.results['_'].append((comp, op)) else: for cnt in ctx.chunk_pieces(key): self.results[key] = [] for comp in self.comp_as: ex = ex_map[comp] op = ex(cnt, comp) # self.results[comp] = op self.results[key].append((comp, op)) return True # 只负责提取, 并不参与判定, 所以始终返回True
def service_method(*args_, **kwargs_): """Return the result of the check request.""" result = True options = [] ctx = Context(self.meta, self.domains, name=self.name) if not self.verify(ctx): options.append(f"verify fail: {self._opts}") return "%s with %s" % (method, ', '.join(options)), \ False, \ self.priority, \ ctx def_args = self._opts[ ctx.domain_name].args if ctx.domain_name in self._opts else [] def_kwargs = self._opts[ ctx. domain_name].kwargs if ctx.domain_name in self._opts else {} args = [*args_, *def_args] kwargs = {**kwargs_, **def_kwargs} # the args has been checked as pos or inspector or callable functor if self.meta is not None and len(args) > 0: # opt_ret=check_item(self.meta, 'pos', args, ctx) # if not opt_ret: # result = False # options.append('{} is {}: {}'.format('pos', args, opt_ret)) if not self.funcs[method](args, ctx, options): result = False # rel_feats = {x[0]: x[5] for x in self.domains} rel_feats = ctx.feats for key, value in kwargs.items(): if not key.startswith('head_'): key = key.replace('_', ':') key = trip_number_suffix(key) if key.startswith('::'): # starts with '__', likes '__engine' opt_name = key[2:] opt_ret = self.meta[opt_name] == value if not opt_ret: logger.debug('%s=%s checker fail, skip this pattern.' % (key, value)) elif key.startswith(':'): opt_ret = check_item(self.meta, key[1:], value, ctx) else: opt_ret = check_item(rel_feats, key, value, ctx) if not opt_ret: result = False options.append('{} is {}: {}'.format(key, value, opt_ret)) single_insps = [ insp for insp in args if isinstance(insp, Inspector) ] pair_insps = { k: insp for k, insp in kwargs.items() if isinstance(insp, Inspector) } if len(self.after_evs) > 0: logger.debug( f".. after_evs {[(el[0].name(), el[1]) for el in self.after_evs]}" ) for arg, key_val in self.after_evs: if not result and arg.when_succ: continue arg.infer(single_insps, pair_insps) opt_ret = arg.check(key_val, ctx) # 这样的写法是希望当result=False之后, 不再被True值置换, # 也就是说一旦result=False之后, 就一直保持False值 if not opt_ret: result = False options.append('{} is {}: {}'.format('pos', arg, opt_ret)) self.after_evs.clear() return "%s with %s" % (method, ', '.join(options)), \ result, \ self.priority, \ ctx
def run(self, key, ctx: Context): ctx.add_result(self.name(), 'default', 'sents', list(self.tags)) return True
def run(self, key, ctx: Context): self.fields.update(self.result_map) ctx.add_result(self.name(), self.provider, 'defined', self.fields) return True
def run(self, key, ctx: Context): ctx.add_result(self.name(), 'default', 'defined', list(self.tools)) return True