def nlu_parse(self, sents, lang='en'): """ Nlu parse routines $ python -m saai.saai_cli nlu_parse "Shenzhen ist das Silicon Valley für Hardware-Firmen" de $ python -m saai.saai_cli nlu_parse '附近有什么好吃的' zh $ python -m saai.saai_cli nlu_parse '六安市公安局裕安分局平桥派出所接到辖区居民戴某报警' zh $ python -m saai.saai_cli nlu_parse '一直打喷嚏怎么办' zh $ python -m saai.saai_cli nlu_parse "I was born in Beijing." en $ python -m saai.saai_cli nlu_parse "Я хочу поехать в москву" ru $ python -m saai.saai_cli nlu_parse "Jokowi pergi ke Singapura." id :param sents: :param lang: :return: """ from sagas.conf.conf import cf from sagas.nlu.rasa_procs import invoke_nlu import json import sagas.tracker_fn as tc endpoint = cf.ensure('nlu_multilang_servant') print('.. with endpoing', endpoint) result = invoke_nlu(endpoint, lang, "current", sents) tc.emp('yellow', result) if result != None and len(result) > 0: print(json.dumps(result, indent=4, ensure_ascii=False)) intent = result["intent"] print('%s -> %f' % (intent['name'], intent['confidence'])) entities = result['entities'] print('entities ->', [ent['entity'] for ent in entities])
def trans_google(self, ctx: TransContext): from tqdm import tqdm from sagas.nlu.translator import translate import time import random source, targets, text, says = ctx.pars() # for target in tqdm(targets.split(';')): print('.. translate to', ctx.target_list) for target in tqdm(ctx.target_list): # default options options = set(cf.ensure('translator_opts')) # options.add('disable_correct') if says == target: options.add('get_pronounce') if says == source and target == 'en': options.add('get_pronounce') trans, tracker = translate(text, source=source, target=target, options=options) count = 0 while trans == '': print('wait a second, try again ...') sleep(1) trans, tracker = translate(text, source=source, target=target, options=options) count = count + 1 if count > self.retries: break if trans != '': # result=text+'\n\t* '+trans+'\n' # line='[%s] '%target[:2]+trans line = '%s="%s"' % (target[:2], trans) ctx.target_sents.append(line) # ctx.target_sents.extend(tracker.pronounce) for i, p in enumerate(tracker.pronounce): ps = p[2:] ctx.target_sents.append(f'v{i}="{ps}"') ctx.sents_map[target[:2]] = trans # print('☌'+line) else: print( 'translate fail, the clipboard content has not been changed.' ) # will exit return False # time.sleep(random.uniform(0.05, 0.20)) return True
def ex_rasa(key: Text, cnt: Text, comp: Text, ctx: cla_meta_intf): from sagas.conf.conf import cf from sagas.nlu.rasa_procs import invoke_nlu endpoint = cf.ensure('nlu_multilang_servant') result = invoke_nlu(endpoint, ctx.lang, "current", ctx.sents) # print('*******', result) if result != None: ctx.add_result(extractor, comp, 'sents', result) return True return False
def query_data_by_url(conf_item, fn_path, data): if '://' not in conf_item: url = cf.ensure(conf_item) else: url = conf_item # the url format: 'http://localhost:8092/entities' response = requests.post(f"{url}/{fn_path}", json=data) if response.status_code == 200: r = response.json() return {'result': 'success', 'data': r} return {'result': 'fail', 'cause': 'error response'}
def __init__(self, intent: str, confidence: float, entire=False, contains_entity: list = None): self.intent = intent self.confidence = confidence self.contains_entity = contains_entity self.entire = entire # self.endpoint = "http://localhost:5000" self.endpoint = cf.ensure('nlu_multilang_servant') self._result = None
def translate(text, source:Text='auto', target:Text='zh-CN', trans_verbose:bool=False, options:Set[Text]=None, tracker:TransTracker=None) -> (Text, TransTracker): from sagas.nlu.trans_cacher import cacher if options is None: options = {} # tracker=TransTracker() meta = {'text': text, 'source': source, 'target': target} if tracker is None: if cf.is_enabled('trans_cache'): tracker = TransTracker() tracker.add_observer(cacher) else: tracker = TransTracker() if 'disable_cache' not in options: # try to get from cacher r = cacher.retrieve(meta) if r: cnt = r['content'] res = join_sentence(cnt) process_result(meta, cnt, trans_verbose, options, tracker) logger.debug(f'get {text} from cacher') return res, tracker def impl(): from sagas.nlu.translator_impl import TranslatorImpl time.sleep(random.uniform(0.05, 0.20)) trans_text=TranslatorImpl().execute(text, source, target, trans_verbose, options, tracker, process_result=process_result) return trans_text def impl2(): from sagas.nlu.translator2_impl import translator_impl time.sleep(random.uniform(0.05, 0.20)) translator_impl.update_TKK() # update kk value time.sleep(random.uniform(0.05, 0.20)) try: return translator_impl.execute(text, source, target, trans_verbose, options, tracker, process_result=process_result) except Exception as e: logger.error(f"translate fail: {text}, error message: {e}") return '' trans_fn={'impl': impl, 'impl2': impl2} trans_text=trans_fn[cf.ensure('translator_impl')]() return trans_text, tracker
def get_entities(sents: Text, rasa_entry='rasa_default'): """ Start rasa service: simple/rasa-serv.sh >>> from sagas.nlu.rasa_procs import get_entities >>> get_entities('id like to find an expensive restaurant', 'rasa_simple') [Entity(start=19, end=28, extractor='CRFEntityExtractor', value='hi', entity='price', confidence=0.930441018)] :param sents: :param rasa_entry: :return: """ result = rasa_nlu_parse(sents, cf.ensure(rasa_entry)) ents = result['entities'] if result and 'entities' in result else [] return [Entity.from_dict(e) for e in ents]
def translate_try(text:Text, source: Text, target: Text, options: Set[Text] = None, tracker: TransTracker = None) -> (Text, TransTracker): trans, tracker = translate(text, source=source, target=target, options=options, tracker=tracker) count = 0 retries=cf.ensure('translator_retries') while trans == '': print('wait a second, try again ...') time.sleep(random.uniform(0.5, 1.20)) trans, tracker = translate(text, source=source, target=target, options=options) count = count + 1 if count > retries: break return trans, tracker
def parse(self, sents, lang): """ $ python -m sagas.nlu.rasa_procs parse "Shenzhen ist das Silicon Valley für Hardware-Firmen" de $ python -m sagas.nlu.rasa_procs parse 'what restaurants can you recommend?' en :param sents: :return: """ from sagas.conf.conf import cf # endpoint = "http://localhost:5000" endpoint = cf.ensure('nlu_multilang_servant') print('.. with endpoing', endpoint) result = invoke_nlu(endpoint, lang, "current", sents) if result != None: print(json.dumps(result, indent=4)) intent = result["intent"] print('%s -> %f' % (intent['name'], intent['confidence'])) entities = result['entities'] print([ent['entity'] for ent in entities])
def query_duckling(text:Text, lang:Text) -> Dict[Text, Any]: """ resp=query_duckling('tomorrow at eight', 'en') print([d['dim'] for d in resp['data']]) :param text: :param lang: :return: """ if lang in locale_mappings: locale=locale_mappings[lang] else: return {'result':'fail', 'cause':"unsupport lang"} data={'locale':locale, 'text':text, 'reftime':current_milli_time()} response = requests.post(cf.ensure('duckling'), data=data) if response.status_code == 200: r=response.json() # print(json.dumps(r, indent=2, ensure_ascii=False)) return {'result':'success', 'data':r} return {'result':'fail', 'cause':'error response', 'data':[]}
def run(self, key, ctx:Context): result = False lang = ctx.meta['lang'] # cnt = ' '.join(ctx.chunks[key]) # cnt=ctx.get_single_chunk_text(key) requestors={'ru':lambda rc: query_entities_by_url(cf.ensure('ner_ru'), rc), } for cnt in ctx.chunk_pieces(key): data={'lang': lang, 'sents': cnt} if lang in requestors: resp=requestors[lang](data) else: resp = query_entities(data) if resp['result'] == 'success': dims = [d['entity'] for d in resp['data']] # print('entities ->', ', '.join(dims)) logger.info('entities -> %s, self.dim -> %s', ', '.join(dims), self.dim) if self.dim in dims: print('\t%s ∈' % cnt, self.dim) result = True return result
def __init__(self): # print('.. connect mongo') # self.client = MongoClient(cf.ensure('mongo'), 27017) self.client = MongoClient(cf.ensure('mongo')) self.db = self.client.langs self.coll = self.db.trans
def __init__(self): from sagas.nlu.omw_extended import omw_ext from sagas.conf.conf import cf self.omw = omw_ext self.default_langs = cf.ensure('default_word_sets_langs')
def query_entities(data): return query_entities_by_url(cf.ensure('ner'), data)
def __init__(self): self.r = redis.StrictRedis(cf.ensure('redis'))