Beispiel #1
0
    def nlu_parse(self, sents, lang='en'):
        """ Nlu parse routines
        $ python -m saai.saai_cli nlu_parse "Shenzhen ist das Silicon Valley für Hardware-Firmen" de
        $ python -m saai.saai_cli nlu_parse '附近有什么好吃的' zh
        $ python -m saai.saai_cli nlu_parse '六安市公安局裕安分局平桥派出所接到辖区居民戴某报警' zh
        $ python -m saai.saai_cli nlu_parse '一直打喷嚏怎么办' zh
        $ python -m saai.saai_cli nlu_parse "I was born in Beijing." en
        $ python -m saai.saai_cli nlu_parse "Я хочу поехать в москву" ru
        $ python -m saai.saai_cli nlu_parse "Jokowi pergi ke Singapura." id

        :param sents:
        :param lang:
        :return:
        """
        from sagas.conf.conf import cf
        from sagas.nlu.rasa_procs import invoke_nlu
        import json
        import sagas.tracker_fn as tc

        endpoint = cf.ensure('nlu_multilang_servant')
        print('.. with endpoing', endpoint)
        result = invoke_nlu(endpoint, lang, "current", sents)
        tc.emp('yellow', result)
        if result != None and len(result) > 0:
            print(json.dumps(result, indent=4, ensure_ascii=False))

            intent = result["intent"]
            print('%s -> %f' % (intent['name'], intent['confidence']))
            entities = result['entities']
            print('entities ->', [ent['entity'] for ent in entities])
Beispiel #2
0
    def trans_google(self, ctx: TransContext):
        from tqdm import tqdm
        from sagas.nlu.translator import translate
        import time
        import random
        source, targets, text, says = ctx.pars()

        # for target in tqdm(targets.split(';')):
        print('.. translate to', ctx.target_list)
        for target in tqdm(ctx.target_list):
            # default options
            options = set(cf.ensure('translator_opts'))
            # options.add('disable_correct')
            if says == target:
                options.add('get_pronounce')
            if says == source and target == 'en':
                options.add('get_pronounce')

            trans, tracker = translate(text,
                                       source=source,
                                       target=target,
                                       options=options)
            count = 0
            while trans == '':
                print('wait a second, try again ...')
                sleep(1)
                trans, tracker = translate(text,
                                           source=source,
                                           target=target,
                                           options=options)
                count = count + 1
                if count > self.retries:
                    break

            if trans != '':
                # result=text+'\n\t* '+trans+'\n'
                # line='[%s] '%target[:2]+trans
                line = '%s="%s"' % (target[:2], trans)
                ctx.target_sents.append(line)
                # ctx.target_sents.extend(tracker.pronounce)
                for i, p in enumerate(tracker.pronounce):
                    ps = p[2:]
                    ctx.target_sents.append(f'v{i}="{ps}"')
                ctx.sents_map[target[:2]] = trans
                # print('☌'+line)
            else:
                print(
                    'translate fail, the clipboard content has not been changed.'
                )
                # will exit
                return False

            # time.sleep(random.uniform(0.05, 0.20))

        return True
Beispiel #3
0
def ex_rasa(key: Text, cnt: Text, comp: Text, ctx: cla_meta_intf):
    from sagas.conf.conf import cf
    from sagas.nlu.rasa_procs import invoke_nlu

    endpoint = cf.ensure('nlu_multilang_servant')
    result = invoke_nlu(endpoint, ctx.lang, "current", ctx.sents)
    # print('*******', result)
    if result != None:
        ctx.add_result(extractor, comp, 'sents', result)
        return True
    return False
Beispiel #4
0
def query_data_by_url(conf_item, fn_path, data):
    if '://' not in conf_item:
        url = cf.ensure(conf_item)
    else:
        url = conf_item
    # the url format: 'http://localhost:8092/entities'
    response = requests.post(f"{url}/{fn_path}", json=data)
    if response.status_code == 200:
        r = response.json()
        return {'result': 'success', 'data': r}
    return {'result': 'fail', 'cause': 'error response'}
Beispiel #5
0
    def __init__(self,
                 intent: str,
                 confidence: float,
                 entire=False,
                 contains_entity: list = None):
        self.intent = intent
        self.confidence = confidence
        self.contains_entity = contains_entity
        self.entire = entire

        # self.endpoint = "http://localhost:5000"
        self.endpoint = cf.ensure('nlu_multilang_servant')
        self._result = None
Beispiel #6
0
def translate(text, source:Text='auto', target:Text='zh-CN',
              trans_verbose:bool=False, options:Set[Text]=None,
              tracker:TransTracker=None) -> (Text, TransTracker):
    from sagas.nlu.trans_cacher import cacher

    if options is None:
        options = {}

    # tracker=TransTracker()
    meta = {'text': text, 'source': source, 'target': target}

    if tracker is None:
        if cf.is_enabled('trans_cache'):
            tracker = TransTracker()
            tracker.add_observer(cacher)
        else:
            tracker = TransTracker()

    if 'disable_cache' not in options:
        # try to get from cacher
        r = cacher.retrieve(meta)
        if r:
            cnt = r['content']
            res = join_sentence(cnt)
            process_result(meta, cnt, trans_verbose, options, tracker)
            logger.debug(f'get {text} from cacher')
            return res, tracker

    def impl():
        from sagas.nlu.translator_impl import TranslatorImpl
        time.sleep(random.uniform(0.05, 0.20))
        trans_text=TranslatorImpl().execute(text, source, target, trans_verbose,
                                 options, tracker, process_result=process_result)
        return trans_text

    def impl2():
        from sagas.nlu.translator2_impl import translator_impl
        time.sleep(random.uniform(0.05, 0.20))
        translator_impl.update_TKK()  # update kk value
        time.sleep(random.uniform(0.05, 0.20))
        try:
            return translator_impl.execute(text, source, target, trans_verbose,
                                     options, tracker, process_result=process_result)
        except Exception as e:
            logger.error(f"translate fail: {text}, error message: {e}")
            return ''

    trans_fn={'impl': impl, 'impl2': impl2}
    trans_text=trans_fn[cf.ensure('translator_impl')]()
    return trans_text, tracker
Beispiel #7
0
def get_entities(sents: Text, rasa_entry='rasa_default'):
    """
    Start rasa service: simple/rasa-serv.sh
    >>> from sagas.nlu.rasa_procs import get_entities
    >>> get_entities('id like to find an expensive restaurant', 'rasa_simple')
        [Entity(start=19, end=28, extractor='CRFEntityExtractor', value='hi', entity='price', confidence=0.930441018)]

    :param sents:
    :param rasa_entry:
    :return:
    """
    result = rasa_nlu_parse(sents, cf.ensure(rasa_entry))
    ents = result['entities'] if result and 'entities' in result else []
    return [Entity.from_dict(e) for e in ents]
Beispiel #8
0
def translate_try(text:Text, source: Text, target: Text,
                  options: Set[Text] = None,
                  tracker: TransTracker = None) -> (Text, TransTracker):
    trans, tracker = translate(text, source=source,
                               target=target,
                               options=options,
                               tracker=tracker)
    count = 0
    retries=cf.ensure('translator_retries')
    while trans == '':
        print('wait a second, try again ...')
        time.sleep(random.uniform(0.5, 1.20))
        trans, tracker = translate(text, source=source, target=target, options=options)
        count = count + 1
        if count > retries:
            break
    return trans, tracker
Beispiel #9
0
    def parse(self, sents, lang):
        """
        $ python -m sagas.nlu.rasa_procs parse "Shenzhen ist das Silicon Valley für Hardware-Firmen" de
        $ python -m sagas.nlu.rasa_procs parse 'what restaurants can you recommend?' en

        :param sents:
        :return:
        """
        from sagas.conf.conf import cf
        # endpoint = "http://localhost:5000"
        endpoint = cf.ensure('nlu_multilang_servant')
        print('.. with endpoing', endpoint)
        result = invoke_nlu(endpoint, lang, "current", sents)
        if result != None:
            print(json.dumps(result, indent=4))
            intent = result["intent"]
            print('%s -> %f' % (intent['name'], intent['confidence']))
            entities = result['entities']
            print([ent['entity'] for ent in entities])
Beispiel #10
0
def query_duckling(text:Text, lang:Text) -> Dict[Text, Any]:
    """
    resp=query_duckling('tomorrow at eight', 'en')
    print([d['dim'] for d in resp['data']])

    :param text:
    :param lang:
    :return:
    """
    if lang in locale_mappings:
        locale=locale_mappings[lang]
    else:
        return {'result':'fail', 'cause':"unsupport lang"}
    data={'locale':locale, 'text':text, 'reftime':current_milli_time()}
    response = requests.post(cf.ensure('duckling'), data=data)
    if response.status_code == 200:
        r=response.json()
        # print(json.dumps(r, indent=2, ensure_ascii=False))
        return {'result':'success', 'data':r}
    return {'result':'fail', 'cause':'error response', 'data':[]}
Beispiel #11
0
 def run(self, key, ctx:Context):
     result = False
     lang = ctx.meta['lang']
     # cnt = ' '.join(ctx.chunks[key])
     # cnt=ctx.get_single_chunk_text(key)
     requestors={'ru':lambda rc: query_entities_by_url(cf.ensure('ner_ru'), rc),
                 }
     for cnt in ctx.chunk_pieces(key):
         data={'lang': lang, 'sents': cnt}
         if lang in requestors:
             resp=requestors[lang](data)
         else:
             resp = query_entities(data)
         if resp['result'] == 'success':
             dims = [d['entity'] for d in resp['data']]
             # print('entities ->', ', '.join(dims))
             logger.info('entities -> %s, self.dim -> %s', ', '.join(dims), self.dim)
             if self.dim in dims:
                 print('\t%s ∈' % cnt, self.dim)
                 result = True
     return result
Beispiel #12
0
 def __init__(self):
     # print('.. connect mongo')
     # self.client = MongoClient(cf.ensure('mongo'), 27017)
     self.client = MongoClient(cf.ensure('mongo'))
     self.db = self.client.langs
     self.coll = self.db.trans
Beispiel #13
0
 def __init__(self):
     from sagas.nlu.omw_extended import omw_ext
     from sagas.conf.conf import cf
     self.omw = omw_ext
     self.default_langs = cf.ensure('default_word_sets_langs')
Beispiel #14
0
def query_entities(data):
    return query_entities_by_url(cf.ensure('ner'), data)
Beispiel #15
0
 def __init__(self):
     self.r = redis.StrictRedis(cf.ensure('redis'))