async def parse_location(location_word: Union[str, List[str]]) -> Location: """ Parse location like "江苏省常州市武进区". :param location_word: location word (segmented or not) :return: Location object """ if not location_word: return Location() if isinstance(location_word, str): location_words = jieba_fast.lcut(location_word) else: location_words = location_word logger.debug(f'Parsing location: {location_words}') location = Location() i = 0 while i < len(location_words): if all((location.province, location.city, location.district)): # we are done with "省"、"市"、"区/县级市" break w = location_words[i].strip('省市区县') if not w: i += 1 continue result = await heweather.find(w) if not result or result.get('status') != 'ok': i += 1 continue # status is ok here, so there is at lease one location info basic = result.get('basic')[0] parsed = False if w == basic.get('admin_area'): location.province = w parsed = True if w == basic.get('parent_city'): # don't check parsed here, because we may encounter "北京", # of which city and province are the same location.city = w parsed = True if not parsed and w == basic.get('location'): location.district = w i += 1 # head on to the next location.other = ''.join(location_words[i:]) or None return location
async def lexer(text: str) -> LexerResult_T: """ A lexer that segment the input text and do POS tagging and NER on it. :param text: the input text (may have multiple paragraphs) :return: the lexical analysis result """ text = text.strip() if not text: return [] lexer_vendors = [ (_lexer_baidu_aip, 0.6), # (_lexer_ltp_cloud, 0.4), ] f = random.choices(*zip(*lexer_vendors))[0] logger.debug(f'Lexer chosen: {f}') return await f(text)
async def tuling(session: CommandSession): message = session.get('message', prompt=__(e.I_AM_READY)) ctx_id = context_id(session.ctx) if ctx_id in tuling_sessions: del tuling_sessions[ctx_id] tmp_msg = Message(message) text = tmp_msg.extract_plain_text() images = [ s.data['url'] for s in tmp_msg if s.type == 'image' and 'url' in s.data ] # call tuling api replies = await call_tuling_api(session, text, images) logger.debug(f'Got tuling\'s replies: {replies}') if replies: for reply in replies: await session.send(escape(reply)) await asyncio.sleep(0.8) else: await session.send(__(e.I_DONT_UNDERSTAND)) one_time = session.get_optional('one_time', False) if one_time: # tuling123 may opened a session, we should recognize the # situation that tuling123 want more information from the user. # for simplification, we only recognize named entities, # and since we will also check the user's input later, # here we can allow some ambiguity. ne_type = tuling_ne_type(replies, { 'LOC': ('哪里', '哪儿', re.compile(r'哪\S城市'), '位置'), 'TIME': ('什么时候', ), }) if ne_type: logger.debug(f'One time call, ' f'and there is a tuling session for {ne_type}') tuling_sessions[ctx_id] = ne_type else: session.pause()