コード例 #1
0
async def parse_location(location_word: Union[str, List[str]]) -> Location:
    """
    Parse location like "江苏省常州市武进区".

    :param location_word: location word (segmented or not)
    :return: Location object
    """
    if not location_word:
        return Location()

    if isinstance(location_word, str):
        location_words = jieba_fast.lcut(location_word)
    else:
        location_words = location_word

    logger.debug(f'Parsing location: {location_words}')

    location = Location()
    i = 0
    while i < len(location_words):
        if all((location.province, location.city, location.district)):
            # we are done with "省"、"市"、"区/县级市"
            break

        w = location_words[i].strip('省市区县')
        if not w:
            i += 1
            continue

        result = await heweather.find(w)
        if not result or result.get('status') != 'ok':
            i += 1
            continue

        # status is ok here, so there is at lease one location info
        basic = result.get('basic')[0]
        parsed = False
        if w == basic.get('admin_area'):
            location.province = w
            parsed = True
        if w == basic.get('parent_city'):
            # don't check parsed here, because we may encounter "北京",
            # of which city and province are the same
            location.city = w
            parsed = True
        if not parsed and w == basic.get('location'):
            location.district = w

        i += 1  # head on to the next

    location.other = ''.join(location_words[i:]) or None
    return location
コード例 #2
0
async def lexer(text: str) -> LexerResult_T:
    """
    A lexer that segment the input text and do POS tagging and NER on it.

    :param text: the input text (may have multiple paragraphs)
    :return: the lexical analysis result
    """
    text = text.strip()
    if not text:
        return []

    lexer_vendors = [
        (_lexer_baidu_aip, 0.6),
        # (_lexer_ltp_cloud, 0.4),
    ]

    f = random.choices(*zip(*lexer_vendors))[0]
    logger.debug(f'Lexer chosen: {f}')
    return await f(text)
コード例 #3
0
async def tuling(session: CommandSession):
    message = session.get('message', prompt=__(e.I_AM_READY))

    ctx_id = context_id(session.ctx)
    if ctx_id in tuling_sessions:
        del tuling_sessions[ctx_id]

    tmp_msg = Message(message)
    text = tmp_msg.extract_plain_text()
    images = [
        s.data['url'] for s in tmp_msg if s.type == 'image' and 'url' in s.data
    ]

    # call tuling api
    replies = await call_tuling_api(session, text, images)
    logger.debug(f'Got tuling\'s replies: {replies}')

    if replies:
        for reply in replies:
            await session.send(escape(reply))
            await asyncio.sleep(0.8)
    else:
        await session.send(__(e.I_DONT_UNDERSTAND))

    one_time = session.get_optional('one_time', False)
    if one_time:
        # tuling123 may opened a session, we should recognize the
        # situation that tuling123 want more information from the user.
        # for simplification, we only recognize named entities,
        # and since we will also check the user's input later,
        # here we can allow some ambiguity.
        ne_type = tuling_ne_type(replies, {
            'LOC': ('哪里', '哪儿', re.compile(r'哪\S城市'), '位置'),
            'TIME': ('什么时候', ),
        })
        if ne_type:
            logger.debug(f'One time call, '
                         f'and there is a tuling session for {ne_type}')
            tuling_sessions[ctx_id] = ne_type
    else:
        session.pause()