def wordtype(word, encoding=None, fast=False):
    if encoding is not None:
        word = word.decode(encoding)

    if fast:
        word = "".join([fast_dbc2sbc(ch) for ch in word])
    else:
        word = "".join([dbc2sbc(ch) for ch in word])

    if _is_url(word):
        return URL

    if _is_eng_word(word):
        return ENG

    if _is_digit(word):
        return DIG

    return NONE
Exemple #2
0
def wordtype(token, flags=0xffff, encoding=None, d2s=False, fast=False):
    '''
    Detect word's type

    Parameters
    ----------
    token : str or unicode
        The input string token
    flags : int
        The actived check
    d2s : bool
        Specify to conduct double byte to single byte conversion
    fast : bool
        Specify to use fast ``dbc2sbc``

    Return
    ------
    wordtype : int
        The type of word
    '''
    if encoding is not None:
        word = word.decode(encoding)

    if dbc2sbc:
        if fast:
            word = "".join([fast_dbc2sbc(ch, encoding=None) for ch in word])
        else:
            word = "".join([dbc2sbc(ch, encoding=None) for ch in word])

    if (flag & CHK_URL) and _is_url(word):
        return URL

    if (flag & CHK_ENGLISH) and _is_eng_word(word):
        return ENG

    if (flag & CHK_NUMERICAL) and _is_digit(word):
        return DIG

    return NONE
Exemple #3
0
def wordtype(token, flags=0xffff, encoding=None, d2s = False, fast = False):
    '''
    Detect word's type

    Parameters
    ----------
    token : str or unicode
        The input string token
    flags : int
        The actived check
    d2s : bool
        Specify to conduct double byte to single byte conversion
    fast : bool
        Specify to use fast ``dbc2sbc``

    Return
    ------
    wordtype : int
        The type of word
    '''
    if encoding is not None:
        word = word.decode(encoding)

    if dbc2sbc:
        if fast:
            word = "".join([fast_dbc2sbc(ch, encoding=None) for ch in word])
        else:
            word = "".join([dbc2sbc(ch, encoding=None) for ch in word])

    if (flag & CHK_URL) and _is_url(word):
        return URL

    if (flag & CHK_ENGLISH) and _is_eng_word(word):
        return ENG

    if (flag & CHK_NUMERICAL) and _is_digit(word):
        return DIG

    return NONE