def wordtype(word, encoding=None, fast=False): if encoding is not None: word = word.decode(encoding) if fast: word = "".join([fast_dbc2sbc(ch) for ch in word]) else: word = "".join([dbc2sbc(ch) for ch in word]) if _is_url(word): return URL if _is_eng_word(word): return ENG if _is_digit(word): return DIG return NONE
def wordtype(token, flags=0xffff, encoding=None, d2s=False, fast=False): ''' Detect word's type Parameters ---------- token : str or unicode The input string token flags : int The actived check d2s : bool Specify to conduct double byte to single byte conversion fast : bool Specify to use fast ``dbc2sbc`` Return ------ wordtype : int The type of word ''' if encoding is not None: word = word.decode(encoding) if dbc2sbc: if fast: word = "".join([fast_dbc2sbc(ch, encoding=None) for ch in word]) else: word = "".join([dbc2sbc(ch, encoding=None) for ch in word]) if (flag & CHK_URL) and _is_url(word): return URL if (flag & CHK_ENGLISH) and _is_eng_word(word): return ENG if (flag & CHK_NUMERICAL) and _is_digit(word): return DIG return NONE
def wordtype(token, flags=0xffff, encoding=None, d2s = False, fast = False): ''' Detect word's type Parameters ---------- token : str or unicode The input string token flags : int The actived check d2s : bool Specify to conduct double byte to single byte conversion fast : bool Specify to use fast ``dbc2sbc`` Return ------ wordtype : int The type of word ''' if encoding is not None: word = word.decode(encoding) if dbc2sbc: if fast: word = "".join([fast_dbc2sbc(ch, encoding=None) for ch in word]) else: word = "".join([dbc2sbc(ch, encoding=None) for ch in word]) if (flag & CHK_URL) and _is_url(word): return URL if (flag & CHK_ENGLISH) and _is_eng_word(word): return ENG if (flag & CHK_NUMERICAL) and _is_digit(word): return DIG return NONE