Python xrange 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: enchant.utils

메소드/함수: xrange

hotexamples.com에서의 예제들: 2

Python xrange - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 enchant.utils.xrange에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: __init__.py 프로젝트: gloob/pyenchant

def get_tokenizer(tag,chunkers=None,filters=None):
    """Locate an appropriate tokenizer by language tag.

    This requires importing the function 'tokenize' from an
    appropriate module.  Modules tried are named after the
    language tag, tried in the following order:
        * the entire tag (e.g. "en_AU.py")
        * the base country code of the tag (e.g. "en.py")

    If a suitable function cannot be found, raises TokenizerNotFoundError.
    
    If given and not None, 'chunkers' and 'filters' must be lists of chunker
    classes and filter classes resectively.  These will be applied to the
    tokenizer during creation.
    """
    # "filters" used to be the second argument.  Try to catch cases
    # where it is given positionally and issue a DeprecationWarning.
    if chunkers is not None and filters is None:
        chunkers = list(chunkers)
        if chunkers:
            try:
                chunkers_are_filters = issubclass(chunkers[0],Filter)
            except TypeError:
                pass
            else:
                if chunkers_are_filters:
                    msg = "passing 'filters' as a non-keyword argument "\
                          "to get_tokenizer() is deprecated"
                    warnings.warn(msg,category=DeprecationWarning)
                    filters = chunkers
                    chunkers = None
    # Ensure only '_' used as separator
    tag = tag.replace("-","_")
    # First try the whole tag
    tkFunc = _try_tokenizer(tag)
    if tkFunc is None:
        # Try just the base
        base = tag.split("_")[0]
        tkFunc = _try_tokenizer(base)
        if tkFunc is None:
            msg = "No tokenizer found for language '%s'" % (tag,)
            raise TokenizerNotFoundError(msg)
    # Given the language-specific tokenizer, we now build up the
    # end result as follows:
    #    * chunk the text using any given chunkers in turn
    #    * begin with basic whitespace tokenization
    #    * apply each of the given filters in turn
    #    * apply language-specific rules
    tokenizer = basic_tokenize
    if chunkers is not None:
        chunkers = list(chunkers)
        for i in xrange(len(chunkers)-1,-1,-1):
            tokenizer = wrap_tokenizer(chunkers[i],tokenizer)
    if filters is not None:
        for f in filters:
            tokenizer = f(tokenizer)
    tokenizer = wrap_tokenizer(tokenizer,tkFunc)
    return tokenizer

예제 #2

파일 보기

파일: __init__.py 프로젝트: JBatista1/NaiveBayesHotel

def get_tokenizer(tag=None, chunkers=None, filters=None):
    """Locate an appropriate tokenizer by language tag.

    This requires importing the function 'tokenize' from an appropriate
    module.  Modules tried are named after the language tag, tried in the
    following order:
        * the entire tag (e.g. "en_AU.py")
        * the base country code of the tag (e.g. "en.py")

    If the language tag is None, a default tokenizer (actually the English
    one) is returned.  It's unicode aware and should work OK for most
    latin-derived languages.

    If a suitable function cannot be found, raises TokenizerNotFoundError.
    
    If given and not None, 'chunkers' and 'filters' must be lists of chunker
    classes and filter classes respectively.  These will be applied to the
    tokenizer during creation.
    """
    if tag is None:
        tag = "en"
    # "filters" used to be the second argument.  Try to catch cases
    # where it is given positionally and issue a DeprecationWarning.
    if chunkers is not None and filters is None:
        chunkers = list(chunkers)
        if chunkers:
            try:
                chunkers_are_filters = issubclass(chunkers[0], Filter)
            except TypeError:
                pass
            else:
                if chunkers_are_filters:
                    msg = "passing 'filters' as a non-keyword argument "\
                          "to get_tokenizer() is deprecated"
                    warnings.warn(msg,
                                  category=DeprecationWarning,
                                  stacklevel=2)
                    filters = chunkers
                    chunkers = None
    # Ensure only '_' used as separator
    tag = tag.replace("-", "_")
    # First try the whole tag
    tkFunc = _try_tokenizer(tag)
    if tkFunc is None:
        # Try just the base
        base = tag.split("_")[0]
        tkFunc = _try_tokenizer(base)
        if tkFunc is None:
            msg = "No tokenizer found for language '%s'" % (tag, )
            raise TokenizerNotFoundError(msg)
    # Given the language-specific tokenizer, we now build up the
    # end result as follows:
    #    * chunk the text using any given chunkers in turn
    #    * begin with basic whitespace tokenization
    #    * apply each of the given filters in turn
    #    * apply language-specific rules
    tokenizer = basic_tokenize
    if chunkers is not None:
        chunkers = list(chunkers)
        for i in xrange(len(chunkers) - 1, -1, -1):
            tokenizer = wrap_tokenizer(chunkers[i], tokenizer)
    if filters is not None:
        for f in filters:
            tokenizer = f(tokenizer)
    tokenizer = wrap_tokenizer(tokenizer, tkFunc)
    return tokenizer