コード例 #1
0
ファイル: phystokens.py プロジェクト: fantasy0901/blog
 def generate_tokens(self, text):
     """A stand-in for `tokenize.generate_tokens`."""
     if text != self.last_text:
         self.last_text = text
         readline = iternext(text.splitlines(True))
         self.last_tokens = list(tokenize.generate_tokens(readline))
     return self.last_tokens
コード例 #2
0
 def generate_tokens(self, text):
     """A stand-in for `tokenize.generate_tokens`."""
     if text != self.last_text:
         self.last_text = text
         readline = iternext(text.splitlines(True))
         self.last_tokens = list(tokenize.generate_tokens(readline))
     return self.last_tokens
コード例 #3
0
ファイル: phystokens.py プロジェクト: th0/test2
 def generate_tokens(self, text):
     """A stand-in for `tokenize.generate_tokens`."""
     # Check the type first so we don't compare bytes to unicode and get
     # warnings.
     if type(text) != type(self.last_text) or text != self.last_text:
         self.last_text = text
         readline = iternext(text.splitlines(True))
         self.last_tokens = list(tokenize.generate_tokens(readline))
     return self.last_tokens
コード例 #4
0
ファイル: phystokens.py プロジェクト: phenoxim/coveragepy
 def generate_tokens(self, text):
     """A stand-in for `tokenize.generate_tokens`."""
     # Check the type first so we don't compare bytes to unicode and get
     # warnings.
     if type(text) != type(self.last_text) or text != self.last_text:
         self.last_text = text
         readline = iternext(text.splitlines(True))
         self.last_tokens = list(tokenize.generate_tokens(readline))
     return self.last_tokens
コード例 #5
0
ファイル: phystokens.py プロジェクト: fantasy0901/blog
def _source_encoding_py3(source):
    """Determine the encoding for `source`, according to PEP 263.

    `source` is a byte string: the text of the program.

    Returns a string, the name of the encoding.

    """
    readline = iternext(source.splitlines(True))
    return tokenize.detect_encoding(readline)[0]
コード例 #6
0
def _source_encoding_py3(source):
    """Determine the encoding for `source`, according to PEP 263.

    `source` is a byte string: the text of the program.

    Returns a string, the name of the encoding.

    """
    readline = iternext(source.splitlines(True))
    return tokenize.detect_encoding(readline)[0]
コード例 #7
0
ファイル: phystokens.py プロジェクト: th0/test2
def _source_encoding_py3(source):
    """Determine the encoding for `source`, according to PEP 263.

    Arguments:
        source (byte string): the text of the program.

    Returns:
        string: the name of the encoding.

    """
    assert isinstance(source, bytes)
    readline = iternext(source.splitlines(True))
    return tokenize.detect_encoding(readline)[0]
コード例 #8
0
ファイル: phystokens.py プロジェクト: phenoxim/coveragepy
def _source_encoding_py3(source):
    """Determine the encoding for `source`, according to PEP 263.

    Arguments:
        source (byte string): the text of the program.

    Returns:
        string: the name of the encoding.

    """
    assert isinstance(source, bytes)
    readline = iternext(source.splitlines(True))
    return tokenize.detect_encoding(readline)[0]
コード例 #9
0
def _source_encoding_py2(source):
    """Determine the encoding for `source`, according to PEP 263.

    Arguments:
        source (byte string): the text of the program.

    Returns:
        string: the name of the encoding.

    """
    assert isinstance(source, bytes)

    # Do this so the detect_encode code we copied will work.
    readline = iternext(source.splitlines(True))

    # This is mostly code adapted from Py3.2's tokenize module.

    def _get_normal_name(orig_enc):
        """Imitates get_normal_name in tokenizer.c."""
        # Only care about the first 12 characters.
        enc = orig_enc[:12].lower().replace("_", "-")
        if re.match(r"^utf-8($|-)", enc):
            return "utf-8"
        if re.match(r"^(latin-1|iso-8859-1|iso-latin-1)($|-)", enc):
            return "iso-8859-1"
        return orig_enc

    # From detect_encode():
    # It detects the encoding from the presence of a UTF-8 BOM or an encoding
    # cookie as specified in PEP-0263.  If both a BOM and a cookie are present,
    # but disagree, a SyntaxError will be raised.  If the encoding cookie is an
    # invalid charset, raise a SyntaxError.  Note that if a UTF-8 BOM is found,
    # 'utf-8-sig' is returned.

    # If no encoding is specified, then the default will be returned.
    default = 'ascii'

    bom_found = False
    encoding = None

    def read_or_stop():
        """Get the next source line, or ''."""
        try:
            return readline()
        except StopIteration:
            return ''

    def find_cookie(line):
        """Find an encoding cookie in `line`."""
        try:
            line_string = line.decode('ascii')
        except UnicodeDecodeError:
            return None

        matches = COOKIE_RE.findall(line_string)
        if not matches:
            return None
        encoding = _get_normal_name(matches[0])
        try:
            codec = codecs.lookup(encoding)
        except LookupError:
            # This behavior mimics the Python interpreter
            raise SyntaxError("unknown encoding: " + encoding)

        if bom_found:
            # codecs in 2.3 were raw tuples of functions, assume the best.
            codec_name = getattr(codec, 'name', encoding)
            if codec_name != 'utf-8':
                # This behavior mimics the Python interpreter
                raise SyntaxError('encoding problem: utf-8')
            encoding += '-sig'
        return encoding

    first = read_or_stop()
    if first.startswith(codecs.BOM_UTF8):
        bom_found = True
        first = first[3:]
        default = 'utf-8-sig'
    if not first:
        return default

    encoding = find_cookie(first)
    if encoding:
        return encoding

    second = read_or_stop()
    if not second:
        return default

    encoding = find_cookie(second)
    if encoding:
        return encoding

    return default
コード例 #10
0
ファイル: phystokens.py プロジェクト: fantasy0901/blog
def _source_encoding_py2(source):
    """Determine the encoding for `source`, according to PEP 263.

    `source` is a byte string, the text of the program.

    Returns a string, the name of the encoding.

    """
    assert isinstance(source, bytes)

    # Do this so the detect_encode code we copied will work.
    readline = iternext(source.splitlines(True))

    # This is mostly code adapted from Py3.2's tokenize module.

    def _get_normal_name(orig_enc):
        """Imitates get_normal_name in tokenizer.c."""
        # Only care about the first 12 characters.
        enc = orig_enc[:12].lower().replace("_", "-")
        if re.match(r"^utf-8($|-)", enc):
            return "utf-8"
        if re.match(r"^(latin-1|iso-8859-1|iso-latin-1)($|-)", enc):
            return "iso-8859-1"
        return orig_enc

    # From detect_encode():
    # It detects the encoding from the presence of a UTF-8 BOM or an encoding
    # cookie as specified in PEP-0263.  If both a BOM and a cookie are present,
    # but disagree, a SyntaxError will be raised.  If the encoding cookie is an
    # invalid charset, raise a SyntaxError.  Note that if a UTF-8 BOM is found,
    # 'utf-8-sig' is returned.

    # If no encoding is specified, then the default will be returned.
    default = 'ascii'

    bom_found = False
    encoding = None

    def read_or_stop():
        """Get the next source line, or ''."""
        try:
            return readline()
        except StopIteration:
            return ''

    def find_cookie(line):
        """Find an encoding cookie in `line`."""
        try:
            line_string = line.decode('ascii')
        except UnicodeDecodeError:
            return None

        matches = COOKIE_RE.findall(line_string)
        if not matches:
            return None
        encoding = _get_normal_name(matches[0])
        try:
            codec = codecs.lookup(encoding)
        except LookupError:
            # This behavior mimics the Python interpreter
            raise SyntaxError("unknown encoding: " + encoding)

        if bom_found:
            # codecs in 2.3 were raw tuples of functions, assume the best.
            codec_name = getattr(codec, 'name', encoding)
            if codec_name != 'utf-8':
                # This behavior mimics the Python interpreter
                raise SyntaxError('encoding problem: utf-8')
            encoding += '-sig'
        return encoding

    first = read_or_stop()
    if first.startswith(codecs.BOM_UTF8):
        bom_found = True
        first = first[3:]
        default = 'utf-8-sig'
    if not first:
        return default

    encoding = find_cookie(first)
    if encoding:
        return encoding

    second = read_or_stop()
    if not second:
        return default

    encoding = find_cookie(second)
    if encoding:
        return encoding

    return default