Esempio n. 1
0
    def lex(self, text: Union[str, bytes], first_line: int) -> None:
        """Lexically analyze a string, storing the tokens at the tok list."""
        self.i = 0
        self.line = first_line
        self.column = 0

        if isinstance(text, bytes):
            if text.startswith(b'\xef\xbb\xbf'):
                self.enc = 'utf8'
                bom = True
            else:
                self.enc, enc_line = find_python_encoding(text, self.pyversion)
                bom = False
            try:
                decoded_text = text.decode(self.enc)
            except UnicodeDecodeError as err:
                self.report_unicode_decode_error(err, text)
                return
            except LookupError:
                self.report_unknown_encoding(enc_line)
                return
            text = decoded_text
            if bom:
                self.add_token(Bom(text[0]))
        self.s = text

        # Parse initial indent; otherwise first-line indent would not generate
        # an error.
        self.lex_indent()

        # Use some local variables as a simple optimization.
        map = self.map
        default = self.unknown_character

        # Lex the file. Repeatedly call the lexer method for the current char.
        while self.i < len(text):
            # Get the character code of the next character to lex.
            c = text[self.i]
            # Dispatch to the relevant lexer method. This will consume some
            # characters in the text, add a token to self.tok and increment
            # self.i.
            map.get(c, default)()

        # Append a break if there is no statement/block terminator at the end
        # of input.
        if len(self.tok) > 0 and (not isinstance(self.tok[-1], Break) and
                                  not isinstance(self.tok[-1], Dedent)):
            self.add_token(Break(''))

        # Attach any dangling comments/whitespace to a final Break token.
        if self.tok and isinstance(self.tok[-1], Break):
            self.tok[-1].string += self.pre_whitespace
            self.pre_whitespace = ''

        # Close remaining open blocks with Dedent tokens.
        self.lex_indent()

        self.add_token(Eof(''))
Esempio n. 2
0
File: build.py Progetto: darjus/mypy
def read_with_python_encoding(path: str, pyversion: Tuple[int, int]) -> str:
    """Read the Python file with while obeying PEP-263 encoding detection"""
    source_bytearray = bytearray()
    encoding = 'utf8' if pyversion[0] >= 3 else 'ascii'

    with open(path, 'rb') as f:
        # read first two lines and check if PEP-263 coding is present
        source_bytearray.extend(f.readline())
        source_bytearray.extend(f.readline())

        # check for BOM UTF-8 encoding and strip it out if present
        if source_bytearray.startswith(b'\xef\xbb\xbf'):
            encoding = 'utf8'
            source_bytearray = source_bytearray[3:]
        else:
            _encoding, _ = util.find_python_encoding(source_bytearray, pyversion)
            # check that the coding isn't mypy. We skip it since
            # registering may not have happened yet
            if _encoding != 'mypy':
                encoding = _encoding

        source_bytearray.extend(f.read())
        return source_bytearray.decode(encoding)
Esempio n. 3
0
def read_with_python_encoding(path: str, pyversion: Tuple[int, int]) -> str:
    """Read the Python file with while obeying PEP-263 encoding detection"""
    source_bytearray = bytearray()
    encoding = 'utf8' if pyversion[0] >= 3 else 'ascii'

    with open(path, 'rb') as f:
        # read first two lines and check if PEP-263 coding is present
        source_bytearray.extend(f.readline())
        source_bytearray.extend(f.readline())

        # check for BOM UTF-8 encoding and strip it out if present
        if source_bytearray.startswith(b'\xef\xbb\xbf'):
            encoding = 'utf8'
            source_bytearray = source_bytearray[3:]
        else:
            _encoding, _ = util.find_python_encoding(source_bytearray,
                                                     pyversion)
            # check that the coding isn't mypy. We skip it since
            # registering may not have happened yet
            if _encoding != 'mypy':
                encoding = _encoding

        source_bytearray.extend(f.read())
        return source_bytearray.decode(encoding)